Commit 0f2320eb authored by incardon's avatar incardon

Fixing CUDA util

parent 26461cc0
......@@ -11,11 +11,7 @@
#include "memory/memory.hpp"
#include "Memleak_check.hpp"
#ifdef __NVCC__
#else
#define __host__
#define __device__
#endif
#include "util/cuda_util.hpp"
/*!
*
......
......@@ -8,11 +8,7 @@
#ifndef OPENFPM_DATA_SRC_UTIL_BOOST_MULTI_ARRAY_OPENFPM_HPP_
#define OPENFPM_DATA_SRC_UTIL_BOOST_MULTI_ARRAY_OPENFPM_HPP_
#ifdef __NVCC__
#else
#define __host__
#define __device__
#endif
#include "util/cuda_util.hpp"
//
// multi_array.hpp - contains the multi_array class template
......
......@@ -8,6 +8,9 @@
#ifndef OFP_CONTEXT_HXX_
#define OFP_CONTEXT_HXX_
#include <iostream>
#ifdef __NVCC__
#include "util/cuda/moderngpu/context.hxx"
namespace mgpu
......@@ -121,8 +124,153 @@ namespace mgpu
cudaEventElapsedTime(&ms, _timer[0], _timer[1]);
return ms / 1.0e3;
}
virtual int getDevice()
{
int dev = 0;
cudaGetDevice(&dev);
return dev;
}
};
}
#else
#include "util/cuda/moderngpu/context_reduced.hxx"
namespace mgpu
{
////////////////////////////////////////////////////////////////////////////////
// standard_context_t is a trivial implementation of context_t. Users can
// derive this type to provide a custom allocator.
class ofp_context_t : public context_t
{
protected:
cudaDeviceProp _props;
cudaStream_t _stream;
cudaEvent_t _timer[2];
cudaEvent_t _event;
// Making this a template argument means we won't generate an instance
// of dummy_k for each translation unit.
template<int dummy_arg = 0>
void init(int dev_num)
{
cudaFuncAttributes attr;
int num_dev;
cudaGetDeviceCount(&num_dev);
cudaSetDevice(dev_num % num_dev);
int ord;
cudaGetDevice(&ord);
cudaGetDeviceProperties(&_props, ord);
cudaEventCreate(&_timer[0]);
cudaEventCreate(&_timer[1]);
cudaEventCreate(&_event);
}
public:
ofp_context_t(bool print_prop = true, int dev_num = 0, cudaStream_t stream_ = 0)
:context_t(), _stream(stream_)
{
init(dev_num);
if(print_prop)
{
printf("%s\n", device_prop_string(_props).c_str());
}
}
~ofp_context_t()
{
cudaEventDestroy(_timer[0]);
cudaEventDestroy(_timer[1]);
cudaEventDestroy(_event);
}
virtual const cudaDeviceProp& props() const
{
return _props;
}
virtual int ptx_version() const
{
std::cout << __FILE__ << ":" << __LINE__ << " error to use this function you must compile the class ofp_context_t with NVCC" << std::endl;
return 0;
}
virtual cudaStream_t stream() { return _stream; }
// Alloc GPU memory.
virtual void* alloc(size_t size, memory_space_t space)
{
void* p = nullptr;
if(size)
{
cudaError_t result = (memory_space_device == space) ?cudaMalloc(&p, size) : cudaMallocHost(&p, size);
if(cudaSuccess != result) throw cuda_exception_t(result);
}
return p;
}
virtual void free(void* p, memory_space_t space)
{
if(p)
{
cudaError_t result = (memory_space_device == space) ? cudaFree(p) : cudaFreeHost(p);
if(cudaSuccess != result) throw cuda_exception_t(result);
}
}
virtual void synchronize()
{
cudaError_t result = _stream ?
cudaStreamSynchronize(_stream) :
cudaDeviceSynchronize();
if(cudaSuccess != result) throw cuda_exception_t(result);
}
virtual cudaEvent_t event()
{
return _event;
}
virtual void timer_begin()
{
cudaEventRecord(_timer[0], _stream);
}
virtual double timer_end()
{
cudaEventRecord(_timer[1], _stream);
cudaEventSynchronize(_timer[1]);
float ms;
cudaEventElapsedTime(&ms, _timer[0], _timer[1]);
return ms / 1.0e3;
}
virtual int getDevice()
{
int dev = 0;
cudaGetDevice(&dev);
return dev;
}
};
}
#endif
#endif /* OFP_CONTEXT_HXX_ */
......@@ -9,39 +9,27 @@
#define OPENFPM_DATA_SRC_UTIL_CUDA_UTIL_HPP_
#include "config.h"
#ifdef CUDA_GPU
#include <cuda_runtime.h>
#endif
#ifdef CUDA_GPU
#ifndef __NVCC__
#include "util/cuda/ofp_context.hxx"
#define __host__
#define __device__
#ifndef __NVCC__
struct uint3
{
unsigned int x, y, z;
};
struct dim3
{
unsigned int x, y, z;
#if defined(__cplusplus)
__host__ __device__ dim3(unsigned int vx = 1, unsigned int vy = 1, unsigned int vz = 1) : x(vx), y(vy), z(vz) {}
__host__ __device__ dim3(uint3 v) : x(v.x), y(v.y), z(v.z) {}
__host__ __device__ operator uint3(void) { uint3 t; t.x = x; t.y = y; t.z = z; return t; }
#endif /* __cplusplus */
};
namespace mgpu
{
// Stub class for modern gpu
struct ofp_context_t
{
ofp_context_t(bool print_prop = true, int dev_num = 0)
{}
};
}
// namespace mgpu
// {
// // Stub class for modern gpu
//
// struct ofp_context_t
// {
// ofp_context_t(bool print_prop = true, int dev_num = 0)
// {}
// };
// }
#else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment