Commit a9632484 authored by incardon's avatar incardon

Adding function to cell-list for expanded point add

parent 9f134a74
......@@ -233,7 +233,7 @@ public:
* \return the shift vector
*
*/
inline const Point<dim,T> & getOrig() const
__device__ __host__ inline const Point<dim,T> & getOrig() const
{
return sh;
}
......@@ -587,6 +587,34 @@ class CellDecomposer_sm
return id;
}
/*! \brief Convert the coordinates into id with negative machine precision expansion
*
* \param x point
* \param s dimension
*
*/
inline size_t ConvertToID_me(const Point<dim,T> & x ,size_t s, size_t sc = 0) const
{
T cc = t.transform(x,s) / box_unit.getHigh(s) - 0.015625;
size_t id = openfpm::math::size_t_floor(cc) + off[s];
id = (id >= gr_cell.size(s))?(gr_cell.size(s)-1-cell_shift.get(s)):id-cell_shift.get(s);
return id;
}
/*! \brief Convert the coordinates into id with positive machine precision expansion
*
* \param x point
* \param s dimension
*
*/
inline size_t ConvertToID_pe(const Point<dim,T> & x ,size_t s, size_t sc = 0) const
{
T cc = t.transform(x,s) / box_unit.getHigh(s) + 0.015625;
size_t id = openfpm::math::size_t_floor(cc) + off[s];
id = (id >= gr_cell.size(s))?(gr_cell.size(s)-1-cell_shift.get(s)):id-cell_shift.get(s);
return id;
}
/*! \brief Convert the coordinates into id without apply shift
*
* \param x coordinate
......@@ -932,6 +960,82 @@ public:
return cell_id;
}
/*! \brief Get the cell-ids with negative machine precision expansion
*
* Convert the point coordinates into the cell ids (Careful it include padding)
*
* \param pos Point position
*
* \return the cell-ids ad a grid_key_dx<dim>
*
*/
inline grid_key_dx<dim> getCellGrid_me(const Point<dim,T> & pos) const
{
#ifdef SE_CLASS1
if (tot_n_cell == 0)
{
std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " using an uninitialized CellDecomposer" << std::endl;
ACTION_ON_ERROR(CELL_DECOMPOSER);
}
#endif
grid_key_dx<dim> key;
key.set_d(0,ConvertToID_me(pos,0));
for (size_t s = 1 ; s < dim ; s++)
{
#ifdef SE_CLASS1
if ((size_t)(t.transform(pos,s) / box_unit.getHigh(s)) + off[s] < 0)
{
std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " point is not inside the cell space" << std::endl;
ACTION_ON_ERROR(CELL_DECOMPOSER);
}
#endif
/* coverity[dead_error_line] */
key.set_d(s,ConvertToID_me(pos,s));
}
return key;
}
/*! \brief Get the cell-ids with positive machine precision expansion
*
* Convert the point coordinates into the cell ids (Careful it include padding)
*
* \param pos Point position
*
* \return the cell-ids ad a grid_key_dx<dim>
*
*/
inline grid_key_dx<dim> getCellGrid_pe(const Point<dim,T> & pos) const
{
#ifdef SE_CLASS1
if (tot_n_cell == 0)
{
std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " using an uninitialized CellDecomposer" << std::endl;
ACTION_ON_ERROR(CELL_DECOMPOSER);
}
#endif
grid_key_dx<dim> key;
key.set_d(0,ConvertToID_pe(pos,0));
for (size_t s = 1 ; s < dim ; s++)
{
#ifdef SE_CLASS1
if ((size_t)(t.transform(pos,s) / box_unit.getHigh(s)) + off[s] < 0)
{
std::cerr << "Error: " << __FILE__ << ":" << __LINE__ << " point is not inside the cell space" << std::endl;
ACTION_ON_ERROR(CELL_DECOMPOSER);
}
#endif
/* coverity[dead_error_line] */
key.set_d(s,ConvertToID_pe(pos,s));
}
return key;
}
/*! \brief Get the cell-ids
*
* Convert the point coordinates into the cell ids (Careful it include padding)
......
......@@ -13,7 +13,7 @@
template<unsigned int dim, typename cnt_type, typename ids_type, typename transform>
struct cid_
{
static inline __device__ cnt_type get_cid(openfpm::array<ids_type,1,cnt_type> & div_c , ids_type * e)
static inline __device__ __host__ cnt_type get_cid(openfpm::array<ids_type,1,cnt_type> & div_c , ids_type * e)
{
cnt_type id = e[dim-1];
......@@ -24,7 +24,7 @@ struct cid_
return id;
}
static inline __device__ cnt_type get_cid(openfpm::array<ids_type,dim,cnt_type> & div_c , const grid_key_dx<1,cnt_type> & e)
static inline __device__ __host__ cnt_type get_cid(openfpm::array<ids_type,dim,cnt_type> & div_c , const grid_key_dx<1,cnt_type> & e)
{
cnt_type id = e.get(dim-1);
......@@ -35,7 +35,7 @@ struct cid_
return id;
}
template<typename T> static inline __device__ cnt_type get_cid(openfpm::array<ids_type,dim,cnt_type> & div_c,
template<typename T> static inline __device__ __host__ cnt_type get_cid(openfpm::array<ids_type,dim,cnt_type> & div_c,
openfpm::array<T,dim,cnt_type> & spacing,
const transform & t,
const Point<dim,T> & p)
......@@ -53,17 +53,17 @@ struct cid_
template<typename cnt_type, typename ids_type, typename transform>
struct cid_<1,cnt_type,ids_type, transform>
{
static inline __device__ cnt_type get_cid(openfpm::array<ids_type,1,cnt_type> & div_c, ids_type * e)
static inline __device__ __host__ cnt_type get_cid(openfpm::array<ids_type,1,cnt_type> & div_c, ids_type * e)
{
return e[0];
}
static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c, const grid_key_dx<1,cnt_type> & e)
static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,1,cnt_type> & div_c, const grid_key_dx<1,cnt_type> & e)
{
return e.get(0);
}
template<typename T> static inline __device__ cnt_type get_cid(openfpm::array<ids_type,1,cnt_type> & div_c,
template<typename T> static inline __device__ __host__ cnt_type get_cid(openfpm::array<ids_type,1,cnt_type> & div_c,
openfpm::array<T,1,cnt_type> & spacing,
const transform & t,
const Point<1,T> & p)
......@@ -75,17 +75,17 @@ struct cid_<1,cnt_type,ids_type, transform>
template<typename cnt_type, typename ids_type, typename transform>
struct cid_<2,cnt_type,ids_type,transform>
{
static inline __device__ cnt_type get_cid(openfpm::array<ids_type,2,cnt_type> & div_c, ids_type * e)
static inline __device__ __host__ cnt_type get_cid(openfpm::array<ids_type,2,cnt_type> & div_c, ids_type * e)
{
return e[0] + div_c[0] * e[1];
}
static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c, const grid_key_dx<2,cnt_type> & e)
static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c, const grid_key_dx<2,cnt_type> & e)
{
return e.get(0) + div_c[0] * e.get(1);
}
template<typename T> static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
const openfpm::array<T,2,cnt_type> & spacing,
const openfpm::array<ids_type,2,cnt_type> & off,
const transform & t,
......@@ -96,7 +96,7 @@ struct cid_<2,cnt_type,ids_type,transform>
(openfpm::math::uint_floor(t.transform(p,1)/spacing[1]) + off[1])*div_c[0];
}
template<typename T> static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
const openfpm::array<T,2,cnt_type> & spacing,
const openfpm::array<ids_type,2,cnt_type> & off,
const transform & t,
......@@ -109,7 +109,7 @@ struct cid_<2,cnt_type,ids_type,transform>
return e[0] + e[1]*div_c[0];
}
template<typename T> static inline __device__ grid_key_dx<2,ids_type> get_cid_key(const openfpm::array<T,2,cnt_type> & spacing,
template<typename T> static inline __device__ __host__ grid_key_dx<2,ids_type> get_cid_key(const openfpm::array<T,2,cnt_type> & spacing,
const openfpm::array<ids_type,2,cnt_type> & off,
const transform & t,
const Point<2,T> & p)
......@@ -123,7 +123,7 @@ struct cid_<2,cnt_type,ids_type,transform>
}
template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,2,cnt_type> & div_c,
const grid_key_dx<2,cnt_type> & e)
{
return e.get(0) + e.get(1)*div_c[0];
......@@ -135,19 +135,19 @@ template<typename cnt_type, typename ids_type,typename transform>
struct cid_<3,cnt_type,ids_type,transform>
{
static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
const ids_type * e)
{
return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
}
static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
const grid_key_dx<3,ids_type> & e)
{
return e.get(0) + (e.get(1) + e.get(2)*div_c[1])*div_c[0];
}
template<typename T> static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
const openfpm::array<T,3,cnt_type> & spacing,
const openfpm::array<ids_type,3,cnt_type> & off,
const transform & t,
......@@ -159,7 +159,7 @@ struct cid_<3,cnt_type,ids_type,transform>
(openfpm::math::uint_floor(t.transform(p,2)/spacing[2]) + off[2])*div_c[1])*div_c[0];
}
template<typename T> static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
template<typename T> static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
const openfpm::array<T,3,cnt_type> & spacing,
const openfpm::array<ids_type,3,cnt_type> & off,
const transform & t,
......@@ -173,7 +173,7 @@ struct cid_<3,cnt_type,ids_type,transform>
return e[0] + (e[1] + e[2]*div_c[1])*div_c[0];
}
template<typename T> static inline __device__ grid_key_dx<3,ids_type> get_cid_key(const openfpm::array<T,3,cnt_type> & spacing,
template<typename T> static inline __device__ __host__ grid_key_dx<3,ids_type> get_cid_key(const openfpm::array<T,3,cnt_type> & spacing,
const openfpm::array<ids_type,3,cnt_type> & off,
const transform & t,
const Point<3,T> & p)
......@@ -188,7 +188,7 @@ struct cid_<3,cnt_type,ids_type,transform>
}
template <typename U = cnt_type, typename sfinae=typename std::enable_if<std::is_same<ids_type,U>::value >::type >
static inline __device__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
static inline __device__ __host__ cnt_type get_cid(const openfpm::array<ids_type,3,cnt_type> & div_c,
const grid_key_dx<3,cnt_type> & e)
{
return e.get(0) + (e.get(1) + e.get(2)*div_c[1])*div_c[0];
......
......@@ -15,6 +15,9 @@ __global__ void merge_add_prp_device_impl(vector_src_type v_src, vector_dst_type
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= v_src.size())
{return;}
// write the object in the last element
object_s_di<decltype(v_src.get(i)),decltype(v_dst.get(old_sz+i)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(old_sz+i));
}
......
......@@ -116,8 +116,8 @@ template<typename ... Args>int error_arg(void * ptr, int prp, Args ... args)
#include <boost/algorithm/string.hpp>
#ifdef SE_CLASS1
#define CHECK_SE_CLASS1_PRE int dev_mem[] = {0,0,0,0,0,0,0,0,0,0,0};
//#define CHECK_SE_CLASS1_POST(...) cudaMemcpyFromSymbol(dev_mem,global_cuda_error_array,sizeof(dev_mem));
#define CUDA_LAUNCH_ERROR_OBJECT std::runtime_error("Runtime vector error");
#define CHECK_SE_CLASS1_PRE int dev_mem[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
#define CHECK_SE_CLASS1_POST(kernel_call,...) cudaMemcpyFromSymbol(dev_mem,global_cuda_error_array,sizeof(dev_mem)); \
if (dev_mem[0] != 0)\
{\
......@@ -127,7 +127,12 @@ template<typename ... Args>int error_arg(void * ptr, int prp, Args ... args)
std::string args_s( #__VA_ARGS__ );\
std::vector<std::string> results;\
boost::split(results, args_s, [](char c){return c == ',';});\
std::cout << __FILE__ << ":" << __LINE__ << " Overflow detected in Kernel: " << kernel_call << " from the structure " << results[ea] << " property: " << prp_err << " index:(" ;\
std::string data_s;\
if (ea >= results.size())\
{data_s = "Internal";}\
else\
{data_s = results[ea];}\
std::cout << __FILE__ << ":" << __LINE__ << " Overflow detected in Kernel: " << kernel_call << " from the structure: " << data_s << " property: " << prp_err << " index:(" ;\
int i = 0; \
for ( ; i < dev_mem[4]-1 ; i++)\
{\
......@@ -136,6 +141,7 @@ template<typename ... Args>int error_arg(void * ptr, int prp, Args ... args)
std::cout << dev_mem[5+i];\
std::cout << ")";\
std::cout << " thread: " << "(" << dev_mem[6+i] << "," << dev_mem[7+i] << "," << dev_mem[8+i] << ")*(" << dev_mem[9+i] << "," << dev_mem[10+i] << "," << dev_mem[11+i] << ")+(" << dev_mem[12+i] << "," << dev_mem[13+i] << "," << dev_mem[14+i] << ")" << std::endl;\
ACTION_ON_ERROR(CUDA_LAUNCH_ERROR_OBJECT);\
}
#else
#define CHECK_SE_CLASS1_PRE
......
......@@ -21,13 +21,13 @@
cuda_call<<<(grid_size),(block_size)>>>(__VA_ARGS__); \
cudaDeviceSynchronize(); \
{\
CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
cudaError_t e = cudaGetLastError();\
if (e != cudaSuccess)\
{\
std::string error = cudaGetErrorString(e);\
std::cout << "Cuda Error in: " << __FILE__ << ":" << __LINE__ << " " << error << std::endl;\
}\
CHECK_SE_CLASS1_POST(#cuda_call,__VA_ARGS__)\
}\
}
#else
......
......@@ -8,6 +8,8 @@
#include "Vector/map_vector.hpp"
#include "util/cuda/moderngpu/kernel_load_balance.hxx"
#include "util/cuda/moderngpu/kernel_mergesort.hxx"
#include "util/cuda/moderngpu/kernel_reduce.hxx"
BOOST_AUTO_TEST_SUITE( modern_gpu_tests )
......@@ -90,5 +92,46 @@ BOOST_AUTO_TEST_CASE( modern_gpu_sort )
// Test the cell list
}
BOOST_AUTO_TEST_CASE( modern_gpu_reduce )
{
std::cout << "Test modern gpu reduce" << "\n";
mgpu::standard_context_t context(false);
int count = 200030;
openfpm::vector_gpu<aggregate<int>> vgpu;
vgpu.resize(count);
for (size_t i = 0 ; i < count ; i++)
{
vgpu.template get<0>(i) = ((float)rand() / RAND_MAX) * 17;
}
vgpu.hostToDevice<0>();
CudaMemory mem;
mem.allocate(sizeof(int));
mgpu::reduce((int *)vgpu.template getDeviceBuffer<0>(), count, (int *)mem.getDevicePointer(), mgpu::plus_t<int>(), context);
mem.deviceToHost();
int red_p = *(int *)mem.getPointer();
// print
int red = 0;
for (int i = 0 ; i < count ; i++)
{
red += vgpu.template get<0>(i);
}
BOOST_REQUIRE_EQUAL(red,red_p);
std::cout << "End test modern gpu test reduce" << "\n";
// Test the cell list
}
BOOST_AUTO_TEST_SUITE_END()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment