Commit 7bc57fc2 authored by incardon's avatar incardon

Latest

parent 34e4e85b
openfpm_data @ 8adda2f7
Subproject commit 6238d10785c35dcc711dc4e5fd0c2d3a0ea8882e
Subproject commit 8adda2f7cdf8aedd7e79ccb1a2bad36cd098ada1
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
#include "VCluster/VCluster.hpp"
#include "Decomposition/CartDecomposition.hpp"
#define SUB_UNIT_FACTOR 1024
BOOST_AUTO_TEST_SUITE( decomposition_to_gpu_test )
BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use )
{
// Vcluster
Vcluster & vcl = create_vcluster();
//! [Create CartDecomposition]
CartDecomposition<3, float> dec(vcl);
// Physical domain
Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 });
size_t div[3];
// Get the number of processor and calculate the number of sub-domain
// for each processor (SUB_UNIT_FACTOR=64)
size_t n_proc = vcl.getProcessingUnits();
size_t n_sub = n_proc * SUB_UNIT_FACTOR;
// Set the number of sub-domains on each dimension (in a scalable way)
for (int i = 0; i < 3; i++)
{ div[i] = openfpm::math::round_big_2(pow(n_sub,1.0/3));}
// Define ghost
Ghost<3, float> g(0.01);
// Boundary conditions
size_t bc[] = { PERIODIC, PERIODIC, PERIODIC };
// Decompose
dec.setParameters(div,box,bc,g);
dec.decompose();
dec.toKernel()
}
BOOST_AUTO_TEST_SUITE_END()
......@@ -3,7 +3,7 @@ LINKLIBS = $(HDF5_LDFLAGS) $(HDF5_LIBS) $(OPENMP_LDFLAGS) $(LIBHILBERT_LIB) $(
FLAGS_NVCC = $(NVCCFLAGS) $(INCLUDES_PATH) $(HDF5_CPPFLAGS) $(BOOST_CPPFLAGS) $(MPI_INC_PATH) $(PETSC_INCLUDE) $(LIBHILBERT_INCLUDE) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) -g --expt-extended-lambda
noinst_PROGRAMS = pdata
pdata_SOURCES = main.cpp pdata_performance.cpp Vector/vector_dist_gpu_unit_tests.cu Grid/grid_dist_id_unit_test.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
pdata_SOURCES = main.cpp pdata_performance.cpp Decomposition/cuda/decomposition_cuda_tests.cpp Vector/vector_dist_gpu_unit_tests.cu Grid/grid_dist_id_unit_test.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
pdata_CXXFLAGS = $(HDF5_CPPFLAGS) $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) $(H5PART_INCLUDE) -DPARALLEL_IO -Wno-unused-local-typedefs
pdata_CFLAGS = $(CUDA_CFLAGS)
pdata_LDADD = $(LINKLIBS) -lparmetis -lmetis
......
......@@ -1706,11 +1706,12 @@ public:
*
* \tparam prp properties to communicate
*
* \param opt options
*
*/
template<unsigned int ... prp> void map_list()
template<unsigned int ... prp> void map_list(size_t opt = 0)
{
this->template map_list_<prp...>(v_pos,v_prp,g_m);
this->template map_list_<prp...>(v_pos,v_prp,g_m,opt);
}
......@@ -1722,15 +1723,16 @@ public:
* elements out the local processor. Or just after initialization if each processor
* contain non local particles
*
* \param opt options
*
*/
template<typename obp = KillParticle> void map()
template<typename obp = KillParticle> void map(size_t opt = 0)
{
#ifdef SE_CLASS3
se3.map_pre();
#endif
this->template map_<obp>(v_pos,v_prp,g_m);
this->template map_<obp>(v_pos,v_prp,g_m,opt);
#ifdef SE_CLASS3
se3.map_post();
......
......@@ -17,6 +17,8 @@
#define BIND_DEC_TO_GHOST 1
#define MAP_ON_DEVICE 1000
/*! \brief compute the communication options from the ghost_get/put options
*
*
......@@ -750,12 +752,19 @@ class vector_dist_comm
* \param v_pos vector of particle positions
* \param lbl_p Particle labeled
* \param prc_sz For each processor the number of particles to send
* \param opt options
*
*/
template<typename obp> void labelParticleProcessor(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<aggregate<size_t,size_t,size_t>> & lbl_p,
openfpm::vector<size_t> & prc_sz)
openfpm::vector<size_t> & prc_sz,
size_t opt)
{
if (opt == MAP_ON_DEVICE)
{
}
// reset lbl_p
lbl_p.clear();
......@@ -1135,9 +1144,10 @@ public:
* \param v_pos vector of particle positions
* \param v_prp vector of particle properties
* \param g_m ghost marker
* \param opt options
*
*/
template<unsigned int ... prp> void map_list_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m)
template<unsigned int ... prp> void map_list_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt)
{
typedef KillParticle obp;
......@@ -1149,7 +1159,7 @@ public:
v_prp.resize(g_m);
// Contain the processor id of each particle (basically where they have to go)
labelParticleProcessor<obp>(v_pos,m_opart, prc_sz);
labelParticleProcessor<obp>(v_pos,m_opart, prc_sz,opt);
// Calculate the sending buffer size for each processor, put this information in
// a contiguous buffer
......@@ -1200,7 +1210,8 @@ public:
*/
template<typename obp = KillParticle>
void map_(openfpm::vector<Point<dim, St>,Memory,typename layout_base<Point<dim,St>>::type,layout_base> & v_pos,
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, size_t & g_m)
openfpm::vector<prop,Memory,typename layout_base<prop>::type,layout_base> & v_prp, size_t & g_m,
size_t opt)
{
// Processor communication size
openfpm::vector<size_t> prc_sz(v_cl.getProcessingUnits());
......@@ -1210,7 +1221,7 @@ public:
v_prp.resize(g_m);
// Contain the processor id of each particle (basically where they have to go)
labelParticleProcessor<obp>(v_pos,m_opart, prc_sz);
labelParticleProcessor<obp>(v_pos,m_opart, prc_sz,opt);
// Calculate the sending buffer size for each processor, put this information in
// a contiguous buffer
......
......@@ -376,4 +376,38 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_test)
}
BOOST_AUTO_TEST_CASE( vector_dist_map_on_gpu_test)
{
auto & v_cl = create_vcluster();
if (v_cl.size() > 16)
{return;}
Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
// set the ghost based on the radius cut off (make just a little bit smaller than the spacing)
Ghost<3,float> g(0.1);
// Boundary conditions
size_t bc[3]={NON_PERIODIC,NON_PERIODIC,NON_PERIODIC};
vector_dist_gpu<3,float,aggregate<float,float[3],float[3]>> vd(1000,domain,bc,g);
auto it = vd.getDomainIterator();
while (it.isNext())
{
auto p = it.get();
vd.getPos(p)[0] = (float)rand() / RAND_MAX;
vd.getPos(p)[1] = (float)rand() / RAND_MAX;
vd.getPos(p)[2] = (float)rand() / RAND_MAX;
++it;
}
// Ok we redistribute the particles (CPU based)
vd.map(MAP_ON_DEVICE);
}
BOOST_AUTO_TEST_SUITE_END()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment