Commit cb3566e1 authored by incardon's avatar incardon

GPU moving on

parent 3d995585
SUBDIRS = src images openfpm_data openfpm_io openfpm_devices openfpm_vcluster openfpm_numerics
ACLOCAL_AMFLAGS = -I m4
bin_PROGRAMS =
pdata:
......@@ -20,6 +22,8 @@ io:
numerics:
cd openfpm_numerics/src && make
actual_test:
cd src && make actual_test
......
......@@ -8,6 +8,7 @@ AC_INIT(OpenFPM_pdata, 1.0.0, BUG-REPORT-ADDRESS)
AC_CANONICAL_SYSTEM
AC_CONFIG_SRCDIR([src/main.cpp])
AC_CONFIG_SUBDIRS([openfpm_data openfpm_devices openfpm_vcluster openfpm_io openfpm_numerics])
AC_CONFIG_MACRO_DIRS([m4])
#### Adding --with-pdata option and openfpm_pdata to prefix folder
......@@ -22,7 +23,7 @@ ac_configure_args="$ac_configure_args --with-pdata=../../src"
########################
AM_INIT_AUTOMAKE([subdir-objects])
AM_INIT_AUTOMAKE([subdir-objects foreign])
AC_CONFIG_HEADER([src/config/config.h])
m4_ifdef([ACX_PTHREAD],,[m4_include([m4/acx_pthread.m4])])
m4_ifdef([ACX_MPI],,[m4_include([m4/acx_mpi.m4])])
......@@ -144,7 +145,7 @@ have_quad_head=no
AC_CHECK_LIB(quadmath, sinq, [have_quad_lib=yes], [])
AC_CHECK_HEADER(quadmath.h,[have_quad_head=yes],[])
if [x"have_quad_math" == x"yes" $&& x"have_quad_math" == x"yes" ]; then
if test x"have_quad_math" == x"yes"; then
AC_DEFINE(HAVE_LIBQUADMATH,[],[Have quad math lib])
LIBQUADMATH=" -lquadmath "
fi
......
......@@ -1444,7 +1444,7 @@ int main(int argc, char* argv[])
// calculate the pressure at the sensor points
sensor_pressure(vd,NN,press_t,probes);
vd.write("Geometry",write);
vd.write_frame("Geometry",write);
write++;
if (v_cl.getProcessUnitID() == 0)
......
......@@ -2,22 +2,22 @@ LINKLIBS = $(HDF5_LDFLAGS) $(HDF5_LIBS) $(OPENMP_LDFLAGS) $(LIBHILBERT_LIB) $(P
noinst_PROGRAMS = cart_dec metis_dec dom_box vector_dist
cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
cart_dec_CXXFLAGS = $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(PARMETIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
cart_dec_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(PARMETIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
cart_dec_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS)
cart_dec_LDADD = $(LINKLIBS) -lparmetis -lmetis
metis_dec_SOURCES = Metis_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
metis_dec_CXXFLAGS = $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
metis_dec_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
metis_dec_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS)
metis_dec_LDADD = $(LINKLIBS) -lmetis
dom_box_SOURCES = domain_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
dom_box_CXXFLAGS = $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
dom_box_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
dom_box_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS)
dom_box_LDADD = $(LINKLIBS)
vector_dist_SOURCES = vector.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_devices/src/Memleak_check.cpp
vector_dist_CXXFLAGS = $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(HDF5_CPPFLAGS) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
vector_dist_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(HDF5_CPPFLAGS) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
vector_dist_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS)
vector_dist_LDADD = $(LINKLIBS) -lparmetis -lmetis
......
openfpm_data @ 66fdd0bd
Subproject commit e96170649cc5257d6be5b937372b281e6e5d74a8
Subproject commit 66fdd0bdd308bd78efe3c14a7d2bf9290e35f550
openfpm_vcluster @ ad71d154
Subproject commit 9602917fecc1e84f7a837fcd69a4bd98d572b4ba
Subproject commit ad71d154d0a8420816f49b63dfa1f43084b13022
......@@ -38,6 +38,7 @@
#include "CartDecomposition_ext.hpp"
#include "data_type/aggregate.hpp"
#include "Domain_NN_calculator_cart.hpp"
#include "cuda/CartDecomposition_gpu.cuh"
#define CARTDEC_ERROR 2000lu
......@@ -128,7 +129,7 @@ template<unsigned int dim> static void nsub_to_div(size_t (& div)[dim], size_t n
*
*/
template<unsigned int dim, typename T, typename Memory, typename Distribution>
template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base, typename Distribution>
class CartDecomposition: public ie_loc_ghost<dim, T>, public nn_prcs<dim, T>, public ie_ghost<dim, T>, public domain_nn_calculator_cart<dim>
{
public:
......@@ -140,13 +141,16 @@ public:
typedef SpaceBox<dim, T> Box;
//! This class is base of itself
typedef CartDecomposition<dim,T,Memory,Distribution> base_type;
typedef CartDecomposition<dim,T,Memory,layout_base,Distribution> base_type;
//! This class admit a class defined on an extended domain
typedef CartDecomposition_ext<dim,T,Memory,Distribution> extended_type;
typedef CartDecomposition_ext<dim,T,Memory,layout_base,Distribution> extended_type;
protected:
//! bool that indicate whenever the buffer has been already transfer to device
bool host_dev_transfer = false;
//! Indicate the communication weight has been set
bool commCostSet = false;
......@@ -163,14 +167,14 @@ protected:
openfpm::vector<SpaceBox<dim, T>> sub_domains;
//! the remote set of all sub-domains as vector of 'sub_domains' vectors
mutable openfpm::vector<Box_map<dim, T>> sub_domains_global;
mutable openfpm::vector<Box_map<dim, T>,Memory,typename layout_base<Box_map<dim, T>>::type,layout_base> sub_domains_global;
//! for each sub-domain, contain the list of the neighborhood processors
openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor;
//! Structure that contain for each sub-sub-domain box the processor id
//! exist for efficient global communication
CellList<dim,T,Mem_fast<>,shift<dim,T>> fine_s;
CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> fine_s;
//! Structure that store the cartesian grid information
grid_sm<dim, void> gr;
......@@ -268,26 +272,24 @@ protected:
return sub_d;
}
void collect_all_sub_domains(openfpm::vector<Box_map<dim,T>> & sub_domains_global)
void collect_all_sub_domains(openfpm::vector<Box_map<dim,T>,Memory,typename layout_base<Box_map<dim, T>>::type,layout_base> & sub_domains_global)
{
#ifdef SE_CLASS2
check_valid(this,8);
#endif
sub_domains_global.clear();
openfpm::vector<Box_map<dim,T>> bm;
openfpm::vector<Box_map<dim,T>,Memory,typename layout_base<Box_map<dim, T>>::type,layout_base> bm;
for (size_t i = 0 ; i < sub_domains.size() ; i++)
{
Box_map<dim,T> tmp;
tmp.box = ::SpaceBox<dim,T>(sub_domains.get(i));
tmp.prc = v_cl.rank();
bm.add(tmp);
bm.add();
bm.template get<0>(bm.size()-1) = ::SpaceBox<dim,T>(sub_domains.get(i));
bm.template get<1>(bm.size()-1) = v_cl.rank();
}
v_cl.SGather(bm,sub_domains_global,0);
v_cl.SGather<decltype(bm),decltype(sub_domains_global),layout_base>(bm,sub_domains_global,0);
size_t size = sub_domains_global.size();
......@@ -324,8 +326,8 @@ public:
{
// get the cells this box span
const grid_key_dx<dim> p1 = fine_s.getCellGrid(sub_domains_global.get(i).box.getP1());
const grid_key_dx<dim> p2 = fine_s.getCellGrid(sub_domains_global.get(i).box.getP2());
const grid_key_dx<dim> p1 = fine_s.getCellGrid(sub_domains_global.template get<0>(i).getP1());
const grid_key_dx<dim> p2 = fine_s.getCellGrid(sub_domains_global.template get<0>(i).getP2());
// Get the grid and the sub-iterator
auto & gi = fine_s.getGrid();
......@@ -336,9 +338,12 @@ public:
{
auto key = g_sub.get();
fine_s.addCell(gi.LinId(key),i);
++g_sub;
}
}
host_dev_transfer = false;
}
/*! \brief Constructor, it decompose and distribute the sub-domains across the processors
......@@ -428,28 +433,6 @@ public:
construct_fine_s();
/////////////////////////////////////////////////////////////////////////////////
/* grid_key_dx_iterator<dim> git(gr);
while (git.isNext())
{
auto key = git.get();
grid_key_dx<dim> key2;
for (size_t i = 0 ; i < dim ; i++)
{key2.set_d(i,key.get(i) / magn[i]);}
size_t lin = gr_dist.LinId(key2);
size_t lin2 = gr.LinId(key);
// Here we draw the fine_s in the cell-list
fine_s.get(lin2) = dist.getGraph().template vertex_p<nm_v::proc_id>(lin);
++git;
}*/
Initialize_geo_cell_lists();
}
......@@ -666,7 +649,7 @@ public:
ie_loc_ghost<dim,T>::create(sub_domains,domain,ghost,bc);
}
template<typename T2> inline size_t processorID_impl(T2 & p) const
/* template<typename T2> inline size_t processorID_impl(T2 & p) const
{
// Get the number of elements in the cell
......@@ -678,7 +661,7 @@ public:
{
e = fine_s.get(cl,i);
if (sub_domains_global.get(e).box.isInsideNP(p) == true)
if (sub_domains_global.template get<0>(e).isInsideNP(p) == true)
{
break;
}
......@@ -694,8 +677,8 @@ public:
#endif
return sub_domains_global.get(e).prc;
}
return sub_domains_global.template get<1>(e);
}*/
public:
......@@ -737,7 +720,7 @@ public:
* \param cart object to copy
*
*/
CartDecomposition(const CartDecomposition<dim,T,Memory,Distribution> & cart)
CartDecomposition(const CartDecomposition<dim,T,Memory,layout_base,Distribution> & cart)
:nn_prcs<dim,T>(cart.v_cl),v_cl(cart.v_cl),dist(v_cl),ref_cnt(0)
{
this->operator=(cart);
......@@ -748,7 +731,7 @@ public:
* \param cart object to copy
*
*/
CartDecomposition(CartDecomposition<dim,T,Memory,Distribution> && cart)
CartDecomposition(CartDecomposition<dim,T,Memory,layout_base,Distribution> && cart)
:nn_prcs<dim,T>(cart.v_cl),v_cl(cart.v_cl),dist(v_cl),ref_cnt(0)
{
this->operator=(cart);
......@@ -900,9 +883,9 @@ public:
* \return a duplicated decomposition with different ghost boxes
*
*/
CartDecomposition<dim,T,Memory,Distribution> duplicate(const Ghost<dim,T> & g) const
CartDecomposition<dim,T,Memory,layout_base,Distribution> duplicate(const Ghost<dim,T> & g) const
{
CartDecomposition<dim,T,Memory,Distribution> cart(v_cl);
CartDecomposition<dim,T,Memory,layout_base,Distribution> cart(v_cl);
cart.box_nn_processor = box_nn_processor;
cart.sub_domains = sub_domains;
......@@ -936,9 +919,9 @@ public:
* \return a duplicated CartDecomposition object
*
*/
CartDecomposition<dim,T,Memory,Distribution> duplicate() const
CartDecomposition<dim,T,Memory,layout_base,Distribution> duplicate() const
{
CartDecomposition<dim,T,Memory,Distribution> cart(v_cl);
CartDecomposition<dim,T,Memory,layout_base,Distribution> cart(v_cl);
(static_cast<ie_loc_ghost<dim,T>*>(&cart))->operator=(static_cast<ie_loc_ghost<dim,T>>(*this));
(static_cast<nn_prcs<dim,T>*>(&cart))->operator=(static_cast<nn_prcs<dim,T>>(*this));
......@@ -974,7 +957,7 @@ public:
* \return itself
*
*/
CartDecomposition<dim,T,Memory, Distribution> & operator=(const CartDecomposition & cart)
CartDecomposition<dim,T,Memory, layout_base, Distribution> & operator=(const CartDecomposition & cart)
{
static_cast<ie_loc_ghost<dim,T>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T>>(cart));
static_cast<nn_prcs<dim,T>*>(this)->operator=(static_cast<nn_prcs<dim,T>>(cart));
......@@ -1014,7 +997,7 @@ public:
* \return itself
*
*/
CartDecomposition<dim,T,Memory,Distribution> & operator=(CartDecomposition && cart)
CartDecomposition<dim,T,Memory,layout_base, Distribution> & operator=(CartDecomposition && cart)
{
static_cast<ie_loc_ghost<dim,T>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T>>(cart));
static_cast<nn_prcs<dim,T>*>(this)->operator=(static_cast<nn_prcs<dim,T>>(cart));
......@@ -1074,7 +1057,7 @@ public:
*/
template<typename Mem> size_t inline processorID(const encapc<1, Point<dim,T>, Mem> & p) const
{
return processorID_impl(p);
return processorID_impl(p,fine_s,sub_domains_global);
}
/*! \brief Given a point return in which processor the particle should go
......@@ -1086,7 +1069,7 @@ public:
*/
size_t inline processorID(const Point<dim,T> &p) const
{
return processorID_impl(p);
return processorID_impl(p,fine_s,sub_domains_global);
}
/*! \brief Given a point return in which processor the particle should go
......@@ -1098,7 +1081,7 @@ public:
*/
size_t inline processorID(const T (&p)[dim]) const
{
return processorID_impl(p);
return processorID_impl(p,fine_s,sub_domains_global);
}
/*! \brief Given a point return in which processor the point/particle should go
......@@ -1116,7 +1099,7 @@ public:
applyPointBC(pt);
return processorID_impl(pt);
return processorID_impl(pt,fine_s,sub_domains_global);
}
/*! \brief Given a point return in which processor the particle should go
......@@ -1135,7 +1118,7 @@ public:
// Get the number of elements in the cell
return processorID_impl(pt);
return processorID_impl(pt,fine_s,sub_domains_global);
}
/*! \brief Given a point return in which processor the particle should go
......@@ -1152,7 +1135,7 @@ public:
Point<dim,T> pt = p;
applyPointBC(pt);
return processorID_impl(pt);
return processorID_impl(pt,fine_s,sub_domains_global);
}
/*! \brief Get the periodicity on i dimension
......@@ -1838,6 +1821,12 @@ public:
std::cout << ::SpaceBox<dim, T>(sub_domains.get(p)).toString() << "\n";
}
std::cout << "Subdomains global\n";
for (size_t p = 0; p < sub_domains_global.size(); p++)
{
std::cout << ::SpaceBox<dim, T>(sub_domains_global.template get<0>(p)).toString() << " proc:" << sub_domains_global.template get<1>(p) << "\n";
}
std::cout << "External ghost box\n";
for (size_t p = 0; p<nn_prcs < dim, T>::getNNProcessors(); p++)
......@@ -1990,6 +1979,33 @@ public:
return cd;
}
/*! \brief convert to a structure usable in a device kernel
*
* \return a data-structure that can be used directy on GPU
*
*/
CartDecomposition_gpu<dim,T,Memory,layout_base> toKernel()
{
if (host_dev_transfer == false)
{
fine_s.hostToDevice();
sub_domains_global.template hostToDevice<0,1>();
host_dev_transfer = true;
}
int bc_[dim];
for (int i = 0 ; i < dim ; i++) {bc_[i] = this->periodicity(i);}
CartDecomposition_gpu<dim,T,Memory,layout_base> cdg(fine_s.toKernel(),
sub_domains_global.toKernel(),
getDomain(),
bc_);
return cdg;
}
//! friend classes
friend extended_type;
......
......@@ -13,7 +13,7 @@
#include "Space/Ghost.hpp"
#include "Decomposition/nn_processor.hpp"
template<unsigned int dim, typename T, typename Memory = HeapMemory, typename Distribution = ParMetisDistribution<dim, T>>
template<unsigned int dim, typename T, typename Memory = HeapMemory, template<typename> class layout_base = memory_traits_lin, typename Distribution = ParMetisDistribution<dim, T>>
class CartDecomposition;
/**
......@@ -38,8 +38,8 @@ class CartDecomposition;
*
*/
template<unsigned int dim, typename T, typename Memory = HeapMemory, typename Distribution = ParMetisDistribution<dim, T>>
class CartDecomposition_ext: public CartDecomposition<dim,T,Memory,Distribution>
template<unsigned int dim, typename T, typename Memory = HeapMemory, template<typename> class layout_base = memory_traits_lin, typename Distribution = ParMetisDistribution<dim, T>>
class CartDecomposition_ext: public CartDecomposition<dim,T,Memory,layout_base,Distribution>
{
private:
......@@ -51,7 +51,7 @@ private:
* \param ext_dom Extended domain
*
*/
void extend_subdomains(const CartDecomposition<dim,T,Memory,Distribution> & dec, const ::Box<dim,T> & ext_dom)
void extend_subdomains(const CartDecomposition<dim,T,Memory,layout_base,Distribution> & dec, const ::Box<dim,T> & ext_dom)
{
// Box
typedef ::Box<dim,T> b;
......@@ -168,12 +168,12 @@ public:
*
*/
CartDecomposition_ext(Vcluster & v_cl)
:CartDecomposition<dim,T,Memory,Distribution>(v_cl)
:CartDecomposition<dim,T,Memory,layout_base,Distribution>(v_cl)
{
}
//! The non-extended decomposition base class
typedef CartDecomposition<dim,T,Memory,Distribution> base_type;
typedef CartDecomposition<dim,T,Memory,layout_base,Distribution> base_type;
/*! \brief It create another object that contain the same decomposition information but with different ghost boxes and an extended domain
*
......@@ -216,7 +216,7 @@ public:
* \return a duplicated decomposition with different ghost boxes and an extended domain
*
*/
void setParameters(const CartDecomposition<dim,T,Memory,Distribution> & dec, const Ghost<dim,T> & g, const ::Box<dim,T> & ext_domain)
void setParameters(const CartDecomposition<dim,T,Memory,layout_base,Distribution> & dec, const Ghost<dim,T> & g, const ::Box<dim,T> & ext_domain)
{
this->box_nn_processor = dec.box_nn_processor;
......
......@@ -115,14 +115,16 @@ struct Box_sub_k
template<unsigned int dim,typename T>
struct Box_map
{
Box<dim,T> box;
typedef boost::fusion::vector<Box<dim,T>,long int> type;
long int prc;
type data;
static bool noPointers()
{
return true;
}
static const unsigned int max_prop = 2;
};
//! Case for local ghost box
......
/*
* CartDecomposition_gpu.hpp
*
* Created on: Aug 7, 2018
* Author: i-bird
*/
#ifndef CARTDECOMPOSITION_GPU_HPP_
#define CARTDECOMPOSITION_GPU_HPP_
#ifdef __NVCC__
template<typename cartdec_gpu, typename particles_type, typename vector_out>
__global__ void process_id_proc_each_part(cartdec_gpu cdg, particles_type parts, vector_out output , int rank)
{
int p = threadIdx.x + blockIdx.x * blockDim.x;
if (p >= parts.size()) return;
Point<3,float> xp = parts.template get<0>(p);
int pr = cdg.processorIDBC(xp);
output.template get<1>(p) = (pr == rank)?-1:pr;
output.template get<0>(p) = p;
}
#endif
template<typename T2, typename fine_s_type, typename vsub_domain_type>
__device__ __host__ inline int processorID_impl(T2 & p, fine_s_type & fine_s, vsub_domain_type & sub_domains_global)
{
// Get the number of elements in the cell
int e = -1;
int cl = fine_s.getCell(p);
int n_ele = fine_s.getNelements(cl);
for (int i = 0 ; i < n_ele ; i++)
{
e = fine_s.get(cl,i);
if (sub_domains_global.template get<0>(e).isInsideNP(p) == true)
{
break;
}
}
#if defined(SE_CLASS1) && !defined(__NVCC__)
if (n_ele == 0)
{
std::cout << __FILE__ << ":" << __LINE__ << " I cannot detect in which processor this particle go" << std::endl;
return -1;
}
#endif
return sub_domains_global.template get<1>(e);
}
template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base>
class CartDecomposition_gpu
{
CellList_cpu_ker<dim,T,Mem_fast_ker<Memory,memory_traits_lin,int>,shift<dim,T>> clk;
Box<dim,T> domain;
int bc[dim];
openfpm::vector_gpu_ker<Box_map<dim, T>,layout_base> sub_domains_global;
/*! \brief Apply boundary condition to the point
*
* If the particle go out to the right, bring back the particle on the left
* in case of periodic, nothing in case of non periodic
*
* \param pt Point to apply the boundary conditions.(it's coordinated are changed according the
* the explanation before)
*
*/
__device__ void applyPointBC(Point<dim,T> & pt) const
{
for (int i = 0 ; i < dim ; i++)
{
if (bc[i] == PERIODIC)
{pt.get(i) = openfpm::math::periodic_l(pt.get(i),domain.getHigh(i),domain.getLow(i));}
}
}
public:
CartDecomposition_gpu(CellList_cpu_ker<dim,T,Mem_fast_ker<Memory,memory_traits_lin,int>,shift<dim,T>> clk,
openfpm::vector_gpu_ker<Box_map<dim, T>,layout_base> sub_domains_global,
const Box<dim,T> & domain,
const int (& bc)[dim])
:clk(clk),domain(domain),sub_domains_global(sub_domains_global)
{
for (int s = 0 ; s < dim ; s++)
{this->bc[s] = bc[s];}
}
CartDecomposition_gpu(const CartDecomposition_gpu<dim,T,Memory,layout_base> & dec)
:clk(dec.clk),domain(dec.domain)
{
for (int s = 0 ; s < dim ; s++)
{this->bc[s] = dec.bc[s];}
}
/*! \brief Given a point return in which processor the point/particle should go
*
* Boundary conditions are considered
*
* \param p point
*
* \return processorID
*
*/
__device__ int inline processorIDBC(const Point<dim,T> & p)
{
Point<dim,T> pt = p;
this->applyPointBC(pt);
return processorID_impl(pt,clk,sub_domains_global);
}
};
#endif /* CARTDECOMPOSITION_GPU_HPP_ */
......@@ -8,13 +8,15 @@
BOOST_AUTO_TEST_SUITE( decomposition_to_gpu_test )
BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use )
{
auto & v_cl = create_vcluster();
// Vcluster
Vcluster & vcl = create_vcluster();
//! [Create CartDecomposition]
CartDecomposition<3, float> dec(vcl);
CartDecomposition<3, float, CudaMemory, memory_traits_inte> dec(vcl);
// Physical domain
Box<3, float> box( { 0.0, 0.0, 0.0 }, { 1.0, 1.0, 1.0 });
......@@ -39,7 +41,38 @@ BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use )
dec.setParameters(div,box,bc,g);
dec.decompose();
dec.toKernel()
openfpm::vector_gpu<Point<3,float>> vg;
vg.resize(10000);
for (size_t i = 0 ; i < 10000 ; i++)
{
vg.template get<0>(i)[0] = (float)rand()/RAND_MAX;
vg.template get<0>(i)[1] = (float)rand()/RAND_MAX;
vg.template get<0>(i)[2] = (float)rand()/RAND_MAX;
}
vg.hostToDevice<0>();
// process on GPU the processor ID for each particles
auto ite = vg.getGPUIterator();
openfpm::vector_gpu<aggregate<int,int>> proc_id_out;
proc_id_out.resize(vg.size());
process_id_proc_each_part<decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel())>
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),v_cl.rank());
proc_id_out.deviceToHost<0>();
bool match = true;
for (size_t i = 0 ; i < proc_id_out.size() ; i++)
{
Point<3,float> xp = vg.template get<0>(i);
match &= proc_id_out.template get<0>(i) == dec.processorIDBC(xp);
}
}
BOOST_AUTO_TEST_SUITE_END()
LINKLIBS = $(HDF5_LDFLAGS) $(HDF5_LIBS) $(OPENMP_LDFLAGS) $(LIBHILBERT_LIB) $(METIS_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOSTREAMS_LIB) $(CUDA_LIBS) $(PETSC_LIB) $(SUITESPARSE_LIBS) $(LAPACK_LIBS) $(BLAS_LIBS) $(PARMETIS_LIB) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) $(BOOST_CHRONO_LIB) $(BOOST_TIMER_LIB) $(BOOST_SYSTEM_LIB) $(LIBIFCORE)
FLAGS_NVCC = $(NVCCFLAGS) $(INCLUDES_PATH) $(HDF5_CPPFLAGS) $(BOOST_CPPFLAGS) $(MPI_INC_PATH) $(PETSC_INCLUDE) $(LIBHILBERT_INCLUDE) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) -g --expt-extended-lambda
FLAGS_NVCC = -Xcudafe "--display_error_number --diag_suppress=2885 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111" $(NVCCFLAGS) $(INCLUDES_PATH) $(HDF5_CPPFLAGS) $(BOOST_CPPFLAGS) $(MPI_INC_PATH) $(PETSC_INCLUDE) $(LIBHILBERT_INCLUDE) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) -g --expt-extended-lambda
noinst_PROGRAMS = pdata
pdata_SOURCES = main.cpp Decomposition/cuda/decomposition_cuda_tests.cpp Vector/vector_dist_gpu_unit_tests.cu Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp Grid/tests/grid_dist_id_unit_test.cpp Grid/tests/staggered_grid_dist_unit_test.cpp Vector/tests/vector_dist_cell_list_tests.cpp Vector/tests/vector_dist_complex_prp_unit_test.cpp Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp Vector/tests/vector_dist_MP_unit_tests.cpp Vector/tests/vector_dist_NN_tests.cpp Vector/tests/vector_dist_unit_test.cpp pdata_performance.cpp Decomposition/tests/CartDecomposition_unit_test.cpp Decomposition/tests/shift_vect_converter_tests.cpp Vector/performance/vector_dist_performance_util.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
pdata_CXXFLAGS = $(BOOST_CPPFLAGS) $(HDF5_CPPFLAGS) $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(H5PART_INCLUDE) -DPARALLEL_IO -Wno-unused-local-typedefs
noinst_PROGRAMS = pdata actual_test
pdata_SOURCES = main.cpp Vector/cuda/vector_dist_cuda_func_test.cu Decomposition/cuda/decomposition_cuda_tests.cu Vector/vector_dist_gpu_unit_tests.cu Grid/tests/grid_dist_id_HDF5_chckpnt_restart_test.cpp Grid/tests/grid_dist_id_unit_test.cpp Grid/tests/staggered_grid_dist_unit_test.cpp Vector/tests/vector_dist_cell_list_tests.cpp Vector/tests/vector_dist_complex_prp_unit_test.cpp Vector/tests/vector_dist_HDF5_chckpnt_restart_test.cpp Vector/tests/vector_dist_MP_unit_tests.cpp Vector/tests/vector_dist_NN_tests.cpp Vector/tests/vector_dist_unit_test.cpp pdata_performance.cpp Decomposition/tests/CartDecomposition_unit_test.cpp Decomposition/tests/shift_vect_converter_tests.cpp Vector/performance/vector_dist_performance_util.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
pdata_CXXFLAGS = -Wno-unknown-pragmas $(BOOST_CPPFLAGS) $(HDF5_CPPFLAGS) $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(H5PART_INCLUDE) -DPARALLEL_IO -Wno-unused-local-typedefs
pdata_CFLAGS = $(CUDA_CFLAGS)
pdata_LDADD = $(LINKLIBS) -lparmetis -lmetis
actual_test_SOURCES = Vector/cuda/vector_dist_cuda_func_test.cu Vector/vector_dist_gpu_unit_tests.cu vector_ main_single.cpp lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/CudaMemory.cu ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
actual_test_CXXFLAGS = -Wno-unknown-pragmas $(BOOST_CPPFLAGS) $(HDF5_CPPFLAGS) $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(H5PART_INCLUDE) -DPARALLEL_IO -Wno-unused-local-typedefs
actual_test_CFLAGS = $(CUDA_CFLAGS)
actual_test_LDADD = $(LINKLIBS) -lparmetis -lmetis
nobase_include_HEADERS = Decomposition/CartDecomposition.hpp Decomposition/shift_vect_converter.hpp Decomposition/CartDecomposition_ext.hpp Decomposition/common.hpp Decomposition/Decomposition.hpp Decomposition/ie_ghost.hpp \
Decomposition/Domain_NN_calculator_cart.hpp Decomposition/nn_processor.hpp Decomposition/ie_loc_ghost.hpp Decomposition/ORB.hpp \
Graph/CartesianGraphFactory.hpp \
......
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
#include "VCluster/VCluster.hpp"
#include "Vector/map_vector.hpp"
#include "Vector/cuda/vector_dist_cuda_funcs.cuh"
#include "Vector/util/vector_dist_funcs.hpp"
BOOST_AUTO_TEST_SUITE( vector_dist_gpu_util_func_test )
BOOST_AUTO_TEST_CASE( vector_dist_gpu_find_buffer_offsets_test )
{
openfpm::vector_gpu<aggregate<int>> vgp;
openfpm::vector_gpu<aggregate<int,int>> offs;