Commit cb3566e1 authored by incardon's avatar incardon

GPU moving on

parent 3d995585
SUBDIRS = src images openfpm_data openfpm_io openfpm_devices openfpm_vcluster openfpm_numerics SUBDIRS = src images openfpm_data openfpm_io openfpm_devices openfpm_vcluster openfpm_numerics
ACLOCAL_AMFLAGS = -I m4
bin_PROGRAMS = bin_PROGRAMS =
pdata: pdata:
...@@ -20,6 +22,8 @@ io: ...@@ -20,6 +22,8 @@ io:
numerics: numerics:
cd openfpm_numerics/src && make cd openfpm_numerics/src && make
actual_test:
cd src && make actual_test
......
...@@ -8,6 +8,7 @@ AC_INIT(OpenFPM_pdata, 1.0.0, BUG-REPORT-ADDRESS) ...@@ -8,6 +8,7 @@ AC_INIT(OpenFPM_pdata, 1.0.0, BUG-REPORT-ADDRESS)
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM
AC_CONFIG_SRCDIR([src/main.cpp]) AC_CONFIG_SRCDIR([src/main.cpp])
AC_CONFIG_SUBDIRS([openfpm_data openfpm_devices openfpm_vcluster openfpm_io openfpm_numerics]) AC_CONFIG_SUBDIRS([openfpm_data openfpm_devices openfpm_vcluster openfpm_io openfpm_numerics])
AC_CONFIG_MACRO_DIRS([m4])
#### Adding --with-pdata option and openfpm_pdata to prefix folder #### Adding --with-pdata option and openfpm_pdata to prefix folder
...@@ -22,7 +23,7 @@ ac_configure_args="$ac_configure_args --with-pdata=../../src" ...@@ -22,7 +23,7 @@ ac_configure_args="$ac_configure_args --with-pdata=../../src"
######################## ########################
AM_INIT_AUTOMAKE([subdir-objects]) AM_INIT_AUTOMAKE([subdir-objects foreign])
AC_CONFIG_HEADER([src/config/config.h]) AC_CONFIG_HEADER([src/config/config.h])
m4_ifdef([ACX_PTHREAD],,[m4_include([m4/acx_pthread.m4])]) m4_ifdef([ACX_PTHREAD],,[m4_include([m4/acx_pthread.m4])])
m4_ifdef([ACX_MPI],,[m4_include([m4/acx_mpi.m4])]) m4_ifdef([ACX_MPI],,[m4_include([m4/acx_mpi.m4])])
...@@ -144,7 +145,7 @@ have_quad_head=no ...@@ -144,7 +145,7 @@ have_quad_head=no
AC_CHECK_LIB(quadmath, sinq, [have_quad_lib=yes], []) AC_CHECK_LIB(quadmath, sinq, [have_quad_lib=yes], [])
AC_CHECK_HEADER(quadmath.h,[have_quad_head=yes],[]) AC_CHECK_HEADER(quadmath.h,[have_quad_head=yes],[])
if [x"have_quad_math" == x"yes" $&& x"have_quad_math" == x"yes" ]; then if test x"have_quad_math" == x"yes"; then
AC_DEFINE(HAVE_LIBQUADMATH,[],[Have quad math lib]) AC_DEFINE(HAVE_LIBQUADMATH,[],[Have quad math lib])
LIBQUADMATH=" -lquadmath " LIBQUADMATH=" -lquadmath "
fi fi
......
...@@ -1444,7 +1444,7 @@ int main(int argc, char* argv[]) ...@@ -1444,7 +1444,7 @@ int main(int argc, char* argv[])
// calculate the pressure at the sensor points // calculate the pressure at the sensor points
sensor_pressure(vd,NN,press_t,probes); sensor_pressure(vd,NN,press_t,probes);
vd.write("Geometry",write); vd.write_frame("Geometry",write);
write++; write++;
if (v_cl.getProcessUnitID() == 0) if (v_cl.getProcessUnitID() == 0)
......
...@@ -2,22 +2,22 @@ LINKLIBS = $(HDF5_LDFLAGS) $(HDF5_LIBS) $(OPENMP_LDFLAGS) $(LIBHILBERT_LIB) $(P ...@@ -2,22 +2,22 @@ LINKLIBS = $(HDF5_LDFLAGS) $(HDF5_LIBS) $(OPENMP_LDFLAGS) $(LIBHILBERT_LIB) $(P
noinst_PROGRAMS = cart_dec metis_dec dom_box vector_dist noinst_PROGRAMS = cart_dec metis_dec dom_box vector_dist
cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp cart_dec_SOURCES = CartDecomposition_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
cart_dec_CXXFLAGS = $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(PARMETIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs cart_dec_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(PARMETIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
cart_dec_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS) cart_dec_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS)
cart_dec_LDADD = $(LINKLIBS) -lparmetis -lmetis cart_dec_LDADD = $(LINKLIBS) -lparmetis -lmetis
metis_dec_SOURCES = Metis_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp metis_dec_SOURCES = Metis_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
metis_dec_CXXFLAGS = $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs metis_dec_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
metis_dec_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS) metis_dec_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS)
metis_dec_LDADD = $(LINKLIBS) -lmetis metis_dec_LDADD = $(LINKLIBS) -lmetis
dom_box_SOURCES = domain_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp dom_box_SOURCES = domain_gen_vtk.cpp ../src/lib/pdata.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
dom_box_CXXFLAGS = $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs dom_box_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(PETSC_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
dom_box_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS) dom_box_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS)
dom_box_LDADD = $(LINKLIBS) dom_box_LDADD = $(LINKLIBS)
vector_dist_SOURCES = vector.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_devices/src/Memleak_check.cpp vector_dist_SOURCES = vector.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_devices/src/Memleak_check.cpp
vector_dist_CXXFLAGS = $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(HDF5_CPPFLAGS) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs vector_dist_CXXFLAGS = -Wno-unknown-pragmas $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(HDF5_CPPFLAGS) $(BOOST_CPPFLAGS) -I../src -Wno-unused-function -Wno-unused-local-typedefs
vector_dist_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS) vector_dist_CFLAGS = $(OPENMP_CFLAGS) $(CUDA_CFLAGS)
vector_dist_LDADD = $(LINKLIBS) -lparmetis -lmetis vector_dist_LDADD = $(LINKLIBS) -lparmetis -lmetis
......
openfpm_data @ 66fdd0bd
Subproject commit e96170649cc5257d6be5b937372b281e6e5d74a8 Subproject commit 66fdd0bdd308bd78efe3c14a7d2bf9290e35f550
openfpm_vcluster @ ad71d154
Subproject commit 9602917fecc1e84f7a837fcd69a4bd98d572b4ba Subproject commit ad71d154d0a8420816f49b63dfa1f43084b13022
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "CartDecomposition_ext.hpp" #include "CartDecomposition_ext.hpp"
#include "data_type/aggregate.hpp" #include "data_type/aggregate.hpp"
#include "Domain_NN_calculator_cart.hpp" #include "Domain_NN_calculator_cart.hpp"
#include "cuda/CartDecomposition_gpu.cuh"
#define CARTDEC_ERROR 2000lu #define CARTDEC_ERROR 2000lu
...@@ -128,7 +129,7 @@ template<unsigned int dim> static void nsub_to_div(size_t (& div)[dim], size_t n ...@@ -128,7 +129,7 @@ template<unsigned int dim> static void nsub_to_div(size_t (& div)[dim], size_t n
* *
*/ */
template<unsigned int dim, typename T, typename Memory, typename Distribution> template<unsigned int dim, typename T, typename Memory, template <typename> class layout_base, typename Distribution>
class CartDecomposition: public ie_loc_ghost<dim, T>, public nn_prcs<dim, T>, public ie_ghost<dim, T>, public domain_nn_calculator_cart<dim> class CartDecomposition: public ie_loc_ghost<dim, T>, public nn_prcs<dim, T>, public ie_ghost<dim, T>, public domain_nn_calculator_cart<dim>
{ {
public: public:
...@@ -140,13 +141,16 @@ public: ...@@ -140,13 +141,16 @@ public:
typedef SpaceBox<dim, T> Box; typedef SpaceBox<dim, T> Box;
//! This class is base of itself //! This class is base of itself
typedef CartDecomposition<dim,T,Memory,Distribution> base_type; typedef CartDecomposition<dim,T,Memory,layout_base,Distribution> base_type;
//! This class admit a class defined on an extended domain //! This class admit a class defined on an extended domain
typedef CartDecomposition_ext<dim,T,Memory,Distribution> extended_type; typedef CartDecomposition_ext<dim,T,Memory,layout_base,Distribution> extended_type;
protected: protected:
//! bool that indicate whenever the buffer has been already transfer to device
bool host_dev_transfer = false;
//! Indicate the communication weight has been set //! Indicate the communication weight has been set
bool commCostSet = false; bool commCostSet = false;
...@@ -163,14 +167,14 @@ protected: ...@@ -163,14 +167,14 @@ protected:
openfpm::vector<SpaceBox<dim, T>> sub_domains; openfpm::vector<SpaceBox<dim, T>> sub_domains;
//! the remote set of all sub-domains as vector of 'sub_domains' vectors //! the remote set of all sub-domains as vector of 'sub_domains' vectors
mutable openfpm::vector<Box_map<dim, T>> sub_domains_global; mutable openfpm::vector<Box_map<dim, T>,Memory,typename layout_base<Box_map<dim, T>>::type,layout_base> sub_domains_global;
//! for each sub-domain, contain the list of the neighborhood processors //! for each sub-domain, contain the list of the neighborhood processors
openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor; openfpm::vector<openfpm::vector<long unsigned int> > box_nn_processor;
//! Structure that contain for each sub-sub-domain box the processor id //! Structure that contain for each sub-sub-domain box the processor id
//! exist for efficient global communication //! exist for efficient global communication
CellList<dim,T,Mem_fast<>,shift<dim,T>> fine_s; CellList<dim,T,Mem_fast<Memory,int>,shift<dim,T>> fine_s;
//! Structure that store the cartesian grid information //! Structure that store the cartesian grid information
grid_sm<dim, void> gr; grid_sm<dim, void> gr;
...@@ -268,26 +272,24 @@ protected: ...@@ -268,26 +272,24 @@ protected:
return sub_d; return sub_d;
} }
void collect_all_sub_domains(openfpm::vector<Box_map<dim,T>> & sub_domains_global) void collect_all_sub_domains(openfpm::vector<Box_map<dim,T>,Memory,typename layout_base<Box_map<dim, T>>::type,layout_base> & sub_domains_global)
{ {
#ifdef SE_CLASS2 #ifdef SE_CLASS2
check_valid(this,8); check_valid(this,8);
#endif #endif
sub_domains_global.clear(); sub_domains_global.clear();
openfpm::vector<Box_map<dim,T>> bm; openfpm::vector<Box_map<dim,T>,Memory,typename layout_base<Box_map<dim, T>>::type,layout_base> bm;
for (size_t i = 0 ; i < sub_domains.size() ; i++) for (size_t i = 0 ; i < sub_domains.size() ; i++)
{ {
Box_map<dim,T> tmp; bm.add();
tmp.box = ::SpaceBox<dim,T>(sub_domains.get(i));
tmp.prc = v_cl.rank();
bm.add(tmp);
bm.template get<0>(bm.size()-1) = ::SpaceBox<dim,T>(sub_domains.get(i));
bm.template get<1>(bm.size()-1) = v_cl.rank();
} }
v_cl.SGather(bm,sub_domains_global,0); v_cl.SGather<decltype(bm),decltype(sub_domains_global),layout_base>(bm,sub_domains_global,0);
size_t size = sub_domains_global.size(); size_t size = sub_domains_global.size();
...@@ -324,8 +326,8 @@ public: ...@@ -324,8 +326,8 @@ public:
{ {
// get the cells this box span // get the cells this box span
const grid_key_dx<dim> p1 = fine_s.getCellGrid(sub_domains_global.get(i).box.getP1()); const grid_key_dx<dim> p1 = fine_s.getCellGrid(sub_domains_global.template get<0>(i).getP1());
const grid_key_dx<dim> p2 = fine_s.getCellGrid(sub_domains_global.get(i).box.getP2()); const grid_key_dx<dim> p2 = fine_s.getCellGrid(sub_domains_global.template get<0>(i).getP2());
// Get the grid and the sub-iterator // Get the grid and the sub-iterator
auto & gi = fine_s.getGrid(); auto & gi = fine_s.getGrid();
...@@ -336,9 +338,12 @@ public: ...@@ -336,9 +338,12 @@ public:
{ {
auto key = g_sub.get(); auto key = g_sub.get();
fine_s.addCell(gi.LinId(key),i); fine_s.addCell(gi.LinId(key),i);
++g_sub; ++g_sub;
} }
} }
host_dev_transfer = false;
} }
/*! \brief Constructor, it decompose and distribute the sub-domains across the processors /*! \brief Constructor, it decompose and distribute the sub-domains across the processors
...@@ -428,28 +433,6 @@ public: ...@@ -428,28 +433,6 @@ public:
construct_fine_s(); construct_fine_s();
/////////////////////////////////////////////////////////////////////////////////
/* grid_key_dx_iterator<dim> git(gr);
while (git.isNext())
{
auto key = git.get();
grid_key_dx<dim> key2;
for (size_t i = 0 ; i < dim ; i++)
{key2.set_d(i,key.get(i) / magn[i]);}
size_t lin = gr_dist.LinId(key2);
size_t lin2 = gr.LinId(key);
// Here we draw the fine_s in the cell-list
fine_s.get(lin2) = dist.getGraph().template vertex_p<nm_v::proc_id>(lin);
++git;
}*/
Initialize_geo_cell_lists(); Initialize_geo_cell_lists();
} }
...@@ -666,7 +649,7 @@ public: ...@@ -666,7 +649,7 @@ public:
ie_loc_ghost<dim,T>::create(sub_domains,domain,ghost,bc); ie_loc_ghost<dim,T>::create(sub_domains,domain,ghost,bc);
} }
template<typename T2> inline size_t processorID_impl(T2 & p) const /* template<typename T2> inline size_t processorID_impl(T2 & p) const
{ {
// Get the number of elements in the cell // Get the number of elements in the cell
...@@ -678,7 +661,7 @@ public: ...@@ -678,7 +661,7 @@ public:
{ {
e = fine_s.get(cl,i); e = fine_s.get(cl,i);
if (sub_domains_global.get(e).box.isInsideNP(p) == true) if (sub_domains_global.template get<0>(e).isInsideNP(p) == true)
{ {
break; break;
} }
...@@ -694,8 +677,8 @@ public: ...@@ -694,8 +677,8 @@ public:
#endif #endif
return sub_domains_global.get(e).prc; return sub_domains_global.template get<1>(e);
} }*/
public: public:
...@@ -737,7 +720,7 @@ public: ...@@ -737,7 +720,7 @@ public:
* \param cart object to copy * \param cart object to copy
* *
*/ */
CartDecomposition(const CartDecomposition<dim,T,Memory,Distribution> & cart) CartDecomposition(const CartDecomposition<dim,T,Memory,layout_base,Distribution> & cart)
:nn_prcs<dim,T>(cart.v_cl),v_cl(cart.v_cl),dist(v_cl),ref_cnt(0) :nn_prcs<dim,T>(cart.v_cl),v_cl(cart.v_cl),dist(v_cl),ref_cnt(0)
{ {
this->operator=(cart); this->operator=(cart);
...@@ -748,7 +731,7 @@ public: ...@@ -748,7 +731,7 @@ public:
* \param cart object to copy * \param cart object to copy
* *
*/ */
CartDecomposition(CartDecomposition<dim,T,Memory,Distribution> && cart) CartDecomposition(CartDecomposition<dim,T,Memory,layout_base,Distribution> && cart)
:nn_prcs<dim,T>(cart.v_cl),v_cl(cart.v_cl),dist(v_cl),ref_cnt(0) :nn_prcs<dim,T>(cart.v_cl),v_cl(cart.v_cl),dist(v_cl),ref_cnt(0)
{ {
this->operator=(cart); this->operator=(cart);
...@@ -900,9 +883,9 @@ public: ...@@ -900,9 +883,9 @@ public:
* \return a duplicated decomposition with different ghost boxes * \return a duplicated decomposition with different ghost boxes
* *
*/ */
CartDecomposition<dim,T,Memory,Distribution> duplicate(const Ghost<dim,T> & g) const CartDecomposition<dim,T,Memory,layout_base,Distribution> duplicate(const Ghost<dim,T> & g) const
{ {
CartDecomposition<dim,T,Memory,Distribution> cart(v_cl); CartDecomposition<dim,T,Memory,layout_base,Distribution> cart(v_cl);
cart.box_nn_processor = box_nn_processor; cart.box_nn_processor = box_nn_processor;
cart.sub_domains = sub_domains; cart.sub_domains = sub_domains;
...@@ -936,9 +919,9 @@ public: ...@@ -936,9 +919,9 @@ public:
* \return a duplicated CartDecomposition object * \return a duplicated CartDecomposition object
* *
*/ */
CartDecomposition<dim,T,Memory,Distribution> duplicate() const CartDecomposition<dim,T,Memory,layout_base,Distribution> duplicate() const
{ {
CartDecomposition<dim,T,Memory,Distribution> cart(v_cl); CartDecomposition<dim,T,Memory,layout_base,Distribution> cart(v_cl);
(static_cast<ie_loc_ghost<dim,T>*>(&cart))->operator=(static_cast<ie_loc_ghost<dim,T>>(*this)); (static_cast<ie_loc_ghost<dim,T>*>(&cart))->operator=(static_cast<ie_loc_ghost<dim,T>>(*this));
(static_cast<nn_prcs<dim,T>*>(&cart))->operator=(static_cast<nn_prcs<dim,T>>(*this)); (static_cast<nn_prcs<dim,T>*>(&cart))->operator=(static_cast<nn_prcs<dim,T>>(*this));
...@@ -974,7 +957,7 @@ public: ...@@ -974,7 +957,7 @@ public:
* \return itself * \return itself
* *
*/ */
CartDecomposition<dim,T,Memory, Distribution> & operator=(const CartDecomposition & cart) CartDecomposition<dim,T,Memory, layout_base, Distribution> & operator=(const CartDecomposition & cart)
{ {
static_cast<ie_loc_ghost<dim,T>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T>>(cart)); static_cast<ie_loc_ghost<dim,T>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T>>(cart));
static_cast<nn_prcs<dim,T>*>(this)->operator=(static_cast<nn_prcs<dim,T>>(cart)); static_cast<nn_prcs<dim,T>*>(this)->operator=(static_cast<nn_prcs<dim,T>>(cart));
...@@ -1014,7 +997,7 @@ public: ...@@ -1014,7 +997,7 @@ public:
* \return itself * \return itself
* *
*/ */
CartDecomposition<dim,T,Memory,Distribution> & operator=(CartDecomposition && cart) CartDecomposition<dim,T,Memory,layout_base, Distribution> & operator=(CartDecomposition && cart)
{ {
static_cast<ie_loc_ghost<dim,T>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T>>(cart)); static_cast<ie_loc_ghost<dim,T>*>(this)->operator=(static_cast<ie_loc_ghost<dim,T>>(cart));
static_cast<nn_prcs<dim,T>*>(this)->operator=(static_cast<nn_prcs<dim,T>>(cart)); static_cast<nn_prcs<dim,T>*>(this)->operator=(static_cast<nn_prcs<dim,T>>(cart));
...@@ -1074,7 +1057,7 @@ public: ...@@ -1074,7 +1057,7 @@ public:
*/ */
template<typename Mem> size_t inline processorID(const encapc<1, Point<dim,T>, Mem> & p) const template<typename Mem> size_t inline processorID(const encapc<1, Point<dim,T>, Mem> & p) const
{ {
return processorID_impl(p); return processorID_impl(p,fine_s,sub_domains_global);
} }
/*! \brief Given a point return in which processor the particle should go /*! \brief Given a point return in which processor the particle should go
...@@ -1086,7 +1069,7 @@ public: ...@@ -1086,7 +1069,7 @@ public:
*/ */
size_t inline processorID(const Point<dim,T> &p) const size_t inline processorID(const Point<dim,T> &p) const
{ {
return processorID_impl(p); return processorID_impl(p,fine_s,sub_domains_global);
} }
/*! \brief Given a point return in which processor the particle should go /*! \brief Given a point return in which processor the particle should go
...@@ -1098,7 +1081,7 @@ public: ...@@ -1098,7 +1081,7 @@ public:
*/ */
size_t inline processorID(const T (&p)[dim]) const size_t inline processorID(const T (&p)[dim]) const
{ {
return processorID_impl(p); return processorID_impl(p,fine_s,sub_domains_global);
} }
/*! \brief Given a point return in which processor the point/particle should go /*! \brief Given a point return in which processor the point/particle should go
...@@ -1116,7 +1099,7 @@ public: ...@@ -1116,7 +1099,7 @@ public:
applyPointBC(pt); applyPointBC(pt);
return processorID_impl(pt); return processorID_impl(pt,fine_s,sub_domains_global);
} }
/*! \brief Given a point return in which processor the particle should go /*! \brief Given a point return in which processor the particle should go
...@@ -1135,7 +1118,7 @@ public: ...@@ -1135,7 +1118,7 @@ public:
// Get the number of elements in the cell // Get the number of elements in the cell
return processorID_impl(pt); return processorID_impl(pt,fine_s,sub_domains_global);
} }
/*! \brief Given a point return in which processor the particle should go /*! \brief Given a point return in which processor the particle should go
...@@ -1152,7 +1135,7 @@ public: ...@@ -1152,7 +1135,7 @@ public:
Point<dim,T> pt = p; Point<dim,T> pt = p;
applyPointBC(pt); applyPointBC(pt);
return processorID_impl(pt); return processorID_impl(pt,fine_s,sub_domains_global);
} }
/*! \brief Get the periodicity on i dimension /*! \brief Get the periodicity on i dimension
...@@ -1838,6 +1821,12 @@ public: ...@@ -1838,6 +1821,12 @@ public:
std::cout << ::SpaceBox<dim, T>(sub_domains.get(p)).toString() << "\n"; std::cout << ::SpaceBox<dim, T>(sub_domains.get(p)).toString() << "\n";
} }
std::cout << "Subdomains global\n";
for (size_t p = 0; p < sub_domains_global.size(); p++)
{
std::cout << ::SpaceBox<dim, T>(sub_domains_global.template get<0>(p)).toString() << " proc:" << sub_domains_global.template get<1>(p) << "\n";
}
std::cout << "External ghost box\n"; std::cout << "External ghost box\n";
for (size_t p = 0; p<nn_prcs < dim, T>::getNNProcessors(); p++) for (size_t p = 0; p<nn_prcs < dim, T>::getNNProcessors(); p++)
...@@ -1990,6 +1979,33 @@ public: ...@@ -1990,6 +1979,33 @@ public:
return cd; return cd;
} }
/*! \brief convert to a structure usable in a device kernel
*
* \return a data-structure that can be used directy on GPU
*
*/
CartDecomposition_gpu<dim,T,Memory,layout_base> toKernel()
{
if (host_dev_transfer == false)
{
fine_s.hostToDevice();
sub_domains_global.template hostToDevice<0,1>();
host_dev_transfer = true;
}
int bc_[dim];
for (int i = 0 ; i < dim ; i++) {bc_[i] = this->periodicity(i);}
CartDecomposition_gpu<dim,T,Memory,layout_base> cdg(fine_s.toKernel(),
sub_domains_global.toKernel(),
getDomain(),
bc_);
return cdg;
}
//! friend classes //! friend classes
friend extended_type; friend extended_type;
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#include "Space/Ghost.hpp" #include "Space/Ghost.hpp"
#include "Decomposition/nn_processor.hpp" #include "Decomposition/nn_processor.hpp"
template<unsigned int dim, typename T, typename Memory = HeapMemory, typename Distribution = ParMetisDistribution<dim, T>> template<unsigned int dim, typename T, typename Memory = HeapMemory, template<typename> class layout_base = memory_traits_lin, typename Distribution = ParMetisDistribution<dim, T>>
class CartDecomposition; class CartDecomposition;
/** /**
...@@ -38,8 +38,8 @@ class CartDecomposition; ...@@ -38,8 +38,8 @@ class CartDecomposition;
* *
*/ */
template<unsigned int dim, typename T, typename Memory = HeapMemory, typename Distribution = ParMetisDistribution<dim, T>> template<unsigned int dim, typename T, typename Memory = HeapMemory, template<typename> class layout_base = memory_traits_lin, typename Distribution = ParMetisDistribution<dim, T>>
class CartDecomposition_ext: public CartDecomposition<dim,T,Memory,Distribution> class CartDecomposition_ext: public CartDecomposition<dim,T,Memory,layout_base,Distribution>
{ {
private: private:
...@@ -51,7 +51,7 @@ private: ...@@ -51,7 +51,7 @@ private:
* \param ext_dom Extended domain * \param ext_dom Extended domain
* *
*/ */
void extend_subdomains(const CartDecomposition<dim,T,Memory,Distribution> & dec, const ::Box<dim,T> & ext_dom) void extend_subdomains(const CartDecomposition<dim,T,Memory,layout_base,Distribution> & dec, const ::Box<dim,T> & ext_dom)
{ {
// Box // Box
typedef ::Box<dim,T> b; typedef ::Box<dim,T> b;
...@@ -168,12 +168,12 @@ public: ...@@ -168,12 +168,12 @@ public:
* *
*/ */
CartDecomposition_ext(Vcluster & v_cl) CartDecomposition_ext(Vcluster & v_cl)
:CartDecomposition<dim,T,Memory,Distribution>(v_cl) :CartDecomposition<dim,T,Memory,layout_base,Distribution>(v_cl)
{ {
} }
//! The non-extended decomposition base class //! The non-extended decomposition base class
typedef CartDecomposition<dim,T,Memory,Distribution> base_type; typedef CartDecomposition<dim,T,Memory,layout_base,Distribution> base_type;
/*! \brief It create another object that contain the same decomposition information but with different ghost boxes and an extended domain /*! \brief It create another object that contain the same decomposition information but with different ghost boxes and an extended domain
* *
...@@ -216,7 +216,7 @@ public: ...@@ -216,7 +216,7 @@ public:
* \return a duplicated decomposition with different ghost boxes and an extended domain * \return a duplicated decomposition with different ghost boxes and an extended domain
* *
*/ */
void setParameters(const CartDecomposition<dim,T,Memory,Distribution> & dec, const Ghost<dim,T> & g, const ::Box<dim,T> & ext_domain) void setParameters(const CartDecomposition<dim,T,Memory,layout_base,Distribution> & dec, const Ghost<dim,T> & g, const ::Box<dim,T> & ext_domain)
{ {
this->box_nn_processor = dec.box_nn_processor; this->box_nn_processor = dec.box_nn_processor;
......
...@@ -115,14 +115,16 @@ struct Box_sub_k ...@@ -115,14 +115,16 @@ struct Box_sub_k
template<unsigned int dim,typename T> template<unsigned int dim,typename T>
struct Box_map struct Box_map
{ {
Box<dim,T> box; typedef boost::fusion::vector<Box<dim,T>,long int> type;
long int prc; type data;
static bool noPointers() static bool noPointers()
{ {
return true; return true;
} }
static const unsigned int max_prop = 2;
}; };
//! Case for local ghost box //! Case for local ghost box
......
/*
* CartDecomposition_gpu.hpp
*
* Created on: Aug 7, 2018
* Author: i-bird
*/
#ifndef CARTDECOMPOSITION_GPU_HPP_
#define CARTDECOMPOSITION_GPU_HPP_
#ifdef __NVCC__
template<typename cartdec_gpu, typename particles_type, typename vector_out>
__global__ void process_id_proc_each_part(cartdec_gpu cdg, particles_type parts, vector_out output , int rank)
{
int p = threadIdx.x + blockIdx.x * blockDim.x;
if (p >= parts.size()) return;
Point<3,float> xp = parts.template get<0>(p);
int pr = cdg.processorIDBC(xp);
output.template get<1>(p) = (pr == rank)?-1:pr;
output.template get<0>(p) = p;
}