Commit 624d8c10 authored by incardon's avatar incardon

Latest GPU particle mesh interpolation

parents b4b5d866 40b251b4
# Change Log
All notable changes to this project will be documented in this file.
## [2.0.0] December 2018
## [2.0.0] December 2018 (Codename Elisa)
### Added
- Adding GPU support (see example 3_molecular_dynamic_gpu)
- Adding GPU support (see example 1_gpu_first_step
3_molecular_dynamic_gpu
7_sph_dlb_gpu
7_sph_dlb_gpu_opt)
### Fixed
- Detection of clang 10.0.0 on mac-osx mojave
- In VTK binary format all 64 bit types are casted to 32 bit. Either the long/unsigned_long are bugged in Paraview we tested, either I do not understand how they work.
### Changed
- The type Vcluster now is templated and the standard Vcluster is Vcluster<>
Most probably you have to change in your code from Vcluster to Vcluster<>
## [1.1.1] December 2018 (Codename Poisson)
### Fixed
- Detection of clang 10.0.0 on mac-osx mojave
## [1.0.X ] End of life (Theese versions are not enymore supported)
## [1.1.0] February 2018
## [1.1.0] February 2018 (Condename Ring)
### Added
......
......@@ -70,12 +70,8 @@ parallel (
stage ('run mac')
{
parallel (
"1" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 1 0 0 numerics"},
"2" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 2 0 0 numerics"},
"3" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 3 0 0 numerics"},
"4" : {sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 4 0 0 numerics"}
)
sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 1 0 0 numerics"
sh "cd openfpm_numerics && ./run.sh $WORKSPACE $NODE_NAME 2 0 0 numerics"
sh "./success.sh 2 sbalzarini-mac-15 openfpm_numerics"
}
}
......
......@@ -4,8 +4,10 @@
* \subpage Vector_1_celllist
* \subpage Vector_1_ghost_get
* \subpage Vector_1_HDF5
* \subpage Vector_1_gpu_first_step
* \subpage Vector_2_expression
* \subpage Vector_3_md
* \subpage Vector_3_md_dyn_gpu
* \subpage Vector_4_reo_root
* \subpage Vector_4_cp
* \subpage Vector_4_mp_cl
......@@ -14,6 +16,7 @@
* \subpage Vector_6_complex_usage
* \subpage Vector_7_sph_dlb
* \subpage Vector_7_sph_dlb_opt
* \subpage Vector_7_sph_dlb_gpu_opt
* \subpage Vector_8_DEM
*
*/
......
/*!
* \page Vector_1_HDF5 HDF5 save and load
* \page Vector_1_HDF5 Vector 1 HDF5 save and load
*
*
* [TOC]
......
/*!
* \page Vector_7_sph_dlb_gpu Vector 7 SPH Dam break simulation with Dynamic load balacing on GPU
/*! \page Vector_7_sph_dlb_gpu Vector 7 SPH Dam break simulation with Dynamic load balacing on Multi-GPU
*
*
* [TOC]
......@@ -8,50 +7,26 @@
* # SPH with Dynamic load Balancing on GPU # {#SPH_dlb_gpu}
*
*
* This example show the classical SPH Dam break simulation with Load Balancing and Dynamic load balancing. With
* Load balancing and Dynamic load balancing we indicate the possibility of the system to re-adapt the domain
* decomposition to keep all the processor load and reduce idle time.
* This example show the classical SPH Dam break simulation with load balancing and dynamic load balancing. The main difference with
* \ref{SPH_dlb} is that here we use GPU and 1.2 Millions particles.
*
* \htmlonly
* <a href="#" onclick="hide_show('vector-video-3')" >Simulation video 1</a><br>
* <div style="display:none" id="vector-video-3">
* <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed.mp4" type="video/mp4"></video>
* <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu1.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-4')" >Simulation video 2</a><br>
* <div style="display:none" id="vector-video-4">
* <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed2.mp4" type="video/mp4"></video>
* <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu2.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-15')" >Simulation dynamic load balancing video 1</a><br>
* <a href="#" onclick="hide_show('vector-video-15')" >Simulation video 3</a><br>
* <div style="display:none" id="vector-video-15">
* <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-16')" >Simulation dynamic load balancing video 2</a><br>
* <div style="display:none" id="vector-video-16">
* <video id="vid16" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb2.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-17')" >Simulation countour prospective 1</a><br>
* <div style="display:none" id="vector-video-17">
* <video id="vid17" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_zoom.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-18')" >Simulation countour prospective 2</a><br>
* <div style="display:none" id="vector-video-18">
* <video id="vid18" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_back.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-19')" >Simulation countour prospective 3</a><br>
* <div style="display:none" id="vector-video-19">
* <video id="vid19" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_all.mp4" type="video/mp4"></video>
* <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu3.mp4" type="video/mp4"></video>
* </div>
* \endhtmlonly
*
* \htmlonly
* <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/dam_break_all.jpg"/>
* \endhtmlonly
*
* ## GPU ## {#e7_sph_inclusion}
*
* This example does not differ from the example in \ref{SPH_dlb}
*
* \snippet Vector/7_SPH_dlb_gpu/main.cpp inclusion
* \snippet Vector/7_SPH_dlb_gpu_opt/main.cpp inclusion
*
*/
......@@ -550,7 +525,7 @@ __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number
// Check if the particle go out of range in space and in density
if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 ||
vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 ||
vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.50 ||
vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
{vd.template getProp<red>(a) = 1;}
else
......@@ -629,7 +604,7 @@ __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
// Check if the particle go out of range in space and in density
if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 ||
vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 ||
vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.50 ||
vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
{vd.template getProp<red>(a) = 1;}
else
......
/*!
* \page Vector_7_sph_dlb_gpu Vector 7 SPH Dam break simulation with Dynamic load balacing on GPU
/*! \page Vector_7_sph_dlb_gpu_opt Vector 7 SPH Dam break simulation with Dynamic load balacing on Multi-GPU (optimized version)
*
*
* [TOC]
*
*
* # SPH with Dynamic load Balancing on GPU # {#SPH_dlb_gpu}
* # SPH with Dynamic load Balancing on GPU # {#SPH_dlb_gpu_opt}
*
*
* This example show the classical SPH Dam break simulation with Load Balancing and Dynamic load balancing. With
* Load balancing and Dynamic load balancing we indicate the possibility of the system to re-adapt the domain
* decomposition to keep all the processor load and reduce idle time.
* This example show the classical SPH Dam break simulation with load balancing and dynamic load balancing. The main difference with
* \ref{SPH_dlb} is that here we use GPU and 1.2 Millions particles. Simulate 1.5 second should be duable on a 1050Ti within a couple
* of hours.
*
* \htmlonly
* <a href="#" onclick="hide_show('vector-video-3')" >Simulation video 1</a><br>
* <div style="display:none" id="vector-video-3">
* <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed.mp4" type="video/mp4"></video>
* <video id="vid3" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu1.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-4')" >Simulation video 2</a><br>
* <div style="display:none" id="vector-video-4">
* <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_speed2.mp4" type="video/mp4"></video>
* <video id="vid4" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu2.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-15')" >Simulation dynamic load balancing video 1</a><br>
* <a href="#" onclick="hide_show('vector-video-15')" >Simulation video 3</a><br>
* <div style="display:none" id="vector-video-15">
* <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-16')" >Simulation dynamic load balancing video 2</a><br>
* <div style="display:none" id="vector-video-16">
* <video id="vid16" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_dlb2.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-17')" >Simulation countour prospective 1</a><br>
* <div style="display:none" id="vector-video-17">
* <video id="vid17" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_zoom.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-18')" >Simulation countour prospective 2</a><br>
* <div style="display:none" id="vector-video-18">
* <video id="vid18" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_back.mp4" type="video/mp4"></video>
* </div>
* <a href="#" onclick="hide_show('vector-video-19')" >Simulation countour prospective 3</a><br>
* <div style="display:none" id="vector-video-19">
* <video id="vid19" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_all.mp4" type="video/mp4"></video>
* <video id="vid15" width="1200" height="576" controls> <source src="http://openfpm.mpi-cbg.de/web/images/examples/7_SPH_dlb/sph_gpu3.mp4" type="video/mp4"></video>
* </div>
* \endhtmlonly
*
* \htmlonly
* <img src="http://ppmcore.mpi-cbg.de/web/images/examples/7_SPH_dlb/dam_break_all.jpg"/>
* \endhtmlonly
*
* ## GPU ## {#e7_sph_inclusion}
*
* This example does not differ from the example in \ref{SPH_dlb}
* This example is the port on GPU of the following example \ref{SPH_dlb}
*
* \snippet Vector/7_SPH_dlb_gpu/main.cpp inclusion
* \snippet Vector/7_SPH_dlb_gpu_opt/main.cpp inclusion
*
*/
......@@ -163,7 +143,7 @@ const int red = 8;
const int red2 = 9;
// Type of the vector containing particles
typedef vector_dist_gpu<3,real_number,aggregate<size_t,real_number, real_number, real_number, real_number, real_number[3], real_number[3], real_number[3], real_number, real_number>> particles;
typedef vector_dist_gpu<3,real_number,aggregate<unsigned int,real_number, real_number, real_number, real_number, real_number[3], real_number[3], real_number[3], real_number, real_number>> particles;
// | | | | | | | | | |
// | | | | | | | | | |
// type density density Pressure delta force velocity velocity reduction another
......@@ -509,8 +489,8 @@ __global__ void verlet_int_gpu(vector_dist_type vd, real_number dt, real_number
vd.template getProp<rho>(a) = vd.template getProp<rho_prev>(a) + dt2*vd.template getProp<drho>(a);
// Check if the particle go out of range in space and in density
if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 ||
vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 ||
if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
{vd.template getProp<red>(a) = 1;}
else
......@@ -588,8 +568,8 @@ __global__ void euler_int_gpu(vector_type vd,real_number dt, real_number dt205)
vd.template getProp<rho>(a) = vd.template getProp<rho>(a) + dt*vd.template getProp<drho>(a);
// Check if the particle go out of range in space and in density
if (vd.getPos(a)[0] < 0.000263878 || vd.getPos(a)[1] < 0.000263878 || vd.getPos(a)[2] < 0.000263878 ||
vd.getPos(a)[0] > 0.000263878+1.59947 || vd.getPos(a)[1] > 0.000263878+0.672972 || vd.getPos(a)[2] > 0.000263878+0.903944 ||
if (vd.getPos(a)[0] < 0.0 || vd.getPos(a)[1] < 0.0 || vd.getPos(a)[2] < 0.0 ||
vd.getPos(a)[0] > 1.61 || vd.getPos(a)[1] > 0.68 || vd.getPos(a)[2] > 0.50 ||
vd.template getProp<rho>(a) < RhoMin || vd.template getProp<rho>(a) > RhoMax)
{vd.template getProp<red>(a) = 1;}
else
......@@ -718,8 +698,8 @@ int main(int argc, char* argv[])
probes.add({0.754,0.31,0.02});
// Here we define our domain a 2D box with internals from 0 to 1.0 for x and y
Box<3,real_number> domain({-0.05,-0.05,-0.05},{1.7010,0.7065,0.5025});
size_t sz[3] = {413,179,131};
Box<3,real_number> domain({-0.05,-0.05,-0.05},{1.7010,0.7065,0.511});
size_t sz[3] = {413,179,133};
// Fill W_dap
W_dap = 1.0/Wab(H/1.5);
......@@ -939,7 +919,7 @@ int main(int argc, char* argv[])
vd.deviceToHostPos();
vd.deviceToHostProp<type,rho,rho_prev,Pressure,drho,force,velocity,velocity_prev,red,red2>();
vd.write_frame("Geometry",write);
vd.write_frame("Geometry",write,VTK_WRITER | FORMAT_BINARY);
write++;
if (v_cl.getProcessUnitID() == 0)
......
......@@ -411,7 +411,7 @@ fi
### Create example.mk
install_base=$(cat install_dir)
openmp_flags="$(cat openmp_flags) $(cat openfpm_flags)"
openmp_flags="$(cat openmp_flags)"
if [ -d "$i_dir/HDF5/lib" ]; then
hdf5_lib=$i_dir/HDF5/lib
......@@ -419,7 +419,7 @@ elif [ -d "$i_dir/HDF5/lib64" ]; then
hdf5_lib=$i_dir/HDF5/lib64
fi
echo "INCLUDE_PATH= $openmp_flags -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" > example.mk
echo "INCLUDE_PATH=-Wno-deprecated-declarations $openmp_flags -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" > example.mk
echo "LIBS_PATH= $openmp_flags -L$install_base/openfpm_devices/lib -L$install_base/openfpm_pdata/lib -L$install_base/openfpm_vcluster/lib -L$i_dir/METIS/lib -L$i_dir/PARMETIS/lib -L$i_dir/BOOST/lib -L$hdf5_lib -L$i_dir/LIBHILBERT/lib $lin_alg_dir" >> example.mk
if [ x"$gpu_support" == x"1" ]; then
echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lhdf5 -llibhilbert $(cat cuda_lib) $lin_alg_lib" >> example.mk
......@@ -428,7 +428,7 @@ else
echo "LIBS=-lvcluster -lofpm_pdata -lofpmmemory -lparmetis -lmetis -lboost_iostreams -lhdf5 -llibhilbert $lin_alg_lib" >> example.mk
echo "LIBS_SE2=-lvcluster -lofpmmemory_se2 -lparmetis -lmetis -lboost_iostreams -lhdf5 -llibhilbert $lin_alg_lib" >> example.mk
fi
echo "INCLUDE_PATH_NVCC=$(cat openmp_flags) -Xcudafe \"--display_error_number --diag_suppress=2885 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111\" --expt-extended-lambda -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" >> example.mk
echo "INCLUDE_PATH_NVCC=-Xcompiler="-Wno-deprecated-declarations" $(cat openmp_flags) -Xcudafe \"--display_error_number --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111\" --expt-extended-lambda -I. -I$install_base/openfpm_numerics/include -I$install_base/openfpm_pdata/include/config -I$install_base/openfpm_pdata/include -I$install_base/openfpm_data/include -I$install_base/openfpm_vcluster/include -I$install_base/openfpm_io/include -I$install_base/openfpm_devices/include -I$i_dir/METIS/include -I$i_dir/PARMETIS/include -I$i_dir/BOOST/include -I$i_dir/HDF5/include -I$i_dir/LIBHILBERT/include $lin_alg_inc" >> example.mk
cp example.mk src/example.mk
cp example.mk example/example.mk
......
......@@ -38,7 +38,7 @@ PROJECT_NAME = "OpenFPM_pdata"
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = 1.1.0
PROJECT_NUMBER = 2.0.0
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
......@@ -763,7 +763,8 @@ INPUT_ENCODING = UTF-8
# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf,
# *.qsf, *.as and *.js.
FILE_PATTERNS = *.hpp *.cpp *.h
FILE_PATTERNS = *.cu *.cuh *.hpp *.cpp *.h
EXTENSION_MAPPING = cu=c++ cuh=c++
# The RECURSIVE tag can be used to specify whether or not subdirectories should
# be searched for input files as well.
......@@ -1927,6 +1928,7 @@ INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
# The PREDEFINED tag can be used to specify one or more macro names that are
# defined before the preprocessor is started (similar to the -D option of e.g.
# gcc). The argument of the tag is a list of macros of the form: name or
......
......@@ -12,6 +12,7 @@ fi
## Remove old download
rm -rf parmetis-4.0.3
rm parmetis-4.0.3.tar.gz
wget http://glaros.dtc.umn.edu/gkhome/fetch/sw/parmetis/parmetis-4.0.3.tar.gz
tar -xf parmetis-4.0.3.tar.gz
......
......@@ -217,8 +217,8 @@ class domain_nn_calculator_cart
anom_lin.clear();
for (size_t i = 0 ; i < anom.size() ; i++)
{
anom_lin.add();
grid_key_dx<dim> tmp = anom.get(i).subsub + shift;
anom_lin.add();
anom_lin.last().subsub = gs.LinId(tmp);
long int self_cell = -1;
......@@ -281,7 +281,7 @@ public:
dom_cells_lin.clear();
for (size_t i = 0 ; i < dom_cells.size() ; i++)
{
grid_key_dx<dim> tmp = dom_cells.get(i) + shift;
grid_key_dx<dim> tmp = dom_cells.get(i) + shift;
dom_cells_lin.add(gs.LinId(tmp));
}
......
......@@ -277,7 +277,7 @@ struct local_ghost_from_dec_impl<dim,St,prop,Memory,layout_base,true>
<<<ite.wthr,ite.thr>>>
(box_f_dev.toKernel(),box_f_sv.toKernel(),
v_pos.toKernel(),v_prp.toKernel(),
starts.toKernel(),shifts.toKernel(),o_part_loc.toKernel(),old);
starts.toKernel(),shifts.toKernel(),o_part_loc.toKernel(),old,g_m);
#else
std::cout << __FILE__ << ":" << __LINE__ << " error: to use the option RUN_ON_DEVICE you must compile with NVCC" << std::endl;
......
......@@ -166,7 +166,7 @@ BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles )
<<<ite.wthr,ite.thr>>>
(box_f_dev.toKernel(),box_f_sv.toKernel(),
v_pos.toKernel(),v_prp.toKernel(),
starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),old);
starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),old,v_pos.size());
v_pos.deviceToHost<0>();
o_part_loc2.deviceToHost<0,1>();
......
......@@ -222,12 +222,12 @@ template<unsigned int dim, typename St,
__global__ void shift_ghost_each_part(vector_of_box box_f, vector_of_shifts box_f_sv,
vector_type_pos v_pos, vector_type_prp v_prp,
start_type start, shifts_type shifts,
output_type output, unsigned int offset)
output_type output, unsigned int offset,unsigned int g_m)
{
unsigned int old_shift = (unsigned int)-1;
int p = threadIdx.x + blockIdx.x * blockDim.x;
if (p >= v_pos.size()) return;
if (p >= g_m) return;
Point<dim,St> xp = v_pos.template get<0>(p);
......@@ -251,15 +251,8 @@ __global__ void shift_ghost_each_part(vector_of_box box_f, vector_of_shifts box_
v_pos.template get<0>(base+n)[j] = xp.get(j) - shifts.template get<0>(shift_actual)[j];
}
if (base_o + n < output.size())
{
output.template get<0>(base_o+n) = p;
output.template get<1>(base_o+n) = shift_actual;
}
else
{
printf("OVERFLOW \n");
}
output.template get<0>(base_o+n) = p;
output.template get<1>(base_o+n) = shift_actual;
v_prp.set(base+n,v_prp.get(p));
......@@ -290,7 +283,7 @@ struct _max_: mgpu::maximum_t<red_type>
{};
template<unsigned int prp, template <typename> class op, typename vector_type>
auto reduce(vector_type & vd) -> typename std::remove_reference<decltype(vd.template getProp<prp>(0))>::type
auto reduce_local(vector_type & vd) -> typename std::remove_reference<decltype(vd.template getProp<prp>(0))>::type
{
typedef typename std::remove_reference<decltype(vd.template getProp<prp>(0))>::type reduce_type;
......
......@@ -736,20 +736,20 @@ BOOST_AUTO_TEST_CASE(vector_dist_reduce)
vd.template hostToDeviceProp<0,1,2,3>();
float redf = reduce<0,_add_>(vd);
double redd = reduce<1,_add_>(vd);
int redi = reduce<2,_add_>(vd);
size_t reds = reduce<3,_add_>(vd);
float redf = reduce_local<0,_add_>(vd);
double redd = reduce_local<1,_add_>(vd);
int redi = reduce_local<2,_add_>(vd);
size_t reds = reduce_local<3,_add_>(vd);
BOOST_REQUIRE_EQUAL(redf,(vd.size_local()+1.0)*(vd.size_local())/2.0);
BOOST_REQUIRE_EQUAL(redd,(vd.size_local()+1.0)*(vd.size_local())/2.0);
BOOST_REQUIRE_EQUAL(redi,(vd.size_local()+1)*(vd.size_local())/2);
BOOST_REQUIRE_EQUAL(reds,(vd.size_local()+1)*(vd.size_local())/2);
float redf2 = reduce<0,_max_>(vd);
double redd2 = reduce<1,_max_>(vd);
int redi2 = reduce<2,_max_>(vd);
size_t reds2 = reduce<3,_max_>(vd);
float redf2 = reduce_local<0,_max_>(vd);
double redd2 = reduce_local<1,_max_>(vd);
int redi2 = reduce_local<2,_max_>(vd);
size_t reds2 = reduce_local<3,_max_>(vd);
BOOST_REQUIRE_EQUAL(redf2,vd.size_local());
BOOST_REQUIRE_EQUAL(redd2,vd.size_local());
......
......@@ -1567,6 +1567,12 @@ public:
*/
template<unsigned int ... prp> void map_list_(openfpm::vector<Point<dim, St>> & v_pos, openfpm::vector<prop> & v_prp, size_t & g_m, size_t opt)
{
if (opt & RUN_ON_DEVICE)
{
std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " map_list is unsupported on device (coming soon)" << std::endl;
return;
}
typedef KillParticle obp;
// Processor communication size
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment