From e1b863f589732c8d07c9ea5cd98c6d46b54c4e6f Mon Sep 17 00:00:00 2001 From: Pietro Incardona <incardon@mpi-cbg.de> Date: Fri, 11 Dec 2020 12:47:26 +0100 Subject: [PATCH] Fixing ghost put on GPU --- src/CMakeLists.txt | 36 +++++++++++++++- src/Vector/cuda/vector_dist_cuda_funcs.cuh | 1 + src/Vector/cuda/vector_dist_gpu_unit_tests.cu | 41 +++++++++++++------ src/Vector/util/vector_dist_funcs.hpp | 1 + 4 files changed, 64 insertions(+), 15 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 038eb87bc..49811b035 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -43,6 +43,14 @@ add_executable(pdata ${OPENFPM_INIT_FILE} ${CUDA_SOURCES} main.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ) +add_executable(isolation_pdata ${OPENFPM_INIT_FILE} isolation.cu + lib/pdata.cpp + ../openfpm_devices/src/memory/HeapMemory.cpp + ../openfpm_devices/src/memory/CudaMemory.cu + ../openfpm_devices/src/memory/PtrMemory.cpp + ../openfpm_vcluster/src/VCluster/VCluster.cpp + ) + if ( CMAKE_COMPILER_IS_GNUCC ) target_compile_options(pdata PRIVATE "-Wno-deprecated-declarations") if (TEST_COVERAGE) @@ -65,6 +73,7 @@ add_test(NAME pdata_4_proc COMMAND mpirun -np 4 ./pdata) if (CUDA_FOUND) target_compile_options(pdata PUBLIC $<$<COMPILE_LANGUAGE:CUDA>: ${WARNING_SUPPRESSION_AND_OPTION_NVCC} >) + target_compile_options(isolation_pdata PUBLIC $<$<COMPILE_LANGUAGE:CUDA>: ${WARNING_SUPPRESSION_AND_OPTION_NVCC} >) target_include_directories (pdata PUBLIC ${MPI_C_INCLUDE_DIRS}) if (TEST_COVERAGE) target_compile_options(pdata PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -Xcompiler "-fprofile-arcs -ftest-coverage" >) @@ -89,10 +98,25 @@ target_include_directories (pdata PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/config) target_include_directories (pdata PUBLIC ${PETSC_INCLUDES}) target_include_directories (pdata PUBLIC ${HDF5_ROOT}/include) target_include_directories (pdata PUBLIC ${LIBHILBERT_INCLUDE_DIRS}) - -target_include_directories(pdata PUBLIC ${Vc_INCLUDE_DIR}) +target_include_directories (pdata PUBLIC ${Vc_INCLUDE_DIR}) target_include_directories (pdata PUBLIC ${Boost_INCLUDE_DIRS}) +target_include_directories (isolation_pdata PUBLIC ${PARMETIS_ROOT}/include) +target_include_directories (isolation_pdata PUBLIC ${METIS_ROOT}/include) +target_include_directories (isolation_pdata PUBLIC ${CUDA_INCLUDE_DIRS}) +target_include_directories (isolation_pdata PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_include_directories (isolation_pdata PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../openfpm_devices/src/) +target_include_directories (isolation_pdata PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../openfpm_vcluster/src/) +target_include_directories (isolation_pdata PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../openfpm_data/src/) +target_include_directories (isolation_pdata PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../openfpm_io/src/) +target_include_directories (isolation_pdata PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/config) +target_include_directories (isolation_pdata PUBLIC ${PETSC_INCLUDES}) +target_include_directories (isolation_pdata PUBLIC ${HDF5_ROOT}/include) +target_include_directories (isolation_pdata PUBLIC ${LIBHILBERT_INCLUDE_DIRS}) +target_include_directories (isolation_pdata PUBLIC ${Vc_INCLUDE_DIR}) +target_include_directories (isolation_pdata PUBLIC ${Boost_INCLUDE_DIRS}) + + target_link_libraries(pdata ${Boost_LIBRARIES}) target_link_libraries(pdata ${PARMETIS_LIBRARIES}) target_link_libraries(pdata -L${METIS_ROOT}/lib metis) @@ -101,6 +125,14 @@ target_link_libraries(pdata -L${LIBHILBERT_LIBRARY_DIRS} ${LIBHILBERT_LIBRARIES} target_link_libraries(pdata ${PETSC_LIBRARIES}) target_link_libraries(pdata ${Vc_LIBRARIES}) +target_link_libraries(isolation_pdata ${Boost_LIBRARIES}) +target_link_libraries(isolation_pdata ${PARMETIS_LIBRARIES}) +target_link_libraries(isolation_pdata -L${METIS_ROOT}/lib metis) +target_link_libraries(isolation_pdata ${HDF5_LIBRARIES}) +target_link_libraries(isolation_pdata -L${LIBHILBERT_LIBRARY_DIRS} ${LIBHILBERT_LIBRARIES}) +target_link_libraries(isolation_pdata ${PETSC_LIBRARIES}) +target_link_libraries(isolation_pdata ${Vc_LIBRARIES}) + if (TEST_PERFORMANCE) target_link_libraries(pdata ${Boost_FILESYSTEM_LIBRARY}) target_link_libraries(pdata ${Boost_SYSTEM_LIBRARY}) diff --git a/src/Vector/cuda/vector_dist_cuda_funcs.cuh b/src/Vector/cuda/vector_dist_cuda_funcs.cuh index c5a19ccc3..5ec0008e6 100644 --- a/src/Vector/cuda/vector_dist_cuda_funcs.cuh +++ b/src/Vector/cuda/vector_dist_cuda_funcs.cuh @@ -130,6 +130,7 @@ __global__ void process_ghost_particles_prp(vector_g_opart_type g_opart, vector process_ghost_device_particle_prp<vector_g_opart_type,vector_prp_type_out,vector_prp_type_in,prp...>(i,offset,g_opart,m_prp,v_prp); } + template<typename vector_prp_type_out, typename vector_prp_type_in, unsigned int ... prp> __global__ void process_ghost_particles_prp_put(vector_prp_type_out m_prp, vector_prp_type_in v_prp, unsigned int offset) diff --git a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu index b70bd00f9..fa3e9223d 100644 --- a/src/Vector/cuda/vector_dist_gpu_unit_tests.cu +++ b/src/Vector/cuda/vector_dist_gpu_unit_tests.cu @@ -1847,6 +1847,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_overflow_se_class1) } + BOOST_AUTO_TEST_CASE( vector_dist_ghost_put_gpu ) { Vcluster<> & v_cl = create_vcluster(); @@ -1878,7 +1879,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost_put_gpu ) // ghost Ghost<3,float> ghost(r_g); - typedef aggregate<float> part_prop; + typedef aggregate<float,float,float> part_prop; // Distributed vector vector_dist_gpu<3,float, part_prop > vd(0,box,bc,ghost); @@ -1899,16 +1900,18 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost_put_gpu ) vd.getLastPropWrite<0>() = 0.0; + vd.getLastPropWrite<2>() = 0.0; + ++it; } vd.map(); vd.hostToDevicePos(); - vd.template hostToDeviceProp<0>(); + vd.template hostToDeviceProp<0,2>(); // sync the ghost - vd.ghost_get<0>(RUN_ON_DEVICE); - vd.template deviceToHostProp<0>(); + vd.ghost_get<0,2>(RUN_ON_DEVICE); + vd.template deviceToHostProp<0,2>(); vd.deviceToHostPos(); { @@ -1937,7 +1940,10 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost_put_gpu ) float dist = xp.distance(xq); if (dist < r_cut) + { vd.getPropWrite<0>(q) += a*(-dist*dist+r_cut*r_cut); + vd.getPropWrite<2>(q) += a*(-dist*dist+r_cut*r_cut) / 2; + } ++Np; } @@ -1946,25 +1952,27 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost_put_gpu ) } vd.hostToDevicePos(); - vd.template hostToDeviceProp<0>(); - vd.template ghost_put<add_atomic_,0>(RUN_ON_DEVICE); - vd.template deviceToHostProp<0>(); + vd.template hostToDeviceProp<0,2>(); + vd.template ghost_put<add_atomic_,0,2>(RUN_ON_DEVICE); + vd.template deviceToHostProp<0,2>(); vd.deviceToHostPos(); bool ret = true; auto it3 = vd.getDomainIterator(); float constant = vd.getProp<0>(it3.get()); + float constanta = vd.getProp<2>(it3.get()); float eps = 0.001; while (it3.isNext()) { float constant2 = vd.getProp<0>(it3.get()); - if (fabs(constant - constant2)/constant > eps) + float constant3 = vd.getProp<2>(it3.get()); + if (fabs(constant - constant2)/constant > eps || fabs(constanta - constant3)/constanta > eps) { Point<3,float> p = vd.getPosRead(it3.get()); - std::cout << p.toString() << " " << constant2 << "/" << constant << " " << v_cl.getProcessUnitID() << std::endl; + std::cout << p.toString() << " " << constant2 << "/" << constant << "/" << constant3 << " " << v_cl.getProcessUnitID() << std::endl; ret = false; break; } @@ -1980,6 +1988,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost_put_gpu ) auto key = itp.get(); vd.getPropWrite<0>(key) = 0.0; + vd.getPropWrite<2>(key) = 0.0; ++itp; } @@ -2010,7 +2019,10 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost_put_gpu ) float dist = xp.distance(xq); if (dist < r_cut) + { vd.getPropWrite<0>(q) += a*(-dist*dist+r_cut*r_cut); + vd.getPropWrite<2>(q) += a*(-dist*dist+r_cut*r_cut); + } ++Np; } @@ -2019,25 +2031,28 @@ BOOST_AUTO_TEST_CASE( vector_dist_ghost_put_gpu ) } vd.hostToDevicePos(); - vd.template hostToDeviceProp<0>(); + vd.template hostToDeviceProp<0,2>(); vd.template ghost_put<add_atomic_,0>(RUN_ON_DEVICE); - vd.template deviceToHostProp<0>(); + vd.template ghost_put<add_atomic_,2>(RUN_ON_DEVICE); + vd.template deviceToHostProp<0,2>(); vd.deviceToHostPos(); bool ret = true; auto it3 = vd.getDomainIterator(); float constant = vd.getPropRead<0>(it3.get()); + float constanta = vd.getPropRead<2>(it3.get()); float eps = 0.001; while (it3.isNext()) { float constant2 = vd.getPropRead<0>(it3.get()); - if (fabs(constant - constant2)/constant > eps) + float constant3 = vd.getPropRead<0>(it3.get()); + if (fabs(constant - constant2)/constant > eps || fabs(constanta - constant3)/constanta > eps) { Point<3,float> p = vd.getPosRead(it3.get()); - std::cout << p.toString() << " " << constant2 << "/" << constant << " " << v_cl.getProcessUnitID() << std::endl; + std::cout << p.toString() << " " << constant2 << "/" << constant << "/" << constant3 << " " << v_cl.getProcessUnitID() << std::endl; ret = false; break; } diff --git a/src/Vector/util/vector_dist_funcs.hpp b/src/Vector/util/vector_dist_funcs.hpp index 35f001aeb..6d86d4477 100644 --- a/src/Vector/util/vector_dist_funcs.hpp +++ b/src/Vector/util/vector_dist_funcs.hpp @@ -116,6 +116,7 @@ __device__ inline void process_map_device_particle(unsigned int i, unsigned int proc_class::proc(i,id,v_prp,m_prp); } + //! It process one particle template<typename Top, typename T2, typename T4, unsigned int ... prp> __device__ inline void process_ghost_device_particle_prp(unsigned int i, unsigned int offset, Top & g_opart, T2 & m_prp, T4 & v_prp) -- GitLab