Commit 49dc3a14 authored by Pietro Incardona's avatar Pietro Incardona

Latest

parents 3f886c2d f9eb760c
......@@ -8,9 +8,13 @@ else
CUDA_CC=nvcc
endif
CC_SCOREP=scorep --nocompiler --cuda --mpp=mpi nvcc
CC=$(CUDA_CC) #${CC_SCOREP}
CC_MPI=mpic++
ifeq ($(PROFILE),ON)
CC=scorep --nocompiler --cuda --mpp=mpi nvcc
CC_MPI=mpic++
else
CC=$(CUDA_CC)
CC_MPI=mpic++
endif
LDIR =
OPT=
......
......@@ -13,17 +13,14 @@ else
CUDA_CC_LINK=nvcc
endif
ifeq ($(PROFILE),ON)
CUDA_CC=scorep --nocompiler --cuda --mpp=mpi nvcc
CUDA_CC_LINK=scorep --nocompiler --cuda --mpp=mpi nvcc
CUDA_CC=scorep --nocompiler --cuda --mpp=mpi nvcc
CUDA_CC_LINK=scorep --nocompiler --cuda --mpp=mpi nvcc
else
CUDA_CC=nvcc
CUDA_CC_LINK=nvcc
CUDA_CC=nvcc
CUDA_CC_LINK=nvcc
endif
CC=mpic++
LDIR =
OPT=
......@@ -34,7 +31,7 @@ sph_dlb_test: OPT += -DTEST_RUN
sph_dlb_test: sph_dlb
%.o: %.cu
$(CUDA_CC) -O3 $(OPT) -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC)
$(CUDA_CC) -O3 $(OPT) -use_fast_math -arch=sm_61 -lineinfo -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC)
%.o: %.cpp
$(CC) -O3 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
......
......@@ -39,9 +39,15 @@
#ifdef __NVCC__
#define OPENMPI
#define CUDA_CHECK_LAUNCH
#include "Vector/vector_dist.hpp"
#include <math.h>
#include "Draw/DrawParticles.hpp"
#include <cuda_profiler_api.h>
typedef float real_number;
......@@ -87,9 +93,9 @@ const real_number MassBound = 0.0000767656 / 8.0;
// End simulation time
#ifdef TEST_RUN
const real_number t_end = 0.001;
const real_number t_end = 0.0005;
#else
const real_number t_end = 0.001;
const real_number t_end = 0.0005;
#endif
// Gravity acceleration
......@@ -225,11 +231,11 @@ inline __device__ __host__ void DWab(Point<3,real_number> & dx, Point<3,real_num
real_number qq2 = qq * qq;
real_number fac1 = (c1*qq + d1*qq2)/r;
real_number b1 = (qq < 1.0)?1.0f:0.0f;
real_number b1 = (qq < 1.0f)?1.0f:0.0f;
real_number wqq = (2.0 - qq);
real_number wqq = (2.0f - qq);
real_number fac2 = c2 * wqq * wqq / r;
real_number b2 = (qq >= 1.0 && qq < 2.0)?1.0f:0.0f;
real_number b2 = (qq >= 1.0f && qq < 2.0f)?1.0f:0.0f;
real_number factor = (b1*fac1 + b2*fac2);
......@@ -256,14 +262,14 @@ inline __device__ __host__ real_number Tensile(real_number r, real_number rhoa,
real_number wqq2=qq*qq;
real_number wqq3=wqq2*qq;
wab=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
wab+=a2*(1.0f-1.5f*wqq2+0.75f*wqq3);
}
//-Tensile correction.
real_number fab=wab*W_dap;
fab*=fab; fab*=fab; //fab=fab^4
const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0? 0.01: -0.2);
const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0? 0.01: -0.2);
const real_number tensilp1=(prs1/(rhoa*rhoa))*(prs1>0.0f? 0.01f: -0.2f);
const real_number tensilp2=(prs2/(rhob*rhob))*(prs2>0.0f? 0.01f: -0.2f);
return (fab*(tensilp1+tensilp2));
}
......@@ -284,7 +290,7 @@ inline __device__ __host__ real_number Pi(const Point<3,real_number> & dr, real_
return pi_visc;
}
else
return 0.0;
return 0.0f;
}
template<typename particles_type, typename NN_type>
......@@ -294,7 +300,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
unsigned int a;
GET_PARTICLE_SORT(a,NN);
real_number max_visc = 0.0;
real_number max_visc = 0.0f;
// Get the position xp of the particle
Point<3,real_number> xa = vd.getPos(a);
......@@ -303,7 +309,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
unsigned int typea = vd.getProp<type>(a);
// Take the mass of the particle dependently if it is FLUID or BOUNDARY
real_number massa = (vd.getProp<type>(a) == FLUID)?MassFluid:MassBound;
//real_number massa = (typea == FLUID)?MassFluid:MassBound;
// Get the density of the of the particle a
real_number rhoa = vd.getProp<rho>(a);
......@@ -315,10 +321,10 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
Point<3,real_number> va = vd.getProp<velocity>(a);
Point<3,real_number> force_;
force_.get(0) = 0.0;
force_.get(1) = 0.0;
force_.get(0) = 0.0f;
force_.get(1) = 0.0f;
force_.get(2) = -gravity;
real_number drho_ = 0.0;
real_number drho_ = 0.0f;
// Get an iterator over the neighborhood particles of p
auto Np = NN.getNNIteratorBox(NN.getCell(xa));
......@@ -344,20 +350,19 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
// Get the distance between p and q
Point<3,real_number> dr = xa - xb;
Point<3,real_number> v_rel = va - vb;
// take the norm of this vector
real_number r2 = norm2(dr);
// if they interact
if (r2 < FourH2 && r2 >= 1e-16)
{
real_number r = sqrt(r2);
Point<3,real_number> v_rel = va - vb;
real_number r = sqrtf(r2);
Point<3,real_number> DW;
DWab(dr,DW,r);
real_number factor = - massb*((vd.getProp<Pressure>(a) + vd.getProp<Pressure>(b)) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
real_number factor = - massb*((Pa + Pb) / (rhoa * rhob) + Tensile(r,rhoa,rhob,Pa,Pb,W_dap) + Pi(dr,r2,v_rel,rhoa,rhob,massb,cbar,max_visc));
// Bound - Bound does not produce any change
factor = (typea == BOUNDARY && typeb == BOUNDARY)?0.0f:factor;
......@@ -385,7 +390,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
template<typename CellList> inline void calc_forces(particles & vd, CellList & NN, real_number & max_visc, size_t cnt)
{
auto part = vd.getDomainIteratorGPU(32);
auto part = vd.getDomainIteratorGPU(64);
// Update the cell-list
vd.updateCellList(NN);
......@@ -694,6 +699,8 @@ int main(int argc, char* argv[])
// initialize the library
openfpm_init(&argc,&argv);
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
// It contain for each time-step the value detected by the probes
openfpm::vector<openfpm::vector<real_number>> press_t;
openfpm::vector<Point<3,real_number>> probes;
......@@ -843,6 +850,7 @@ int main(int argc, char* argv[])
vd.hostToDevicePos();
vd.template hostToDeviceProp<type,rho,rho_prev,Pressure,velocity>();
vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
auto NN = vd.getCellListGPU(2*H / 2.0);
......@@ -850,6 +858,8 @@ int main(int argc, char* argv[])
timer tot_sim;
tot_sim.start();
cudaProfilerStart();
size_t write = 0;
size_t it = 0;
size_t it_reb = 0;
......@@ -880,6 +890,9 @@ int main(int argc, char* argv[])
vd.map(RUN_ON_DEVICE);
// make sort
vd.make_sort(NN);
// Calculate pressure from the density
EqState(vd);
......@@ -913,7 +926,7 @@ int main(int argc, char* argv[])
{
// Sensor pressure require update ghost, so we ensure that particles are distributed correctly
// and ghost are updated
vd.map(RUN_ON_DEVICE);
/* vd.map(RUN_ON_DEVICE);
vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
vd.updateCellList(NN);
......@@ -951,7 +964,7 @@ int main(int argc, char* argv[])
++ito;
}
vd_out.write_frame("Particles_s",write,VTK_WRITER | FORMAT_BINARY);
vd_out.write_frame("Particles",write,VTK_WRITER | FORMAT_BINARY);*/
write++;
if (v_cl.getProcessUnitID() == 0)
......@@ -967,7 +980,10 @@ int main(int argc, char* argv[])
tot_sim.stop();
std::cout << "Time to complete: " << tot_sim.getwct() << " seconds" << std::endl;
openfpm_finalize();
cudaProfilerStop();
}
#else
......
openfpm_data @ 8eedc3b4
Subproject commit ea19c74eb60d0c9bee3690958e6fa83a6fe79e6f
Subproject commit 8eedc3b445e2f12429dea28c89c7cf47f89afb2f
openfpm_vcluster @ e105e6ef
Subproject commit 309c31433036534153366a27f8ad52c85c8dabb2
Subproject commit e105e6ef793fa254da78c643c06bafdd858f0bb1
......@@ -15,6 +15,7 @@ if ( CMAKE_COMPILER_IS_GNUCC )
target_compile_options(pdata PRIVATE "-Wno-deprecated-declarations")
endif()
add_library(ofpm_pdata STATIC lib/pdata.cpp)
add_test(NAME pdata_3_proc COMMAND mpirun -np 3 ./pdata)
......@@ -145,6 +146,9 @@ install(FILES DLB/DLB.hpp DLB/LB_Model.hpp
install(FILES config/config.h
DESTINATION openfpm_pdata/include/config )
install(FILES lib/pdata.hpp
DESTINATION openfpm_pdata/include/lib )
install(FILES Debug/debug.hpp
DESTINATION openfpm_pdata/include/Debug )
......
......@@ -218,6 +218,10 @@ public:
*/
void setParameters(const CartDecomposition<dim,T,Memory,layout_base,Distribution> & dec, const Ghost<dim,T> & g, const ::Box<dim,T> & ext_domain)
{
// Set the decomposition parameters
this->gr.setDimensions(dec.gr.getSize());
this->cd.setDimensions(ext_domain, dec.gr.getSize(), 0);
this->box_nn_processor = dec.box_nn_processor;
// Calculate new sub-domains for extended domain
......@@ -225,7 +229,6 @@ public:
// Calculate fine_s structure for the extended domain
// update the cell decomposer and gr
// extend_fines(dec);
reconstruct_fine_s_from_extended_domain(ext_domain);
// Get the old sub-sub-domain grid extension
......
......@@ -1823,6 +1823,7 @@ public:
return this->ig_box;
}
//! Define friend classes
//\cond
friend grid_dist_id<dim,St,T,typename Decomposition::extended_type,Memory,device_grid>;
......
......@@ -12,6 +12,7 @@
#include "util/cuda/moderngpu/kernel_reduce.hxx"
#include "util/cuda/moderngpu/kernel_scan.hxx"
#include "Decomposition/common.hpp"
#include "lib/pdata.hpp"
template<unsigned int dim, typename St, typename decomposition_type, typename vector_type, typename start_type, typename output_type>
__global__ void proc_label_id_ghost(decomposition_type dec,vector_type vd, start_type starts, output_type out)
......@@ -349,6 +350,8 @@ void remove_marked(vector_type & vd)
// first we do a scan of the property
openfpm::vector_gpu<aggregate<unsigned int>> idx;
idx.setMemory(mem_tmp);
idx.resize(vd.size_local());
auto ite = idx.getGPUIterator();
......
......@@ -239,8 +239,21 @@ template<typename vector_type> void test_dlb_vector()
}
vd.map();
////////////// DEBUG ////////////////////////
vd.write("bug_test_init_before");
/////////////////////////////////////////////
vd.template ghost_get<>();
////////////// DEBUG ////////////////////////
vd.write("bug_test_init_after");
/////////////////////////////////////////////
// Get the neighborhood of each particles
auto VV = vd.getVerlet(0.01);
......@@ -260,10 +273,24 @@ template<typename vector_type> void test_dlb_vector()
ModelSquare md;
md.factor = 10;
vd.map();
////////////// DEBUG ////////////////////////
vd.write("bug_test_before_dlb");
/////////////////////////////////////////////
vd.addComputationCosts(md);
vd.getDecomposition().decompose();
vd.map();
////////////// DEBUG ////////////////////////
vd.write("bug_test_after_dlb");
/////////////////////////////////////////////
vd.addComputationCosts(md);
openfpm::vector<size_t> loads;
......@@ -299,9 +326,24 @@ template<typename vector_type> void test_dlb_vector()
++it;
}
vd.map();
////////////// DEBUG ////////////////////////
vd.write_frame("bug_test_before",i);
/////////////////////////////////////////////
vd.template ghost_get<>();
////////////// DEBUG ////////////////////////
vd.getDecomposition().write("dec_test");
vd.write_frame("bug_test_after",i);
/////////////////////////////////////////////
auto VV2 = vd.getVerlet(0.01);
auto it2 = vd.getDomainIterator();
......
......@@ -1243,7 +1243,7 @@ public:
* \param no_se3 avoid se class 3 checking
*
*/
template<typename CellL> void updateCellList(CellL & cell_list, bool no_se3 = false)
template<typename CellL> void updateCellList(CellL & cell_list, bool no_se3 = false, cl_construct_opt opt = cl_construct_opt::Full)
{
#ifdef SE_CLASS3
if (no_se3 == false)
......@@ -1262,7 +1262,7 @@ public:
if (to_reconstruct == false)
{
populate_cell_list(v_pos,v_pos_out,v_prp,v_prp_out,cell_list,v_cl.getmgpuContext(false),g_m,CL_NON_SYMMETRIC);
populate_cell_list(v_pos,v_pos_out,v_prp,v_prp_out,cell_list,v_cl.getmgpuContext(false),g_m,CL_NON_SYMMETRIC,opt);
cell_list.set_gm(g_m);
}
......@@ -1294,7 +1294,7 @@ public:
if (to_reconstruct == false)
{
populate_cell_list(v_pos,v_pos_out,v_prp,v_prp_out,cell_list,v_cl.getmgpuContext(),g_m,CL_SYMMETRIC);
populate_cell_list(v_pos,v_pos_out,v_prp,v_prp_out,cell_list,v_cl.getmgpuContext(),g_m,CL_SYMMETRIC,cl_construct_opt::Full);
cell_list.set_gm(g_m);
}
......@@ -2694,6 +2694,26 @@ public:
this->g_m = g_m;
}
/*! \brief this function sort the vector
*
* \warning this function kill the ghost (and invalidate the Cell-list)
*
* \param NN Cell-list to use to reorder
*
*/
void make_sort(CellList_gpu<dim,St,CudaMemory,shift_only<dim, St>> & NN)
{
deleteGhost();
updateCellList(NN,false,cl_construct_opt::Only_reorder);
// construct a cell-list forcing to create a sorted version without ghost
// swap the sorted with the non-sorted
v_pos.swap(v_pos_out);
v_prp.swap(v_prp_out);
}
#endif
......
......@@ -292,6 +292,9 @@ class vector_dist_comm
};
openfpm::vector<sh_box> reord_shift;
box_f.clear();
map_cmb.clear();
box_cmb.clear();
// Add local particles coming from periodic boundary, the only boxes that count are the one
// touching the border
......
......@@ -4,7 +4,7 @@
* Created on: Feb 5, 2016
* Author: Pietro Incardona
*/
#include "pdata.hpp"
#include "SubdomainGraphNodes.hpp"
const std::string nm_v::attributes::name[] = {"x","migration","computation","global_id","id","sub_id","proc_id","id","fake_v"};
......@@ -13,4 +13,3 @@ const std::string nm_part_v::attributes::name[] = {"id","sub_id"};
const std::string nm_part_e::attributes::name[] = {"id"};
#include "config.h"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment