Commit 6c6ae5b8 authored by incardon's avatar incardon

Fixing fine_s for CPU construction and GPU usage

parent 4672f6d5
......@@ -31,7 +31,7 @@ sph_dlb_test: OPT += -DTEST_RUN
sph_dlb_test: sph_dlb
%.o: %.cu
$(CUDA_CC) -O3 $(OPT) -use_fast_math -arch=sm_61 -lineinfo -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC)
$(CUDA_CC) -O0 -g $(OPT) -use_fast_math -arch=sm_61 -lineinfo -g -c -isystem=/home/i-bird/MPI/include --std=c++11 -o $@ $< $(INCLUDE_PATH_NVCC)
%.o: %.cpp
$(CC) -O3 $(OPT) -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
......
......@@ -39,6 +39,8 @@
#ifdef __NVCC__
#define PRINT_STACKTRACE
#define STOP_ON_ERROR
#define OPENMPI
#include "Vector/vector_dist.hpp"
#include <math.h>
......@@ -89,9 +91,9 @@ const real_number MassBound = 0.0000767656 / 8;
// End simulation time
#ifdef TEST_RUN
const real_number t_end = 0.0005;
const real_number t_end = 0.001;
#else
const real_number t_end = 0.0005;
const real_number t_end = 0.001;
#endif
// Gravity acceleration
......@@ -195,7 +197,7 @@ inline void EqState(particles & vd)
{
auto it = vd.getDomainIteratorGPU();
EqState_gpu<<<it.wthr,it.thr>>>(vd.toKernel(),B);
CUDA_LAUNCH(EqState_gpu,it.wthr,it.thr,vd.toKernel(),B);
}
......@@ -294,6 +296,7 @@ __global__ void calc_forces_gpu(particles_type vd, NN_type NN, real_number W_dap
{
// ... a
unsigned int a;
GET_PARTICLE_SORT(a,NN);
real_number max_visc = 0.0f;
......@@ -391,7 +394,7 @@ template<typename CellList> inline void calc_forces(particles & vd, CellList & N
// Update the cell-list
vd.updateCellList(NN);
calc_forces_gpu<<<part.wthr,part.thr>>>(vd.toKernel_sorted(),NN.toKernel(),W_dap,cbar);
CUDA_LAUNCH(calc_forces_gpu,part.wthr,part.thr,vd.toKernel_sorted(),NN.toKernel(),W_dap,cbar);
vd.merge_sort<force,drho,red>(NN);
......@@ -865,6 +868,7 @@ int main(int argc, char* argv[])
Vcluster<> & v_cl = create_vcluster();
timer it_time;
////// Do rebalancing every 200 timesteps
it_reb++;
if (it_reb == 300)
......@@ -886,6 +890,7 @@ int main(int argc, char* argv[])
vd.map(RUN_ON_DEVICE);
// make sort
vd.make_sort(NN);
......@@ -896,6 +901,7 @@ int main(int argc, char* argv[])
vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
// Calc forces
calc_forces(vd,NN,max_visc,cnt);
......@@ -922,7 +928,7 @@ int main(int argc, char* argv[])
{
// Sensor pressure require update ghost, so we ensure that particles are distributed correctly
// and ghost are updated
/* vd.map(RUN_ON_DEVICE);
vd.map(RUN_ON_DEVICE);
vd.ghost_get<type,rho,Pressure,velocity>(RUN_ON_DEVICE);
vd.updateCellList(NN);
......@@ -960,7 +966,7 @@ int main(int argc, char* argv[])
++ito;
}
vd_out.write_frame("Particles",write,VTK_WRITER | FORMAT_BINARY);*/
vd_out.write_frame("Particles",write,VTK_WRITER | FORMAT_BINARY);
write++;
if (v_cl.getProcessUnitID() == 0)
......
openfpm_data @ a9632484
Subproject commit 9f134a74e1ad2dd797c0a9fa2e40754c24da4dd7
Subproject commit a9632484c3123f103cacd2479d02c86c21b835e0
......@@ -326,8 +326,8 @@ public:
{
// get the cells this box span
const grid_key_dx<dim> p1 = fine_s.getCellGrid(sub_domains_global.template get<0>(i).getP1());
const grid_key_dx<dim> p2 = fine_s.getCellGrid(sub_domains_global.template get<0>(i).getP2());
const grid_key_dx<dim> p1 = fine_s.getCellGrid_me(sub_domains_global.template get<0>(i).getP1());
const grid_key_dx<dim> p2 = fine_s.getCellGrid_pe(sub_domains_global.template get<0>(i).getP2());
// Get the grid and the sub-iterator
auto & gi = fine_s.getGrid();
......
......@@ -19,7 +19,8 @@ __device__ __host__ inline int processorID_impl(T2 & p, fine_s_type & fine_s, vs
int cl = fine_s.getCell(p);
int n_ele = fine_s.getNelements(cl);
for (int i = 0 ; i < n_ele ; i++)
int i = 0;
for ( ; i < n_ele ; i++)
{
e = fine_s.get(cl,i);
......@@ -29,11 +30,17 @@ __device__ __host__ inline int processorID_impl(T2 & p, fine_s_type & fine_s, vs
}
}
#if defined(SE_CLASS1) && !defined(__NVCC__)
#if defined(SE_CLASS1)
if (n_ele == 0)
{
std::cout << __FILE__ << ":" << __LINE__ << " I cannot detect in which processor this particle go" << std::endl;
printf("CartDecomposition_gpu.cuh:processorID_impl, error I cannot detect in which processor this particle go");
return -1;
}
if (i == n_ele)
{
printf("CartDecomposition_gpu.cuh:processorID_impl, error I cannot detect in which processor this particle go because of round-off inconsistencies");
return -1;
}
......@@ -82,7 +89,7 @@ class CartDecomposition_gpu : public ie_ghost_gpu<dim,T,Memory,layout_base>
* the explanation before)
*
*/
__device__ void applyPointBC(Point<dim,T> & pt) const
__device__ __host__ void applyPointBC(Point<dim,T> & pt) const
{
for (int i = 0 ; i < dim ; i++)
{
......@@ -120,7 +127,7 @@ public:
* \return processorID
*
*/
__device__ int inline processorIDBC(const Point<dim,T> & p)
__device__ __host__ int inline processorIDBC(const Point<dim,T> & p)
{
Point<dim,T> pt = p;
this->applyPointBC(pt);
......@@ -137,7 +144,7 @@ public:
* the explanation before)
*
*/
template<typename Mem> __device__ void applyPointBC(encapc<1,Point<dim,T>,Mem> && pt) const
template<typename Mem> __device__ __host__ void applyPointBC(encapc<1,Point<dim,T>,Mem> && pt) const
{
for (size_t i = 0 ; i < dim ; i++)
{
......@@ -154,7 +161,7 @@ public:
* \return processorID
*
*/
__device__ int inline processorID(const Point<dim,T> &pt)
__device__ __host__ int inline processorID(const Point<dim,T> &pt)
{
return processorID_impl(pt,clk,sub_domains_global);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment