Commit ee81919a authored by incardon's avatar incardon

merging with master

parent 8325000f
......@@ -101,7 +101,7 @@ int main(int argc, char* argv[])
openfpm_init(&argc,&argv);
// domain
Box<3,double> domain({0.0,0.0},{2.5,2.5,2.5});
Box<3,double> domain({0.0,0.0,0.0},{2.5,2.5,2.5});
// grid size
size_t sz[3] = {128,128,128};
......
......@@ -7,7 +7,7 @@ LDIR =
OBJ = main.o
%.o: %.cpp
$(CC) -O3 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
$(CC) -I/usr/local/cuda/include -O3 -g -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
ps_cma_es: $(OBJ)
$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
......
......@@ -7,7 +7,7 @@ LDIR =
OBJ = main.o
%.o: %.cpp
$(CC) -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
$(CC) -I/usr/local/cuda/include -O3 -c --std=c++11 -o $@ $< $(INCLUDE_PATH)
vect: $(OBJ)
$(CC) -o $@ $^ $(CFLAGS) $(LIBS_PATH) $(LIBS)
......
openfpm_data @ 804ad631
Subproject commit 35fe8163bc97801aefb126a5fe59b7113b5ce634
Subproject commit 804ad6310e36bb777937613168f98d4c29f2f367
openfpm_devices @ 1a3dfc0a
Subproject commit 96029620d939dc71967ffdd67f7e504a2a1f2c91
Subproject commit 1a3dfc0a96e4ad8ab61f7f1f7cdc9127ea99041c
openfpm_io @ 73be827c
Subproject commit a63a55e5c66839e93bd30af99475b2c4c3fe1355
Subproject commit 73be827ca337384fb90aca24b111b53d604c3cd0
openfpm_vcluster @ 15b8a504
Subproject commit e105e6ef793fa254da78c643c06bafdd858f0bb1
Subproject commit 15b8a504e379fb0fe44f85735ca3e9a2bd6a2f00
......@@ -1068,7 +1068,7 @@ public:
*/
template<typename Mem> size_t inline processorID(const encapc<1, Point<dim,T>, Mem> & p) const
{
return processorID_impl(p,fine_s,sub_domains_global);
return processorID_impl(p,fine_s,sub_domains_global,getDomain(),bc);
}
/*! \brief Given a point return in which processor the particle should go
......@@ -1080,7 +1080,7 @@ public:
*/
size_t inline processorID(const Point<dim,T> &p) const
{
return processorID_impl(p,fine_s,sub_domains_global);
return processorID_impl(p,fine_s,sub_domains_global,getDomain(),bc);
}
/*! \brief Given a point return in which processor the particle should go
......@@ -1092,7 +1092,7 @@ public:
*/
size_t inline processorID(const T (&p)[dim]) const
{
return processorID_impl(p,fine_s,sub_domains_global);
return processorID_impl(p,fine_s,sub_domains_global,getDomain(),bc);
}
/*! \brief Given a point return in which processor the point/particle should go
......@@ -1110,7 +1110,7 @@ public:
applyPointBC(pt);
return processorID_impl(pt,fine_s,sub_domains_global);
return processorID_impl(pt,fine_s,sub_domains_global,getDomain(),bc);
}
/*! \brief Given a point return in which processor the particle should go
......@@ -1129,7 +1129,7 @@ public:
// Get the number of elements in the cell
return processorID_impl(pt,fine_s,sub_domains_global);
return processorID_impl(pt,fine_s,sub_domains_global,getDomain(),bc);
}
/*! \brief Given a point return in which processor the particle should go
......@@ -1146,7 +1146,7 @@ public:
Point<dim,T> pt = p;
applyPointBC(pt);
return processorID_impl(pt,fine_s,sub_domains_global);
return processorID_impl(pt,fine_s,sub_domains_global,getDomain(),bc);
}
/*! \brief Get the periodicity on i dimension
......
......@@ -10,8 +10,12 @@
#include "ie_ghost_gpu.cuh"
template<typename T2, typename fine_s_type, typename vsub_domain_type>
__device__ __host__ inline int processorID_impl(T2 & p, fine_s_type & fine_s, vsub_domain_type & sub_domains_global)
template<unsigned int dim, typename bc_type, typename T2, typename fine_s_type, typename vsub_domain_type, typename box_type>
__device__ __host__ inline int processorID_impl(T2 & p,
fine_s_type & fine_s,
vsub_domain_type & sub_domains_global,
const box_type & domain,
const bc_type (& bc)[dim])
{
// Get the number of elements in the cell
......@@ -24,7 +28,7 @@ __device__ __host__ inline int processorID_impl(T2 & p, fine_s_type & fine_s, vs
{
e = fine_s.get(cl,i);
if (sub_domains_global.template get<0>(e).isInsideNP(p) == true)
if (sub_domains_global.template get<0>(e).isInsideNP_with_border(p,domain,bc) == true)
{
break;
}
......@@ -34,13 +38,13 @@ __device__ __host__ inline int processorID_impl(T2 & p, fine_s_type & fine_s, vs
if (n_ele == 0)
{
printf("CartDecomposition_gpu.cuh:processorID_impl, error I cannot detect in which processor this particle go");
printf("CartDecomposition_gpu.cuh:processorID_impl, error I cannot detect in which processor this particle go \n");
return -1;
}
if (i == n_ele)
{
printf("CartDecomposition_gpu.cuh:processorID_impl, error I cannot detect in which processor this particle go because of round-off inconsistencies");
printf("CartDecomposition_gpu.cuh:processorID_impl, error I cannot detect in which processor this particle go because of round-off inconsistencies \n");
return -1;
}
......@@ -132,7 +136,7 @@ public:
Point<dim,T> pt = p;
this->applyPointBC(pt);
return processorID_impl(pt,clk,sub_domains_global);
return processorID_impl(pt,clk,sub_domains_global,domain,bc);
}
/*! \brief Apply boundary condition to the point
......@@ -163,7 +167,7 @@ public:
*/
__device__ __host__ int inline processorID(const Point<dim,T> &pt)
{
return processorID_impl(pt,clk,sub_domains_global);
return processorID_impl(pt,clk,sub_domains_global,domain,bc);
}
};
......
This diff is collapsed.
......@@ -100,6 +100,7 @@ void grid_interpolation_benchmark(openfpm::vector<size_t> & nk_grid,
std::cout << "Time particles to mesh " << time_interpolation_p2m_mean.last() << std::endl;
measures.clear();
for (size_t j = 0 ; j < GRID_INTERPOLATION_TESTS ; j++)
{
......
......@@ -13,6 +13,7 @@
#include "util/cuda/moderngpu/kernel_scan.hxx"
#include "Decomposition/common.hpp"
#include "lib/pdata.hpp"
#include "util/cuda/kernels.cuh"
template<unsigned int dim, typename St, typename decomposition_type, typename vector_type, typename start_type, typename output_type>
__global__ void proc_label_id_ghost(decomposition_type dec,vector_type vd, start_type starts, output_type out)
......@@ -89,34 +90,6 @@ __global__ void process_id_proc_each_part(cartdec_gpu cdg, particles_type parts,
#endif
}
template<unsigned int prp_off, typename vector_type,typename vector_type_offs>
__global__ void find_buffer_offsets(vector_type vd, int * cnt, vector_type_offs offs)
{
int p = threadIdx.x + blockIdx.x * blockDim.x;
if (p >= (int)vd.size() - 1) return;
if (vd.template get<prp_off>(p) != vd.template get<prp_off>(p+1))
{
int i = atomicAdd(cnt, 1);
offs.template get<0>(i) = p+1;
offs.template get<1>(i) = vd.template get<prp_off>(p);
}
}
template<unsigned int prp_off, typename vector_type,typename vector_type_offs>
__global__ void find_buffer_offsets_no_prc(vector_type vd, int * cnt, vector_type_offs offs, int g_m)
{
int p = threadIdx.x + blockIdx.x * blockDim.x;
if (p >= (int)g_m - 1) return;
if (vd.template get<prp_off>(p) != vd.template get<prp_off>(p+1))
{
int i = atomicAdd(cnt, 1);
offs.template get<0>(i) = p+1;
}
}
template<typename vector_m_opart_type, typename vector_pos_type_out, typename vector_prp_type_out,
typename vector_pos_type_in, typename vector_prp_type_in>
......
......@@ -82,6 +82,7 @@ void addUpdtateTime(GoogleChart & cg);
*/
static inline void standard_deviation(openfpm::vector<double> measures, double & mean, double & dev)
{
mean = 0;
for (size_t i = 0 ; i < measures.size() ; i++)
mean += measures.get(i);
mean /= measures.size();
......
......@@ -14,6 +14,7 @@
#include "util/cuda/moderngpu/kernel_mergesort.hxx"
#include "Vector/cuda/vector_dist_cuda_funcs.cuh"
#include "util/cuda/moderngpu/kernel_scan.hxx"
#include "util/cuda/kernels.cuh"
#endif
#include "Vector/util/vector_dist_funcs.hpp"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment