Commit 0d706b96 authored by incardon's avatar incardon

General fixing for SE_CLASS1

parent 3ea45c93
......@@ -349,14 +349,14 @@ struct copy_grid_fast<false,3,grid,ginfo>
grid_key_dx<3> one = zero;
one.set_d(1,1);
unsigned char * ptr_final_src = (unsigned char *)&(gd_src.template get<0>(one));
unsigned char * ptr_start_src = (unsigned char *)&(gd_src.template get<0>(zero));
unsigned char * ptr_final_src = (unsigned char *)(gd_src.template get_address<0>(one));
unsigned char * ptr_start_src = (unsigned char *)(gd_src.template get_address<0>(zero));
unsigned char * ptr_final_dst = (unsigned char *)&(gd_dst.template get<0>(one));
unsigned char * ptr_start_dst = (unsigned char *)&(gd_dst.template get<0>(zero));
unsigned char * ptr_final_dst = (unsigned char *)(gd_dst.template get_address<0>(one));
unsigned char * ptr_start_dst = (unsigned char *)(gd_dst.template get_address<0>(zero));
unsigned char * ptr_src = (unsigned char *)&(gd_src.template get<0>(bx_src.getKP1()));
unsigned char * ptr_dst = (unsigned char *)&(gd_dst.template get<0>(bx_dst.getKP1()));
unsigned char * ptr_src = (unsigned char *)(gd_src.template get_address<0>(bx_src.getKP1()));
unsigned char * ptr_dst = (unsigned char *)(gd_dst.template get_address<0>(bx_dst.getKP1()));
size_t n_cpy = bx_src.getHigh(0) - bx_src.getLow(0) + 1;
......@@ -368,11 +368,11 @@ struct copy_grid_fast<false,3,grid,ginfo>
grid_key_dx<3> one2 = zero;
one2.set_d(2,1);
ptr_final_src = (unsigned char *)&(gd_src.template get<0>(one2));
ptr_start_src = (unsigned char *)&(gd_src.template get<0>(zero));
ptr_final_src = (unsigned char *)(gd_src.template get_address<0>(one2));
ptr_start_src = (unsigned char *)(gd_src.template get_address<0>(zero));
ptr_final_dst = (unsigned char *)&(gd_dst.template get<0>(one2));
ptr_start_dst = (unsigned char *)&(gd_dst.template get<0>(zero));
ptr_final_dst = (unsigned char *)(gd_dst.template get_address<0>(one2));
ptr_start_dst = (unsigned char *)(gd_dst.template get_address<0>(zero));
size_t stride_src_y = ptr_final_src - ptr_start_src;
size_t stride_dst_y = ptr_final_dst - ptr_start_dst;
......@@ -462,11 +462,11 @@ struct copy_grid_fast<false,2,grid,ginfo>
grid_key_dx<2> one = zero;
one.set_d(1,1);
unsigned char * ptr_final_src = (unsigned char *)&(gd_src.template get<0>(one));
unsigned char * ptr_start_src = (unsigned char *)&(gd_src.template get<0>(zero));
unsigned char * ptr_final_src = (unsigned char *)(gd_src.template get_address<0>(one));
unsigned char * ptr_start_src = (unsigned char *)(gd_src.template get_address<0>(zero));
unsigned char * ptr_final_dst = (unsigned char *)&(gd_dst.template get<0>(one));
unsigned char * ptr_start_dst = (unsigned char *)&(gd_dst.template get<0>(zero));
unsigned char * ptr_final_dst = (unsigned char *)(gd_dst.template get_address<0>(one));
unsigned char * ptr_start_dst = (unsigned char *)(gd_dst.template get_address<0>(zero));
unsigned char * ptr_src = (unsigned char *)&(gd_src.template get<0>(bx_src.getKP1()));
unsigned char * ptr_dst = (unsigned char *)&(gd_dst.template get<0>(bx_dst.getKP1()));
......@@ -812,8 +812,8 @@ struct pack_with_iterator_shortx<3,n_cpy,obj_byte,git,grid>
grid_key_dx<3> one = zero;
one.set_d(2,1);
unsigned char * ptr_final = (unsigned char *)&(gr.template get<0>(one));
unsigned char * ptr_start = (unsigned char *)&(gr.template get<0>(zero));
unsigned char * ptr_final = (unsigned char *)(gr.template get_address<0>(one));
unsigned char * ptr_start = (unsigned char *)(gr.template get_address<0>(zero));
size_t stride_y = ptr_final - ptr_start;
......@@ -881,8 +881,8 @@ struct pack_with_iterator<false,dim,grid,encap_src,encap_dst,boost_vct,it,dtype,
grid_key_dx<dim> one = zero;
one.set_d(1,1);
unsigned char * ptr_final = (unsigned char *)&(gr.template get<0>(one));
unsigned char * ptr_start = (unsigned char *)&(gr.template get<0>(zero));
unsigned char * ptr_final = (unsigned char *)(gr.template get_address<0>(one));
unsigned char * ptr_start = (unsigned char *)(gr.template get_address<0>(zero));
size_t stride = ptr_final - ptr_start;
......
......@@ -752,6 +752,32 @@ public:
return mem_getpointer<decltype(data_),layout_base_>::template getPointer<p>(data_);
}
/*! \brief Get the address of the selected element
*
* \param v1 grid_key that identify the element in the grid
*
* \return the reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(&mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get(data_,g1,grid_key_dx<dim>()))>
__device__ __host__ inline r_type get_address(const grid_key_dx<dim> & v1)
{
return &mem_get<p,layout_base<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get(data_,g1,v1);
}
/*! \brief Get the address of the selected element
*
* \param v1 grid_key that identify the element in the grid
*
* \return the reference of the element
*
*/
template <unsigned int p, typename r_type=decltype(&mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_c(data_,g1,grid_key_dx<dim>()))>
__device__ __host__ inline r_type get_address(const grid_key_dx<dim> & v1) const
{
return &mem_get<p,layout_base<T>,decltype(this->data_),decltype(this->g1),decltype(v1)>::get_c(data_,g1,v1);
}
/*! \brief Get the reference of the selected element
*
* \param v1 grid_key that identify the element in the grid
......
......@@ -59,6 +59,25 @@ namespace openfpm
}
};
template<bool is_ok_cuda,typename T, typename Memory,
typename layout, template<typename> class layout_base,
typename grow_p>
struct merge_prp_device_impl
{
template <typename S,
typename M,
typename gp,
unsigned int impl,
template <typename> class layout_base2,
unsigned int ...args>
static void run(openfpm::vector<T,Memory,layout,layout_base,grow_p,impl> & this_ ,
const openfpm::vector<S,M,typename layout_base2<S>::type,layout_base2,gp,impl> & v,
unsigned int offset)
{
std::cout << __FILE__ << ":" << __LINE__ << " Error the function merge_prp_device only work with cuda enabled vector" << std::endl;
}
};
template<typename T, typename Memory,
typename layout, template<typename> class layout_base,
typename grow_p>
......@@ -92,6 +111,38 @@ namespace openfpm
}
};
template<typename T, typename Memory,
typename layout, template<typename> class layout_base,
typename grow_p>
struct merge_prp_device_impl<true,T,Memory,layout,layout_base,grow_p>
{
template <typename S,
typename M,
typename gp,
unsigned int impl,
template <typename> class layout_base2,
unsigned int ...args>
static void run(vector<T,Memory,layout,layout_base,grow_p,impl> & this_ ,
const vector<S,M,typename layout_base2<S>::type,layout_base2,gp,impl> & v,
unsigned int offset)
{
#ifdef SE_CLASS2
check_valid(&this_,8);
#endif
// merge the data on device
#if defined(CUDA_GPU) && defined(__NVCC__)
auto ite = v.getGPUIterator();
merge_add_prp_device_impl<decltype(v.toKernel()),decltype(this_.toKernel()),args...><<<ite.wthr,ite.thr>>>(v.toKernel(),this_.toKernel(),(unsigned int)offset);
#else
std::cout << __FILE__ << ":" << __LINE__ << " Error the function merge_prp_device only work when map_vector is compiled with nvcc" << std::endl;
#endif
}
};
/*! \brief Implementation of 1-D std::vector like structure
*
* Stub object look at the various implementations
......@@ -510,6 +561,59 @@ namespace openfpm
}
}
/*! \brief It merge the elements of a source vector to this vector (on device)
*
* Given 2 vector v1 and v2 of size 7,3. and as merging operation the function add.
* Merging the second vector v2 to
* the first one v1 starting from the element 2. Mean
*
* \verbatim
*
* 6 8 3 2 1 0 3 v1 elements
* | | |
* op op op
* | | |
* 5 1 9 v2 elements
*
*-------------------------------------
* 6 8 8 3 10 0 3 updated v1 elements
*
* This operation is done for each selected property in args
*
* \endverbatim
*
* The number of properties in the source vector must be smaller than the destination
* all the properties of S must be mapped so if S has 3 properties
* 3 numbers for args are required
*
* \tparam op merging operation
* \tparam S Base object of the source vector
* \tparam M memory type of the source vector
* \tparam gp Grow policy of the source vector
* \tparam args one or more number that define which property to set-up
*
* \param v source vector
* \param start index from where to start the merging
*
*/
template <template<typename,typename> class op, typename S, typename M, typename gp, unsigned int ...args>
void merge_prp_device(const vector<S,M,typename layout_base<S>::type,layout_base,gp,OPENFPM_NATIVE> & v,
unsigned int start)
{
#ifdef SE_CLASS2
check_valid(this,8);
#endif
#ifdef SE_CLASS1
if (v.size() != opart.size())
std::cerr << __FILE__ << ":" << __LINE__ << " error merge_prp: v.size()=" << v.size() << " must be the same as o_part.size()" << opart.size() << std::endl;
#endif
merge_prp_device_impl<std::is_same<Memory,CudaMemory>::value,T,Memory,layout,layout_base,grow_p>
::template run<S,M,gp,OPENFPM_NATIVE,layout_base,args...>(*this,v,start);
}
/*! \brief It merge the elements of a source vector to this vector
*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment