Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sbalzarini Lab
S
Software
P
Parallel Computing
OpenFPM
openfpm_data
Commits
df7c8617
Commit
df7c8617
authored
Dec 12, 2021
by
incardon
Browse files
Fixing performance
parent
f84f0a37
Pipeline
#4008
passed with stages
in 23 minutes and 36 seconds
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/Grid/cuda/cuda_grid_gpu_funcs.cuh
View file @
df7c8617
...
...
@@ -98,9 +98,9 @@ __global__ void copy_ndim_grid_device(grid_type src, grid_type dst)
template
<
bool
inte_or_lin
,
unsigned
int
dim
,
typename
T
>
struct
grid_toKernelImpl
{
template
<
typename
grid_type
>
static
grid_gpu_ker
<
dim
,
T
,
memory_traits_lin
>
toKernel
(
grid_type
&
gc
)
template
<
typename
grid_type
>
static
grid_gpu_ker
<
dim
,
T
,
memory_traits_lin
,
typename
grid_type
::
linearizer_type
>
toKernel
(
grid_type
&
gc
)
{
grid_gpu_ker
<
dim
,
T
,
memory_traits_lin
>
g
(
gc
.
getGrid
());
grid_gpu_ker
<
dim
,
T
,
memory_traits_lin
,
typename
grid_type
::
linearizer_type
>
g
(
gc
.
getGrid
());
g
.
get_data_
().
disable_manage_memory
();
g
.
get_data_
().
mem
=
gc
.
get_internal_data_
().
mem
;
...
...
@@ -116,9 +116,9 @@ struct grid_toKernelImpl
template
<
unsigned
int
dim
,
typename
T
>
struct
grid_toKernelImpl
<
true
,
dim
,
T
>
{
template
<
typename
grid_type
>
static
grid_gpu_ker
<
dim
,
T
,
memory_traits_inte
>
toKernel
(
grid_type
&
gc
)
template
<
typename
grid_type
>
static
grid_gpu_ker
<
dim
,
T
,
memory_traits_inte
,
typename
grid_type
::
linearizer_type
>
toKernel
(
grid_type
&
gc
)
{
grid_gpu_ker
<
dim
,
T
,
memory_traits_inte
>
g
(
gc
.
getGrid
());
grid_gpu_ker
<
dim
,
T
,
memory_traits_inte
,
typename
grid_type
::
linearizer_type
>
g
(
gc
.
getGrid
());
copy_switch_memory_c_no_cpy
<
typename
std
::
remove_reference
<
decltype
(
gc
.
get_internal_data_
())
>::
type
,
typename
std
::
remove_reference
<
decltype
(
g
.
get_data_
())
>::
type
>
cp_mc
(
gc
.
get_internal_data_
(),
g
.
get_data_
());
...
...
src/Grid/cuda/cuda_grid_unit_tests_func.cu
View file @
df7c8617
...
...
@@ -4,7 +4,8 @@
#include <stdio.h>
#include "cuda_grid_unit_tests_func.cuh"
__global__
void
grid_gradient_vector
(
grid_gpu_ker
<
3
,
Point_aggr_test
,
memory_traits_inte
>
g1
,
grid_gpu_ker
<
3
,
Point_aggr_test
,
memory_traits_inte
>
g2
,
ite_gpu
<
3
>
ite_gpu
)
template
<
typename
grid_type1
,
typename
grid_type2
>
__global__
void
grid_gradient_vector
(
grid_type1
g1
,
grid_type2
g2
,
ite_gpu
<
3
>
ite_gpu
)
{
GRID_ID_3
(
ite_gpu
);
...
...
@@ -13,7 +14,8 @@ __global__ void grid_gradient_vector(grid_gpu_ker<3,Point_aggr_test,memory_trait
g2
.
template
get
<
4
>(
key
)[
2
]
=
(
g1
.
template
get
<
0
>(
key
.
move
(
2
,
1
))
-
g1
.
template
get
<
0
>(
key
.
move
(
2
,
-
1
)))
/
2.0
;
}
__global__
void
grid_fill_vector
(
grid_gpu_ker
<
3
,
Point_aggr_test
,
memory_traits_inte
>
g1
,
ite_gpu
<
3
>
ite_gpu
)
template
<
typename
grid_type
>
__global__
void
grid_fill_vector
(
grid_type
g1
,
ite_gpu
<
3
>
ite_gpu
)
{
GRID_ID_3
(
ite_gpu
);
...
...
@@ -22,7 +24,8 @@ __global__ void grid_fill_vector(grid_gpu_ker<3,Point_aggr_test,memory_traits_in
g1
.
template
get
<
4
>(
key
)[
2
]
=
3.0
;
}
__global__
void
grid_fill_vector2
(
grid_gpu_ker
<
3
,
Point_aggr_test
,
memory_traits_inte
>
g1
,
ite_gpu
<
3
>
ite_gpu
)
template
<
typename
grid_type
>
__global__
void
grid_fill_vector2
(
grid_type
g1
,
ite_gpu
<
3
>
ite_gpu
)
{
GRID_ID_3
(
ite_gpu
);
...
...
@@ -31,8 +34,8 @@ __global__ void grid_fill_vector2(grid_gpu_ker<3,Point_aggr_test,memory_traits_i
g1
.
template
get
<
4
>(
key
)[
2
]
=
1003.0
;
}
__global__
void
compute_stencil_grid
(
grid_
gpu_ker
<
3
,
Point_aggr_test
,
memory_traits_inte
>
g1
,
grid_gpu_ker
<
3
,
Point_aggr_test
,
memory_traits_inte
>
g2
,
ite_gpu
<
3
>
ite_gpu
)
template
<
typename
grid_type
>
__global__
void
compute_stencil_grid
(
grid_
type
g1
,
grid_type
g2
,
ite_gpu
<
3
>
ite_gpu
)
{
GRID_ID_3
(
ite_gpu
);
...
...
src/Grid/cuda/map_grid_cuda_ker.cuh
View file @
df7c8617
...
...
@@ -142,14 +142,14 @@ __device__ void fill_grid_error_array(size_t lin_id)
* \tparam n_buf number of template buffers
*
*/
template
<
unsigned
int
dim
,
typename
T
,
template
<
typename
>
class
layout_base
>
template
<
unsigned
int
dim
,
typename
T
,
template
<
typename
>
class
layout_base
,
typename
linearizer
>
class
grid_gpu_ker
{
//! Type T
typedef
typename
apply_transform
<
layout_base
,
T
>::
type
T_
;
//! grid information
grid_sm
<
dim
,
void
>
g1
;
linearizer
g1
;
//! type of layout of the structure
typedef
typename
layout_base
<
T_
>::
type
layout
;
...
...
@@ -201,7 +201,7 @@ public:
__device__
__host__
grid_gpu_ker
()
{}
__device__
__host__
grid_gpu_ker
(
const
grid_sm
<
dim
,
void
>
&
g1
)
__device__
__host__
grid_gpu_ker
(
const
linearizer
&
g1
)
:
g1
(
g1
)
{
}
...
...
@@ -209,7 +209,12 @@ public:
__device__
__host__
grid_gpu_ker
(
const
grid_gpu_ker
&
cpy
)
:
g1
(
cpy
.
g1
)
{
// std::cout << "Constructing " << &cpy << std::endl;
grid_gpu_ker_constructor_impl
<
is_layout_inte
<
layout_base
<
T_
>>::
value
,
T_
>::
construct
(
cpy
,
*
this
);
}
__device__
__host__
void
constructor_impl
(
const
grid_gpu_ker
&
cpy
)
{
g1
=
cpy
.
g1
;
grid_gpu_ker_constructor_impl
<
is_layout_inte
<
layout_base
<
T_
>>::
value
,
T_
>::
construct
(
cpy
,
*
this
);
}
...
...
@@ -337,7 +342,7 @@ public:
}
__device__
inline
void
set
(
const
grid_key_dx
<
dim
>
&
key1
,
const
grid_gpu_ker
<
dim
,
T_
,
layout_base
>
&
g
,
const
grid_key_dx
<
dim
>
&
key2
)
__device__
inline
void
set
(
const
grid_key_dx
<
dim
>
&
key1
,
const
grid_gpu_ker
<
dim
,
T_
,
layout_base
,
linearizer
>
&
g
,
const
grid_key_dx
<
dim
>
&
key2
)
{
#ifdef SE_CLASS1
if
(
check_bound
(
key1
)
==
false
)
...
...
@@ -351,7 +356,7 @@ public:
this
->
get_o
(
key1
)
=
g
.
get_o
(
key2
);
}
template
<
unsigned
int
...
prp
>
__device__
inline
void
set
(
const
grid_key_dx
<
dim
>
&
key1
,
const
grid_gpu_ker
<
dim
,
T_
,
layout_base
>
&
g
,
const
grid_key_dx
<
dim
>
&
key2
)
template
<
unsigned
int
...
prp
>
__device__
inline
void
set
(
const
grid_key_dx
<
dim
>
&
key1
,
const
grid_gpu_ker
<
dim
,
T_
,
layout_base
,
linearizer
>
&
g
,
const
grid_key_dx
<
dim
>
&
key2
)
{
#ifdef SE_CLASS1
if
(
check_bound
(
key1
)
==
false
)
...
...
@@ -413,7 +418,7 @@ public:
* \param object to copy
*
*/
grid_gpu_ker
<
dim
,
T
,
layout_base
>
&
operator
=
(
const
grid_gpu_ker
<
dim
,
T
,
layout_base
>
&
g
)
grid_gpu_ker
<
dim
,
T
,
layout_base
,
linearizer
>
&
operator
=
(
const
grid_gpu_ker
<
dim
,
T
,
layout_base
,
linearizer
>
&
g
)
{
g1
=
g
.
g1
;
...
...
src/Grid/grid_base_implementation.hpp
View file @
df7c8617
...
...
@@ -570,6 +570,12 @@ private:
}
else
grid_new
.
setMemory
();
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
grid_toKernelImpl
<
is_layout_inte
<
layout_base
<
T
>>::
value
,
dim
,
T
>::
toKernel
(
*
this
);
#endif
}
public:
...
...
@@ -650,6 +656,12 @@ public:
{
swap
(
g
.
duplicate
());
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
grid_toKernelImpl
<
is_layout_inte
<
layout_base
<
T
>>::
value
,
dim
,
T
>::
toKernel
(
*
this
);
#endif
return
*
this
;
}
...
...
@@ -664,6 +676,12 @@ public:
{
swap
(
g
);
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
grid_toKernelImpl
<
is_layout_inte
<
layout_base
<
T
>>::
value
,
dim
,
T
>::
toKernel
(
*
this
);
#endif
return
*
this
;
}
...
...
@@ -773,6 +791,13 @@ public:
void
setMemory
()
{
mem_setm
<
S
,
layout_base
<
T
>
,
decltype
(
this
->
data_
),
decltype
(
this
->
g1
)
>::
setMemory
(
data_
,
g1
,
is_mem_init
);
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
grid_toKernelImpl
<
is_layout_inte
<
layout_base
<
T
>>::
value
,
dim
,
T
>::
toKernel
(
*
this
);
#endif
}
/*! \brief Set the object that provide memory from outside
...
...
@@ -791,17 +816,17 @@ public:
//! Is external
isExternal
=
true
;
//! Create and set the memory allocator
// data_.setMemory(m);
//! Allocate the memory and create the reppresentation
// if (g1.size() != 0) data_.allocate(g1.size());
bool
skip_ini
=
skip_init
<
has_noPointers
<
T
>::
value
,
T
>::
skip_
();
mem_setmemory
<
decltype
(
data_
),
S
,
layout_base
<
T
>>::
template
setMemory
<
p
>(
data_
,
m
,
g1
.
size
(),
skip_ini
);
is_mem_init
=
true
;
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
grid_toKernelImpl
<
is_layout_inte
<
layout_base
<
T
>>::
value
,
dim
,
T
>::
toKernel
(
*
this
);
#endif
}
/*! \brief Set the object that provide memory from outside
...
...
@@ -825,6 +850,12 @@ public:
mem_setmemory
<
decltype
(
data_
),
S
,
layout_base
<
T
>>::
template
setMemoryArray
(
*
this
,
m
,
g1
.
size
(),
skip_ini
);
is_mem_init
=
true
;
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
grid_toKernelImpl
<
is_layout_inte
<
layout_base
<
T
>>::
value
,
dim
,
T
>::
toKernel
(
*
this
);
#endif
}
/*! \brief Return a plain pointer to the internal data
...
...
@@ -1315,18 +1346,7 @@ public:
resize_impl_memset
(
grid_new
);
// We know that, if it is 1D we can safely copy the memory
// if (dim == 1)
// {
// //! 1-D copy (This case is simple we use raw memory copy because is the fastest option)
// grid_new.data_.mem->copy(*data_.mem);
// }
// else
// {
// It should be better to separate between fast and slow cases
//! N-D copy
//! N-D copy
if
(
opt
&
DATA_ON_HOST
)
{
resize_impl_host
(
sz
,
grid_new
);}
...
...
@@ -1388,30 +1408,27 @@ public:
*
* This is a different from the standard swap and require long explanation.
*
* This object by default when it construct after we call setMemory() it create an internal memory object
* and use it allocate memory internally.
* This object
(grid)
by default when it construct
s and
after we call setMemory() it create an internal memory object
* and use it allocate memory internally.
(Mode 1)
*
* If instead we use setMemory(external_mem) this object does not create an internal memory object but use
* the passed object to allocate memory. Because the external memory can already have a pool of memory preallocated
* we can re-use the memory.
*
* Despite this setMemory can be used to do memory retaining/re-use and/or garbage collection.
* It can be seen from a different prospective of making the data structures act like a representation of external
* memory. De facto we are giving meaning to the external memory so we are shaping or re-shaping pre-existing
* external memory.
* the passed object to allocate memory. (Mode 2)
*
* In the following I will call these two mode Mode1 and Mode2
* External memory can be used to do memory retaining/re-use and/or garbage collection and making the data structures
* act like a representation of external memory
*
* Using the structure in this way has consequences, because
now
in Mode2 the memory (and so its life-span) is disentangled
* Using the structure in this way has consequences, because in Mode2 the memory (and so its life-span) is disentangled
* by its structure.
*
*
* The
main difference
comes when we swap object in which one
of both are
in Mode2
* The
problem
comes when we swap object in which one
the structure is
in Mode2
*
* Let's suppose object A is in Mode1 and object B is is Mode2. The normal swap, fully swap the objects
*
* A.swap(B) A become B (in mode 2) and B become A (in mode 1)
*
* swap nomode require that A and B have the same size.
*
* A.swap_nomode(B) In this case the mode is not swapped A become B (in mode 1) and B become A (in mode 2).
* So the mode is not swapped and remain the original
*
...
...
@@ -1669,7 +1686,32 @@ public:
return
grid_key_dx_iterator
<
dim
>
(
gvoid
);
}
#ifdef CUDA_GPU
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
mutable
grid_gpu_ker
<
dim
,
T
,
layout_base
,
linearizer_type
>
base_gpu
;
/*! \brief Convert the grid into a data-structure compatible for computing into GPU
*
* The object created can be considered like a reference of the original
*
*/
grid_gpu_ker
<
dim
,
T
,
layout_base
,
linearizer_type
>
&
toKernel
()
{
return
base_gpu
;
}
/*! \brief Convert the grid into a data-structure compatible for computing into GPU
*
* The object created can be considered like a reference of the original
*
*/
const
grid_gpu_ker
<
dim
,
T
,
layout_base
,
linearizer_type
>
&
toKernel
()
const
{
return
base_gpu
;
}
#else
/*! \brief Convert the grid into a data-structure compatible for computing into GPU
*
...
...
src/Grid/map_grid.hpp
View file @
df7c8617
...
...
@@ -315,7 +315,7 @@ public:
* The object created can be considered like a reference of the original
*
*/
grid_gpu_ker
<
dim
,
T_
,
memory_traits_lin
>
toKernel
()
grid_gpu_ker
<
dim
,
T_
,
memory_traits_lin
,
linearizer_type
>
toKernel
()
{
return
grid_toKernelImpl
<
is_layout_inte
<
memory_traits_lin
<
T_
>>::
value
,
dim
,
T_
>::
toKernel
(
*
this
);
}
...
...
@@ -325,7 +325,7 @@ public:
* The object created can be considered like a reference of the original
*
*/
const
grid_gpu_ker
<
dim
,
T_
,
memory_traits_lin
>
toKernel
()
const
const
grid_gpu_ker
<
dim
,
T_
,
memory_traits_lin
,
linearizer_type
>
toKernel
()
const
{
return
grid_toKernelImpl
<
is_layout_inte
<
memory_traits_lin
<
T_
>>::
value
,
dim
,
T_
>::
toKernel
(
*
this
);
}
...
...
@@ -874,7 +874,7 @@ public:
* The object created can be considered like a reference of the original
*
*/
grid_gpu_ker
<
dim
,
T_
,
memory_traits_inte
>
toKernel
()
grid_gpu_ker
<
dim
,
T_
,
memory_traits_inte
,
linearizer_type
>
toKernel
()
{
return
grid_toKernelImpl
<
is_layout_inte
<
memory_traits_inte
<
T_
>>::
value
,
dim
,
T_
>::
toKernel
(
*
this
);
}
...
...
@@ -884,7 +884,7 @@ public:
* The object created can be considered like a reference of the original
*
*/
const
grid_gpu_ker
<
dim
,
T_
,
memory_traits_inte
>
toKernel
()
const
const
grid_gpu_ker
<
dim
,
T_
,
memory_traits_inte
,
linearizer_type
>
toKernel
()
const
{
return
grid_toKernelImpl
<
is_layout_inte
<
memory_traits_inte
<
T
>>::
value
,
dim
,
T_
>::
toKernel
(
*
this
);
}
...
...
src/Vector/cuda/map_vector_cuda_ker.cuh
View file @
df7c8617
...
...
@@ -127,7 +127,7 @@ namespace openfpm
unsigned
int
v_size
;
//! 1-D static grid
grid_gpu_ker
<
1
,
T_
,
layout_base
>
base
;
grid_gpu_ker
<
1
,
T_
,
layout_base
,
grid_sm
<
1
,
void
>
>
base
;
/*! \brief Check that the key is inside the grid
*
...
...
@@ -180,7 +180,6 @@ namespace openfpm
return
base
.
size
();
}
/*! \brief Get an element of the vector
*
* Get an element of the vector
...
...
@@ -344,10 +343,20 @@ namespace openfpm
:
v_size
(
0
)
{}
vector_gpu_ker
(
int
v_size
,
const
grid_gpu_ker
<
1
,
T_
,
layout_base
>
&
cpy
)
vector_gpu_ker
(
int
v_size
,
const
grid_gpu_ker
<
1
,
T_
,
layout_base
,
grid_sm
<
1
,
void
>
>
&
cpy
)
:
v_size
(
v_size
),
base
(
cpy
)
{}
/*! \brief implementation of the constructor
*
* \param v_size number of elements
*
*/
inline
void
constructor_impl
(
int
v_size
,
const
grid_gpu_ker
<
1
,
T_
,
layout_base
,
grid_sm
<
1
,
void
>>
&
cpy
)
{
this
->
v_size
=
v_size
;
base
.
constructor_impl
(
cpy
);
}
/*! \brief Set the object id to obj
*
...
...
@@ -491,7 +500,7 @@ namespace openfpm
* \return the base
*
*/
__device__
grid_gpu_ker
<
1
,
T_
,
layout_base
>
&
getBase
()
__device__
grid_gpu_ker
<
1
,
T_
,
layout_base
,
grid_sm
<
1
,
void
>
>
&
getBase
()
{
return
base
;
}
...
...
src/Vector/map_vector.hpp
View file @
df7c8617
...
...
@@ -369,6 +369,12 @@ namespace openfpm
size_t
sz
[
1
]
=
{
sp
};
base
.
resize
(
sz
);
}
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
/*! \brief Clear the vector
...
...
@@ -390,6 +396,12 @@ namespace openfpm
{
size_t
sz
[
1
]
=
{
size
()};
base
.
resize
(
sz
);
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
/*! \brief Resize the vector
...
...
@@ -426,6 +438,12 @@ namespace openfpm
// update the vector size
v_size
=
slot
;
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
...
...
@@ -452,6 +470,12 @@ namespace openfpm
// update the vector size
v_size
=
slot
;
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
//! Access key for the vector
...
...
@@ -477,6 +501,12 @@ namespace openfpm
//! increase the vector size
v_size
++
;
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
/*! \brief It insert a new emtpy object on the vector, eventually it reallocate the grid
...
...
@@ -499,6 +529,12 @@ namespace openfpm
//! increase the vector size
v_size
++
;
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
/*! \brief It insert a new object on the vector, eventually it reallocate the grid
...
...
@@ -526,6 +562,12 @@ namespace openfpm
//! increase the vector size
v_size
++
;
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
/*! \brief It insert a new object on the vector, eventually it reallocate the vector
...
...
@@ -554,6 +596,12 @@ namespace openfpm
//! increase the vector size
v_size
++
;
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
/*! \brief It add the element of another vector to this vector
...
...
@@ -1434,6 +1482,12 @@ namespace openfpm
:
v_size
(
0
)
{
swap
(
v
);
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
/*! \brief Constructor from another constant vector
...
...
@@ -1445,6 +1499,12 @@ namespace openfpm
:
v_size
(
0
)
{
swap
(
v
.
duplicate
());
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
//! Constructor, vector of size 0
...
...
@@ -1452,6 +1512,12 @@ namespace openfpm
:
v_size
(
0
),
base
(
0
)
{
base
.
setMemory
();
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
//! Constructor, vector of size sz
...
...
@@ -1459,6 +1525,12 @@ namespace openfpm
:
v_size
(
sz
),
base
(
sz
)
{
base
.
setMemory
();
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
}
/*! \brief Set the object id to obj
...
...
@@ -1548,6 +1620,12 @@ namespace openfpm
v_size
=
mv
.
v_size
;
base
.
swap
(
mv
.
base
);
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
return
*
this
;
}
...
...
@@ -1575,6 +1653,12 @@ namespace openfpm
copy_two_vectors_activate_impl
<
Memory
::
isDeviceHostSame
()
==
false
>::
copy2
(
*
this
,
mv
);
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
return
*
this
;
}
...
...
@@ -1592,6 +1676,12 @@ namespace openfpm
v_size
=
mv
.
v_size
;
base
.
swap
(
mv
.
base
);
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
return
*
this
;
}
...
...
@@ -1619,6 +1709,12 @@ namespace openfpm
copy_two_vectors_activate_impl
<
Memory
::
isDeviceHostSame
()
==
false
&&
Mem
::
isDeviceHostSame
()
==
false
>::
copy2
(
*
this
,
mv
);
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
return
*
this
;
}
...
...
@@ -1637,6 +1733,12 @@ namespace openfpm
v_size
=
mv
.
v_size
;
base
.
swap
(
mv
.
base
);
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
return
*
this
;
}
...
...
@@ -1668,6 +1770,13 @@ namespace openfpm
copy_two_vectors_activate_impl
<
Memory
::
isDeviceHostSame
()
==
false
&&
Mem
::
isDeviceHostSame
()
==
false
>::
copy2
(
*
this
,
mv
);
#if defined(CUDIFY_USE_SEQUENTIAL) || defined(CUDIFY_USE_OPENMP)
base_gpu
=
this
->
toKernel
();
#endif
return
*
this
;
}
...
...
@@ -1719,6 +1828,13 @@ namespace openfpm
v_size
=
v
.
v_size
;
base
.
swap_nomode
(
v
.
base
);