...
 
Commits (172)
......@@ -62,7 +62,7 @@ compile
depcomp
missing
src/mem_map
**/performance/
#**/performance/
previous_measure
previous_measureg
previous_measurev
......
......@@ -10,6 +10,7 @@ set(SE_CLASS1 CACHE BOOL "Activate compilation with SE_CLASS1")
set(SE_CLASS2 CACHE BOOL "Activate compilation with SE_CLASS2")
set(SE_CLASS3 CACHE BOOL "Activate compilation with SE_CLASS3")
set(ENABLE_GPU CACHE BOOL "Disable the GPU code independently that a cuda compiler is found")
set(TEST_PERFORMANCE CACHE BOOL "Enable test performance")
set (CMAKE_CXX_STANDARD 11)
set (CMAKE_CUDA_STANDARD 11)
......@@ -23,14 +24,13 @@ if (ENABLE_GPU)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111" --expt-extended-lambda)
elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 1 )
message("CUDA is compatible")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2915 --diag_suppress=2914 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 " --expt-extended-lambda)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2915 --diag_suppress=2914 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 --diag_suppress=1301 --diag_suppress=177 --diag_suppress=2928 --diag_suppress=2929 --diag_suppress=2930 --diag_suppress=2931" --expt-extended-lambda)
else()
message(FATAL_ERROR "CUDA is incompatible, version 9.2 is only supported")
endif()
endif()
find_package(Boost 1.66.0 REQUIRED COMPONENTS unit_test_framework iostreams program_options)
find_package(Boost 1.66.0 REQUIRED COMPONENTS unit_test_framework iostreams program_options system filesystem)
find_package(LibHilbert REQUIRED)
......@@ -52,6 +52,10 @@ if(CUDA_FOUND)
set(DEFINE_CUDA_GPU "#define CUDA_GPU")
endif()
if(TEST_PERFORMANCE)
set(DEFINE_PERFORMANCE_TEST "#define PERFORMANCE_TEST")
endif()
if (Boost_FOUND)
set(DEFINE_HAVE_BOOST "#define HAVE_BOOST")
set(DEFINE_HAVE_BOOST_IOSTREAMS "#define HAVE_BOOST_IOSTREAMS")
......
SUBDIRS = src
bin_PROGRAMS =
test:
cd src && make test
#! /bin/bash
# Make a directory in /tmp/openfpm_data
echo "Build on: $2 with $3"
mkdir /tmp/openfpm_data_$3
mv * .[^.]* /tmp/openfpm_data_$3
mv /tmp/openfpm_data_$3 openfpm_data
mkdir openfpm_data/src/config
git clone git@ppmcore.mpi-cbg.de:incardon/openfpm_devices.git openfpm_devices
cd "$1/openfpm_data"
pre_command=""
sh ./autogen.sh
if [ "$2" == "master" ]; then
options="$options --disable-gpu"
fi
if [ x"$3" == x"SE" ]; then
options="$options --enable-se-class1 --enable-se-class2 --enable-se-class3 --with-action-on-error=throw --enable-test-coverage"
opt_comp="for security enhancement"
fi
if [ x"$3" == x"VALGRIND" ]; then
pre_command="valgrind --leak-check=full"
options="$options --disable-gpu --enable-test-coverage"
opt_comp="for valgrind test"
fi
sh ./configure $options
if [ $? -ne 0 ]; then
curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\" , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed to comfigure openfpm_data test $opt_comp \" }] }" https://hooks.slack.com/services/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
exit 1
fi
make
if [ $? -ne 0 ]; then
curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\" , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed to compile the openfpm_data test $opt_comp \" }] }" https://hooks.slack.com/services/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
exit 1
fi
$pre_command ./src/mem_map
if [ $? -ne 0 ]; then
curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\" , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed the test with openfpm_data test $opt_comp \" }] }" https://hooks.slack.com/services/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
exit 1
fi
curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\" , \"attachments\":[{ \"title\":\"Info:\", \"color\": \"#00FF00\", \"text\":\"$2 completed succeffuly the openfpm_data test $opt_comp \" }] }" https://hooks.slack.com/services/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
......@@ -229,6 +229,12 @@ do
test_coverage)
conf_options="$conf_options -DTEST_COVERAGE=ON"
;;
scan_coverty)
conf_options="$conf_options -DSCAN_COVERTY=ON"
;;
test_performance)
conf_options="$conf_options -DTEST_PERFORMANCE=ON"
;;
gpu)
if [ x"$CXX" == x"" ]; then
conf_options="$conf_options"
......@@ -469,6 +475,9 @@ do
boost)
conf_options="$conf_options -DBOOST_ROOT=$ac_optarg"
;;
action_on_error)
conf_options="$conf_options -DACTION_ON_ERROR=$ac_optarg"
;;
mpivendor)
conf_options="$conf_options -DMPI_VENDOR=$ac_optarg"
;;
......
......@@ -32,7 +32,7 @@ case $host_os in
;;
esac
CXXFLAGS+=" -march=native -mtune=native -Wno-unused-local-typedefs -Wextra -Wno-unused-parameter "
CXXFLAGS+=" -Wno-unused-local-typedefs -Wextra -Wno-unused-parameter "
NVCCFLAGS=" "
INCLUDES_PATH=" "
......
This diff is collapsed.
......@@ -12,12 +12,6 @@
#include "map_graph.hpp"
#include "Point_test.hpp"
#ifdef TEST_COVERAGE_MODE
#define GS_SIZE 8
#else
#define GS_SIZE 128
#endif
BOOST_AUTO_TEST_SUITE( graph_test )
BOOST_AUTO_TEST_CASE( graph_use)
......
......@@ -678,6 +678,26 @@ public:
e_invalid.clear();
}
/*! \brief operator to clear the whole graph
*
* operator to clear all
*
*/
void destroy()
{
v.clear();
v.shrink_to_fit();
e.clear();
e.shrink_to_fit();
v_l.clear();
v_l.shrink_to_fit();
e_l.clear();
e_l.shrink_to_fit();
e_invalid.clear();
e_invalid.shrink_to_fit();
}
/*! \brief Access the edge
*
* \tparam i property to access
......
......@@ -691,7 +691,7 @@ public:
static const int max_prop = T::max_prop;
//! constructor require a key and a memory data
encapc(typename memory_traits_inte<T>::type & data, size_t k)
__device__ __host__ encapc(typename memory_traits_inte<T>::type & data, size_t k)
:data(data),k(k)
{}
......@@ -702,7 +702,8 @@ public:
* \return The reference of the data
*
*/
template <unsigned int p> __device__ __host__ auto get() -> decltype(boost::fusion::at_c<p>(data).mem_r.operator[](k))
template <unsigned int p>
__device__ __host__ auto get() -> decltype(boost::fusion::at_c<p>(data).mem_r.operator[](k))
{
return boost::fusion::at_c<p>(data).mem_r.operator[](k);
}
......@@ -719,6 +720,13 @@ public:
return boost::fusion::at_c<p>(data).mem_r.operator[](k);
}
// __device__ __host__ encapc(const encapc<dim,T,Mem> & ec) = delete;
__device__ __host__ encapc(const encapc<dim,T,Mem> & ec) : data(ec.data), k(ec.k)
{
// printf("ciao\n");
}
// __device__ __host__ inline encapc<dim,T,Mem> & operator=(const encapc<dim,T,Mem> & ec) = delete; //DEBUG
/*! \brief Assignment
*
* \param ec encapsulator
......@@ -766,6 +774,27 @@ public:
return *this;
}
__device__ __host__ inline void private_set_data_k(Mem & data_c, size_t k)
{
this->data = data;
this->k = k;
}
__device__ __host__ inline Mem & private_get_data()
{
return data;
}
__device__ __host__ inline size_t private_get_k()
{
return k;
}
__device__ __host__ inline size_t private_set_k(unsigned int k)
{
return this->k = k;
}
};
#include "util/common.hpp"
......
This diff is collapsed.
......@@ -19,7 +19,7 @@ struct copy_ndim_grid_impl
{
unsigned int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= src.getGrid().size())
if (i >= src.getGrid().size() || i >= dst.getGrid().size())
{return;}
auto key_src = src.getGrid().InvLinId(i);
......@@ -37,6 +37,12 @@ struct copy_ndim_grid_impl<2,grid_type>
key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x);
key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y);
if (key_src.get(0) >= src.getGrid().size(0)) {return;}
if (key_src.get(1) >= src.getGrid().size(1)) {return;}
if (key_src.get(0) >= dst.getGrid().size(0)) {return;}
if (key_src.get(1) >= dst.getGrid().size(1)) {return;}
dst.get_o(key_src) = src.get_o(key_src);
}
};
......@@ -51,6 +57,14 @@ struct copy_ndim_grid_impl<3,grid_type>
key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y);
key_src.set_d(2,threadIdx.y + blockIdx.y * blockDim.y);
if (key_src.get(0) >= src.getGrid().size(0)) {return;}
if (key_src.get(1) >= src.getGrid().size(1)) {return;}
if (key_src.get(2) >= src.getGrid().size(2)) {return;}
if (key_src.get(0) >= dst.getGrid().size(0)) {return;}
if (key_src.get(1) >= dst.getGrid().size(1)) {return;}
if (key_src.get(2) >= dst.getGrid().size(2)) {return;}
dst.get_o(key_src) = src.get_o(key_src);
}
};
......
......@@ -27,8 +27,6 @@ __global__ void test_launch(vector_pos_type set_points, vector_prop_type prop, B
v[0] = prop.template get<1>(p)[0];
v[1] = prop.template get<1>(p)[1];
v[2] = prop.template get<1>(p)[2];
printf("Point p %f %f %f scalar: %f vector: %f %f %f \n",pos[0],pos[1],pos[2],scalar,v[0],v[1],v[2]);
}
template<typename grid_type>
......
......@@ -13,6 +13,7 @@
#include "Grid/grid_util_test.hpp"
#include "cuda_grid_unit_tests_func.cuh"
#include "util/cuda/cuda_launch.hpp"
#include "Grid/grid_test_utils.hpp"
BOOST_AUTO_TEST_SUITE( grid_gpu_func_test )
......@@ -682,4 +683,31 @@ BOOST_AUTO_TEST_CASE (gpu_grid_test_se_class1)
#endif
}
BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_2d)
{
size_t sz_dst[] = {5,5};
size_t sz_src[] = {3,2};
grid_gpu<2,aggregate<float,float[3],float[3][3]>> g_dst(sz_dst);
grid_gpu<2,aggregate<float,float[3],float[3][3]>> g_src(sz_src);
Box<2,size_t> box_dst({1,2},{2,3});
Box<2,size_t> box_src({1,0},{2,1});
copy_test(g_src,g_dst,box_src,box_dst);
}
BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_3d)
{
size_t sz_dst[] = {5,5,5};
size_t sz_src[] = {3,2,2};
grid_gpu<3,aggregate<float,float[3],float[3][3]>> g_dst(sz_dst);
grid_gpu<3,aggregate<float,float[3],float[3][3]>> g_src(sz_src);
Box<3,size_t> box_dst({1,2,2},{2,3,3});
Box<3,size_t> box_src({1,0,0},{2,1,1});
copy_test(g_src,g_dst,box_src,box_dst);
}
BOOST_AUTO_TEST_SUITE_END()
......@@ -419,12 +419,23 @@ public:
return *this;
}
/*! \brief Get an iterator for the GPU
*
* \param start starting point
* \param stop end point
*
*/
struct ite_gpu<dim> getGPUIterator(grid_key_dx<dim> & key1, grid_key_dx<dim> & key2, size_t n_thr = 1024) const
{
return getGPUIterator_impl<dim>(g1,key1,key2,n_thr);
}
/*! \brief Get the internal data_ structure
*
* \return the data_ structure
*
*/
inline layout & get_data_()
__device__ __host__ inline layout & get_data_()
{
return data_;
}
......@@ -434,7 +445,7 @@ public:
* \return the data_ structure
*
*/
inline const layout & get_data_() const
__device__ __host__ inline const layout & get_data_() const
{
return data_;
}
......
......@@ -69,25 +69,90 @@ struct frswap
};
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called
*
* \param T Type of memory allocator
* \param Mem_type Memory type
*
*/
template<typename s_m, typename Mem_type>
struct frswap_nomode
{
s_m & swap_src;
s_m & swap_dst;
//! constructor
frswap_nomode(s_m & swap_dst, s_m & swap_src)
:swap_src(swap_src),swap_dst(swap_dst)
{};
//! It call the allocate function for each member
template<typename T>
void operator()(T& t) const
{
boost::fusion::at_c<T::value>(swap_dst).template swap_nomode<Mem_type>(boost::fusion::at_c<T::value>(swap_src));
}
};
//! Case memory_traits_lin
template<unsigned int p, typename layout, typename data_type, typename g1_type, typename key_type, unsigned int sel = 2*is_layout_mlin<layout>::value + is_layout_inte<layout>::value >
struct mem_get
{
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get(data_type & data_, const g1_type & g1, const key_type & v1) -> decltype(boost::fusion::at_c<p>(data_.mem_r.operator[](g1.LinId(v1)))) &
{
return boost::fusion::at_c<p>(data_.mem_r.operator[](g1.LinId(v1)));
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get_lin(data_type & data_, const g1_type & g1, const size_t lin_id) -> decltype(boost::fusion::at_c<p>(data_.mem_r.operator[](lin_id))) &
{
return boost::fusion::at_c<p>(data_.mem_r.operator[](lin_id));
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get_c(const data_type & data_, const g1_type & g1, const key_type & v1) -> decltype(boost::fusion::at_c<p>(data_.mem_r.operator[](g1.LinId(v1)))) &
{
return boost::fusion::at_c<p>(data_.mem_r.operator[](g1.LinId(v1)));
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get_lin_c(const data_type & data_, const g1_type & g1, const size_t lin_id) -> decltype(boost::fusion::at_c<p>(data_.mem_r.operator[](lin_id))) &
{
return boost::fusion::at_c<p>(data_.mem_r.operator[](lin_id));
......@@ -98,21 +163,57 @@ struct mem_get
template<unsigned int p, typename layout, typename data_type, typename g1_type, typename key_type>
struct mem_get<p,layout,data_type,g1_type,key_type,1>
{
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get(data_type & data_, const g1_type & g1, const key_type & v1) -> decltype(boost::fusion::at_c<p>(data_).mem_r.operator[](g1.LinId(v1)))
{
return boost::fusion::at_c<p>(data_).mem_r.operator[](g1.LinId(v1));
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get_lin(data_type & data_, const g1_type & g1, size_t lin_id) -> decltype(boost::fusion::at_c<p>(data_).mem_r.operator[](lin_id))
{
return boost::fusion::at_c<p>(data_).mem_r.operator[](lin_id);
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a const reference to the object selected
*
*/
__host__ __device__ static inline auto get_c(const data_type & data_, const g1_type & g1, const key_type & v1) -> decltype(boost::fusion::at_c<p>(data_).mem_r.operator[](g1.LinId(v1)))
{
return boost::fusion::at_c<p>(data_).mem_r.operator[](g1.LinId(v1));
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a const reference to the object selected
*
*/
__host__ __device__ static inline auto get_lin_c(const data_type & data_, const g1_type & g1, size_t lin_id) -> decltype(boost::fusion::at_c<p>(data_).mem_r.operator[](lin_id))
{
return boost::fusion::at_c<p>(data_).mem_r.operator[](lin_id);
......@@ -236,6 +337,46 @@ struct mem_setext_prp
}
};
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called.
* Is mainly used to set an external memory for each
*
*
*/
template<typename grid_type, typename Memory>
struct mem_setarray
{
grid_type & grid_new;
Memory * mem;
size_t sz;
bool np;
/*! \brief constructor
*
*
*
*/
inline mem_setarray(grid_type & g_new, Memory * mem, size_t sz, bool np)
:grid_new(g_new),mem(mem),sz(sz),np(np)
{};
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t)
{
grid_new.template setMemory<T::value>(mem[T::value]);
//! Allocate the memory and create the reppresentation
if (sz != 0) boost::fusion::at_c<T::value>(grid_new.get_internal_data_()).allocate(sz,np);
}
};
//! Case memory_traits_inte
template<typename grid_type, typename S , typename layout, typename data_type>
struct mem_setext<grid_type,S,layout,data_type,1>
......@@ -262,6 +403,13 @@ struct mem_swap
// move the data
data_dst.swap(data_src);
}
template<typename Mem_type>
static inline void swap_nomode(data_type & data_dst, data_type & data_src)
{
// move the data
data_dst.swap_nomode(data_src);
}
};
//! Case memory_traits_inte
......@@ -275,6 +423,15 @@ struct mem_swap<T,layout,data_type,grid_type,1>
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(sw);
}
template<typename Mem_type>
static inline void swap_nomode(data_type & data_dst, data_type & data_src)
{
// swap the data for each property
frswap_nomode<decltype(data_dst),Mem_type> sw(data_dst,data_src);
boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(sw);
}
};
template<typename data_type, typename layout, unsigned int sel = 2*is_layout_mlin<layout>::value + is_layout_inte<layout>::value>
......@@ -316,6 +473,15 @@ struct mem_setmemory
//! Allocate the memory and create the reppresentation
if (sz != 0) data_.allocate(sz,np);
}
static void setMemoryArray(data_type & data_, Mem_type * m, size_t sz, bool np)
{
//! Create and set the memory allocator
data_.setMemory(m[0]);
//! Allocate the memory and create the reppresentation
if (sz != 0) data_.allocate(sz,np);
}
};
template<typename data_type, typename Mem_type, typename layout>
......@@ -329,6 +495,14 @@ struct mem_setmemory<data_type,Mem_type,layout,1>
//! Allocate the memory and create the reppresentation
if (sz != 0) boost::fusion::at_c<p>(data_).allocate(sz,np);
}
template<typename grid_type> static void setMemoryArray(grid_type & grid, Mem_type * m, size_t sz,bool np)
{
mem_setarray<grid_type,Mem_type> ma(grid,m,sz,np);
// Create an empty memory allocator for the actual structure
boost::mpl::for_each_ref<boost::mpl::range_c<int,0,grid_type::value_type::max_prop>>(ma);
}
};
......
This diff is collapsed.
......@@ -11,6 +11,33 @@
#include <type_traits>
#include "util/tokernel_transformation.hpp"
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
* element of the boost::vector the operator() is called.
* Is mainly used to call hostToDevice for each properties
*
*/
template<typename aggrT_src, typename local_grids_type>
struct setBackground_impl
{
aggrT_src & bck;
local_grids_type & loc_grid;
inline setBackground_impl(aggrT_src & bck, local_grids_type & loc_grid)
:bck(bck),loc_grid(loc_grid)
{};
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t)
{
for (size_t i = 0 ; i < loc_grid.size() ; i++)
{loc_grid.get(i).template setBackgroundValue<T::value>(bck.template get<T::value>());}
}
};
/*! \brief this class is a functor for "for_each" algorithm
*
* This class is a functor for "for_each" algorithm. For each
......
......@@ -24,7 +24,8 @@ public:
* \param exp grid_key_dx expression
*
*/
template<typename exp1> inline grid_key_dx(const grid_key_dx_expression<dim,exp1> & exp)
template<typename exp1>
__device__ __host__ inline grid_key_dx(const grid_key_dx_expression<dim,exp1> & exp)
{
for (size_t i = 0 ; i < dim ; i++)
this->k[i] = exp.value(i);
......@@ -116,6 +117,17 @@ public:
{this->k[i] = k[i];}
}
/*! \brief Constructor from buffer reference
*
* \param k reference buffer
*
*/
__device__ __host__ inline grid_key_dx(const unsigned int (&k)[dim])
{
for (size_t i = 0 ; i < dim ; i++)
{this->k[i] = k[i];}
}
/*! \brief Construct a grid key from a list of numbers
*
* \param cmb combination
......@@ -202,7 +214,8 @@ public:
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
inline grid_key_dx<dim> & operator+=(const grid_key_dx<dim> & p)
__device__ __host__
inline grid_key_dx<dim,index_type> & operator+=(const grid_key_dx<dim,index_type> & p)
{
for (size_t i = 0 ; i < dim ; i++)
k[i] += p.k[i];
......@@ -217,7 +230,8 @@ public:
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
inline grid_key_dx<dim> & operator-=(const grid_key_dx<dim> & p)
__device__ __host__
inline grid_key_dx<dim,index_type> & operator-=(const grid_key_dx<dim,index_type> & p)
{
for (size_t i = 0 ; i < dim ; i++)
k[i] -= p.k[i];
......@@ -232,9 +246,11 @@ public:
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
inline grid_key_dx_sum<dim,grid_key_dx<dim>,grid_key_dx<dim>> operator+(const grid_key_dx<dim> & p) const
__device__ __host__
inline grid_key_dx_sum<dim,grid_key_dx<dim,index_type>,grid_key_dx<dim,index_type>>
operator+(const grid_key_dx<dim,index_type> & p) const
{
grid_key_dx_sum<dim,grid_key_dx<dim>,grid_key_dx<dim>> exp_sum(*this,p);
grid_key_dx_sum<dim,grid_key_dx<dim,index_type>,grid_key_dx<dim,index_type>> exp_sum(*this,p);
return exp_sum;
}
......@@ -246,7 +262,9 @@ public:
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
inline grid_key_dx_sum<dim,grid_key_dx<dim>,Point<dim,long int>> operator+(const Point<dim,long int> & p) const
__device__ __host__
inline grid_key_dx_sum<dim,grid_key_dx<dim>,Point<dim,long int>>
operator+(const Point<dim,long int> & p) const
{
grid_key_dx_sum<dim,grid_key_dx<dim>,Point<dim,long int>> exp_sum(*this,p);
......@@ -260,6 +278,7 @@ public:
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
__device__ __host__
inline grid_key_dx_sum<dim,grid_key_dx<dim>,comb<dim>> operator+(const comb<dim> & cmb) const
{
grid_key_dx_sum<dim,grid_key_dx<dim>,comb<dim>> exp_sum(*this,cmb);
......@@ -274,9 +293,11 @@ public:
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
inline grid_key_dx_sub<dim,grid_key_dx<dim>,grid_key_dx<dim>> operator-(const grid_key_dx<dim> & cmb) const
__device__ __host__
inline grid_key_dx_sub<dim,grid_key_dx<dim,index_type>,grid_key_dx<dim,index_type>>
operator-(const grid_key_dx<dim,index_type> & cmb) const
{
grid_key_dx_sub<dim,grid_key_dx<dim>,grid_key_dx<dim>> exp_sum(*this,cmb);
grid_key_dx_sub<dim,grid_key_dx<dim,index_type>,grid_key_dx<dim,index_type>> exp_sum(*this,cmb);
return exp_sum;
}
......@@ -288,9 +309,10 @@ public:
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
template <typename T> inline grid_key_dx_sub<dim,grid_key_dx<dim>,grid_key_dx_expression<dim,T>> operator-(const grid_key_dx_expression<dim,T> & cmb) const
template <typename T>
__device__ __host__ inline grid_key_dx_sub<dim,grid_key_dx<dim,index_type>,grid_key_dx_expression<dim,T>> operator-(const grid_key_dx_expression<dim,T> & cmb) const
{
grid_key_dx_sub<dim,grid_key_dx<dim>,grid_key_dx_expression<dim,T>> exp_sum(*this,cmb);
grid_key_dx_sub<dim,grid_key_dx<dim,index_type>,grid_key_dx_expression<dim,T>> exp_sum(*this,cmb);
return exp_sum;
}
......@@ -302,7 +324,7 @@ public:
* \return true if the two key are equal
*
*/
template<unsigned int dim_t> bool operator==(const grid_key_dx<dim_t> & key_t) const
template<unsigned int dim_t> bool operator==(const grid_key_dx<dim_t,index_type> & key_t) const
{
if (dim != dim_t)
{
......@@ -331,7 +353,7 @@ public:
* \return true if the two key are equal
*
*/
template<unsigned int dim_t> bool operator!=(const grid_key_dx<dim_t> & key_t)
template<unsigned int dim_t> bool operator!=(const grid_key_dx<dim_t,index_type> & key_t)
{
return !this->operator==(key_t);
}
......@@ -343,7 +365,7 @@ public:
* \return true if this is lexicographically less than other key
*
*/
bool operator<(const grid_key_dx<dim> & key_t) const
bool operator<(const grid_key_dx<dim,index_type> & key_t) const
{
// Check the two key index by index
......@@ -363,6 +385,10 @@ public:
return false;
}
static bool noPointers()
{
return true;
}
/*! \brief set the Key from a list of numbers
*
......@@ -427,9 +453,10 @@ public:
* \return a point unsigned long int
*
*/
Point<dim,size_t> toPoint() const
template<typename typeT = size_t>
__host__ __device__ inline Point<dim,typeT> toPoint() const
{
Point<dim,size_t> p;
Point<dim,typeT> p;
for (size_t i = 0; i < dim ; i++)
{
......@@ -460,7 +487,7 @@ public:
* \return the index value
*
*/
__device__ __host__ mem_id get(size_t i) const
__device__ __host__ index_type get(index_type i) const
{
return k[i];
}
......@@ -473,7 +500,7 @@ public:
* \param id value to set
*
*/
__device__ __host__ void set_d(size_t i, mem_id id)
__device__ __host__ void set_d(index_type i, index_type id)
{
#if defined(SE_CLASS1) && !defined(__NVCC__)
......
......@@ -23,7 +23,7 @@ class grid_key_dx_expression
{
public:
mem_id value(int i) const
__device__ __host__ mem_id value(int i) const
{
return static_cast<const exp &>(*this).value(i);
}
......@@ -35,9 +35,10 @@ public:
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
inline grid_key_dx_sub<dim,grid_key_dx_expression<dim,exp>,grid_key_dx<dim>> operator-(const grid_key_dx<dim> & key) const
template<typename index_type>
__device__ __host__ inline grid_key_dx_sub<dim,grid_key_dx_expression<dim,exp>,grid_key_dx<dim,index_type>> operator-(const grid_key_dx<dim,index_type> & key) const
{
grid_key_dx_sub<dim,grid_key_dx_expression<dim,exp>,grid_key_dx<dim>> exp_sum(*this,key);
grid_key_dx_sub<dim,grid_key_dx_expression<dim,exp>,grid_key_dx<dim,index_type>> exp_sum(*this,key);
return exp_sum;
}
......@@ -49,12 +50,43 @@ public:
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
template <typename T> inline grid_key_dx_sub<dim,grid_key_dx_expression<dim,exp>,grid_key_dx_expression<dim,T> > operator-(const grid_key_dx_expression<dim,T> & key) const
template <typename T>
__device__ __host__ inline grid_key_dx_sub<dim,grid_key_dx_expression<dim,exp>,grid_key_dx_expression<dim,T> > operator-(const grid_key_dx_expression<dim,T> & key) const
{
grid_key_dx_sub< dim,grid_key_dx_expression<dim,exp>,grid_key_dx_expression<dim,T> > exp_sum(*this,key);
return exp_sum;
}
/* \brief subtract this expression with another expression
*
* \param key to subtract
*
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
template<typename index_type>
__device__ __host__ inline grid_key_dx_sum<dim,grid_key_dx_expression<dim,exp>,grid_key_dx<dim,index_type>> operator+(const grid_key_dx<dim,index_type> & key) const
{
grid_key_dx_sum<dim,grid_key_dx_expression<dim,exp>,grid_key_dx<dim,index_type>> exp_sum(*this,key);
return exp_sum;
}
/* \brief subtract this expression a grid key
*
* \param key to subtract
*
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
template <typename T>
__device__ __host__ inline grid_key_dx_sum<dim,grid_key_dx_expression<dim,exp>,grid_key_dx_expression<dim,T> > operator+(const grid_key_dx_expression<dim,T> & key) const
{
grid_key_dx_sum< dim,grid_key_dx_expression<dim,exp>,grid_key_dx_expression<dim,T> > exp_sum(*this,key);
return exp_sum;
}
};
......@@ -73,11 +105,11 @@ class grid_key_dx_sum : public grid_key_dx_expression<dim,grid_key_dx_sum<dim,ex
public:
grid_key_dx_sum(const exp1 & ex1, const exp2 & ex2)
__device__ __host__ grid_key_dx_sum(const exp1 & ex1, const exp2 & ex2)
:e1(ex1),e2(ex2)
{}
mem_id value(int i) const
__device__ __host__ mem_id value(int i) const
{
return e1.value(i) + e2.value(i);
}
......@@ -98,11 +130,11 @@ class grid_key_dx_sub : public grid_key_dx_expression<dim,grid_key_dx_sub<dim,ex
public:
grid_key_dx_sub(const exp1 & ex1, const exp2 & ex2)
__device__ __host__ grid_key_dx_sub(const exp1 & ex1, const exp2 & ex2)
:e1(ex1),e2(ex2)
{}
mem_id value(int i) const
__device__ __host__ mem_id value(int i) const
{
return e1.value(i) - e2.value(i);
}
......
......@@ -11,6 +11,23 @@ static bool pack()
return false;
}
/*! \brief Reset the pack calculation
*
* \note in this case does nothing
*
*/
void packReset()
{}
/*! \brief Pack calculate
*
* \note in this case does nothing
*
*/
template<unsigned int ... prp, typename context_type>
void packCalculate(size_t & req, const context_type & ctx)
{}
static bool packRequest()
{
return false;
......@@ -338,6 +355,17 @@ struct unpack_simple_cond<true, prp ...>
}
}
/*! \brief Pack finalize Finalize the pack of this object. In this case it does nothing
*
* \tparam prp properties to pack
*
* \param mem preallocated memory where to pack the objects
* \param sts pack statistic
*
*/
template<int ... prp> void packFinalize(ExtPreAlloc<S> & mem, Pack_stat & sts)
{}
/*! \brief Pack the object into the memory given an iterator
*
* \tparam prp properties to pack
......@@ -413,7 +441,7 @@ struct unpack_simple_cond<true, prp ...>
* \param obj object where to unpack
*
*/
template<unsigned int ... prp> void unpack(ExtPreAlloc<S> & mem, grid_key_dx_iterator_sub<dims> & sub_it, Unpack_stat & ps)
template<unsigned int ... prp,typename S2, typename context_type> void unpack(ExtPreAlloc<S2> & mem, grid_key_dx_iterator_sub<dims> & sub_it, Unpack_stat & ps,context_type & context)
{
// object that store the information in mem
typedef object<typename object_creator<typename grid_base_impl<dim,T,S,layout,layout_base>::value_type::type,prp...>::type> prp_object;
......@@ -446,6 +474,66 @@ struct unpack_simple_cond<true, prp ...>
ps.addOffset(size);
}
/*! \brief unpack the sub-grid object applying an operation
*
* \tparam op operation
* \tparam prp properties to unpack
*
* \param mem preallocated memory from where to unpack the object
* \param sub sub-grid iterator
* \param obj object where to unpack
*
*/
template<template<typename,typename> class op, typename S2, unsigned int ... prp>
void unpack_with_op(ExtPreAlloc<S2> & mem, grid_key_dx_iterator_sub<dim> & sub2, Unpack_stat & ps)
{
PtrMemory * ptr1;
size_t sz[dim];
for (size_t i = 0 ; i < dim ; i++)
sz[i] = sub2.getStop().get(i) - sub2.getStart().get(i) + 1;
size_t tot = 1;
for (size_t i = 0 ; i < dim ; i++)
{tot *= sz[i];}
tot *= sizeof(T);
#ifdef SE_CLASS1
if (ps.getOffset() + tot > mem.size())
std::cerr << __FILE__ << ":" << __LINE__ << " Error: overflow in the receiving buffer for ghost_put" << std::endl;
#endif
// add the received particles to the vector
ptr1 = new PtrMemory(((char *)mem.getPointerBase()+ps.getOffset()),tot);
// create vector representation to a piece of memory already allocated
grid_base_impl<dim,T,PtrMemory,typename memory_traits_lin<T>::type,memory_traits_lin> gs;
gs.setMemory(*ptr1);
// resize with the number of elements
gs.resize(sz);
// Merge the information
auto it_src = gs.getIterator();
while (sub2.isNext())
{
object_s_di_op<op,decltype(gs.get_o(it_src.get())),decltype(this->get_o(sub2.get())),OBJ_ENCAP,prp...>(gs.get_o(it_src.get()),this->get_o(sub2.get()));
++sub2;
++it_src;
}
ps.addOffset(tot);
}
/*! \brief Calculate the memory size required to pack n elements
*
* Calculate the total size required to store n-elements in a vector
......
/*
* grid_performance_tests.hpp
*
* Created on: Nov 1, 2015
* Author: i-bird
*/
#ifndef OPENFPM_DATA_SRC_GRID_GRID_PERFORMANCE_TESTS_HPP_
#define OPENFPM_DATA_SRC_GRID_GRID_PERFORMANCE_TESTS_HPP_
#include "grid_util_test.hpp"
openfpm::vector<std::string> testsg;
openfpm::vector<float> per_timesg;
BOOST_AUTO_TEST_CASE(grid_performance_set_obj)
{
size_t sz[] = {128,128,128};
grid_cpu<3, Point_test<float> > c3(sz);
c3.setMemory();
fill_grid<3>(c3);
Point_test<float> f __attribute__((aligned(16)));
f.fill();
std::vector<double> times(N_STAT + 1);
times[0] = 1000;
for (size_t j = 0 ; j < 8 ; j++)
{
for (size_t i = 1 ; i < N_STAT+1 ; i++)
{
timer t;
t.start();
auto it = c3.getIterator();
while (it.isNext())
{
c3.set(it.get(),f);
++it;
}
t.stop();
times[i] = t.getwct();
}
std::sort(times.begin(),times.end());
sleep(5);
}
testsg.add("Grid so");
per_timesg.add(times[0]);
}
BOOST_AUTO_TEST_CASE(grid_performance_set_other_grid)
{
size_t sz[] = {128,128,128};
grid_cpu<3, Point_test<float> > c3(sz);
c3.setMemory();
fill_grid<3>(c3);
grid_cpu<3, Point_test<float> > c1(sz);
c1.setMemory();
std::vector<double> times(N_STAT + 1);
times[0] = 1000;
for (size_t j = 0 ; j < 8 ; j++)
{
for (size_t i = 1 ; i < N_STAT+1 ; i++)
{
timer t;
t.start();
auto it = c3.getIterator();
while (it.isNext())
{
c3.set(it.get(),c1,it.get());
++it;
}
t.stop();
times[i] = t.getwct();
}
std::sort(times.begin(),times.end());
sleep(5);
}
testsg.add("Grid sog");
per_timesg.add(times[0]);
}
BOOST_AUTO_TEST_CASE(grid_performance_set_other_grid_encap)
{
size_t sz[] = {128,128,128};
grid_cpu<3, Point_test<float> > c3(sz);
c3.setMemory();
fill_grid<3>(c3);
grid_cpu<3, Point_test<float> > c1(sz);
c1.setMemory();
std::vector<double> times(N_STAT + 1);
times[0] = 1000;
for (size_t j = 0 ; j < 8 ; j++)
{
for (size_t i = 1 ; i < N_STAT+1 ; i++)
{
timer t;
t.start();
auto it = c3.getIterator();
while (it.isNext())
{
c3.set(it.get(),c1.get_o(it.get()));
++it;
}
t.stop();
times[i] = t.getwct();
}
std::sort(times.begin(),times.end());
sleep(5);
}
testsg.add("Grid soge");
per_timesg.add(times[0]);
}
BOOST_AUTO_TEST_CASE(grid_performance_duplicate)
{
size_t sz[] = {128,128,128};
grid_cpu<3, Point_test<float> > c3(sz);
c3.setMemory();
fill_grid<3>(c3);
grid_cpu<3, Point_test<float> > c1;
std::vector<double> times(N_STAT_SMALL + 1);
times[0] = 1000;
for (size_t j = 0 ; j < 8 ; j++)
{
for (size_t i = 1 ; i < N_STAT_SMALL+1 ; i++)
{
timer t;
t.start();
c1 = c3.duplicate();
t.stop();
times[i] = t.getwct();
}
std::sort(times.begin(),times.end());
sleep(5);
}
testsg.add("Grid dup");
per_timesg.add(times[0]);
}
/////// THIS IS NOT A TEST IT WRITE THE PERFORMANCE RESULT ///////
BOOST_AUTO_TEST_CASE(grid_performance_write_report)
{
openfpm::vector<std::string> yn;
openfpm::vector<openfpm::vector<float>> y;
// Get the directory of the performance test files
std::string per_dir(test_dir);
// Reference time
openfpm::vector<openfpm::vector<float>> y_ref;
y_ref.load(per_dir + std::string("/openfpm_data/ref_timesg"));
load_and_combine(per_dir + std::string("/openfpm_data/previous_measureg"),y,per_timesg);
// Adding the dataset names
if (y.size() != 0)
{
for (size_t j = 0; j < y.get(0).size(); j++)
yn.add("config " + std::to_string(j));
}
// Google charts options
GCoptions options;
options.title = std::string("Grid Performances");
options.yAxis = std::string("Time (seconds)");
options.xAxis = std::string("Benchmark");
options.stype = std::string("bars");
std::stringstream g_test_desc;
g_test_desc << "<h2>Grid performance test</h2>\n";
g_test_desc << "<strong>128x128x128 Grid containing a Point_test<float></strong><br>";
g_test_desc << "<strong>Grid so:</strong> Initialize each element of the grid<br>";
g_test_desc << "<strong>Grid sog:</strong> Manual copy of two grids<br>";
g_test_desc << "<strong>Grid soge:</strong> Manual copy of two grids in a different way<br>";
g_test_desc << "<strong>Grid dup:</strong> Duplication of the grid (Duplication include grid creation time)<br>";
cg.addHTML(g_test_desc.str());
cg.AddHistGraph(testsg,y,yn,options);
// compare the reference times with the actual times
// calculate speed-up
openfpm::vector<openfpm::vector<float>> y_ref_sup;
speedup_calculate(y_ref_sup,y,y_ref,yn);
std::stringstream g_test_spdesc;
g_test_spdesc << "<h2>Grid speedup</h2>\n";
g_test_spdesc << "The previous tests are compared with the best performances ever registered, ";
g_test_spdesc << "the banded area indicate the upper and lower bounds of the best registrered performances.<br>";
g_test_spdesc << "The lines are the latest 5 tests<br>";
g_test_spdesc << "<strong>Line inside the area</strong>: The tested configuration has no improvement or degradation in performance<br>";
g_test_spdesc << "<strong>Line break the upper bound</strong>: The tested configuration has improvement in performance<br>";
g_test_spdesc << "<strong>Line break the lower bound</strong>: The tested configuration has degradation in performance<br>";
g_test_spdesc << "<strong>Y axis:</strong> Performance change in percentage from the average of the best registered performances<br>";
cg.addHTML(g_test_spdesc.str());
cg.AddLinesGraph(testsg,y_ref_sup,yn,options);
}
#endif /* OPENFPM_DATA_SRC_GRID_GRID_PERFORMANCE_TESTS_HPP_ */
......@@ -13,9 +13,6 @@
#include "util/mathutil.hpp"
#include "iterators/stencil_type.hpp"
#define PERIODIC 1
#define NON_PERIODIC 0
// Box need the definition of grid_key_dx_r
#define HARDWARE 1
......@@ -67,9 +64,36 @@ public:
}
};
template<unsigned int dim>
struct ite_gpu
{
#ifdef CUDA_GPU
dim3 thr;
dim3 wthr;
grid_key_dx<dim,int> start;
grid_key_dx<dim,int> stop;
size_t nblocks()
{
return wthr.x * wthr.y * wthr.z;
}