...
 
Commits (128)
......@@ -62,7 +62,7 @@ compile
depcomp
missing
src/mem_map
**/performance/
#**/performance/
previous_measure
previous_measureg
previous_measurev
......
......@@ -10,6 +10,7 @@ set(SE_CLASS1 CACHE BOOL "Activate compilation with SE_CLASS1")
set(SE_CLASS2 CACHE BOOL "Activate compilation with SE_CLASS2")
set(SE_CLASS3 CACHE BOOL "Activate compilation with SE_CLASS3")
set(ENABLE_GPU CACHE BOOL "Disable the GPU code independently that a cuda compiler is found")
set(TEST_PERFORMANCE CACHE BOOL "Enable test performance")
set (CMAKE_CXX_STANDARD 11)
set (CMAKE_CUDA_STANDARD 11)
......@@ -23,14 +24,13 @@ if (ENABLE_GPU)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=611 --diag_suppress=2885 --diag_suppress=2886 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111" --expt-extended-lambda)
elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 1 )
message("CUDA is compatible")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2915 --diag_suppress=2914 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 " --expt-extended-lambda)
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2915 --diag_suppress=2914 --diag_suppress=2912 --diag_suppress=2913 --diag_suppress=111 --diag_suppress=186 --diag_suppress=611 " --expt-extended-lambda)
else()
message(FATAL_ERROR "CUDA is incompatible, version 9.2 is only supported")
endif()
endif()
find_package(Boost 1.66.0 REQUIRED COMPONENTS unit_test_framework iostreams program_options)
find_package(Boost 1.66.0 REQUIRED COMPONENTS unit_test_framework iostreams program_options system filesystem)
find_package(LibHilbert REQUIRED)
......@@ -52,6 +52,10 @@ if(CUDA_FOUND)
set(DEFINE_CUDA_GPU "#define CUDA_GPU")
endif()
if(TEST_PERFORMANCE)
set(DEFINE_PERFORMANCE_TEST "#define PERFORMANCE_TEST")
endif()
if (Boost_FOUND)
set(DEFINE_HAVE_BOOST "#define HAVE_BOOST")
set(DEFINE_HAVE_BOOST_IOSTREAMS "#define HAVE_BOOST_IOSTREAMS")
......
SUBDIRS = src
bin_PROGRAMS =
test:
cd src && make test
#! /bin/bash
# Make a directory in /tmp/openfpm_data
echo "Build on: $2 with $3"
mkdir /tmp/openfpm_data_$3
mv * .[^.]* /tmp/openfpm_data_$3
mv /tmp/openfpm_data_$3 openfpm_data
mkdir openfpm_data/src/config
git clone git@ppmcore.mpi-cbg.de:incardon/openfpm_devices.git openfpm_devices
cd "$1/openfpm_data"
pre_command=""
sh ./autogen.sh
if [ "$2" == "master" ]; then
options="$options --disable-gpu"
fi
if [ x"$3" == x"SE" ]; then
options="$options --enable-se-class1 --enable-se-class2 --enable-se-class3 --with-action-on-error=throw --enable-test-coverage"
opt_comp="for security enhancement"
fi
if [ x"$3" == x"VALGRIND" ]; then
pre_command="valgrind --leak-check=full"
options="$options --disable-gpu --enable-test-coverage"
opt_comp="for valgrind test"
fi
sh ./configure $options
if [ $? -ne 0 ]; then
curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\" , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed to comfigure openfpm_data test $opt_comp \" }] }" https://hooks.slack.com/services/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
exit 1
fi
make
if [ $? -ne 0 ]; then
curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\" , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed to compile the openfpm_data test $opt_comp \" }] }" https://hooks.slack.com/services/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
exit 1
fi
$pre_command ./src/mem_map
if [ $? -ne 0 ]; then
curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\" , \"attachments\":[{ \"title\":\"Error:\", \"color\": \"#FF0000\", \"text\":\"$2 failed the test with openfpm_data test $opt_comp \" }] }" https://hooks.slack.com/services/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
exit 1
fi
curl -X POST --data "payload={\"icon_emoji\": \":jenkins:\", \"username\": \"jenkins\" , \"attachments\":[{ \"title\":\"Info:\", \"color\": \"#00FF00\", \"text\":\"$2 completed succeffuly the openfpm_data test $opt_comp \" }] }" https://hooks.slack.com/services/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
......@@ -229,6 +229,12 @@ do
test_coverage)
conf_options="$conf_options -DTEST_COVERAGE=ON"
;;
scan_coverty)
conf_options="$conf_options -DSCAN_COVERTY=ON"
;;
test_performance)
conf_options="$conf_options -DTEST_PERFORMANCE=ON"
;;
gpu)
if [ x"$CXX" == x"" ]; then
conf_options="$conf_options"
......@@ -469,6 +475,9 @@ do
boost)
conf_options="$conf_options -DBOOST_ROOT=$ac_optarg"
;;
action_on_error)
conf_options="$conf_options -DACTION_ON_ERROR=$ac_optarg"
;;
mpivendor)
conf_options="$conf_options -DMPI_VENDOR=$ac_optarg"
;;
......
......@@ -32,7 +32,7 @@ case $host_os in
;;
esac
CXXFLAGS+=" -march=native -mtune=native -Wno-unused-local-typedefs -Wextra -Wno-unused-parameter "
CXXFLAGS+=" -Wno-unused-local-typedefs -Wextra -Wno-unused-parameter "
NVCCFLAGS=" "
INCLUDES_PATH=" "
......
This diff is collapsed.
......@@ -12,12 +12,6 @@
#include "map_graph.hpp"
#include "Point_test.hpp"
#ifdef TEST_COVERAGE_MODE
#define GS_SIZE 8
#else
#define GS_SIZE 128
#endif
BOOST_AUTO_TEST_SUITE( graph_test )
BOOST_AUTO_TEST_CASE( graph_use)
......
......@@ -678,6 +678,26 @@ public:
e_invalid.clear();
}
/*! \brief operator to clear the whole graph
*
* operator to clear all
*
*/
void destroy()
{
v.clear();
v.shrink_to_fit();
e.clear();
e.shrink_to_fit();
v_l.clear();
v_l.shrink_to_fit();
e_l.clear();
e_l.shrink_to_fit();
e_invalid.clear();
e_invalid.shrink_to_fit();
}
/*! \brief Access the edge
*
* \tparam i property to access
......
......@@ -691,7 +691,7 @@ public:
static const int max_prop = T::max_prop;
//! constructor require a key and a memory data
encapc(typename memory_traits_inte<T>::type & data, size_t k)
__device__ __host__ encapc(typename memory_traits_inte<T>::type & data, size_t k)
:data(data),k(k)
{}
......@@ -702,7 +702,8 @@ public:
* \return The reference of the data
*
*/
template <unsigned int p> __device__ __host__ auto get() -> decltype(boost::fusion::at_c<p>(data).mem_r.operator[](k))
template <unsigned int p>
__device__ __host__ auto get() -> decltype(boost::fusion::at_c<p>(data).mem_r.operator[](k))
{
return boost::fusion::at_c<p>(data).mem_r.operator[](k);
}
......@@ -719,6 +720,13 @@ public:
return boost::fusion::at_c<p>(data).mem_r.operator[](k);
}
// __device__ __host__ encapc(const encapc<dim,T,Mem> & ec) = delete;
__device__ __host__ encapc(const encapc<dim,T,Mem> & ec) : data(ec.data), k(ec.k)
{
// printf("ciao\n");
}
// __device__ __host__ inline encapc<dim,T,Mem> & operator=(const encapc<dim,T,Mem> & ec) = delete; //DEBUG
/*! \brief Assignment
*
* \param ec encapsulator
......@@ -766,6 +774,23 @@ public:
return *this;
}
__device__ __host__ inline void private_set_data_k(Mem & data_c, size_t k)
{
this->data = data;
this->k = k;
}
__device__ __host__ inline Mem & private_get_data()
{
return data;
}
__device__ __host__ inline size_t private_get_k()
{
return k;
}
};
#include "util/common.hpp"
......
This diff is collapsed.
......@@ -19,7 +19,7 @@ struct copy_ndim_grid_impl
{
unsigned int i = threadIdx.x + blockIdx.x * blockDim.x;
if (i >= src.getGrid().size())
if (i >= src.getGrid().size() || i >= dst.getGrid().size())
{return;}
auto key_src = src.getGrid().InvLinId(i);
......@@ -37,6 +37,12 @@ struct copy_ndim_grid_impl<2,grid_type>
key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x);
key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y);
if (key_src.get(0) >= src.getGrid().size(0)) {return;}
if (key_src.get(1) >= src.getGrid().size(1)) {return;}
if (key_src.get(0) >= dst.getGrid().size(0)) {return;}
if (key_src.get(1) >= dst.getGrid().size(1)) {return;}
dst.get_o(key_src) = src.get_o(key_src);
}
};
......@@ -51,6 +57,14 @@ struct copy_ndim_grid_impl<3,grid_type>
key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y);
key_src.set_d(2,threadIdx.y + blockIdx.y * blockDim.y);
if (key_src.get(0) >= src.getGrid().size(0)) {return;}
if (key_src.get(1) >= src.getGrid().size(1)) {return;}
if (key_src.get(2) >= src.getGrid().size(2)) {return;}
if (key_src.get(0) >= dst.getGrid().size(0)) {return;}
if (key_src.get(1) >= dst.getGrid().size(1)) {return;}
if (key_src.get(2) >= dst.getGrid().size(2)) {return;}
dst.get_o(key_src) = src.get_o(key_src);
}
};
......
......@@ -27,8 +27,6 @@ __global__ void test_launch(vector_pos_type set_points, vector_prop_type prop, B
v[0] = prop.template get<1>(p)[0];
v[1] = prop.template get<1>(p)[1];
v[2] = prop.template get<1>(p)[2];
printf("Point p %f %f %f scalar: %f vector: %f %f %f \n",pos[0],pos[1],pos[2],scalar,v[0],v[1],v[2]);
}
template<typename grid_type>
......
......@@ -13,6 +13,7 @@
#include "Grid/grid_util_test.hpp"
#include "cuda_grid_unit_tests_func.cuh"
#include "util/cuda/cuda_launch.hpp"
#include "Grid/grid_test_utils.hpp"
BOOST_AUTO_TEST_SUITE( grid_gpu_func_test )
......@@ -682,4 +683,31 @@ BOOST_AUTO_TEST_CASE (gpu_grid_test_se_class1)
#endif
}
BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_2d)
{
size_t sz_dst[] = {5,5};
size_t sz_src[] = {3,2};
grid_gpu<2,aggregate<float,float[3],float[3][3]>> g_dst(sz_dst);
grid_gpu<2,aggregate<float,float[3],float[3][3]>> g_src(sz_src);
Box<2,size_t> box_dst({1,2},{2,3});
Box<2,size_t> box_src({1,0},{2,1});
copy_test(g_src,g_dst,box_src,box_dst);
}
BOOST_AUTO_TEST_CASE(grid_test_copy_to_gpu_3d)
{
size_t sz_dst[] = {5,5,5};
size_t sz_src[] = {3,2,2};
grid_gpu<3,aggregate<float,float[3],float[3][3]>> g_dst(sz_dst);
grid_gpu<3,aggregate<float,float[3],float[3][3]>> g_src(sz_src);
Box<3,size_t> box_dst({1,2,2},{2,3,3});
Box<3,size_t> box_src({1,0,0},{2,1,1});
copy_test(g_src,g_dst,box_src,box_dst);
}
BOOST_AUTO_TEST_SUITE_END()
......@@ -419,12 +419,23 @@ public:
return *this;
}
/*! \brief Get an iterator for the GPU
*
* \param start starting point
* \param stop end point
*
*/
struct ite_gpu<dim> getGPUIterator(grid_key_dx<dim> & key1, grid_key_dx<dim> & key2, size_t n_thr = 1024) const
{
return getGPUIterator_impl<dim>(g1,key1,key2,n_thr);
}
/*! \brief Get the internal data_ structure
*
* \return the data_ structure
*
*/
inline layout & get_data_()
__device__ __host__ inline layout & get_data_()
{
return data_;
}
......@@ -434,7 +445,7 @@ public:
* \return the data_ structure
*
*/
inline const layout & get_data_() const
__device__ __host__ inline const layout & get_data_() const
{
return data_;
}
......
......@@ -73,21 +73,57 @@ struct frswap
template<unsigned int p, typename layout, typename data_type, typename g1_type, typename key_type, unsigned int sel = 2*is_layout_mlin<layout>::value + is_layout_inte<layout>::value >
struct mem_get
{
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get(data_type & data_, const g1_type & g1, const key_type & v1) -> decltype(boost::fusion::at_c<p>(data_.mem_r.operator[](g1.LinId(v1)))) &
{
return boost::fusion::at_c<p>(data_.mem_r.operator[](g1.LinId(v1)));
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get_lin(data_type & data_, const g1_type & g1, const size_t lin_id) -> decltype(boost::fusion::at_c<p>(data_.mem_r.operator[](lin_id))) &
{
return boost::fusion::at_c<p>(data_.mem_r.operator[](lin_id));
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get_c(const data_type & data_, const g1_type & g1, const key_type & v1) -> decltype(boost::fusion::at_c<p>(data_.mem_r.operator[](g1.LinId(v1)))) &
{
return boost::fusion::at_c<p>(data_.mem_r.operator[](g1.LinId(v1)));
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get_lin_c(const data_type & data_, const g1_type & g1, const size_t lin_id) -> decltype(boost::fusion::at_c<p>(data_.mem_r.operator[](lin_id))) &
{
return boost::fusion::at_c<p>(data_.mem_r.operator[](lin_id));
......@@ -98,21 +134,57 @@ struct mem_get
template<unsigned int p, typename layout, typename data_type, typename g1_type, typename key_type>
struct mem_get<p,layout,data_type,g1_type,key_type,1>
{
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get(data_type & data_, const g1_type & g1, const key_type & v1) -> decltype(boost::fusion::at_c<p>(data_).mem_r.operator[](g1.LinId(v1)))
{
return boost::fusion::at_c<p>(data_).mem_r.operator[](g1.LinId(v1));
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a reference to the object selected
*
*/
__host__ __device__ static inline auto get_lin(data_type & data_, const g1_type & g1, size_t lin_id) -> decltype(boost::fusion::at_c<p>(data_).mem_r.operator[](lin_id))
{
return boost::fusion::at_c<p>(data_).mem_r.operator[](lin_id);
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a const reference to the object selected
*
*/
__host__ __device__ static inline auto get_c(const data_type & data_, const g1_type & g1, const key_type & v1) -> decltype(boost::fusion::at_c<p>(data_).mem_r.operator[](g1.LinId(v1)))
{
return boost::fusion::at_c<p>(data_).mem_r.operator[](g1.LinId(v1));
}
/*! \brief Return a reference to the selected element
*
* \param data object from where to take the element
* \param g1 grid information
* \param v1 element id
*
* \return a const reference to the object selected
*
*/
__host__ __device__ static inline auto get_lin_c(const data_type & data_, const g1_type & g1, size_t lin_id) -> decltype(boost::fusion::at_c<p>(data_).mem_r.operator[](lin_id))
{
return boost::fusion::at_c<p>(data_).mem_r.operator[](lin_id);
......
This diff is collapsed.
......@@ -24,7 +24,8 @@ public:
* \param exp grid_key_dx expression
*
*/
template<typename exp1> inline grid_key_dx(const grid_key_dx_expression<dim,exp1> & exp)
template<typename exp1>
__device__ __host__ inline grid_key_dx(const grid_key_dx_expression<dim,exp1> & exp)
{
for (size_t i = 0 ; i < dim ; i++)
this->k[i] = exp.value(i);
......@@ -116,6 +117,17 @@ public:
{this->k[i] = k[i];}
}
/*! \brief Constructor from buffer reference
*
* \param k reference buffer
*
*/
__device__ __host__ inline grid_key_dx(const unsigned int (&k)[dim])
{
for (size_t i = 0 ; i < dim ; i++)
{this->k[i] = k[i];}
}
/*! \brief Construct a grid key from a list of numbers
*
* \param cmb combination
......@@ -232,9 +244,10 @@ public:
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
inline grid_key_dx_sum<dim,grid_key_dx<dim>,grid_key_dx<dim>> operator+(const grid_key_dx<dim> & p) const
__device__ __host__ inline grid_key_dx_sum<dim,grid_key_dx<dim,index_type>,grid_key_dx<dim,index_type>>
operator+(const grid_key_dx<dim,index_type> & p) const
{
grid_key_dx_sum<dim,grid_key_dx<dim>,grid_key_dx<dim>> exp_sum(*this,p);
grid_key_dx_sum<dim,grid_key_dx<dim,index_type>,grid_key_dx<dim,index_type>> exp_sum(*this,p);
return exp_sum;
}
......@@ -246,7 +259,8 @@ public:
* \return a grid_key_dx_expression that encapsulate the expression
*
*/
inline grid_key_dx_sum<dim,grid_key_dx<dim>,Point<dim,long int>> operator+(const Point<dim,long int> & p) const
inline grid_key_dx_sum<dim,grid_key_dx<dim>,Point<dim,long int>>
operator+(const Point<dim,long int> & p) const
{
grid_key_dx_sum<dim,grid_key_dx<dim>,Point<dim,long int>> exp_sum(*this,p);
......@@ -363,6 +377,10 @@ public:
return false;
}
static bool noPointers()
{
return true;
}
/*! \brief set the Key from a list of numbers
*
......
......@@ -413,7 +413,7 @@ struct unpack_simple_cond<true, prp ...>
* \param obj object where to unpack
*
*/
template<unsigned int ... prp> void unpack(ExtPreAlloc<S> & mem, grid_key_dx_iterator_sub<dims> & sub_it, Unpack_stat & ps)
template<unsigned int ... prp,typename S2> void unpack(ExtPreAlloc<S2> & mem, grid_key_dx_iterator_sub<dims> & sub_it, Unpack_stat & ps)
{
// object that store the information in mem
typedef object<typename object_creator<typename grid_base_impl<dim,T,S,layout,layout_base>::value_type::type,prp...>::type> prp_object;
......@@ -446,6 +446,66 @@ struct unpack_simple_cond<true, prp ...>
ps.addOffset(size);
}
/*! \brief unpack the sub-grid object applying an operation
*
* \tparam op operation
* \tparam prp properties to unpack
*
* \param mem preallocated memory from where to unpack the object
* \param sub sub-grid iterator
* \param obj object where to unpack
*
*/
template<template<typename,typename> class op, typename S2, unsigned int ... prp>
void unpack_with_op(ExtPreAlloc<S2> & mem, grid_key_dx_iterator_sub<dim> & sub2, Unpack_stat & ps)
{
PtrMemory * ptr1;
size_t sz[dim];
for (size_t i = 0 ; i < dim ; i++)
sz[i] = sub2.getStop().get(i) - sub2.getStart().get(i) + 1;
size_t tot = 1;
for (size_t i = 0 ; i < dim ; i++)
{tot *= sz[i];}
tot *= sizeof(T);
#ifdef SE_CLASS1
if (ps.getOffset() + tot > mem.size())
std::cerr << __FILE__ << ":" << __LINE__ << " Error: overflow in the receiving buffer for ghost_put" << std::endl;
#endif
// add the received particles to the vector
ptr1 = new PtrMemory(((char *)mem.getPointerBase()+ps.getOffset()),tot);
// create vector representation to a piece of memory already allocated
grid_base_impl<dim,T,PtrMemory,typename memory_traits_lin<T>::type,memory_traits_lin> gs;
gs.setMemory(*ptr1);
// resize with the number of elements
gs.resize(sz);
// Merge the information
auto it_src = gs.getIterator();
while (sub2.isNext())
{
object_s_di_op<op,decltype(gs.get_o(it_src.get())),decltype(this->get_o(sub2.get())),OBJ_ENCAP,prp...>(gs.get_o(it_src.get()),this->get_o(sub2.get()));
++sub2;
++it_src;
}
ps.addOffset(tot);
}
/*! \brief Calculate the memory size required to pack n elements
*
* Calculate the total size required to store n-elements in a vector
......
/*
* grid_performance_tests.hpp
*
* Created on: Nov 1, 2015
* Author: i-bird
*/
#ifndef OPENFPM_DATA_SRC_GRID_GRID_PERFORMANCE_TESTS_HPP_
#define OPENFPM_DATA_SRC_GRID_GRID_PERFORMANCE_TESTS_HPP_
#include "grid_util_test.hpp"
openfpm::vector<std::string> testsg;
openfpm::vector<float> per_timesg;
BOOST_AUTO_TEST_CASE(grid_performance_set_obj)
{
size_t sz[] = {128,128,128};
grid_cpu<3, Point_test<float> > c3(sz);
c3.setMemory();
fill_grid<3>(c3);
Point_test<float> f __attribute__((aligned(16)));
f.fill();
std::vector<double> times(N_STAT + 1);
times[0] = 1000;
for (size_t j = 0 ; j < 8 ; j++)
{
for (size_t i = 1 ; i < N_STAT+1 ; i++)
{
timer t;
t.start();
auto it = c3.getIterator();
while (it.isNext())
{
c3.set(it.get(),f);
++it;
}
t.stop();
times[i] = t.getwct();
}
std::sort(times.begin(),times.end());
sleep(5);
}
testsg.add("Grid so");
per_timesg.add(times[0]);
}
BOOST_AUTO_TEST_CASE(grid_performance_set_other_grid)
{
size_t sz[] = {128,128,128};
grid_cpu<3, Point_test<float> > c3(sz);
c3.setMemory();
fill_grid<3>(c3);
grid_cpu<3, Point_test<float> > c1(sz);
c1.setMemory();
std::vector<double> times(N_STAT + 1);
times[0] = 1000;
for (size_t j = 0 ; j < 8 ; j++)
{
for (size_t i = 1 ; i < N_STAT+1 ; i++)
{
timer t;
t.start();
auto it = c3.getIterator();
while (it.isNext())
{
c3.set(it.get(),c1,it.get());
++it;
}
t.stop();
times[i] = t.getwct();
}
std::sort(times.begin(),times.end());
sleep(5);
}
testsg.add("Grid sog");
per_timesg.add(times[0]);
}
BOOST_AUTO_TEST_CASE(grid_performance_set_other_grid_encap)
{
size_t sz[] = {128,128,128};
grid_cpu<3, Point_test<float> > c3(sz);
c3.setMemory();
fill_grid<3>(c3);
grid_cpu<3, Point_test<float> > c1(sz);
c1.setMemory();
std::vector<double> times(N_STAT + 1);
times[0] = 1000;
for (size_t j = 0 ; j < 8 ; j++)
{
for (size_t i = 1 ; i < N_STAT+1 ; i++)
{
timer t;
t.start();
auto it = c3.getIterator();
while (it.isNext())
{
c3.set(it.get(),c1.get_o(it.get()));
++it;
}
t.stop();
times[i] = t.getwct();
}
std::sort(times.begin(),times.end());
sleep(5);
}
testsg.add("Grid soge");
per_timesg.add(times[0]);
}
BOOST_AUTO_TEST_CASE(grid_performance_duplicate)
{
size_t sz[] = {128,128,128};
grid_cpu<3, Point_test<float> > c3(sz);
c3.setMemory();
fill_grid<3>(c3);
grid_cpu<3, Point_test<float> > c1;
std::vector<double> times(N_STAT_SMALL + 1);
times[0] = 1000;
for (size_t j = 0 ; j < 8 ; j++)
{
for (size_t i = 1 ; i < N_STAT_SMALL+1 ; i++)
{
timer t;
t.start();
c1 = c3.duplicate();
t.stop();
times[i] = t.getwct();
}
std::sort(times.begin(),times.end());
sleep(5);
}
testsg.add("Grid dup");
per_timesg.add(times[0]);
}
/////// THIS IS NOT A TEST IT WRITE THE PERFORMANCE RESULT ///////
BOOST_AUTO_TEST_CASE(grid_performance_write_report)
{
openfpm::vector<std::string> yn;
openfpm::vector<openfpm::vector<float>> y;
// Get the directory of the performance test files
std::string per_dir(test_dir);
// Reference time
openfpm::vector<openfpm::vector<float>> y_ref;
y_ref.load(per_dir + std::string("/openfpm_data/ref_timesg"));
load_and_combine(per_dir + std::string("/openfpm_data/previous_measureg"),y,per_timesg);
// Adding the dataset names
if (y.size() != 0)
{
for (size_t j = 0; j < y.get(0).size(); j++)
yn.add("config " + std::to_string(j));
}
// Google charts options
GCoptions options;
options.title = std::string("Grid Performances");
options.yAxis = std::string("Time (seconds)");
options.xAxis = std::string("Benchmark");
options.stype = std::string("bars");
std::stringstream g_test_desc;
g_test_desc << "<h2>Grid performance test</h2>\n";
g_test_desc << "<strong>128x128x128 Grid containing a Point_test<float></strong><br>";
g_test_desc << "<strong>Grid so:</strong> Initialize each element of the grid<br>";
g_test_desc << "<strong>Grid sog:</strong> Manual copy of two grids<br>";
g_test_desc << "<strong>Grid soge:</strong> Manual copy of two grids in a different way<br>";
g_test_desc << "<strong>Grid dup:</strong> Duplication of the grid (Duplication include grid creation time)<br>";
cg.addHTML(g_test_desc.str());
cg.AddHistGraph(testsg,y,yn,options);
// compare the reference times with the actual times
// calculate speed-up
openfpm::vector<openfpm::vector<float>> y_ref_sup;
speedup_calculate(y_ref_sup,y,y_ref,yn);
std::stringstream g_test_spdesc;
g_test_spdesc << "<h2>Grid speedup</h2>\n";
g_test_spdesc << "The previous tests are compared with the best performances ever registered, ";
g_test_spdesc << "the banded area indicate the upper and lower bounds of the best registrered performances.<br>";
g_test_spdesc << "The lines are the latest 5 tests<br>";
g_test_spdesc << "<strong>Line inside the area</strong>: The tested configuration has no improvement or degradation in performance<br>";
g_test_spdesc << "<strong>Line break the upper bound</strong>: The tested configuration has improvement in performance<br>";
g_test_spdesc << "<strong>Line break the lower bound</strong>: The tested configuration has degradation in performance<br>";
g_test_spdesc << "<strong>Y axis:</strong> Performance change in percentage from the average of the best registered performances<br>";
cg.addHTML(g_test_spdesc.str());
cg.AddLinesGraph(testsg,y_ref_sup,yn,options);
}
#endif /* OPENFPM_DATA_SRC_GRID_GRID_PERFORMANCE_TESTS_HPP_ */
......@@ -13,9 +13,6 @@
#include "util/mathutil.hpp"
#include "iterators/stencil_type.hpp"
#define PERIODIC 1
#define NON_PERIODIC 0
// Box need the definition of grid_key_dx_r
#define HARDWARE 1
......@@ -67,9 +64,31 @@ public:
}
};
template<unsigned int dim>
struct ite_gpu
{
#ifdef CUDA_GPU
dim3 thr;
dim3 wthr;
grid_key_dx<dim,int> start;
grid_key_dx<dim,int> stop;
size_t nblocks()
{
return wthr.x * wthr.y * wthr.z;
}
#endif
};
//! Declaration grid_sm
template<unsigned int N, typename T> class grid_sm;
template<unsigned int dim, typename T2, typename T>
ite_gpu<dim> getGPUIterator_impl(const grid_sm<dim,T2> & g1, const grid_key_dx<dim,T> & key1, const grid_key_dx<dim,T> & key2, size_t n_thr = 1024);
//! Declaration print_warning_on_adjustment
template <unsigned int dim> class print_warning_on_adjustment;
......@@ -686,11 +705,47 @@ public:
* \param stop stop point
*
*/
inline grid_key_dx_iterator_sub<N> getSubIterator(grid_key_dx<N> & start, grid_key_dx<N> & stop) const
inline grid_key_dx_iterator_sub<N> getSubIterator(const grid_key_dx<N> & start, const grid_key_dx<N> & stop) const
{
return grid_key_dx_iterator_sub<N>(*this,start,stop);
}
#ifdef CUDA_GPU
/*! \brief Get an iterator for the GPU
*
* \param start starting point
* \param stop end point
*
*/
template<typename T2>
struct ite_gpu<N> getGPUIterator(const grid_key_dx<N,T2> & key1, const grid_key_dx<N,T2> & key2, size_t n_thr = 1024) const
{
return getGPUIterator_impl<N>(*this,key1,key2,n_thr);
}
/*! \brief Get an iterator for the GPU
*
* \param start starting point
* \param stop end point
*
*/
struct ite_gpu<N> getGPUIterator(size_t n_thr = 1024) const
{
grid_key_dx<N> k1;
grid_key_dx<N> k2;
for (size_t i = 0 ; i < N ; i++)
{
k1.set_d(i,0);
k2.set_d(i,size(i));
}
return getGPUIterator_impl<N>(*this,k1,k2,n_thr);
}
#endif
/*! \brief swap the grid_sm informations
*
* \param g grid to swap
......@@ -734,6 +789,89 @@ public:
};
template<unsigned int dim, typename T2, typename T>
ite_gpu<dim> getGPUIterator_impl(const grid_sm<dim,T2> & g1, const grid_key_dx<dim,T> & key1, const grid_key_dx<dim,T> & key2, const size_t n_thr)
{
size_t tot_work = 1;
for (size_t i = 0 ; i < dim ; i++)
{tot_work *= key2.get(i) - key1.get(i) + 1;}
size_t n = (tot_work <= n_thr)?openfpm::math::round_big_2(tot_work):n_thr;
// Work to do
ite_gpu<dim> ig;
if (tot_work == 0)
{
ig.thr.x = 0;
ig.thr.y = 0;
ig.thr.z = 0;
ig.wthr.x = 0;
ig.wthr.y = 0;
ig.wthr.z = 0;
return ig;
}
ig.thr.x = 1;
ig.thr.y = 1;
ig.thr.z = 1;
int dir = 0;
while (n != 1)
{
if (dir % 3 == 0)
{ig.thr.x = ig.thr.x << 1;}
else if (dir % 3 == 1)
{ig.thr.y = ig.thr.y << 1;}
else if (dir % 3 == 2)
{ig.thr.z = ig.thr.z << 1;}
n = n >> 1;
dir++;
dir %= dim;
}
if (dim >= 1)
{ig.wthr.x = (key2.get(0) - key1.get(0) + 1) / ig.thr.x + (((key2.get(0) - key1.get(0) + 1)%ig.thr.x != 0)?1:0);}
if (dim >= 2)
{ig.wthr.y = (key2.get(1) - key1.get(1) + 1) / ig.thr.y + (((key2.get(1) - key1.get(1) + 1)%ig.thr.y != 0)?1:0);}
else
{ig.wthr.y = 1;}
if (dim >= 3)
{
// Roll the other dimensions on z
ig.wthr.z = 1;
for (size_t i = 2 ; i < dim ; i++)
{ig.wthr.z *= (key2.get(i) - key1.get(i) + 1) / ig.thr.z + (((key2.get(i) - key1.get(i) + 1)%ig.thr.z != 0)?1:0);}
}
else
{ig.wthr.z = 1;}
// crop if wthr == 1
if (dim >= 1 && ig.wthr.x == 1)
{ig.thr.x = (key2.get(0) - key1.get(0) + 1);}
if (dim >= 2 && ig.wthr.y == 1)
{ig.wthr.y = key2.get(1) - key1.get(1) + 1;}
if (dim == 3 && ig.wthr.z == 1)
{ig.wthr.z = key2.get(2) - key1.get(2) + 1;}
for (size_t i = 0 ; i < dim ; i++)
{
ig.start.set_d(i,key1.get(i));
ig.stop.set_d(i,key2.get(i));
}
return ig;
}
/*! \brief Emulate grid_key_dx with runtime dimensionality
......
/*
* grid_test_utils.hpp
*
* Created on: Jun 16, 2019
* Author: i-bird
*/
#ifndef GRID_TEST_UTILS_HPP_
#define GRID_TEST_UTILS_HPP_
template<typename grid_type>
void copy_test(grid_type & g_src, grid_type & g_dst,
Box<grid_type::dims,size_t> & box_src, Box<grid_type::dims,size_t> & box_dst)
{
g_dst.setMemory();
g_src.setMemory();
auto itd = g_dst.getIterator();
while (itd.isNext())
{
auto k = itd.get();
g_dst.template get<0>(k) = 0;
g_dst.template get<1>(k)[0] = 0;
g_dst.template get<1>(k)[1] = 0;
g_dst.template get<1>(k)[2] = 0;
g_dst.template get<2>(k)[0][0] = 0;
g_dst.template get<2>(k)[0][1] = 0;
g_dst.template get<2>(k)[0][2] = 0;
g_dst.template get<2>(k)[1][0] = 0;
g_dst.template get<2>(k)[1][1] = 0;
g_dst.template get<2>(k)[1][2] = 0;
g_dst.template get<2>(k)[2][0] = 0;
g_dst.template get<2>(k)[2][1] = 0;
g_dst.template get<2>(k)[2][2] = 0;
++itd;
}
auto & gs = g_src.getGrid();
auto its = g_src.getIterator();
while (its.isNext())
{
auto k = its.get();
g_src.template get<0>(k) = gs.LinId(k);
g_src.template get<1>(k)[0] = gs.LinId(k) + 100;
g_src.template get<1>(k)[1] = gs.LinId(k) + 200;
g_src.template get<1>(k)[2] = gs.LinId(k) + 300;
g_src.template get<2>(k)[0][0] = gs.LinId(k) + 1000;
g_src.template get<2>(k)[0][1] = gs.LinId(k) + 2000;
g_src.template get<2>(k)[0][2] = gs.LinId(k) + 3000;
g_src.template get<2>(k)[1][0] = gs.LinId(k) + 4000;
g_src.template get<2>(k)[1][1] = gs.LinId(k) + 5000;
g_src.template get<2>(k)[1][2] = gs.LinId(k) + 6000;
g_src.template get<2>(k)[2][0] = gs.LinId(k) + 7000;
g_src.template get<2>(k)[2][1] = gs.LinId(k) + 8000;
g_src.template get<2>(k)[2][2] = gs.LinId(k) + 9000;
++its;
}
// copy
g_dst.copy_to(g_src,box_src,box_dst);
// Check
itd = g_dst.getIterator();
while (itd.isNext())
{
auto k = itd.get();
Point<grid_type::dims,size_t> p;
for (size_t i = 0 ; i < grid_type::dims ; i++)
{p.get(i) = k.get(i);}
if (box_dst.isInside(p) == true)
{
grid_key_dx<grid_type::dims> ks = k + box_src.getKP1() - box_dst.getKP1();
BOOST_REQUIRE_EQUAL(g_dst.template get<0>(k),gs.LinId(ks));
BOOST_REQUIRE_EQUAL(g_dst.template get<1>(k)[0],gs.LinId(ks) + 100);
BOOST_REQUIRE_EQUAL(g_dst.template get<1>(k)[1],gs.LinId(ks) + 200);
BOOST_REQUIRE_EQUAL(g_dst.template get<1>(k)[2],gs.LinId(ks) + 300);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[0][0],gs.LinId(ks) + 1000);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[0][1],gs.LinId(ks) + 2000);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[0][2],gs.LinId(ks) + 3000);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[1][0],gs.LinId(ks) + 4000);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[1][1],gs.LinId(ks) + 5000);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[1][2],gs.LinId(ks) + 6000);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[2][0],gs.LinId(ks) + 7000);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[2][1],gs.LinId(ks) + 8000);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[2][2],gs.LinId(ks) + 9000);
}
else
{
BOOST_REQUIRE_EQUAL(g_dst.template get<0>(k),0);
BOOST_REQUIRE_EQUAL(g_dst.template get<1>(k)[0],0);
BOOST_REQUIRE_EQUAL(g_dst.template get<1>(k)[1],0);
BOOST_REQUIRE_EQUAL(g_dst.template get<1>(k)[2],0);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[0][0],0);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[0][1],0);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[0][2],0);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[1][0],0);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[1][1],0);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[1][2],0);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[2][0],0);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[2][1],0);
BOOST_REQUIRE_EQUAL(g_dst.template get<2>(k)[2][2],0);
}
++itd;
}
}
#endif /* GRID_TEST_UTILS_HPP_ */
......@@ -7,11 +7,12 @@
#include "Space/Shape/HyperCube.hpp"
#include "timer.hpp"
#include "grid_util_test.hpp"
#include "grid_test_utils.hpp"
#ifdef TEST_COVERAGE_MODE
#define GS_SIZE 8
constexpr int GS_SIZE = 8;
#else
#define GS_SIZE 128
constexpr int GS_SIZE = 128;
#endif
template<unsigned int dim, typename g> void test_layout_gridNd(g & c3, size_t sz);
......@@ -775,6 +776,21 @@ BOOST_AUTO_TEST_CASE(copy_encap_vector_fusion_test)
BOOST_REQUIRE_EQUAL(g.template get<2>(key)[2][2],g.template get<2>(key1)[2][2]);
}
BOOST_AUTO_TEST_CASE(grid_test_copy_to)
{
size_t sz_dst[] = {5,5};
size_t sz_src[] = {3,2};
grid_cpu<2,aggregate<float,float[3],float[3][3]>> g_dst(sz_dst);
grid_cpu<2,aggregate<float,float[3],float[3][3]>> g_src(sz_src);
Box<2,size_t> box_dst({1,2},{2,3});
Box<2,size_t> box_src({1,0},{2,1});
copy_test(g_src,g_dst,box_src,box_dst);
}
BOOST_AUTO_TEST_SUITE_END()
#endif
......@@ -468,6 +468,8 @@ public:
this->stl_code.increment();
//! check the overflow of all the index with exception of the last dimensionality
post_increment();
return *this;
......@@ -541,7 +543,6 @@ public:
* \param g_s_it grid_key_dx_iterator_sub
*
*/
inline void reinitialize(const grid_key_dx_iterator_sub<dim> & g_s_it)
{
// Reinitialize the iterator
......
......@@ -101,7 +101,11 @@ public:
* \param bc boundary conditions
*
*/
template<typename T> grid_key_dx_iterator_sub_bc(const grid_sm<dim,T> & g, const grid_key_dx<dim> & start , const grid_key_dx<dim> & stop, const size_t (& bc)[dim])
template<typename T>
grid_key_dx_iterator_sub_bc(const grid_sm<dim,T> & g,
const grid_key_dx<dim> & start,
const grid_key_dx<dim> & stop,
const size_t (& bc)[dim])
:act(0)
{
Initialize(g,start,stop,bc);
......@@ -115,7 +119,10 @@ public:
* \param bc boundary conditions
*
*/
template<typename T> void Initialize(const grid_sm<dim,T> & g, const grid_key_dx<dim> & start , const grid_key_dx<dim> & stop, const size_t (& bc)[dim])
template<typename T> void Initialize(const grid_sm<dim,T> & g,
const grid_key_dx<dim> & start ,
const grid_key_dx<dim> & stop,
const size_t (& bc)[dim])
{
// copy the boundary conditions
......@@ -186,14 +193,14 @@ public:
// if intersect add in the box list
if (intersect == true)
boxes.push_back(b_out);
{boxes.push_back(b_out);}
++it;
}
// initialize the first iterator
if (boxes.size() > 0)
grid_key_dx_iterator_sub<dim,stencil,warn>::reinitialize(grid_key_dx_iterator_sub<dim>(g,boxes[0].getKP1(),boxes[0].getKP2()));
{grid_key_dx_iterator_sub<dim,stencil,warn>::reinitialize(grid_key_dx_iterator_sub<dim>(g,boxes[0].getKP1(),boxes[0].getKP2()));}
}
/*! \brief Get the next element
......
......@@ -100,7 +100,9 @@ class grid_cpu
template<unsigned int dim, typename T, typename S>
class grid_cpu<dim,T,S,typename memory_traits_lin<T>::type> : public grid_base_impl<dim,T,S,typename memory_traits_lin<T>::type, memory_traits_lin>
{
typedef typename apply_transform<memory_traits_inte,T>::type T_;
typedef typename apply_transform<memory_traits_lin,T>::type T_;
T background;
public:
......@@ -114,6 +116,11 @@ public:
//! Grid_cpu has no grow policy
typedef void grow_policy;
//! type that identify one point in the grid
typedef grid_key_dx<dim> base_key;
//! sub-grid iterator type
typedef grid_key_dx_iterator_sub<dim> sub_grid_iterator_type;
//! Default constructor
inline grid_cpu() THROW
......@@ -158,10 +165,12 @@ public:
* \param g grid to copy
*
*/
grid_cpu<dim,T,S,typename memory_traits_lin<T>::type> & operator=(const grid_base_impl<dim,T,S,layout,memory_traits_lin> & g)
grid_cpu<dim,T,S,typename memory_traits_lin<T>::type> & operator=(const grid_cpu<dim,T,S,typename memory_traits_lin<T>::type> & g)
{
(static_cast<grid_base_impl<dim,T,S,typename memory_traits_lin<T>::type, memory_traits_lin> *>(this))->swap(g.duplicate());
meta_copy<T>::meta_copy_(g.background,background);
return *this;
}
......@@ -170,10 +179,12 @@ public:
* \param g grid to copy
*
*/
grid_cpu<dim,T,S,typename memory_traits_lin<T>::type> & operator=(grid_base_impl<dim,T,S,layout,memory_traits_lin> && g)
grid_cpu<dim,T,S,typename memory_traits_lin<T>::type> & operator=(grid_cpu<dim,T,S,typename memory_traits_lin<T>::type> && g)
{
(static_cast<grid_base_impl<dim,T,S,typename memory_traits_lin<T>::type, memory_traits_lin> *>(this))->swap(g);
meta_copy<T>::meta_copy_(g.background,background);
return *this;
}