Commit 8a24928d authored by incardon's avatar incardon

Merge branch 'master' into 'develop'

Master

See merge request !4
parents b6a8524b 53098d9f
Pipeline #2425 passed with stages
in 8 minutes and 46 seconds
......@@ -33,6 +33,9 @@ if (ENABLE_GPU)
elseif ( CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 0 )
message("CUDA is compatible 11.0")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=3059 --diag_suppress=3058 --diag_suppress=3057 --diag_suppress=3056 --diag_suppress=611 --diag_suppress=186" --expt-extended-lambda)
elseif ( CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 1 )
message("CUDA is compatible 11.1")
set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=3124 --diag_suppress=3126 --diag_suppress=3125 --diag_suppress=3123 --diag_suppress=611 --diag_suppress=186 --diag_suppress=128" --expt-extended-lambda)
else()
message(FATAL_ERROR "CUDA is incompatible, version 9.2 10.1 10.2 and 11.0 is only supported")
endif()
......
......@@ -9,6 +9,11 @@ branch=$4
echo "Build on: $hostname with $type_compile branch: $branch"
if [ x"$hostname" == x"cifarm-centos-node.mpi-cbg.de" ]; then
echo "CentOS node"
source /opt/rh/devtoolset-7/enable
fi
# Check if libHilbert is installed
if [ ! -d $HOME/openfpm_dependencies/openfpm_data/LIBHILBERT ]; then
......
......@@ -269,6 +269,7 @@ install(FILES util/check_no_pointers.hpp
util/for_each_ref.hpp
util/for_each_ref_host.hpp
util/mathutil.hpp util/object_creator.hpp
util/object_si_di.hpp
util/object_s_di.hpp
util/zmorton.hpp
util/object_si_d.hpp
......
......@@ -13,6 +13,7 @@
#include "cuda/cuda_grid_gpu_funcs.cuh"
#include "util/create_vmpl_sequence.hpp"
#include "util/cuda/cuda_launch.hpp"
#include "util/object_si_di.hpp"
constexpr int DATA_ON_HOST = 32;
constexpr int DATA_ON_DEVICE = 64;
......@@ -305,6 +306,8 @@ public:
typedef ord_type linearizer_type;
typedef T background_type;
protected:
//! Memory layout specification + memory chunk pointer
......@@ -1246,7 +1249,7 @@ public:
while (sub_src.isNext())
{
// write the object in the last element
object_s_di_op<op,decltype(gs.get_o(sub_src.get())),decltype(this->get_o(sub_dst.get())),OBJ_ENCAP,prp...>(gs.get_o(sub_src.get()),this->get_o(sub_dst.get()));
object_si_di_op<op,decltype(gs.get_o(sub_src.get())),decltype(this->get_o(sub_dst.get())),OBJ_ENCAP,prp...>(gs.get_o(sub_src.get()),this->get_o(sub_dst.get()));
++sub_src;
++sub_dst;
......@@ -1667,11 +1670,12 @@ public:
*
* \param start point
* \param stop point
* \param to_init unused bool
*
* \return a sub-grid iterator
*
*/
inline grid_key_dx_iterator_sub<dim> getIterator(const grid_key_dx<dim> & start, const grid_key_dx<dim> & stop) const
inline grid_key_dx_iterator_sub<dim> getIterator(const grid_key_dx<dim> & start, const grid_key_dx<dim> & stop, bool to_init = false) const
{
// get the starting point and the end point of the real domain
......
......@@ -34,7 +34,9 @@ struct setBackground_impl
inline void operator()(T& t)
{
for (size_t i = 0 ; i < loc_grid.size() ; i++)
{loc_grid.get(i).template setBackgroundValue<T::value>(bck.template get<T::value>());}
{
loc_grid.get(i).template setBackgroundValue<T::value>(bck.template get<T::value>());
}
}
};
......
......@@ -500,7 +500,9 @@ struct unpack_simple_cond<true, prp ...>
for (size_t i = 0 ; i < dim ; i++)
{tot *= sz[i];}
tot *= sizeof(T);
typedef object<typename object_creator<typename T::type,prp...>::type> prp_object;
tot *= sizeof(prp_object);
#ifdef SE_CLASS1
......@@ -513,7 +515,7 @@ struct unpack_simple_cond<true, prp ...>
ptr1 = new PtrMemory(((char *)mem.getPointerBase()+ps.getOffset()),tot);
// create vector representation to a piece of memory already allocated
grid_base_impl<dim,T,PtrMemory,typename memory_traits_lin<T>::type,memory_traits_lin,ord_type> gs;
grid_base_impl<dim,prp_object,PtrMemory,typename memory_traits_lin<prp_object>::type,memory_traits_lin,ord_type> gs;
gs.setMemory(*ptr1);
......
......@@ -339,6 +339,18 @@ public:
return background;
}
/*! \brief Get the background value
*
* For dense grid this function is useless
*
* \return background value
*
*/
T & getBackgroundValueAggr()
{
return background;
}
/*! \brief Set the background value
*
* \tparam p property to set
......@@ -879,6 +891,18 @@ public:
return background;
}
/*! \brief Get the background value
*
* For dense grid this function is useless
*
* \return background value
*
*/
T & getBackgroundValueAggr()
{
return background;
}
/*! \brief assign operator
*
* \return itself
......
......@@ -191,7 +191,7 @@ class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
int mid = gs.LinId(middle);
grid_key_dx_iterator_sub<3> it(gs,start,stop);
grid_key_dx_iterator_sub<dim> it(gs,start,stop);
size_t i = 0;
while (it.isNext())
......
......@@ -269,48 +269,53 @@ public:
};
//! It copy one element of the chunk for each property
/*template<typename e_src, typename e_dst, int ... prp>
struct copy_unpacker_chunk
template<unsigned int prop, typename T>
struct std_array_copy_chunks
{
//! encapsulated object source
const e_src & src;
//! encapsulated object destination
e_dst & dst;
template<typename Tsrc, typename Tdst>
static void copy(Tsrc & src, Tdst & dst, size_t pos)
{
typedef typename std::remove_reference<decltype(dst.template get<prop>()[0])>::type copy_rtype;
//! element to copy
size_t sub_id;*/
meta_copy<copy_rtype>::meta_copy_(src.template get<prop>()[0],dst.template get<prop>()[pos]);
}
/*! \brief constructor
*
*
* \param src source encapsulated object
* \param dst destination encapsulated object
*
*/
/* inline copy_unpacker_chunk(const e_src & src, e_dst & dst, size_t sub_id)
:src(src),dst(dst),sub_id(sub_id)
template<typename Tsrc, typename Tdst>
static void copy_unpack(Tsrc & src, Tdst & dst, size_t pos)
{
};
// Remove the reference from the type to copy
typedef typename boost::remove_reference<decltype(src.template get< prop >())>::type copy_rtype;
meta_copy<copy_rtype>::meta_copy_(src.template get< prop >(),dst.template get< prop >()[pos]);
}
};
//! It call the copy function for each property
template<typename T>
inline void operator()(T& t) const
template<unsigned int prop, typename T, unsigned int N1>
struct std_array_copy_chunks<prop,T[N1]>
{
template<typename Tsrc, typename Tdst>
static void copy(Tsrc & src, Tdst & dst, size_t pos)
{
// Convert variadic to boost::vector
typedef typename boost::mpl::vector_c<unsigned int,prp...> prpv;
typedef typename std::remove_reference<decltype(dst.template get<prop>()[0][pos])>::type copy_rtype;
// element id to copy applying an operation
typedef typename boost::mpl::at<prpv,T>::type ele_cop;
for (int i = 0 ; i < N1 ; i++)
{
meta_copy<copy_rtype>::meta_copy_(dst.template get<prop>()[i][pos],src.template get<prop>()[i][0]);
}
}
template<typename Tsrc, typename Tdst>
static void copy_unpack(Tsrc & src, Tdst & dst, size_t pos)
{
// Remove the reference from the type to copy
typedef typename boost::remove_reference<decltype(src.template get< ele_cop::value >())>::type copy_rtype;
typedef typename boost::remove_reference<decltype(src.template get< prop >()[0])>::type copy_rtype;
meta_copy<copy_rtype>::meta_copy_(src.template get< ele_cop::value >(),dst.template get< ele_cop::value >()[sub_id]);
for (int i = 0 ; i < N1 ; i++)
{
meta_copy<copy_rtype>::meta_copy_(src.template get< prop >()[i],dst.template get< prop >()[i][pos]);
}
}
};*/
};
//! It copy one element of the chunk for each property
template<typename e_src, typename e_dst>
......@@ -342,10 +347,9 @@ struct copy_unpacker_chunk
template<typename T>
inline void operator()(T& t) const
{
// Remove the reference from the type to copy
typedef typename boost::remove_reference<decltype(src.template get< T::value >())>::type copy_rtype;
typedef typename std::remove_reference<decltype(src.template get<T::value>())>::type copy_rtype;
meta_copy<copy_rtype>::meta_copy_(src.template get< T::value >(),dst.template get< T::value >()[sub_id]);
std_array_copy_chunks<T::value,copy_rtype>::copy_unpack(src,dst,sub_id);
}
};
......
......@@ -127,7 +127,7 @@ template<unsigned int dim ,typename T> class Sphere
*/
__device__ __host__ bool isInside(Point<dim,T> p) const
{
T dist;
T dist = 0.0;
// calculate the distance of the center from the point
......@@ -156,7 +156,7 @@ template<unsigned int dim ,typename T> class Sphere
template<typename Distance>
__device__ __host__ bool isInside(Point<dim,T> p) const
{
T dist;
T dist = 0.0;
// Object to calculate distances
Distance d;
......@@ -187,7 +187,7 @@ template<unsigned int dim ,typename T> class Sphere
template<typename Distance> bool
__device__ __host__ isInside(float * pnt) const
{
T dist;
T dist = 0.0;
// Object to calculate distances
Distance d;
......
......@@ -27,7 +27,7 @@ BOOST_AUTO_TEST_CASE( Sphere_test_use)
BOOST_REQUIRE_EQUAL(s.isInside(p3),false);
double dist = s.distance(p3);
BOOST_REQUIRE_EQUAL(dist,0.0866025403784);
BOOST_REQUIRE_EQUAL(dist,0.15980762113533162);
}
BOOST_AUTO_TEST_SUITE_END()
This diff is collapsed.
......@@ -18,11 +18,25 @@ const static int cnk_mask = 2;
//! When we have more that 1024 to remove remove them
#define FLUSH_REMOVE 1024
//! transform T=aggregate<float,double,int> into aggregate<float[n_ele],double[n_ele],int[n_ele]>
template<typename T>
struct encapsulated_type
{
typedef T type;
};
//! transform T=aggregate<float,double,int> into aggregate<std::array<float,n_ele>,std::array<double,n_ele>,std::array<int,n_ele>>
template <typename n_ele, typename T>
struct Ft_chunk
{
typedef std::array<typename std::remove_const<typename std::remove_reference<T>::type>::type,n_ele::value> type;
typedef encapsulated_type<std::array<typename std::remove_const<typename std::remove_reference<T>::type>::type,n_ele::value>> type;
};
//! Special case for vector
template <typename n_ele, typename T, int N1>
struct Ft_chunk<n_ele,const T(&)[N1]>
{
// typedef typename T::culo culo;
typedef encapsulated_type<std::array<T,n_ele::value>[N1]> type;
};
template<unsigned int dim>
......
......@@ -39,7 +39,7 @@ BOOST_AUTO_TEST_CASE( sparse_grid_chunk_test )
{
typedef aggregate<double> T;
typedef typename v_transform_two<Ft_chunk,boost::mpl::int_<test_chunking3::size::value>,typename T::type>::type chunk_def;
typedef typename v_transform_two_v2<Ft_chunk,boost::mpl::int_<test_chunking3::size::value>,typename T::type>::type chunk_def;
double chunk_with_border[66*10*6];
unsigned char mask[66*10*6];
......@@ -163,7 +163,7 @@ BOOST_AUTO_TEST_CASE( sparse_grid_chunk_test_2 )
{
typedef aggregate<double> T;
typedef typename v_transform_two<Ft_chunk,boost::mpl::int_<test_chunking3::size::value>,typename T::type>::type chunk_def;
typedef typename v_transform_two_v2<Ft_chunk,boost::mpl::int_<test_chunking3::size::value>,typename T::type>::type chunk_def;
double chunk_with_border[68*12*8];
unsigned char mask[68*12*8];
......
This diff is collapsed.
......@@ -205,7 +205,7 @@ struct loadBlock_impl
*
*
*/
template<unsigned int N1, typename T, typename SparseGridType,typename backgroundType>
template<unsigned int N1, typename T, typename SparseGridType>
static void loadBorder(T arr[N1],
SparseGridType & sgt,
size_t chunk_id,
......@@ -214,7 +214,6 @@ struct loadBlock_impl
openfpm::vector<unsigned int> & chunk_ids ,
openfpm::vector<short int> & offsets,
unsigned char mask[N1],
backgroundType & background,
openfpm::vector<unsigned int> & maps_blk)
{
typedef typename generate_array_vector<size_t,typename vector_blocks_exts::type>::result size;
......@@ -247,7 +246,7 @@ struct loadBlock_impl
auto & h = header_mask.get(ac);
arr[b] = (ac == data.size()-1)?background.template get<prop>():data.template get<prop>(ac)[off];
arr[b] = (ac == data.size()-1)?data.template get<prop>(0)[off]:data.template get<prop>(ac)[off];
mask[b] = (ac == data.size()-1)?0:exist_sub(h,off);
}
}
......@@ -297,7 +296,7 @@ struct loadBlock_impl<prop,stencil_size,3,vector_blocks_exts,vector_ext>
*
*
*/
template<bool findNN, typename NNType, unsigned int N1, typename T, typename SparseGridType,typename backgroundType>
template<bool findNN, typename NNType, unsigned int N1, typename T, typename SparseGridType>
inline static void loadBorder(T arr[N1],
SparseGridType & sgt,
size_t chunk_id,
......@@ -306,7 +305,6 @@ struct loadBlock_impl<prop,stencil_size,3,vector_blocks_exts,vector_ext>
openfpm::vector<unsigned int> & chunk_ids ,
openfpm::vector<short int> & offsets,
unsigned char mask[N1],
backgroundType & background,
openfpm::vector<unsigned int> & maps_blk)
{
typedef typename generate_array_vector<size_t,typename vector_blocks_exts::type>::result size;
......@@ -499,9 +497,6 @@ class grid_key_sparse_dx_iterator_block_sub
// temporary buffer for Load border
openfpm::vector<unsigned int> maps_blk;
//! background value
typename SparseGridType::background_type & background;
//!iteration block
Box<dim,size_t> block_it;
......@@ -559,10 +554,9 @@ public:
grid_key_sparse_dx_iterator_block_sub(SparseGridType & spg,
const grid_key_dx<dim> & start,
const grid_key_dx<dim> & stop,
typename SparseGridType::background_type & background)
const grid_key_dx<dim> & stop)
:spg(spg),chunk_id(1),
start_(start),stop_(stop),background(background)
start_(start),stop_(stop)
{
// Create border coeficents
get_block_sizes<dim,stencil_size,vector_blocks_exts,vector_ext> gbs;
......@@ -754,7 +748,7 @@ public:
auto & header_inf = spg.private_get_header_inf();
loadBlock_impl<prop,stencil_size,dim,vector_blocks_exts,vector_ext>::template loadBlock<sizeBlockBord>(arr,spg,chunk_id,mask);
loadBlock_impl<prop,stencil_size,dim,vector_blocks_exts,vector_ext>::template loadBorder<findNN,NNtype,sizeBlockBord>(arr,spg,chunk_id,bord,block_skin,chunk_shifts,offsets,mask,background,maps_blk);
loadBlock_impl<prop,stencil_size,dim,vector_blocks_exts,vector_ext>::template loadBorder<findNN,NNtype,sizeBlockBord>(arr,spg,chunk_id,bord,block_skin,chunk_shifts,offsets,mask,maps_blk);
hm = &header_mask.get(chunk_id);
hc = &header_inf.get(chunk_id);
......
This diff is collapsed.
......@@ -278,6 +278,14 @@ public:
unsetBit(bitMask, EXIST_BIT);
}
/*! \brief Eliminate many internal temporary buffer you can use this between flushes if you get some out of memory
*
*
*/
void removeUnusedBuffers()
{
blockMap.removeUnusedBuffers();
}
/*! \brief Return internal structure block map
*
......
......@@ -2796,7 +2796,7 @@ public:
n_pnt = tmp.template get<0>((i+1)*(indexBuffer.size() + 1)-1);
n_cnk = tmp.template get<1>((i+1)*(indexBuffer.size() + 1)-1);
req += sizeof(indexT) + // byte required to pack the number of chunk packed
req += sizeof(size_t) + // byte required to pack the number of chunk packed
2*dim*sizeof(int) + // starting point + size of the indexing packing
sizeof(indexT)*n_cnk + // byte required to pack the chunk indexes
align_number(sizeof(indexT),(n_cnk+1)*sizeof(unsigned int)) + // byte required to pack the scan of the chunk point
......@@ -3223,6 +3223,15 @@ public:
}
}
/*! \brief Eliminate many internal temporary buffer you can use this between flushes if you get some out of memory
*
*
*/
void removeUnusedBuffers()
{
BlockMapGpu<AggregateInternalT, threadBlockSize, indexT, layout_base>::removeUnusedBuffers();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
/*! \brief Return a SparseGrid iterator
......
......@@ -1976,6 +1976,7 @@ BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3DHeatStencil)
sparseGrid.flush < smax_< 0 >> (ctx, flush_type::FLUSH_ON_DEVICE);
sparseGrid.removeUnusedBuffers();
// Sphere 2
grid_key_dx<3,int> start2({192,192,192});
......
......@@ -2013,6 +2013,28 @@ namespace openfpm
return blf.get_outputMap();
}
/*! \brief Eliminate many internal temporary buffer you can use this between flushes if you get some out of memory
*
*
*/
void removeUnusedBuffers()
{
vct_add_data.resize(0);
vct_add_data.shrink_to_fit();
vct_add_data.resize(0);
vct_add_data.shrink_to_fit();
vct_add_data_reord.resize(0);
vct_add_data_reord.shrink_to_fit();
vct_add_data_cont.resize(0);
vct_add_data_cont.shrink_to_fit();
vct_add_data_unique.resize(0);
vct_add_data_unique.shrink_to_fit();
}
/* \brief Return the offsets of the segments for the merge indexes
*
*
......
......@@ -127,23 +127,23 @@ struct aggregate
return boost::fusion::at_c<i>(data);
}
/*! \brief it return false if this aggregate has no pointers
/*! \brief get the properties i
*
* \return the property i
*
*/
static bool noPointers()
template<unsigned int i> const typename boost::mpl::at<type,boost::mpl::int_<i>>::type & get() const
{
return !has_pack_gen<aggregate<list ...>>::value;
return boost::fusion::at_c<i>(data);
}
/*! \brief get the properties i
/*! \brief it return false if this aggregate has no pointers
*
* \return the property i
*
*/
template<unsigned int i> const typename boost::mpl::at<type,boost::mpl::int_<i>>::type & get() const
static bool noPointers()
{
return boost::fusion::at_c<i>(data);
return !has_pack_gen<aggregate<list ...>>::value;
}
aggregate<list...> & operator=(const aggregate<list...> & ag)
......@@ -180,6 +180,26 @@ struct aggregate_bfv
//! data to store
type data;
/*! \brief get the properties i
*
* \return the property i
*
*/
template<unsigned int i> typename boost::mpl::at<type,boost::mpl::int_<i>>::type & get()
{
return boost::fusion::at_c<i>(data);
}
/*! \brief get the properties i
*
* \return the property i
*
*/
template<unsigned int i> const typename boost::mpl::at<type,boost::mpl::int_<i>>::type & get() const
{
return boost::fusion::at_c<i>(data);
}
static const unsigned int max_prop = boost::mpl::size<type>::type::value;
};
......
......@@ -13,6 +13,10 @@
#include "Vector/vect_isel.hpp"
#include "Vector/util.hpp"
constexpr int SOA_layout_IA = 2;
constexpr int SOA_layout = 1;
constexpr int AOS_layout = 0;
/*! \brief This class convert a boost::mpl::fusion/vector to a boost::mpl::fusion/vector with memory_c interleaved
*
* This class convert a boost::mpl::fusion/vector to a boost::mpl::fusion/vector with memory_c interleaved
......@@ -80,6 +84,8 @@ struct memory_traits_inte
//! indicate that it change the memory layout from the original
typedef int yes_is_inte;
typedef boost::mpl::int_<SOA_layout_IA> type_value;
};
/*! \brief Transform the boost::fusion::vector into memory specification (memory_traits)
......@@ -144,6 +150,8 @@ struct memory_traits_lin
typedef typename memory_traits_lin_type<T,openfpm::vect_isel<T>::value == OPENFPM_NATIVE>::type type;
typedef int yes_is_tlin;
typedef boost::mpl::int_<AOS_layout> type_value;
};
......
......@@ -10,6 +10,43 @@
#include <boost/type_traits.hpp>
#include <boost/mpl/vector_c.hpp>
#include <iostream>
template<typename T> struct meta_copy;
template<template<typename,typename> class op, typename T> struct meta_copy_op;
template<typename T> struct meta_compare;
/*! \brief Structure to copy aggregates
*
* \tparam aggregate to copy
*
*/
template<typename S, typename S2>
struct copy_aggregate_dual
{
//! src
const S src;
//! Destination grid
S2 & dst;
//! copy_aggregate
inline copy_aggregate_dual(S src, S2 & dst)
:src(src),dst(dst){};
//! It call the copy function for each member
template<typename T>
inline void operator()(T& t) const
{
// This is the type of the object we have to copy
typedef typename boost::fusion::result_of::at_c<typename S2::type,T::value>::type copy_type;
// Remove the reference from the type to copy
typedef typename boost::remove_reference<copy_type>::type copy_rtype;
meta_copy<copy_rtype>::meta_copy_(src.template get<T::value>(),dst.template get<T::value>());
}
};
template<typename T> struct meta_copy;
template<template<typename,typename> class op, typename T> struct meta_copy_op;
......
......@@ -803,7 +803,7 @@ struct meta_copy_op_d
* \param dst destination object
*