Skip to content
Snippets Groups Projects
Commit f5174f2b authored by yaskovet's avatar yaskovet
Browse files

Merge DCPSE on GPU

parents c12d45a1 3caed06b
No related branches found
No related tags found
No related merge requests found
...@@ -18,6 +18,7 @@ endif() ...@@ -18,6 +18,7 @@ endif()
if (NOT CUDA_ON_BACKEND STREQUAL "None") if (NOT CUDA_ON_BACKEND STREQUAL "None")
set(CUDA_SOURCES Operators/Vector/vector_dist_operators_unit_tests.cu set(CUDA_SOURCES Operators/Vector/vector_dist_operators_unit_tests.cu
DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu
Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu) Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu)
endif() endif()
...@@ -286,7 +287,6 @@ install(FILES DCPSE/Dcpse.hpp ...@@ -286,7 +287,6 @@ install(FILES DCPSE/Dcpse.hpp
DCPSE/DCPSE_op/DCPSE_Solver.cuh DCPSE/DCPSE_op/DCPSE_Solver.cuh
DCPSE/DcpseDiagonalScalingMatrix.hpp DCPSE/DcpseDiagonalScalingMatrix.hpp
DCPSE/DcpseRhs.hpp DCPSE/DcpseRhs.hpp
DCPSE/DcpseRhs.cuh
DCPSE/Monomial.hpp DCPSE/Monomial.hpp
DCPSE/Monomial.cuh DCPSE/Monomial.cuh
DCPSE/MonomialBasis.hpp DCPSE/MonomialBasis.hpp
...@@ -294,7 +294,6 @@ install(FILES DCPSE/Dcpse.hpp ...@@ -294,7 +294,6 @@ install(FILES DCPSE/Dcpse.hpp
DCPSE/SupportBuilder.cuh DCPSE/SupportBuilder.cuh
DCPSE/SupportBuilder.hpp DCPSE/SupportBuilder.hpp
DCPSE/Vandermonde.hpp DCPSE/Vandermonde.hpp
DCPSE/Vandermonde.cuh
DCPSE/VandermondeRowBuilder.hpp DCPSE/VandermondeRowBuilder.hpp
DCPSE/DcpseInterpolation.hpp DCPSE/DcpseInterpolation.hpp
DESTINATION openfpm_numerics/include/DCPSE DESTINATION openfpm_numerics/include/DCPSE
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -17,7 +17,7 @@ private: ...@@ -17,7 +17,7 @@ private:
const monomialBasis_type& monomialBasis; const monomialBasis_type& monomialBasis;
public: public:
__host__ __device__ DcpseDiagonalScalingMatrix(const monomialBasis_type &monomialBasis) : monomialBasis(monomialBasis) {} DcpseDiagonalScalingMatrix(const monomialBasis_type &monomialBasis) : monomialBasis(monomialBasis) {}
template <typename T, typename MatrixType, typename vector_type, typename vector_type2> template <typename T, typename MatrixType, typename vector_type, typename vector_type2>
void buildMatrix(MatrixType &M, Support support, T eps, vector_type & particlesFrom , vector_type2 & particlesTo) void buildMatrix(MatrixType &M, Support support, T eps, vector_type & particlesFrom , vector_type2 & particlesTo)
......
//
// Created by Serhii
//
#ifndef OPENFPM_PDATA_DCPSERHS_CUH
#define OPENFPM_PDATA_DCPSERHS_CUH
#include "MonomialBasis.hpp"
template<unsigned int dim, typename MonomialBasis_type>
class DcpseRhs_gpu
{
private:
const Point<dim, unsigned int>& differentialSignature;
const MonomialBasis_type& monomialBasis;
int sign;
public:
__host__ __device__ DcpseRhs_gpu(const MonomialBasis_type &monomialBasis, const Point<dim, unsigned int> &differentialSignature);
template<typename T>
__host__ __device__ void getVector(T* b);
};
template<unsigned int dim, typename MonomialBasis_type>
__host__ __device__ DcpseRhs_gpu<dim, MonomialBasis_type>::DcpseRhs_gpu(const MonomialBasis_type &monomialBasis,
const Point<dim, unsigned int> &differentialSignature)
: monomialBasis(monomialBasis), differentialSignature(differentialSignature) {
unsigned int order = (Monomial_gpu<dim>(differentialSignature)).order();
if (order % 2 == 0)
sign = 1;
else
sign = -1;
}
template<unsigned int dim, typename MonomialBasis_type>
template<typename T>
__host__ __device__ void DcpseRhs_gpu<dim, MonomialBasis_type>::getVector(T* b)
{
const auto& basisElements = monomialBasis.getElements();
for (size_t i = 0; i < basisElements.size(); ++i) {
const Monomial_gpu<dim>& dm = basisElements.get(i).getDerivative(differentialSignature);
b[i] = sign * dm.evaluate(Point<dim, T>(0));
}
}
#endif //OPENFPM_PDATA_DCPSERHS_CUH
...@@ -53,7 +53,7 @@ __global__ void gatherSupportSize_gpu( ...@@ -53,7 +53,7 @@ __global__ void gatherSupportSize_gpu(
size_t el = cl.get(cellLinId, k); size_t el = cl.get(cellLinId, k);
if (p_key == el) continue; if (p_key == el) continue;
if (pos.distance(particles.getPos(el)) < rCut) ++N; if (pos.distance(particles.getPosOrig(el)) < rCut) ++N;
} }
} while (nextCell<dim>(offset, 2+1)); } while (nextCell<dim>(offset, 2+1));
...@@ -90,7 +90,7 @@ __global__ void assembleSupport_gpu(particles_type particles, CellList_type cl, ...@@ -90,7 +90,7 @@ __global__ void assembleSupport_gpu(particles_type particles, CellList_type cl,
size_t el = cl.get(cellLinId, k); size_t el = cl.get(cellLinId, k);
if (p_key == el) continue; if (p_key == el) continue;
if (pos.distance(particles.getPos(el)) < rCut) supportKeys[N++] = el; if (pos.distance(particles.getPosOrig(el)) < rCut) supportKeys[N++] = el;
} }
} while (nextCell<dim>(offset, 2+1)); } while (nextCell<dim>(offset, 2+1));
...@@ -103,22 +103,22 @@ class SupportBuilderGPU ...@@ -103,22 +103,22 @@ class SupportBuilderGPU
{ {
private: private:
vector_type &domain; vector_type &domain;
const Point<vector_type::dims, unsigned int> differentialSignature;
typename vector_type::stype rCut; typename vector_type::stype rCut;
public: public:
SupportBuilderGPU(vector_type &domain, Point<vector_type::dims, unsigned int> differentialSignature, typename vector_type::stype rCut) SupportBuilderGPU(vector_type &domain, typename vector_type::stype rCut)
: domain(domain), differentialSignature(differentialSignature), rCut(rCut) {} : domain(domain), rCut(rCut) {}
SupportBuilderGPU(vector_type &domain, unsigned int differentialSignature[vector_type::dims], typename vector_type::stype rCut)
: SupportBuilderGPU(domain, Point<vector_type::dims, unsigned int>(differentialSignature), rCut) {}
void getSupport(size_t N, openfpm::vector_custd<size_t>& kerOffsets, openfpm::vector_custd<size_t>& supportKeys1D, void getSupport(size_t N, openfpm::vector_custd<size_t>& kerOffsets, openfpm::vector_custd<size_t>& supportKeys1D,
size_t& maxSupport, size_t& supportKeysTotalN) size_t& maxSupport, size_t& supportKeysTotalN)
{ {
domain.hostToDevicePos(); domain.hostToDevicePos();
auto it = domain.getDomainIteratorGPU(512); auto it = domain.getDomainIteratorGPU(512);
auto NN = domain.getCellListGPU(rCut); typedef CellList_gen<vector_type::dims, typename vector_type::stype, Process_keys_lin, Mem_fast<CudaMemory>, shift<vector_type::dims, typename vector_type::stype>> params;
// auto NN = domain.getCellListGPU(rCut);
auto NN = domain.template getCellList<params>(rCut);
NN.hostToDevice();
// +1 to allow getting size from cumulative sum: "size[i+1] - size[i]" // +1 to allow getting size from cumulative sum: "size[i+1] - size[i]"
kerOffsets.resize(N+1); kerOffsets.resize(N+1);
......
//
// Created by Serhii on 15/05/21
//
#ifndef OPENFPM_PDATA_VANDERMONDE_CUH
#define OPENFPM_PDATA_VANDERMONDE_CUH
#include "MonomialBasis.hpp"
#include "VandermondeRowBuilder.hpp"
#include "Support.hpp"
template<unsigned int dim, typename T, typename MonomialBasis_type = MonomialBasis<dim>>
class Vandermonde_gpu
{
private:
size_t N;
T* offsets;
const Point<dim, T> point;
const MonomialBasis_type& monomialBasis;
T eps;
public:
template<typename vector_type>
__host__ __device__ Vandermonde_gpu( T* mem, size_t supportRefKey, size_t supportKeysSize,
const size_t* supportKeys, const MonomialBasis_type &monomialBasis, const vector_type & particles)
: offsets(mem), monomialBasis(monomialBasis), point(particles.getPos(supportRefKey))
{
initialize(supportRefKey, supportKeysSize, supportKeys, particles);
}
__host__ __device__ void getMatrix(T *M)
{
// Build the Vandermonde_gpu matrix, row-by-row
VandermondeRowBuilder<dim, T, MonomialBasis_type> vrb(monomialBasis);
unsigned int row = 0;
for (size_t i = 0; i < N; ++i)
{
const T(*offset) [dim] = reinterpret_cast<const T(*) [dim]>(&offsets[i*dim]);
vrb.buildRow_gpu(&M[i*monomialBasis.size()], row, *offset, eps);
++row;
}
}
__host__ __device__ T getEps()
{
return eps;
}
__host__ __device__ ~Vandermonde_gpu()
{
}
private:
__host__ __device__ void computeEps(T factor)
{
T avgNeighbourSpacing = 0;
for (size_t i = 0; i < N; ++i)
{
const T(*offset) [dim] = reinterpret_cast<const T(*) [dim]>(&offsets[i*dim]);
avgNeighbourSpacing += computeAbsSum(*offset);
}
avgNeighbourSpacing /= N;
eps = factor * avgNeighbourSpacing;
assert(eps != 0);
}
__host__ __device__ static T computeAbsSum(const Point<dim, T> &x)
{
T absSum = 0;
for (unsigned int i = 0; i < dim; ++i)
{
absSum += fabs(x.value(i));
}
return absSum;
}
__host__ __device__ static T computeAbsSum(const T (& x) [dim])
{
T absSum = 0;
for (unsigned int i = 0; i < dim; ++i)
{
absSum += fabs(x[i]);
}
return absSum;
}
template<typename vector_type>
__host__ __device__ void initialize(size_t supportRefKey, size_t supportKeysSize, const size_t* supportKeys, const vector_type & particles)
{
N = supportKeysSize;
for (int i = 0 ; i < N ; i++)
{
size_t otherKey = supportKeys[i];
Point<dim,T> p = particles.getPos(supportRefKey);
const auto& p2 = particles.getPos(otherKey);
p -= p2;
for (size_t j = 0; j < dim; j++) {
offsets[i*dim+j] = p[j];
auto diff = p[j];
}
}
computeEps(2);
}
};
#endif //OPENFPM_PDATA_VANDERMONDE_CUH
...@@ -15,12 +15,10 @@ private: ...@@ -15,12 +15,10 @@ private:
const MonomialBasis_type& monomialBasis; const MonomialBasis_type& monomialBasis;
public: public:
__host__ __device__ VandermondeRowBuilder(const MonomialBasis_type &monomialBasis) : monomialBasis(monomialBasis) {} VandermondeRowBuilder(const MonomialBasis_type &monomialBasis) : monomialBasis(monomialBasis) {}
template <typename MatrixType> template <typename MatrixType>
void buildRow(MatrixType &M, unsigned int row, Point<dim, T> x, T eps); void buildRow(MatrixType &M, unsigned int row, Point<dim, T> x, T eps);
__host__ __device__ void buildRow_gpu(T *M, unsigned int row, const T (& x) [dim], T eps);
}; };
template<unsigned int dim, typename T,typename MonomialBasis_type> template<unsigned int dim, typename T,typename MonomialBasis_type>
...@@ -37,18 +35,5 @@ void VandermondeRowBuilder<dim, T, MonomialBasis_type>::buildRow(MatrixType &M, ...@@ -37,18 +35,5 @@ void VandermondeRowBuilder<dim, T, MonomialBasis_type>::buildRow(MatrixType &M,
} }
} }
template<unsigned int dim, typename T, typename MonomialBasis_type>
__host__ __device__ void VandermondeRowBuilder<dim, T, MonomialBasis_type>::buildRow_gpu(T* M, unsigned int row, const T (& x) [dim], T eps)
{
// M is a pointer to the row being filled
const auto& basisElements = monomialBasis.getElements();
for (size_t col = 0; col < basisElements.size(); ++col)
{
const Monomial_gpu<dim>& m = basisElements.get(col);
M[col] = m.evaluate(x) / pow(eps, m.order());
}
}
#endif //OPENFPM_PDATA_VANDERMONDEROW_HPP #endif //OPENFPM_PDATA_VANDERMONDEROW_HPP
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#define OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_HPP_ #define OPENFPM_NUMERICS_SRC_OPERATORS_VECTOR_VECTOR_DIST_OPERATORS_HPP_
#include "Vector/vector_dist.hpp" #include "Vector/vector_dist.hpp"
#include "Vector/vector_dist_subset.hpp"
#include "lib/pdata.hpp" #include "lib/pdata.hpp"
#include "cuda/vector_dist_operators_cuda.cuh" #include "cuda/vector_dist_operators_cuda.cuh"
...@@ -980,7 +981,7 @@ public: ...@@ -980,7 +981,7 @@ public:
* \return the result of the expression * \return the result of the expression
* *
*/ */
__device__ __host__ inline auto value(const unsigned int & k) const -> decltype(pos_or_propR<vector,prp>::value(v.v,k)) __host__ __device__ inline auto value(const unsigned int & k) const -> decltype(pos_or_propR<vector,prp>::value(v.v,k))
{ {
return pos_or_propR<vector,prp>::value(v.v,k); return pos_or_propR<vector,prp>::value(v.v,k);
} }
...@@ -992,7 +993,7 @@ public: ...@@ -992,7 +993,7 @@ public:
* \return the result of the expression * \return the result of the expression
* *
*/ */
__device__ __host__ inline auto value(const unsigned int & k) -> decltype(pos_or_propR<vector,prp>::value(v.v,k)) __host__ __device__ inline auto value(const unsigned int & k) -> decltype(pos_or_propR<vector,prp>::value(v.v,k))
{ {
return pos_or_propR<vector,prp>::value(v.v,k); return pos_or_propR<vector,prp>::value(v.v,k);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment