Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mosaic/software/parallel-computing/openfpm/openfpm_numerics
  • argupta/openfpm_numerics
2 results
Show changes
Commits on Source (2)
......@@ -17,7 +17,7 @@ endif()
if (NOT CUDA_ON_BACKEND STREQUAL "None")
set(CUDA_SOURCES Operators/Vector/vector_dist_operators_unit_tests.cu
#DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
OdeIntegrators/tests/Odeintegrators_test_gpu.cu
#DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu
Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu)
......
......@@ -17,7 +17,6 @@
#include <iostream>
#include "../DCPSE_op.hpp"
#include "../DCPSE_Solver.hpp"
#include "../DCPSE_Solver.cuh"
#include "Operators/Vector/vector_dist_operators.hpp"
#include "Vector/vector_dist_subset.hpp"
#include "../EqnsStruct.hpp"
......@@ -154,7 +153,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
double spacing = box.getHigh(0) / (sz[0] - 1);
Ghost<2, double> ghost(spacing * 3);
double rCut = 3.1 * spacing;
double rCut = 2.0 * spacing;
BOOST_TEST_MESSAGE("Init vector_dist...");
vector_dist_gpu<2, double, aggregate<double,double,double,double>> domain(0, box, bc, ghost);
......@@ -180,7 +179,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
domain.map();
domain.ghost_get<0>();
Laplacian_gpu Lap(domain, 2, rCut);
Laplacian_gpu Lap(domain, 2, rCut, 2,support_options::N_PARTICLES);
DCPSE_scheme_gpu<equations2d1_gpu,decltype(domain)> Solver( domain);
......@@ -415,10 +414,9 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
domain.map();
domain.ghost_get<0>();
Derivative_x_gpu Dx(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/);
Derivative_y_gpu Dy(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/);
Laplacian_gpu Lap(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/);
Derivative_x_gpu Dx(domain, 2, rCut/3.0 ,1.9,support_options::N_PARTICLES);
Derivative_y_gpu Dy(domain, 2, rCut/3.0,1.9,support_options::N_PARTICLES);
Laplacian_gpu Lap(domain, 2, rCut/3.0 ,1.9,support_options::N_PARTICLES);
openfpm::vector<aggregate<int>> bulk;
openfpm::vector<aggregate<int>> up_p;
......@@ -552,7 +550,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
BOOST_AUTO_TEST_CASE(dcpse_poisson_Dirichlet_anal) {
// int rank;
// MPI_Comm_rank(MPI_COMM_WORLD, &rank);
const size_t sz[2] = {200,200};
const size_t sz[2] = {81,81};
Box<2, double> box({0, 0}, {1, 1});
size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
double spacing = box.getHigh(0) / (sz[0] - 1);
......@@ -860,7 +858,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
double spacing = box.getHigh(0) / (sz[0] - 1);
Ghost<2, double> ghost(spacing * 3);
double rCut = 3.1 * spacing;
double rCut = 2.0 * spacing;
BOOST_TEST_MESSAGE("Init vector_dist...");
vector_dist_gpu<2, double, aggregate<double,double,double,double,double,VectorS<2, double>>> domain(0, box, bc, ghost);
......@@ -886,8 +884,8 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
domain.map();
domain.ghost_get<0>();
Derivative_y_gpu Dy(domain, 2, rCut);
Laplacian_gpu Lap(domain, 2, rCut);
Derivative_y_gpu Dy(domain, 2, rCut,2,support_options::N_PARTICLES);
Laplacian_gpu Lap(domain, 2, rCut, 3,support_options::N_PARTICLES);
DCPSE_scheme_gpu<equations2d1_gpu,decltype(domain)> Solver(domain);
......@@ -1033,9 +1031,10 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
domain.map();
domain.ghost_get<0>();
Derivative_x_gpu Dx(domain, 2, rCut);
Derivative_y_gpu Dy(domain, 2, rCut);
Laplacian_gpu Lap(domain, 2, rCut);
Derivative_x_gpu Dx(domain, 2, rCut,1.9,support_options::N_PARTICLES);
Derivative_y_gpu Dy(domain, 2, rCut,1.9,support_options::N_PARTICLES);
Laplacian_gpu Lap(domain, 2, rCut, 1.9,support_options::N_PARTICLES);
petsc_solver<double> solver;
solver.setRestart(500);
solver.setSolver(KSPGMRES);
......@@ -1175,10 +1174,9 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
domain.map();
domain.ghost_get<0>();
Derivative_x_gpu Dx(domain, 2, rCut,1.9,support_options::RADIUS);
Derivative_y_gpu Dy(domain, 2, rCut,1.9,support_options::RADIUS);
Laplacian_gpu Lap(domain, 2, rCut,1.9,support_options::RADIUS);
Derivative_x Dx(domain, 2, rCut,1.9,support_options::RADIUS);
Derivative_y Dy(domain, 2, rCut,1.9,support_options::RADIUS);
Laplacian Lap(domain, 2, rCut,1.9,support_options::RADIUS);
openfpm::vector<aggregate<int>> bulk;
openfpm::vector<aggregate<int>> up_p;
......
......@@ -484,79 +484,10 @@ public:
private:
template <typename U>
void initializeAdaptive(vector_type &particles,
unsigned int convergenceOrder,
double rCut) {
// Still need to be tested
#ifdef SE_CLASS1
this->update_ctr=particles.getMapCtr();
#endif
if (!isSharedSupport) {
subsetKeyPid.resize(particles.size_local_orig());
supportRefs.resize(particles.size_local());
}
localEps.resize(particles.size_local());
localEpsInvPow.resize(particles.size_local());
kerOffsets.resize(particles.size_local()+1);
const T condVTOL = 1e2;
if (!isSharedSupport) {
SupportBuilder<vector_type,vector_type>
supportBuilder(particles, particles, differentialSignature, rCut, differentialOrder == 0);
unsigned int requiredSupportSize = monomialBasis.size();
// need to resize supportKeys1D to yet unknown supportKeysTotalN
// add() takes too long
openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size());
auto it = particles.getDomainIterator();
while (it.isNext()) {
auto key_o = particles.getOriginKey(it.get());
subsetKeyPid.get(key_o.getKey()) = it.get().getKey();
Support support = supportBuilder.getSupport(it, requiredSupportSize, opt);
supportRefs.get(key_o.getKey()) = key_o.getKey();
tempSupportKeys.get(key_o.getKey()) = support.getKeys();
kerOffsets.get(key_o.getKey()) = supportKeysTotalN;
if (maxSupportSize < support.size())
maxSupportSize = support.size();
supportKeysTotalN += support.size();
EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size());
// Vandermonde matrix computation
Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>>
vandermonde(support, monomialBasis, particles, particles,HOverEpsilon);
vandermonde.getMatrix(V);
T condV = conditionNumber(V, condVTOL);
T eps = vandermonde.getEps();
if (condV > condVTOL) {
requiredSupportSize *= 2;
std::cout << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize << std::endl; // debug
continue;
} else requiredSupportSize = monomialBasis.size();
++it;
}
kerOffsets.get(supportRefs.size()) = supportKeysTotalN;
supportKeys1D.resize(supportKeysTotalN);
size_t offset = 0;
for (size_t i = 0; i < tempSupportKeys.size(); ++i)
for (size_t j = 0; j < tempSupportKeys.get(i).size(); ++j, ++offset)
supportKeys1D.get(offset) = tempSupportKeys.get(i).get(j);
}
kerOffsets.hostToDevice(); supportKeys1D.hostToDevice();
assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched);
}
void initializeAdaptive(vector_type &particles,
unsigned int convergenceOrder,
float rCut) {
U rCut) {
// Still need to be tested
#ifdef SE_CLASS1
this->update_ctr=particles.getMapCtr();
......@@ -598,13 +529,11 @@ private:
EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size());
// Vandermonde matrix computation
Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>>
vandermonde(support, monomialBasis, particles, particles, HOverEpsilon);
vandermonde(support, monomialBasis, particles, particles,HOverEpsilon);
vandermonde.getMatrix(V);
T condV = conditionNumber(V, condVTOL);
T eps = vandermonde.getEps();
if (condV > condVTOL) {
requiredSupportSize *= 2;
std::cout << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize << std::endl; // debug
......@@ -624,13 +553,14 @@ private:
}
kerOffsets.hostToDevice(); supportKeys1D.hostToDevice();
assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched);
assembleLocalMatrices_t(rCut);
}
template <typename U>
void initializeStaticSize(vector_type &particles,
unsigned int convergenceOrder,
double rCut,
double supportSizeFactor) {
U rCut,
U supportSizeFactor) {
#ifdef SE_CLASS1
this->update_ctr=particles.getMapCtr();
#endif
......@@ -697,82 +627,19 @@ std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution
std::chrono::duration<double> time_span2 = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
std::cout << "Support building took " << time_span2.count() * 1000. << " milliseconds." << std::endl;
assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched);
assembleLocalMatrices_t(rCut);
}
// ad hoc solution to template specialization for float/double
void initializeStaticSize(vector_type &particles,
unsigned int convergenceOrder,
float rCut,
float supportSizeFactor) {
#ifdef SE_CLASS1
this->update_ctr=particles.getMapCtr();
#endif
this->rCut=rCut;
this->supportSizeFactor=supportSizeFactor;
this->convergenceOrder=convergenceOrder;
// Cublas subroutine selector: float or double
// rCut is needed to overcome limitation of nested template class specialization
template <typename U> void assembleLocalMatrices_t(U rCut) {
throw std::invalid_argument("DCPSE operator error: CUBLAS supports only float or double"); }
if (!isSharedSupport) {
subsetKeyPid.resize(particles.size_local_orig());
supportRefs.resize(particles.size_local());
}
localEps.resize(particles.size_local());
localEpsInvPow.resize(particles.size_local());
void assembleLocalMatrices_t(float rCut) {
assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched); }
std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
auto it = particles.getDomainIterator();
if (opt==support_options::RADIUS) {
if (!isSharedSupport) {
while (it.isNext()) {
auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey();
supportRefs.get(key_o.getKey()) = key_o.getKey();
++it;
}
SupportBuilderGPU<vector_type> supportBuilder(particles, rCut);
supportBuilder.getSupport(supportRefs.size(), kerOffsets, supportKeys1D, maxSupportSize, supportKeysTotalN);
}
} else {
if (!isSharedSupport){
openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size());
size_t requiredSupportSize = monomialBasis.size() * supportSizeFactor;
// need to resize supportKeys1D to yet unknown supportKeysTotalN
// add() takes too long
SupportBuilder<vector_type,vector_type> supportBuilder(particles, particles, differentialSignature, rCut, differentialOrder == 0);
kerOffsets.resize(supportRefs.size()+1);
while (it.isNext()) {
auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey();
Support support = supportBuilder.getSupport(it, requiredSupportSize, opt);
supportRefs.get(key_o.getKey()) = key_o.getKey();
tempSupportKeys.get(key_o.getKey()) = support.getKeys();
kerOffsets.get(key_o.getKey()) = supportKeysTotalN;
if (maxSupportSize < support.size()) maxSupportSize = support.size();
supportKeysTotalN += support.size();
++it;
}
kerOffsets.get(supportRefs.size()) = supportKeysTotalN;
supportKeys1D.resize(supportKeysTotalN);
size_t offset = 0;
for (size_t i = 0; i < tempSupportKeys.size(); ++i)
for (size_t j = 0; j < tempSupportKeys.get(i).size(); ++j, ++offset)
supportKeys1D.get(offset) = tempSupportKeys.get(i).get(j);
}
kerOffsets.hostToDevice(); supportKeys1D.hostToDevice();
}
std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_span2 = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
std::cout << "Support building took " << time_span2.count() * 1000. << " milliseconds." << std::endl;
assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched);
}
void assembleLocalMatrices_t(double rCut) {
assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched); }
template<typename cublasLUDec_type, typename cublasTriangSolve_type>
void assembleLocalMatrices(cublasLUDec_type cublasLUDecFunc, cublasTriangSolve_type cublasTriangSolveFunc) {
......