Compare revisions

yaskovet · yaskovet · 86873545 · 86873545 · 86873545
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -17,7 +17,7 @@ endif()

 if (NOT CUDA_ON_BACKEND STREQUAL "None")
 	set(CUDA_SOURCES Operators/Vector/vector_dist_operators_unit_tests.cu
-					 #DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
+					 DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
 			 	 	 OdeIntegrators/tests/Odeintegrators_test_gpu.cu
 					 #DCPSE/DCPSE_op/tests/DCPSE_op_subset_test.cu
 					 Operators/Vector/vector_dist_operators_apply_kernel_unit_tests.cu)

--- a/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
+++ b/src/DCPSE/DCPSE_op/tests/DCPSE_op_Solver_test.cu
@@ -17,7 +17,6 @@
 #include <iostream>
 #include "../DCPSE_op.hpp"
 #include "../DCPSE_Solver.hpp"
-#include "../DCPSE_Solver.cuh"
 #include "Operators/Vector/vector_dist_operators.hpp"
 #include "Vector/vector_dist_subset.hpp"
 #include "../EqnsStruct.hpp"
@@ -154,7 +153,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
        double spacing = box.getHigh(0) / (sz[0] - 1);
        Ghost<2, double> ghost(spacing * 3);
-        double rCut = 3.1 * spacing;
+        double rCut = 2.0 * spacing;
        BOOST_TEST_MESSAGE("Init vector_dist...");

        vector_dist_gpu<2, double, aggregate<double,double,double,double>> domain(0, box, bc, ghost);
@@ -180,7 +179,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        domain.map();
        domain.ghost_get<0>();

-        Laplacian_gpu Lap(domain, 2, rCut);
+        Laplacian_gpu Lap(domain, 2, rCut, 2,support_options::N_PARTICLES);

        DCPSE_scheme_gpu<equations2d1_gpu,decltype(domain)> Solver( domain);

@@ -415,10 +414,9 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        domain.map();
        domain.ghost_get<0>();

-        Derivative_x_gpu Dx(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/);
-        Derivative_y_gpu Dy(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/);
-        Laplacian_gpu Lap(domain, 2, rCut / 3.0 ,1.9/*,support_options::RADIUS*/);
-
+        Derivative_x_gpu Dx(domain, 2, rCut/3.0 ,1.9,support_options::N_PARTICLES);
+        Derivative_y_gpu Dy(domain, 2, rCut/3.0,1.9,support_options::N_PARTICLES);
+        Laplacian_gpu Lap(domain, 2, rCut/3.0 ,1.9,support_options::N_PARTICLES);

        openfpm::vector<aggregate<int>> bulk;
        openfpm::vector<aggregate<int>> up_p;
@@ -552,7 +550,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
    BOOST_AUTO_TEST_CASE(dcpse_poisson_Dirichlet_anal) {
 //  int rank;
 //  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-        const size_t sz[2] = {200,200};
+        const size_t sz[2] = {81,81};
        Box<2, double> box({0, 0}, {1, 1});
        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
        double spacing = box.getHigh(0) / (sz[0] - 1);
@@ -860,7 +858,7 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        size_t bc[2] = {NON_PERIODIC, NON_PERIODIC};
        double spacing = box.getHigh(0) / (sz[0] - 1);
        Ghost<2, double> ghost(spacing * 3);
-        double rCut = 3.1 * spacing;
+        double rCut = 2.0 * spacing;
        BOOST_TEST_MESSAGE("Init vector_dist...");

        vector_dist_gpu<2, double, aggregate<double,double,double,double,double,VectorS<2, double>>> domain(0, box, bc, ghost);
@@ -886,8 +884,8 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        domain.map();
        domain.ghost_get<0>();

-        Derivative_y_gpu Dy(domain, 2, rCut);
-        Laplacian_gpu Lap(domain, 2, rCut);
+        Derivative_y_gpu Dy(domain, 2, rCut,2,support_options::N_PARTICLES);
+        Laplacian_gpu Lap(domain, 2, rCut, 3,support_options::N_PARTICLES);

        DCPSE_scheme_gpu<equations2d1_gpu,decltype(domain)> Solver(domain);

@@ -1033,9 +1031,10 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        domain.map();
        domain.ghost_get<0>();

-        Derivative_x_gpu Dx(domain, 2, rCut);
-        Derivative_y_gpu Dy(domain, 2, rCut);
-        Laplacian_gpu Lap(domain, 2, rCut);
+        Derivative_x_gpu Dx(domain, 2, rCut,1.9,support_options::N_PARTICLES);
+        Derivative_y_gpu Dy(domain, 2, rCut,1.9,support_options::N_PARTICLES);
+        Laplacian_gpu Lap(domain, 2, rCut, 1.9,support_options::N_PARTICLES);
+
        petsc_solver<double> solver;
        solver.setRestart(500);
        solver.setSolver(KSPGMRES);
@@ -1175,10 +1174,9 @@ BOOST_AUTO_TEST_CASE(dcpse_op_vec3d_gpu) {
        domain.map();
        domain.ghost_get<0>();

-        Derivative_x_gpu Dx(domain, 2, rCut,1.9,support_options::RADIUS);
-        Derivative_y_gpu Dy(domain, 2, rCut,1.9,support_options::RADIUS);
-        Laplacian_gpu Lap(domain, 2, rCut,1.9,support_options::RADIUS);
-
+        Derivative_x Dx(domain, 2, rCut,1.9,support_options::RADIUS);
+        Derivative_y Dy(domain, 2, rCut,1.9,support_options::RADIUS);
+        Laplacian Lap(domain, 2, rCut,1.9,support_options::RADIUS);

        openfpm::vector<aggregate<int>> bulk;
        openfpm::vector<aggregate<int>> up_p;

--- a/src/DCPSE/Dcpse.cuh
+++ b/src/DCPSE/Dcpse.cuh
@@ -484,79 +484,10 @@ public:

 private:

+    template <typename U>
    void initializeAdaptive(vector_type &particles,
                            unsigned int convergenceOrder,
-                            double rCut) {
-        // Still need to be tested
-#ifdef SE_CLASS1
-        this->update_ctr=particles.getMapCtr();
-#endif
-
-        if (!isSharedSupport) {
-            subsetKeyPid.resize(particles.size_local_orig());
-            supportRefs.resize(particles.size_local());
-        }
-        localEps.resize(particles.size_local());
-        localEpsInvPow.resize(particles.size_local());
-        kerOffsets.resize(particles.size_local()+1);
-
-        const T condVTOL = 1e2;
-
-        if (!isSharedSupport) {
-            SupportBuilder<vector_type,vector_type>
-                supportBuilder(particles, particles, differentialSignature, rCut, differentialOrder == 0);
-            unsigned int requiredSupportSize = monomialBasis.size();
-            // need to resize supportKeys1D to yet unknown supportKeysTotalN
-            // add() takes too long
-            openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size());
-
-            auto it = particles.getDomainIterator();
-            while (it.isNext()) {
-                auto key_o = particles.getOriginKey(it.get());
-                subsetKeyPid.get(key_o.getKey()) = it.get().getKey();
-
-                Support support = supportBuilder.getSupport(it, requiredSupportSize, opt);
-                supportRefs.get(key_o.getKey()) = key_o.getKey();
-                tempSupportKeys.get(key_o.getKey()) = support.getKeys();
-                kerOffsets.get(key_o.getKey()) = supportKeysTotalN;
-
-                if (maxSupportSize < support.size())
-                    maxSupportSize = support.size();
-                supportKeysTotalN += support.size();
-
-                EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size());
-                // Vandermonde matrix computation
-                Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>>
-                        vandermonde(support, monomialBasis, particles, particles,HOverEpsilon);
-                vandermonde.getMatrix(V);
-
-                T condV = conditionNumber(V, condVTOL);
-                T eps = vandermonde.getEps();
-                if (condV > condVTOL) {
-                    requiredSupportSize *= 2;
-                    std::cout << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize << std::endl; // debug
-                    continue;
-                } else requiredSupportSize = monomialBasis.size();
-
-                ++it;
-            }
-
-            kerOffsets.get(supportRefs.size()) = supportKeysTotalN;
-            supportKeys1D.resize(supportKeysTotalN);
-
-            size_t offset = 0;
-            for (size_t i = 0; i < tempSupportKeys.size(); ++i)
-                for (size_t j = 0; j < tempSupportKeys.get(i).size(); ++j, ++offset)
-                    supportKeys1D.get(offset) = tempSupportKeys.get(i).get(j);
-        }
-
-        kerOffsets.hostToDevice(); supportKeys1D.hostToDevice();
-        assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched);
-    }
-
-    void initializeAdaptive(vector_type &particles,
-                            unsigned int convergenceOrder,
-                            float rCut) {
+                            U rCut) {
        // Still need to be tested
 #ifdef SE_CLASS1
        this->update_ctr=particles.getMapCtr();
@@ -598,13 +529,11 @@ private:
                EMatrix<T, Eigen::Dynamic, Eigen::Dynamic> V(support.size(), monomialBasis.size());
                // Vandermonde matrix computation
                Vandermonde<dim, T, EMatrix<T, Eigen::Dynamic, Eigen::Dynamic>>
-                        vandermonde(support, monomialBasis, particles, particles, HOverEpsilon);
-
+                        vandermonde(support, monomialBasis, particles, particles,HOverEpsilon);
                vandermonde.getMatrix(V);

                T condV = conditionNumber(V, condVTOL);
                T eps = vandermonde.getEps();
-
                if (condV > condVTOL) {
                    requiredSupportSize *= 2;
                    std::cout << "INFO: Increasing, requiredSupportSize = " << requiredSupportSize << std::endl; // debug
@@ -624,13 +553,14 @@ private:
        }

        kerOffsets.hostToDevice(); supportKeys1D.hostToDevice();
-        assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched);
+        assembleLocalMatrices_t(rCut);
    }

+    template <typename U>
    void initializeStaticSize(vector_type &particles,
                              unsigned int convergenceOrder,
-                              double rCut,
-                              double supportSizeFactor) {
+                              U rCut,
+                              U supportSizeFactor) {
 #ifdef SE_CLASS1
        this->update_ctr=particles.getMapCtr();
 #endif
@@ -697,82 +627,19 @@ std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution
 std::chrono::duration<double> time_span2 = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
 std::cout << "Support building took " << time_span2.count() * 1000. << " milliseconds." << std::endl;

-        assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched);
+        assembleLocalMatrices_t(rCut);
    }

-    // ad hoc solution to template specialization for float/double
-    void initializeStaticSize(vector_type &particles,
-                              unsigned int convergenceOrder,
-                              float rCut,
-                              float supportSizeFactor) {
-#ifdef SE_CLASS1
-        this->update_ctr=particles.getMapCtr();
-#endif
-        this->rCut=rCut;
-        this->supportSizeFactor=supportSizeFactor;
-        this->convergenceOrder=convergenceOrder;
+    // Cublas subroutine selector: float or double
+    // rCut is needed to overcome limitation of nested template class specialization
+    template <typename U> void assembleLocalMatrices_t(U rCut) {
+        throw std::invalid_argument("DCPSE operator error: CUBLAS supports only float or double"); }

-        if (!isSharedSupport) {
-            subsetKeyPid.resize(particles.size_local_orig());
-            supportRefs.resize(particles.size_local());
-        }
-        localEps.resize(particles.size_local());
-        localEpsInvPow.resize(particles.size_local());
+    void assembleLocalMatrices_t(float rCut) {
+        assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched); }

-std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
-        auto it = particles.getDomainIterator();
-
-        if (opt==support_options::RADIUS) {
-            if (!isSharedSupport) {
-                while (it.isNext()) {
-                    auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey();
-                    supportRefs.get(key_o.getKey()) = key_o.getKey();
-                    ++it;
-                }
-
-                SupportBuilderGPU<vector_type> supportBuilder(particles, rCut);
-                supportBuilder.getSupport(supportRefs.size(), kerOffsets, supportKeys1D, maxSupportSize, supportKeysTotalN);
-            }
-        } else {
-            if (!isSharedSupport){
-                openfpm::vector<openfpm::vector<size_t>> tempSupportKeys(supportRefs.size());
-                size_t requiredSupportSize = monomialBasis.size() * supportSizeFactor;
-                // need to resize supportKeys1D to yet unknown supportKeysTotalN
-                // add() takes too long
-                SupportBuilder<vector_type,vector_type> supportBuilder(particles, particles, differentialSignature, rCut, differentialOrder == 0);
-                kerOffsets.resize(supportRefs.size()+1);
-
-                while (it.isNext()) {
-                    auto key_o = it.get(); subsetKeyPid.get(particles.getOriginKey(key_o).getKey()) = key_o.getKey();
-
-                    Support support = supportBuilder.getSupport(it, requiredSupportSize, opt);
-                    supportRefs.get(key_o.getKey()) = key_o.getKey();
-                    tempSupportKeys.get(key_o.getKey()) = support.getKeys();
-                    kerOffsets.get(key_o.getKey()) = supportKeysTotalN;
-
-                    if (maxSupportSize < support.size()) maxSupportSize = support.size();
-                    supportKeysTotalN += support.size();
-                    ++it;
-                }
-
-                kerOffsets.get(supportRefs.size()) = supportKeysTotalN;
-                supportKeys1D.resize(supportKeysTotalN);
-
-                size_t offset = 0;
-                for (size_t i = 0; i < tempSupportKeys.size(); ++i)
-                    for (size_t j = 0; j < tempSupportKeys.get(i).size(); ++j, ++offset)
-                        supportKeys1D.get(offset) = tempSupportKeys.get(i).get(j);
-            }
-
-            kerOffsets.hostToDevice(); supportKeys1D.hostToDevice();
-        }
-
-std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
-std::chrono::duration<double> time_span2 = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
-std::cout << "Support building took " << time_span2.count() * 1000. << " milliseconds." << std::endl;
-
-        assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched);
-    }
+    void assembleLocalMatrices_t(double rCut) {
+        assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched); }

    template<typename cublasLUDec_type, typename cublasTriangSolve_type>
    void assembleLocalMatrices(cublasLUDec_type cublasLUDecFunc, cublasTriangSolve_type cublasTriangSolveFunc) {
No results found