Commit 759fb600 authored by Pietro Incardona's avatar Pietro Incardona

Initial parallelization of the matrix constriction

parent 1d68c3aa
......@@ -87,10 +87,11 @@ esac
# First, check SUITESPARSE_LIBS environment variable
if test "x$SUITESPARSE_LIBS" != x; then
save_LIBS="$LIBS"; LIBS="$SUITESPARSE_LIBS -lumfpack -lsuitesparseconfig -lm $RT_LIB"
save_LIBS="$LIBS"; LIBS="$SUITESPARSE_LIBS -lumfpack -lamd -lbtf -lcamd -lccolamd -lcholmod -lcolamd -lcxsparse -lklu -ldl -lrbio -lspqr -lsuitesparseconfig -lm $RT_LIB"
AC_MSG_CHECKING([for umf_l_malloc])
AC_TRY_LINK_FUNC(umf_l_malloc, [ax_suitesparse_ok=yes
SUITESPARSE_LIB="$SUITESPARSE_LIBS -lamd -lbtf -lcamd -lccolamd -lcholmod -lcolamd -lcxsparse -lklu -ldl -lrbio -lspqr -lsuitesparseconfig -lumfpack"], [SUITRSPARSE_LIBS=""])
SUITESPARSE_LIBS="$SUITESPARSE_LIBS -lumfpack -lamd -lbtf -lcamd -lccolamd -lcholmod -lcolamd -lcxsparse -lklu -ldl -lrbio -lspqr -lsuitesparseconfig"
], [SUITRSPARSE_LIBS=""])
AC_MSG_RESULT($ax_suitesparse_ok)
LIBS="$save_LIBS"
if test $ax_suitesparse_ok = no; then
......@@ -100,12 +101,18 @@ if test "x$SUITESPARSE_LIBS" != x; then
CFLAGS=$SUITESPARSE_INCLUDE
AC_CHECK_HEADER(umfpack.h,[],[SUITESPARSE_INCLUDE=""
ax_suitesparse_ok=no])
CFLAGS="$old_CFLAGS"
else
AC_CHECK_LIB(umfpack,umf_l_alloc,[SUITESPARSE_LIB="$SUITESPARSE_LIB -lamd -lbtf -lcamd -lccolamd -lcholmod -lcolamd -lcxsparse -lklu -ldl -lrbio -lspqr -lsuitesparseconfig -lumfpack"],[SUITESPARSE_LIB=""])
AC_CHECK_LIB(umfpack,umf_l_alloc,[SUITESPARSE_LIBS="$SUITESPARSE_LIBS -lumfpack -lamd -lbtf -lcamd -lccolamd -lcholmod -lcolamd -lcxsparse -lklu -ldl -lrbio -lspqr -lsuitesparseconfig"],[
SUITESPARSE_LIBS=""
ax_suitesparse_ok=no
])
old_CFLAGS="$CFLAGS"
AC_CHECK_HEADER(umfpack.h,[],[SUITESPARSE_INCLUDE=""
ax_suitesparse_ok=no])
CFLAGS="$old_CFLAGS"
fi
......
......@@ -444,7 +444,6 @@ public:
// get the position
auto key = it.get();
grid_key_dx<Sys_eqs::dims> gkey = g_map.getGKey(key);
// Calculate the non-zero colums
T::value(g_map,key,gs,spacing,cols,1.0);
......
......@@ -219,6 +219,11 @@ BOOST_AUTO_TEST_CASE(lid_driven_cavity)
//! [lid-driven cavity 2D]
g_dist.write("lid_driven_cavity");
// Check that match
bool test = compare("lid_driven_cavity_grid_0_test.vtk","lid_driven_cavity_grid_0.vtk");
BOOST_REQUIRE_EQUAL(test,true);
}
BOOST_AUTO_TEST_SUITE_END()
......
......@@ -229,6 +229,10 @@ BOOST_AUTO_TEST_CASE(lid_driven_cavity)
x.copy<FDScheme<lid_nn_3d>,decltype(g_dist),velocity,pressure>(fd,{0,0},{sz[0]-1,sz[1]-1,sz[2]-1},g_dist);
g_dist.write("lid_driven_cavity_3d");
// Check that match
bool test = compare("lid_driven_cavity_3d_grid_0.vtk","lid_driven_cavity_3d_grid_0_test.vtk");
BOOST_REQUIRE_EQUAL(test,true);
}
BOOST_AUTO_TEST_SUITE_END()
......
......@@ -3,7 +3,7 @@ LINKLIBS = $(SUITESPARSE_LIBS) $(LAPACK_LIBS) $(BLAS_LIBS) $(METIS_LIB) $(DEFA
noinst_PROGRAMS = numerics
numerics_SOURCES = main.cpp ../../openfpm_vcluster/src/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp
numerics_CXXFLAGS = $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(SUITESPARSE_INCLUDE) $(METIS_INCLUDE) $(EIGEN_INCLUDE) -Wno-deprecated-declarations -Wunused-local-typedefs
numerics_CXXFLAGS = $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(SUITESPARSE_INCLUDE) $(METIS_INCLUDE) $(EIGEN_INCLUDE) -Wno-deprecated-declarations -Wno-unused-local-typedefs
numerics_CFLAGS = $(CUDA_CFLAGS)
numerics_LDADD = $(LINKLIBS) -lmetis
nobase_include_HEADERS = PSE/Kernels.hpp
......
......@@ -10,6 +10,7 @@
#include "Vector/map_vector.hpp"
#include <boost/mpl/int.hpp>
#include "VCluster.hpp"
#define EIGEN_TRIPLET 1
......@@ -56,16 +57,140 @@ struct triplet<T,EIGEN_TRIPLET>
template<typename T, typename id_t>
class SparseMatrix<T,id_t,Eigen::SparseMatrix<T,0,id_t>>
{
Eigen::SparseMatrix<T,0,id_t> mat;
public:
// Triplet implementation id
//! Triplet implementation id
typedef boost::mpl::int_<EIGEN_TRIPLET> triplet_impl;
// Triplet type
//! Triplet type
typedef triplet<T,EIGEN_TRIPLET> triplet_type;
private:
Eigen::SparseMatrix<T,0,id_t> mat;
openfpm::vector<triplet_type> trpl;
/*! \brief Call-back to allocate buffer to receive incoming elements (particles)
*
* \param msg_i size required to receive the message from i
* \param total_msg total size to receive from all the processors
* \param total_p the total number of processor that want to communicate with you
* \param i processor id
* \param ri request id (it is an id that goes from 0 to total_p, and is unique
* every time message_alloc is called)
* \param ptr a pointer to the vector_dist structure
*
* \return the pointer where to store the message for the processor i
*
*/
template<typename triplet> static void * msg_alloc(size_t msg_i ,size_t total_msg, size_t total_p, size_t i, size_t ri, void * ptr)
{
openfpm::vector<openfpm::vector<triplet> *> * trpl = (openfpm::vector<openfpm::vector<triplet> *> *)ptr;
if (trpl == NULL)
std::cerr << __FILE__ << ":" << __LINE__ << " Internal error this processor is not suppose to receive\n";
// We need one more slot
trpl->add();
trpl->last()->resize(msg_i/sizeof(triplet));
// return the pointer
return trpl->last()->getPointer();
}
/*! \brief Assemble the matrix
*
* \param trpl Matrix in form of triplets
* \param mat Matrix to assemble
*
*/
template<typename triplet, typename mat_impl> void assembleMatrix(openfpm::vector<openfpm::vector<triplet> *> & trpl, SparseMatrix<double,int,mat_impl> & mat)
{
Vcluster & vcl = *global_v_cluster;
////// On Master and only here
// we assemble the Matrix from the collected data
if (vcl.getProcessingUnits() != 1)
{
// count the total triplet we have
size_t tot_trpl = 0;
for (size_t i = 0 ; i < trpl.size() ; i++)
tot_trpl += trpl.get(i).size();
openfpm::vector<triplet> mat_t;
mat_t.resize(tot_trpl);
// id zero
size_t id = 0;
// Add all the received triplet in one array
for (size_t i = 0 ; i < trpl.size() ; i++)
{
for (size_t j = 0 ; j < trpl.get(i).size() ; j++)
{
mat_t.get(id) = trpl.get(i).get(j);
id++;
}
}
mat.setFromTriplets(mat_t);
}
else
{
//
if (trpl.size() != 1)
std::cerr << "Internal error: " << __FILE__ << ":" << __LINE__ << " in case of single processor we must have a single triplet set\n";
mat.setFromTriplets(*trpl.get(0));
}
}
/*! \brief Here we collect the full matrix on master
*
*/
void collect()
{
Vcluster & vcl = global_v_cluster;
// If we are on master collect the information
if (vcl.getProcessUnitID() == 0)
{
// send buffer (master does not send anything) so send req and send_buf
// remain buffer with size 0
openfpm::vector<size_t> send_req;
openfpm::vector<openfpm::vector<triplet_type>> send_buf;
// for each processor we are going to receive a set of triplet
openfpm::vector<openfpm::vector<triplet_type>> trpl;
// Send and recv multiple messages
vcl.sendrecvMultipleMessagesNBX(send_req, send_buf,msg_alloc<triplet>,&trpl);
assembleMatrix<triplet,Eigen::SparseMatrix<T,0,id_t>>(trpl);
}
else
{
// send buffer (master does not send anything) so send req and send_buf
// remain buffer with size 0
openfpm::vector<size_t> send_req;
send_req.add(0);
openfpm::vector<openfpm::vector<triplet_type> *> send_buf;
send_buf.add(&A);
// for each processor we are going to receive a set of triplet
openfpm::vector<openfpm::vector<triplet_type>> trpl;
// Send and recv multiple messages
vcl.sendrecvMultipleMessagesNBX(send_req, send_buf,msg_alloc<triplet_type>,NULL);
}
}
public:
/*! \brief Create an empty Matrix
*
* \param N1 number of row
......@@ -84,10 +209,10 @@ public:
* \param trpl triplet set
*
*/
SparseMatrix(size_t N1, size_t N2, openfpm::vector<Eigen::Triplet<T,id_t>> & trpl)
SparseMatrix(size_t N1, size_t N2, const openfpm::vector<Eigen::Triplet<T,id_t>> & trpl)
:mat(N1,N2)
{
mat.setFromTriplets(trpl.begin(),trpl.end());
this->trpl = trpl;
}
/*! \brief Create an empty Matrix
......@@ -95,14 +220,16 @@ public:
*/
SparseMatrix() {}
/*! \brief Set the Matrix from triplets
/*! \brief Get the Matrix triplets bugger
*
* It return a buffer that can be filled with triplets
*
* \param trpl Triplet array from where to set
* \return triplet buffer
*
*/
void setFromTriplets(openfpm::vector<triplet_type> & trpl)
openfpm::vector<triplet_type> & getMatrixTriplets()
{
mat.setFromTriplets(trpl.begin(),trpl.end());
return this->trpl;
}
/*! \brief Get the Eigen Matrix object
......@@ -112,6 +239,10 @@ public:
*/
const Eigen::SparseMatrix<T,0,id_t> & getMat() const
{
// Here we collect the information on master
collect();
assemble();
return mat;
}
......@@ -122,6 +253,9 @@ public:
*/
Eigen::SparseMatrix<T,0,id_t> & getMat()
{
collect();
assemble();
return mat;
}
......
......@@ -12,6 +12,7 @@
#include "Eigen/UmfPackSupport"
#include <Eigen/SparseLU>
#define UMFPACK_NONE 0
template<typename T>
class umfpack_solver
......@@ -31,38 +32,56 @@ public:
template<>
class umfpack_solver<double>
{
public:
template<typename impl> static Vector<double> solve(const SparseMatrix<double,int,impl> & A, const Vector<double> & b, size_t opt = NONE)
/*! \brief Here we invert the matrix and solve the system
*
* \warning umfpack is not a parallel solver, this function work only with one processor
*
* \note if you want to use umfpack in a NON parallel, but on a distributed data, use solve with triplet
*
* \tparam impl Implementation of the SparseMatrix
*
*/
template<typename impl> static Vector<double> solve(const SparseMatrix<double,int,impl> & A, const Vector<double> & b, size_t opt = UMFPACK_NONE)
{
Vcluster & v_cl = *global_v_cluster;
Vector<double> x;
// only master processor solve
Eigen::UmfPackLU<Eigen::SparseMatrix<double,0,int> > solver;
if (v_cl.getProcessUnitID() == 0)
{
Eigen::UmfPackLU<Eigen::SparseMatrix<double,0,int> > solver;
solver.compute(A.getMat());
solver.compute(A.getMat());
if(solver.info()!=Eigen::Success)
{
// decomposition failed
std::cout << __FILE__ << ":" << __LINE__ << " solver failed" << "\n";
return x;
}
if(solver.info()!=Eigen::Success)
{
// decomposition failed
std::cout << __FILE__ << ":" << __LINE__ << " solver failed" << "\n";
return x;
}
x.getVec() = solver.solve(b.getVec());
x.getVec() = solver.solve(b.getVec());
if (opt & SOLVER_PRINT_RESIDUAL_NORM_INFINITY)
{
Vector<double> res;
res.getVec() = A.getMat() * x.getVec() - b.getVec();
if (opt & SOLVER_PRINT_RESIDUAL_NORM_INFINITY)
{
Vector<double> res;
res.getVec() = A.getMat() * x.getVec() - b.getVec();
std::cout << "Infinity norm: " << res.getVec().lpNorm<Eigen::Infinity>() << "\n";
}
std::cout << "Infinity norm: " << res.getVec().lpNorm<Eigen::Infinity>() << "\n";
}
if (opt & SOLVER_PRINT_DETERMINANT)
{
std::cout << " Determinant: " << solver.determinant() << "\n";
}
if (opt & SOLVER_PRINT_DETERMINANT)
{
std::cout << " Determinant: " << solver.determinant() << "\n";
}
// Vector is only on master, scatter back the information
x.sync();
}
return x;
}
};
......
......@@ -228,6 +228,32 @@ public:
}
}
/*! \brief Collect the vector into one processor
*
* Eigen does not have a real parallel vector, so in order to work we have to collect
* the vector in one processor
*
* This function collect the information from all the other processor into one
*
*/
void collect()
{
}
/*! \brief Scatter the vector information to the other processors
*
* Eigen does not have a real parallel vector, so in order to work we have to scatter
* the vector from one processor to the other
*
* This function scatter the information to all the other processors
*
*/
void scatter()
{
}
/*! \brief Load from file
*
*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment