regression.hpp 13.54 KiB
/*
* Regression module
* Obtains polynomial models for data from vector_dist
* author : sachin (sthekke@mpi-cbg.de)
* date : 28.04.2022
*
*/
#ifndef OPENFPM_NUMERICS_REGRESSION_HPP
#define OPENFPM_NUMERICS_REGRESSION_HPP
#include "Vector/map_vector.hpp"
#include "Space/Shape/Point.hpp"
#include "DMatrix/EMatrix.hpp"
#include "DCPSE/SupportBuilder.hpp"
#include "minter/include/minter.h"
template<typename vector_type_support, typename NN_type>
class RegressionSupport
{
openfpm::vector<size_t> keys;
public:
template<typename iterator_type>
RegressionSupport(vector_type_support &vd, iterator_type itPoint, unsigned int requiredSize, support_options opt, NN_type &cellList_in) : domain(vd),
cellList(cellList_in)
{
rCut = cellList_in.getCellBox().getHigh(0);
// Get spatial position from point iterator
vect_dist_key_dx p = itPoint.get();
vect_dist_key_dx pOrig = itPoint.getOrig();
Point<vector_type_support::dims, typename vector_type_support::stype> pos = domain.getPos(p.getKey());
// Get cell containing current point and add it to the set of cell keys
grid_key_dx<vector_type_support::dims> curCellKey = cellList.getCellGrid(pos); // Here get the key of the cell where the current point is
std::set<grid_key_dx<vector_type_support::dims>> supportCells;
supportCells.insert(curCellKey);
// Make sure to consider a set of cells providing enough points for the support
enlargeSetOfCellsUntilSize(supportCells, requiredSize + 1,
opt); // NOTE: this +1 is because we then remove the point itself
// Now return all the points from the support into a vector
keys = getPointsInSetOfCells(supportCells, p, pOrig, requiredSize, opt);
}
auto getKeys()
{
return keys;
}
auto getNumParticles()
{
return keys.size();
}
private:
vector_type_support &domain;
NN_type &cellList;
typename vector_type_support::stype rCut;
void enlargeSetOfCellsUntilSize(std::set<grid_key_dx<vector_type_support::dims>> &set, unsigned int requiredSize,
support_options opt) {
if (opt == support_options::RADIUS) {
auto cell = *set.begin();
grid_key_dx<vector_type_support::dims> middle;
int n = std::ceil(rCut / cellList.getCellBox().getHigh(0));
size_t sz[vector_type_support::dims];
for (int i = 0; i < vector_type_support::dims; i++) {
sz[i] = 2 * n + 1;
middle.set_d(i, n);
}
grid_sm<vector_type_support::dims, void> g(sz);
grid_key_dx_iterator<vector_type_support::dims> g_k(g);
while (g_k.isNext()) {
auto key = g_k.get();
key = cell + key - middle;
if (isCellKeyInBounds(key)) {
set.insert(key);
}
++g_k;
}
}
else if (opt == support_options::AT_LEAST_N_PARTICLES) {
int done = 0;
int n = 1;
auto cell = *set.begin();
grid_key_dx<vector_type_support::dims> middle;
size_t sz[vector_type_support::dims];
while(true) // loop for the number of cells enlarged per dimension
{
std::set<grid_key_dx<vector_type_support::dims>> temp_set;
for (int i = 0; i < vector_type_support::dims; i++) {
sz[i] = 2 * n + 1;
middle.set_d(i, n);
}
grid_sm<vector_type_support::dims, void> g(sz);
grid_key_dx_iterator<vector_type_support::dims> g_k(g);
while (g_k.isNext()) {
auto key = g_k.get();
key = cell + key - middle;
if (isCellKeyInBounds(key)) {
temp_set.insert(key);
}
++g_k;
}
if (getNumElementsInSetOfCells(temp_set) < requiredSize) n++;
else
{
set = temp_set;
break;
}
}
}
else {
while (getNumElementsInSetOfCells(set) <
5.0 * requiredSize) //Why 5*requiredSize? Becasue it can help with adaptive resolutions.
{
auto tmpSet = set;
for (const auto el: tmpSet) {
for (unsigned int i = 0; i < vector_type_support::dims; ++i) {
const auto pOneEl = el.move(i, +1);
const auto mOneEl = el.move(i, -1);
if (isCellKeyInBounds(pOneEl)) {
set.insert(pOneEl);
}
if (isCellKeyInBounds(mOneEl)) {
set.insert(mOneEl);
}
}
}
}
}
}
openfpm::vector<size_t> getPointsInSetOfCells(std::set<grid_key_dx<vector_type_support::dims>> set,
vect_dist_key_dx &p,
vect_dist_key_dx &pOrig,
size_t requiredSupportSize,
support_options opt) {
struct reord {
typename vector_type_support::stype dist;
size_t offset;
bool operator<(const reord &p) const { return this->dist < p.dist; }
};
openfpm::vector<reord> rp;
openfpm::vector<size_t> points;
Point<vector_type_support::dims, typename vector_type_support::stype> xp = domain.getPos(p);
for (const auto cellKey: set) {
const size_t cellLinId = getCellLinId(cellKey);
const size_t elemsInCell = getNumElementsInCell(cellKey);
for (size_t k = 0; k < elemsInCell; ++k) {
size_t el = cellList.get(cellLinId, k);
Point<vector_type_support::dims, typename vector_type_support::stype> xq = domain.getPosOrig(el);
//points.push_back(el);
reord pr;
pr.dist = xp.distance(xq);
pr.offset = el;
rp.add(pr);
}
}
if (opt == support_options::RADIUS) {
for (int i = 0; i < rp.size(); i++) {
if (rp.get(i).dist < rCut) {
points.add(rp.get(i).offset);
}
}
/* #ifdef SE_CLASS1
if (points.size()<requiredSupportSize)
{
std::cerr<<__FILE__<<":"<<__LINE__<<"Note that the DCPSE neighbourhood doesn't have asked no. particles (Increase the rCut or reduce the over_sampling factor)";
std::cout<<"Particels asked (minimum*oversampling_factor): "<<requiredSupportSize<<". Particles Possible with given options:"<<points.size()<<"."<<std::endl;
}
#endif*/
}
else if (opt == support_options::AT_LEAST_N_PARTICLES) {
for (int i = 0; i < rp.size(); i++) points.add(rp.get(i).offset);
}
else {
rp.sort();
for (int i = 0; i < requiredSupportSize; i++) {
points.add(rp.get(i).offset);
}
}
//MinSpacing=MinSpacing/requiredSupportSize
return points;
}
size_t getCellLinId(const grid_key_dx<vector_type_support::dims> &cellKey) {
mem_id id = cellList.getGrid().LinId(cellKey);
return static_cast<size_t>(id);
}
size_t getNumElementsInCell(const grid_key_dx<vector_type_support::dims> &cellKey) {
const size_t curCellId = getCellLinId(cellKey);
size_t numElements = cellList.getNelements(curCellId);
return numElements;
}
size_t getNumElementsInSetOfCells(const std::set<grid_key_dx<vector_type_support::dims>> &set)
{
size_t tot = 0;
for (const auto cell : set)
{
tot += getNumElementsInCell(cell);
}
return tot;
}
bool isCellKeyInBounds(grid_key_dx<vector_type_support::dims> key)
{
const size_t *cellGridSize = cellList.getGrid().getSize();
for (size_t i = 0; i < vector_type_support::dims; ++i)
{
if (key.value(i) < 0 || key.value(i) >= cellGridSize[i])
{
return false;
}
}
return true;
}
};
template<int spatial_dim, unsigned int prp_id, typename MatType = EMatrixXd, typename VecType = EVectorXd>
class RegressionModel
{
public:
minter::PolyModel<spatial_dim, MatType, VecType> *model = nullptr;
minter::PolyModel<spatial_dim, MatType, VecType> *deriv_model[spatial_dim];
RegressionModel(unsigned int poly_degree, float lp_degree) {
// construct polynomial model
model = new minter::PolyModel<spatial_dim, MatType, VecType>();
model->setDegree(poly_degree, lp_degree);
// placeholder for derivatives
for(int i = 0;i < spatial_dim;++i)
deriv_model[i] = nullptr;
}
template<typename vector_type, typename reg_support_type>
RegressionModel(vector_type &vd, reg_support_type &support, unsigned int poly_degree, float lp_degree = 2.0)
{
int num_particles = support->getNumParticles();
int dim = vector_type::dims;
MatType points(num_particles, dim);
VecType values(num_particles);
auto keys = support->getKeys();
for(int i = 0;i < num_particles;++i)
{
for(int j = 0;j < dim;++j)
points(i,j) = vd.getPos(keys.get(i))[j];
values(i) = vd.template getProp<prp_id>(keys.get(i));
}
// construct polynomial model
model = new minter::PolyModel<spatial_dim, MatType, VecType>(points, values, poly_degree, lp_degree);
// placeholder for derivatives
for(int i = 0;i < dim;++i)
deriv_model[i] = nullptr;
}
// Constructor for all points in a proc (domain + ghost) and a specified poly_degree
template<typename vector_type>
RegressionModel(vector_type &vd, unsigned int poly_degree, float lp_degree = 2.0)
{
int num_particles = vd.size_local_with_ghost();
int dim = vector_type::dims;
MatType points(num_particles, dim);
VecType values(num_particles);
auto it = vd.getDomainAndGhostIterator();
int i = 0;
while (it.isNext())
{
auto key = it.get();
for(int j = 0;j < dim;++j)
points(i,j) = vd.getPos(key)[j];
values(i) = vd.template getProp<prp_id>(key);
++it;
++i;
}
// construct polynomial model
model = new minter::PolyModel<spatial_dim, MatType, VecType>(points, values, poly_degree, lp_degree);
for(i = 0;i < dim;++i)
deriv_model[i] = nullptr;
}
// Constructor for all points in a proc (domain + ghost) within a tolerance
template<typename vector_type>
RegressionModel(vector_type &vd, double tolerance)
{
int num_particles = vd.size_local_with_ghost();
int dim = vector_type::dims;
MatType points(num_particles, dim);
VecType values(num_particles);
auto it = vd.getDomainAndGhostIterator();
int i = 0;
while (it.isNext())
{
auto key = it.get();
for(int j = 0;j < dim;++j)
points(i,j) = vd.getPos(key)[j];
values(i) = vd.template getProp<prp_id>(key);
++it;
++i;
}
int poly_degree = 1;
double error = -1.0;
minter::PolyModel<spatial_dim, MatType, VecType> *mdl = nullptr;
do
{
++poly_degree;
if(mdl)
delete mdl;
// construct polynomial model
mdl = new minter::PolyModel<spatial_dim, MatType, VecType>(points, values, poly_degree, 2.0);
// check if linf_error is within the tolerance
error = -1.0;
for(i = 0;i < num_particles;++i)
{
double pred = mdl->eval(points.block(i,0,1,dim))(0); // evaluated for one point
double err = std::abs(pred - values(i));
if (err > error)
error = err;
}
// std::cout<<"Fit of degree "<<poly_degree<<" with error = "<<error<<std::endl;
}while(error > tolerance);
model = mdl;
for(i = 0;i < dim;++i)
deriv_model[i] = nullptr;
}
template<typename vector_type, typename reg_support_type>
void computeCoeffs(vector_type& vd, reg_support_type& support)
{
unsigned int dim = vector_type::dims;
auto num_particles = support.getNumParticles();
Eigen::MatrixXd points(num_particles, dim);
Eigen::VectorXd values(num_particles);
auto keys = support.getKeys();
for(int i = 0;i < num_particles;++i)
{
for(int j = 0;j < dim;++j)
points(i,j) = vd.getPos(keys.get(i))[j];
values(i) = vd.template getProp<prp_id>(keys.get(i));
}
model->computeCoeffs(points, values);
}
~RegressionModel()
{
if(model)
delete model;
for(int i = 0;i < spatial_dim;++i)
{
if(deriv_model[i])
delete deriv_model[i];
}
}
template<typename T> // Typical: Point<vector_type::dims, typename vector_type::stype>
double eval(T pos)
{
int dim = pos.dims;
MatType point(1,dim);
for(int j = 0;j < dim;++j)
point(0,j) = pos.get(j);
return model->eval(point)(0);
}
// T1 : Point<vector_type::dims, typename vector_type::stype>
// T2 : Point<vector_type::dims, int>
template<typename T1, typename T2>
double deriv(T1 pos, T2 deriv_order)
{
int dim = pos.dims;
MatType point(1,dim);
for(int j = 0;j < dim;++j)
point(0,j) = pos.get(j);
std::vector<int> order;
for(int j = 0;j < dim;++j)
order.push_back(deriv_order.get(j));
return model->deriv_eval(point, order)(0);
}
void compute_grad()
{
for(int i = 0;i < spatial_dim;++i)
{
std::vector<int> ord(spatial_dim, 0);
ord[i] = 1;
deriv_model[i] = model->derivative(ord);
}
}
// T: Point<vector_type::dims, typename vector_type::stype>
template<typename T>
T eval_grad(T pos)
{
T res;
if(!deriv_model[0])
compute_grad();
for(int i = 0;i < spatial_dim;++i)
res.get(i) = deriv_model[i]->eval(pos);
return res;
}
};
#endif /* REGRESSION_HPP_ */