diff --git a/src/Makefile.am b/src/Makefile.am
index c3985d537eeadfd8a37b5e1f9fe6763bbe005cab..731844471740fda8f9ae36eb8b333a867540c671 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,7 +1,7 @@
 LINKLIBS = $(OPENMP_LDFLAGS) $(LIBHILBERT_LIB)  $(METIS_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_IOSTREAMS_LIB) $(CUDA_LIBS) $(PETSC_LIB) $(HDF5_LDFLAGS)  $(HDF5_LIBS) $(PARMETIS_LIB) $(BOOST_UNIT_TEST_FRAMEWORK_LIB) $(BOOST_CHRONO_LIB) $(BOOST_TIMER_LIB) $(BOOST_SYSTEM_LIB) $(LIBIFCORE)
 
 noinst_PROGRAMS = pdata
-pdata_SOURCES = main.cpp Grid/grid_dist_id_unit_test.cpp  lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
+pdata_SOURCES = main.cpp pdata_performance.cpp Grid/grid_dist_id_unit_test.cpp  lib/pdata.cpp test_multiple_o.cpp ../openfpm_devices/src/memory/HeapMemory.cpp ../openfpm_devices/src/memory/PtrMemory.cpp ../openfpm_vcluster/src/VCluster/VCluster.cpp ../openfpm_devices/src/Memleak_check.cpp
 pdata_CXXFLAGS = $(OPENMP_CFLAGS) $(AM_CXXFLAGS) $(LIBHILBERT_INCLUDE) $(PETSC_INCLUDE) $(HDF5_CPPFLAGS) $(CUDA_CFLAGS) $(INCLUDES_PATH) $(PARMETIS_INCLUDE) $(METIS_INCLUDE) $(BOOST_CPPFLAGS) $(H5PART_INCLUDE) -DPARALLEL_IO  -Wno-unused-local-typedefs
 pdata_CFLAGS = $(CUDA_CFLAGS)
 pdata_LDADD = $(LINKLIBS) -lparmetis -lmetis
diff --git a/src/Vector/performance/cell_list_comp_reorder.hpp b/src/Vector/performance/cell_list_comp_reorder.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7e389b52accc1e7a0880ce0e1a53ea0b48aaeae1
--- /dev/null
+++ b/src/Vector/performance/cell_list_comp_reorder.hpp
@@ -0,0 +1,100 @@
+/*
+ * vector_dist_cl_hilb_performance_tests.hpp
+ *
+ *  Created on: May 24, 2016
+ *      Author: Yaroslav Zaluzhnyi
+ */
+
+#ifndef SRC_VECTOR_VECTOR_DIST_CL_HILB_PERFORMANCE_TESTS_HPP_
+#define SRC_VECTOR_VECTOR_DIST_CL_HILB_PERFORMANCE_TESTS_HPP_
+
+#include "Vector/vector_dist.hpp"
+#include "data_type/aggregate.hpp"
+#include "Plot/GoogleChart.hpp"
+#include "vector_dist_performance_util.hpp"
+
+BOOST_AUTO_TEST_SUITE( celllist_comp_reorder_performance_test )
+
+///////////////////// INPUT DATA //////////////////////
+
+// Cut-off radiuses. Can be put different number of values
+openfpm::vector<float> r_cutoff {0.004, 0.007, 0.01};
+// The starting amount of particles (remember that this number is multiplied by number of processors you use for testing)
+size_t k_start = 100000;
+// The lower threshold for number of particles
+size_t k_min = 15000;
+
+///////////////////////////////////////////////////////
+
+// Numbers of particles vector
+openfpm::vector<size_t> n_particles;
+// Vectors to store the data for 2D
+openfpm::vector<openfpm::vector<double>> time_rand_mean;
+openfpm::vector<openfpm::vector<double>> time_hilb_mean;
+openfpm::vector<openfpm::vector<double>> time_rand_dev;
+openfpm::vector<openfpm::vector<double>> time_hilb_dev;
+openfpm::vector<openfpm::vector<double>> time_create_rand_mean;
+openfpm::vector<openfpm::vector<double>> time_create_hilb_mean;
+openfpm::vector<openfpm::vector<double>> time_create_rand_dev;
+openfpm::vector<openfpm::vector<double>> time_create_hilb_dev;
+// Vectors to store the data for 3D
+openfpm::vector<openfpm::vector<double>> time_rand_2_mean;
+openfpm::vector<openfpm::vector<double>> time_hilb_2_mean;
+openfpm::vector<openfpm::vector<double>> time_rand_2_dev;
+openfpm::vector<openfpm::vector<double>> time_hilb_2_dev;
+openfpm::vector<openfpm::vector<double>> time_create_rand_2_mean;
+openfpm::vector<openfpm::vector<double>> time_create_hilb_2_mean;
+openfpm::vector<openfpm::vector<double>> time_create_rand_2_dev;
+openfpm::vector<openfpm::vector<double>> time_create_hilb_2_dev;
+
+
+BOOST_AUTO_TEST_CASE( vector_dist_celllist_random_test )
+{
+	//Benchmark test for 2D and 3D
+	cell_list_comp_reorder_random_benchmark<3>(k_start,k_min,r_cutoff,n_particles,time_rand_mean,time_create_rand_mean,time_rand_dev,time_create_rand_dev);
+	cell_list_comp_reorder_random_benchmark<2>(k_start,k_min,r_cutoff,n_particles,time_rand_2_mean,time_create_rand_2_mean,time_rand_2_dev,time_create_rand_2_dev);
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_celllist_hilbert_test )
+{
+	//Benchmark test for 2D and 3D
+	cell_list_comp_reorder_hilbert_benchmark<3>(k_start,k_min,r_cutoff,n_particles,time_hilb_mean,time_hilb_dev,time_create_hilb_mean,time_create_hilb_dev);
+	cell_list_comp_reorder_hilbert_benchmark<2>(k_start,k_min,r_cutoff,n_particles,time_hilb_2_mean,time_hilb_2_dev,time_create_hilb_2_mean,time_create_hilb_2_dev);
+}
+
+BOOST_AUTO_TEST_CASE(vector_dist_cl_performance_write_report)
+{
+	GoogleChart cg;
+
+	//Write report for 2D and 3D
+	cell_list_comp_reorder_report<3>(cg,
+			                         r_cutoff,
+									 n_particles,
+									 time_hilb_mean,
+									 time_rand_mean,
+									 time_hilb_dev,
+									 time_rand_dev,
+									 time_create_hilb_mean,
+									 time_create_rand_mean,
+									 time_create_hilb_dev,
+									 time_create_rand_dev);
+
+	cell_list_comp_reorder_report<2>(cg,
+			                         r_cutoff,
+									 n_particles,
+									 time_hilb_2_mean,
+									 time_rand_2_mean,
+									 time_create_hilb_2_mean,
+									 time_create_rand_2_mean,
+									 time_rand_2_dev,
+									 time_hilb_2_dev,
+									 time_create_hilb_2_dev,
+									 time_create_rand_2_dev);
+
+	if (create_vcluster().getProcessUnitID() == 0)
+		cg.write(std::string(test_dir) + "/openfpm_pdata/Celllist_comp_ord.html");
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+#endif /* SRC_VECTOR_VECTOR_DIST_CL_HILB_PERFORMANCE_TESTS_HPP_ */
diff --git a/src/Vector/performance/cell_list_part_reorder.hpp b/src/Vector/performance/cell_list_part_reorder.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f53f3f15f295772d10daeeb0d702a086c35bfea6
--- /dev/null
+++ b/src/Vector/performance/cell_list_part_reorder.hpp
@@ -0,0 +1,158 @@
+/*
+ * vector_dist_cl_performance_tests.hpp
+ *
+ *
+ *  Created on: Mar 22, 2016
+ *      Author: Yaroslav Zaluzhnyi
+ */
+
+#ifndef SRC_VECTOR_VECTOR_DIST_CL_PERFORMANCE_TESTS_HPP_
+#define SRC_VECTOR_VECTOR_DIST_CL_PERFORMANCE_TESTS_HPP_
+
+#include "Vector/vector_dist.hpp"
+#include "data_type/aggregate.hpp"
+#include "Plot/GoogleChart.hpp"
+#include <functional>
+
+BOOST_AUTO_TEST_SUITE( celllist_part_reorder_performance_test )
+
+///////////////////// INPUT DATA //////////////////////
+
+// Cut-off radiuses. Can be put different number of values
+openfpm::vector<float> r_cutoff {0.004, 0.007, 0.01};
+// Orders of a curve. Can be put different number of values
+openfpm::vector<size_t> orders = {1,2,3};
+// Number of steps of moving the particles
+size_t n_moving = 8;
+// Moving distance (step size)
+double dist = 0.03;
+// The starting amount of particles (remember that this number is multiplied by number of processors you use for testing)
+size_t k_start = 100000;
+// The minimal amount of particles
+size_t k_min = 15000;
+
+///////////////////////////////////////////////////////
+
+// Numbers of particles vector
+openfpm::vector<size_t> n_particles;
+// Vectors to store the data for 2D
+openfpm::vector<openfpm::vector<double>> time_rand_mean;
+openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_hilb_mean;
+openfpm::vector<openfpm::vector<double>> time_rand_dev;
+openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_hilb_dev;
+openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_reorder_mean;
+openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_reorder_dev;
+// Vectors to store the data for 3D
+openfpm::vector<openfpm::vector<double>> time_rand_2_mean;
+openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_hilb_2_mean;
+openfpm::vector<openfpm::vector<double>> time_rand_2_dev;
+openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_hilb_2_dev;
+openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_reorder_2_mean;
+openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_reorder_2_dev;
+
+
+BOOST_AUTO_TEST_CASE( vector_dist_cl_random_test )
+{
+	//Benchmark test for 2D and 3D
+	cell_list_part_reorder_random_benchmark<3>(k_start,k_min,r_cutoff,n_particles,time_rand_mean,time_rand_dev);
+	cell_list_part_reorder_random_benchmark<2>(k_start,k_min,r_cutoff,n_particles,time_rand_2_mean,time_rand_2_dev);
+}
+
+BOOST_AUTO_TEST_CASE( vector_dist_cl_hilbert_test )
+{
+	//Benchmark test for 2D and 3D
+	cell_list_part_reorder_hilbert_benchmark<3>(k_start,
+			                                    k_min,
+												n_moving,
+												dist,
+												r_cutoff,
+												n_particles,
+												orders,
+												time_hilb_mean,
+												time_reorder_mean,
+												time_hilb_dev,
+												time_reorder_dev);
+
+	cell_list_part_reorder_hilbert_benchmark<2>(k_start,
+			                                    k_min,
+												n_moving,
+												dist,
+												r_cutoff,
+												n_particles,
+												orders,
+												time_hilb_2_mean,
+												time_reorder_2_mean,
+												time_hilb_2_dev,
+												time_reorder_2_dev);
+}
+
+BOOST_AUTO_TEST_CASE(vector_dist_cl_performance_write_report)
+{
+	GoogleChart cg;
+
+/*	n_particles.save("debug1");
+	time_rand_mean.save("debug2");
+	time_hilb_mean.save("debug3");
+	time_rand_dev.save("debug4");
+	time_hilb_dev.save("debug5");
+	time_reorder_mean.save("debug6");
+	time_reorder_dev.save("debug7");
+	time_hilb_moved.save("debug8");
+
+	time_rand_2_mean.save("debug9");
+	time_hilb_2_mean.save("debug10");
+	time_rand_2_dev.save("debug11");
+	time_hilb_2_dev.save("debug12");
+	time_reorder_2_mean.save("debug13");
+	time_reorder_2_dev.save("debug14");
+	time_hilb_moved_2.save("debug15");*/
+
+/*	n_particles.load("debug1");
+	time_rand_mean.load("debug2");
+	time_hilb_mean.load("debug3");
+	time_rand_dev.load("debug4");
+	time_hilb_dev.load("debug5");
+	time_reorder_mean.load("debug6");
+	time_reorder_dev.load("debug7");
+	time_hilb_moved.load("debug8");
+
+	time_rand_2_mean.load("debug9");
+	time_hilb_2_mean.load("debug10");
+	time_rand_2_dev.load("debug11");
+	time_hilb_2_dev.load("debug12");
+	time_reorder_2_mean.load("debug13");
+	time_reorder_2_dev.load("debug14");
+	time_hilb_moved_2.load("debug15");*/
+
+	//Write report for 2D and 3D
+	cell_list_part_reorder_report<3>(cg,
+			                         n_moving,
+			                         r_cutoff,
+									 n_particles,
+									 orders,
+									 time_hilb_mean,
+									 time_rand_mean,
+									 time_reorder_mean,
+									 time_hilb_dev,
+									 time_rand_dev,
+									 time_reorder_dev);
+
+	cell_list_part_reorder_report<2>(cg,
+			                         n_moving,
+			                         r_cutoff,
+									 n_particles,
+									 orders,
+									 time_hilb_2_mean,
+									 time_rand_2_mean,
+									 time_reorder_2_mean,
+									 time_hilb_2_dev,
+									 time_rand_2_dev,
+									 time_reorder_2_dev);
+
+	if (create_vcluster().getProcessUnitID() == 0)
+		cg.write(std::string(test_dir) + "/openfpm_pdata/Celllist_part_ord.html");
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+#endif /* SRC_VECTOR_VECTOR_DIST_CL_PERFORMANCE_TESTS_HPP_ */
diff --git a/src/Vector/performance/vector_dist_cl_hilb_performance_tests.hpp b/src/Vector/performance/vector_dist_cl_hilb_performance_tests.hpp
deleted file mode 100644
index 7b0c77bde064932719ba6a80f287d9cc90a93d11..0000000000000000000000000000000000000000
--- a/src/Vector/performance/vector_dist_cl_hilb_performance_tests.hpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * vector_dist_cl_hilb_performance_tests.hpp
- *
- *  Created on: May 24, 2016
- *      Author: Yaroslav Zaluzhnyi
- */
-
-#ifndef SRC_VECTOR_VECTOR_DIST_CL_HILB_PERFORMANCE_TESTS_HPP_
-#define SRC_VECTOR_VECTOR_DIST_CL_HILB_PERFORMANCE_TESTS_HPP_
-
-#include "Vector/vector_dist.hpp"
-#include "data_type/aggregate.hpp"
-#include "Plot/GoogleChart.hpp"
-#include "vector_dist_performance_util.hpp"
-
-BOOST_AUTO_TEST_SUITE( vector_dist_celllist_hilb_performance_test )
-
-///////////////////// INPUT DATA //////////////////////
-
-// Cut-off radiuses. Can be put different number of values
-openfpm::vector<float> r_cutoff {0.01, 0.02, 0.03};
-// The starting amount of particles (remember that this number is multiplied by number of processors you use for testing)
-size_t k_start = 100000;
-// The lower threshold for number of particles
-size_t k_min = 15000;
-// Ghost part of distributed vector
-double ghost_part = 0.03;
-
-///////////////////////////////////////////////////////
-
-// Numbers of particles vector
-openfpm::vector<size_t> n_particles;
-// Vectors to store the data for 2D
-openfpm::vector<openfpm::vector<double>> time_rand;
-openfpm::vector<openfpm::vector<double>> time_hilb;
-openfpm::vector<openfpm::vector<double>> time_total_rand;
-openfpm::vector<openfpm::vector<double>> time_total_hilb;
-// Vectors to store the data for 3D
-openfpm::vector<openfpm::vector<double>> time_rand_2;
-openfpm::vector<openfpm::vector<double>> time_hilb_2;
-openfpm::vector<openfpm::vector<double>> time_total_rand_2;
-openfpm::vector<openfpm::vector<double>> time_total_hilb_2;
-
-
-BOOST_AUTO_TEST_CASE( vector_dist_celllist_random_test )
-{
-	//Benchmark test for 2D and 3D
-	vd_celllist_random_benchmark<2>(k_start,k_min,ghost_part,r_cutoff,n_particles,time_rand,time_total_rand);
-	vd_celllist_random_benchmark<3>(k_start,k_min,ghost_part,r_cutoff,n_particles,time_rand_2,time_total_rand_2);
-}
-
-BOOST_AUTO_TEST_CASE( vector_dist_celllist_hilbert_test )
-{
-	//Benchmark test for 2D and 3D
-	vd_celllist_hilbert_benchmark<2>(k_start,k_min,ghost_part,r_cutoff,n_particles,time_hilb,time_total_hilb);
-	vd_celllist_hilbert_benchmark<3>(k_start,k_min,ghost_part,r_cutoff,n_particles,time_hilb_2,time_total_hilb_2);
-}
-
-BOOST_AUTO_TEST_CASE(vector_dist_cl_performance_write_report)
-{
-	//Write report for 2D and 3D
-	vd_celllist_performance_write_report<2>(r_cutoff,n_particles,time_hilb,time_rand,time_total_hilb,time_total_rand);
-	vd_celllist_performance_write_report<3>(r_cutoff,n_particles,time_hilb_2,time_rand_2,time_total_hilb_2,time_total_rand_2);
-}
-
-BOOST_AUTO_TEST_SUITE_END()
-
-#endif /* SRC_VECTOR_VECTOR_DIST_CL_HILB_PERFORMANCE_TESTS_HPP_ */
diff --git a/src/Vector/performance/vector_dist_cl_performance_tests.hpp b/src/Vector/performance/vector_dist_cl_performance_tests.hpp
deleted file mode 100644
index a9f590d730a6f122777a9b134ec6a4af8502f51a..0000000000000000000000000000000000000000
--- a/src/Vector/performance/vector_dist_cl_performance_tests.hpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * vector_dist_cl_performance_tests.hpp
- *
- *
- *  Created on: Mar 22, 2016
- *      Author: Yaroslav Zaluzhnyi
- */
-
-#ifndef SRC_VECTOR_VECTOR_DIST_CL_PERFORMANCE_TESTS_HPP_
-#define SRC_VECTOR_VECTOR_DIST_CL_PERFORMANCE_TESTS_HPP_
-
-#include "Vector/vector_dist.hpp"
-#include "data_type/aggregate.hpp"
-#include "Vector/vector_dist_unit_test.hpp"
-#include "Plot/GoogleChart.hpp"
-#include <functional>
-
-BOOST_AUTO_TEST_SUITE( vector_dist_cl_perf_test )
-
-///////////////////// INPUT DATA //////////////////////
-
-// Cut-off radiuses. Can be put different number of values
-openfpm::vector<float> r_cutoff {0.007,0.02};
-// Orders of a curve. Can be put different number of values
-openfpm::vector<size_t> orders = {1,2,3,5,7};
-// Number of steps of moving the particles
-size_t n_moving = 8;
-// Moving distance (step size)
-double dist = 0.1;
-// The starting amount of particles (remember that this number is multiplied by number of processors you use for testing)
-size_t k_start = 100000;
-// The minimal amount of particles
-size_t k_min = 15000;
-// Ghost part of distributed vector
-double ghost_part = 0.01;
-
-///////////////////////////////////////////////////////
-
-// Numbers of particles vector
-openfpm::vector<size_t> n_particles;
-// Vectors to store the data for 2D
-openfpm::vector<openfpm::vector<double>> time_rand;
-openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_hilb;
-openfpm::vector<openfpm::vector<double>> time_total_rand;
-openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_total_hilb;
-openfpm::vector<openfpm::vector<openfpm::vector<openfpm::vector<double>>>> time_hilb_moved;
-// Vectors to store the data for 3D
-openfpm::vector<openfpm::vector<double>> time_rand_2;
-openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_hilb_2;
-openfpm::vector<openfpm::vector<double>> time_total_rand_2;
-openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_total_hilb_2;
-openfpm::vector<openfpm::vector<openfpm::vector<openfpm::vector<double>>>> time_hilb_moved_2;
-
-
-BOOST_AUTO_TEST_CASE( vector_dist_cl_random_test )
-{
-	//Benchmark test for 2D and 3D
-	vd_cl_random_benchmark<2>(k_start,k_min,ghost_part,r_cutoff,n_particles,time_rand,time_total_rand);
-	vd_cl_random_benchmark<3>(k_start,k_min,ghost_part,r_cutoff,n_particles,time_rand_2,time_total_rand_2);
-}
-
-BOOST_AUTO_TEST_CASE( vector_dist_cl_hilbert_test )
-{
-	//Benchmark test for 2D and 3D
-	vd_cl_hilbert_benchmark<2>(k_start,k_min,ghost_part,n_moving,dist,r_cutoff,n_particles,orders,time_hilb,time_total_hilb,time_hilb_moved);
-	vd_cl_hilbert_benchmark<3>(k_start,k_min,ghost_part,n_moving,dist,r_cutoff,n_particles,orders,time_hilb_2,time_total_hilb_2,time_hilb_moved_2);
-}
-
-BOOST_AUTO_TEST_CASE(vector_dist_cl_performance_write_report)
-{
-	//Write report for 2D and 3D
-	vd_cl_performance_write_report<2>(n_moving,r_cutoff,n_particles,orders,time_hilb,time_rand,time_total_hilb,time_total_rand,time_hilb_moved);
-	vd_cl_performance_write_report<3>(n_moving,r_cutoff,n_particles,orders,time_hilb_2,time_rand_2,time_total_hilb_2,time_total_rand_2,time_hilb_moved_2);
-}
-
-BOOST_AUTO_TEST_SUITE_END()
-
-#endif /* SRC_VECTOR_VECTOR_DIST_CL_PERFORMANCE_TESTS_HPP_ */
diff --git a/src/Vector/performance/vector_dist_performance_util.hpp b/src/Vector/performance/vector_dist_performance_util.hpp
index bbc45d880d4de7405344fd5d51554b0eef3b4141..a773f9379d079ea213f8eb5857460760dc85a265 100644
--- a/src/Vector/performance/vector_dist_performance_util.hpp
+++ b/src/Vector/performance/vector_dist_performance_util.hpp
@@ -8,202 +8,46 @@
 #ifndef SRC_VECTOR_VECTOR_DIST_PERFORMANCE_UTIL_HPP_
 #define SRC_VECTOR_VECTOR_DIST_PERFORMANCE_UTIL_HPP_
 
+#include "cl_comp_performance_graph.hpp"
 #include "Plot/GoogleChart.hpp"
+#include "cl_part_performance_graph.hpp"
+#include "vector_dist_performance_common.hpp"
 
 // Number of tests
 #define N_VERLET_TEST 3
+#define N_STAT_TEST 30
 
-/*! \brief Print out only ones (no matter how many processors involved)
+/*! \brief Standard deviation
  *
- * \param test, sz Data to print out
- */
-void print_test_v(std::string test)
-{
-	if (create_vcluster().getProcessUnitID() == 0)
-		std::cout << test  << "\n";
-}
-
-/*! \brief Initialize a distributed vector
+ * \param measures set of measures
+ * \param mean the mean of the measures
  *
- * \param vd Distributed vector
- * \param v_cl Global vcluster
- * \param k_int Number of particles
- */
-template<unsigned int dim, typename v_dist> void vd_initialize(v_dist & vd, Vcluster & v_cl, size_t k_int)
-{
-	// The random generator engine
-	std::default_random_engine eg(v_cl.getProcessUnitID()*4313);
-	std::uniform_real_distribution<float> ud(0.0f, 1.0f);
-
-	//! [Create a vector of random elements on each processor 2D]
-
-	auto it = vd.getIterator();
-
-	while (it.isNext())
-	{
-		auto key = it.get();
-
-		for (size_t i = 0; i < dim; i++)
-			vd.getPos(key)[i] = ud(eg);
-
-		++it;
-	}
-
-	vd.map();
-}
-
-/*! \brief Initialize 2 distributed vectors with equally positioned particles
+ * \return the standard deviation
  *
- * \param vd, vd2 Distributed vectors
- * \param v_cl Global vcluster
- * \param k_int Number of particles
  */
-template<unsigned int dim, typename v_dist> void vd_initialize_double(v_dist & vd,v_dist & vd2, Vcluster & v_cl, size_t k_int)
+static inline void standard_deviation(openfpm::vector<double> measures, double & mean, double & dev)
 {
-	// The random generator engine
-	std::default_random_engine eg(v_cl.getProcessUnitID()*4313);
-	std::uniform_real_distribution<float> ud(0.0f, 1.0f);
-
-	//! [Create a vector of random elements on each processor 2D]
-
-	auto it = vd.getIterator();
+	for (size_t i = 0 ; i < measures.size() ; i++)
+		mean += measures.get(i);
+	mean /= measures.size();
 
-	while (it.isNext())
-	{
-		auto key = it.get();
-
-		for (size_t i = 0; i < dim; i++)
-		{
-			vd.getPos(key)[i] = ud(eg);
-			vd2.getPos(key)[i] = vd.getPos(key)[i];
-		}
-
-		++it;
-	}
+	dev = 0;
+	for (size_t i = 0 ; i < measures.size() ; i++)
+		dev += (measures.get(i) - mean)*(measures.get(i) - mean);
 
-	vd.map();
-	vd2.map();
+	dev = sqrt(dev / (measures.size() - 1));
 }
 
-/*! \brief Calculate and put particles' forces
+/*! \brief Print out only ones (no matter how many processors involved)
  *
- * \param NN Cell list
- * \param vd Distributed vector
- * \param r_cut Cut-off radius
+ * \param test, sz Data to print out
  */
-template<unsigned int dim, size_t prp = 0, typename T, typename V> void calc_forces(T & NN, V & vd, float r_cut)
+void print_test_v(std::string test)
 {
-	auto it_v = vd.getDomainIterator();
-
-	float sum[dim];
-
-	for (size_t i = 0; i < dim; i++)
-		sum[i] = 0;
-
-	while (it_v.isNext())
-	{
-		//key
-		vect_dist_key_dx key = it_v.get();
-
-    	// Get the position of the particles
-		Point<dim,float> p = vd.getPos(key);
-
-		for (size_t i = 0; i < dim; i++)
-			sum[i] = 0;
-
-    	// Get the neighborhood of the particle
-    	auto cell_it = NN.template getNNIterator<NO_CHECK>(NN.getCell(p));
-
-    	while(cell_it.isNext())
-    	{
-    		auto nnp = cell_it.get();
-
-    		// p != q
-    		if (nnp == key.getKey())
-    		{
-    			++cell_it;
-    			continue;
-    		}
-
-    		Point<dim,float> q = vd.getPos(nnp);
-
-    		if (p.distance2(q) < r_cut*r_cut)
-    		{
-				//Calculate the forces
-    			float num[dim];
-    			for (size_t i = 0; i < dim; i++)
-    				num[i] = vd.getPos(key)[i] - vd.getPos(nnp)[i];
-
-    			float denom = 0;
-    			for (size_t i = 0; i < dim; i++)
-					denom += num[i] * num[i];
-
-    			float res[dim];
-    			for (size_t i = 0; i < dim; i++)
-					res[i] = num[i] / denom;
-
-    			for (size_t i = 0; i < dim; i++)
-					sum[i] += res[i];
-    		}
-			//Next particle in a cell
-			++cell_it;
-		}
-
-		//Put the forces
-		for (size_t i = 0; i < dim; i++)
-			vd.template getProp<prp>(key)[i] += sum[i];
-
-		//Next particle in cell list
-		++it_v;
-	}
+	if (create_vcluster().getProcessUnitID() == 0)
+		std::cout << test  << "\n";
 }
 
-/*! \brief For each particle of vd calculate the accumulation of the distances of the neighborhood
- *          particles inside vd2
- *
- *
- * \param NN Cell list vd
- * \param NN2 Cell list vd2
- * \param vd Distributed vector
- * \param vd2 Distributed vector 2
- * \param r_cut Cut-off radius
- *
- */
-template<unsigned int dim, unsigned int prp, typename T, typename V> void cross_calc(T & NN, T & NN2, V & vd, V & vd2)
-{
-	auto it_v = vd.getDomainIterator();
-
-	while (it_v.isNext())
-	{
-		//key
-		vect_dist_key_dx key = it_v.get();
-
-    	// Get the position of the particles
-		Point<dim,float> p = vd.getPos(key);
-
-    	// Get the neighborhood of the particle
-    	auto cell_it = NN2.template getNNIterator<NO_CHECK>(NN2.getCell(p));
-
-    	double sum = 0.0;
-
-    	while(cell_it.isNext())
-    	{
-    		auto nnp = cell_it.get();
-
-    		Point<dim,float> q = vd2.getPos(nnp);
-
-    		sum += norm(p - q);
-
-			//Next particle in a cell
-			++cell_it;
-		}
-
-		vd.template getProp<prp>(key) = sum;
-
-		//Next particle in cell list
-		++it_v;
-	}
-}
 
 /*! \brief Benchmark particles' forces time
  *
@@ -291,78 +135,7 @@ template<typename V> double benchmark_get_verlet(V & vd, float r_cut)
 	return t.getwct();
 }
 
-/*! \brief Calculate and put particles' forces
- *
- * \param NN Cell list hilbert
- * \param vd Distributed vector
- * \param r_cut Cut-off radius
- */
-template<unsigned int dim, size_t prp = 0, typename T, typename V> void calc_forces_hilb(T & NN, V & vd, float r_cut)
-{
-	auto it_cl = NN.getIterator();
-
-	float sum[dim];
-
-	for (size_t i = 0; i < dim; i++)
-		sum[i] = 0;
-
-	while (it_cl.isNext())
-	{
-		//key
-		auto key = it_cl.get();
-
-    	// Get the position of the particles
-		Point<dim,float> p = vd.getPos(key);
-
-		for (size_t i = 0; i < dim; i++)
-			sum[i] = 0;
-
-    	// Get the neighborhood of the particle
-    	auto cell_it = NN.template getNNIterator<NO_CHECK>(NN.getCell(p));
-
-    	while(cell_it.isNext())
-    	{
-    		auto nnp = cell_it.get();
-
-    		// p != q
-    		if (nnp == key)
-    		{
-    			++cell_it;
-    			continue;
-    		}
-
-    		Point<dim,float> q = vd.getPos(nnp);
-
-    		if (p.distance2(q) < r_cut*r_cut)
-    		{
-				//Calculate the forces
-    			float num[dim];
-    			for (size_t i = 0; i < dim; i++)
-    				num[i] = vd.getPos(key)[i] - vd.getPos(nnp)[i];
-
-    			float denom = 0;
-    			for (size_t i = 0; i < dim; i++)
-					denom += num[i] * num[i];
-
-    			float res[dim];
-    			for (size_t i = 0; i < dim; i++)
-					res[i] = num[i] / denom;
-
-    			for (size_t i = 0; i < dim; i++)
-					sum[i] += res[i];
-    		}
-			//Next particle in a cell
-			++cell_it;
-		}
 
-		//Put the forces
-		for (size_t i = 0; i < dim; i++)
-			vd.template getProp<prp>(key)[i] += sum[i];
-
-		//Next particle in cell list
-		++it_cl;
-	}
-}
 
 /*! \brief Benchmark particles' forces time
  *
@@ -453,10 +226,19 @@ template<unsigned int dim, typename v_dist> void move_particles(v_dist & vd, dou
 /*! \brief Function for verlet test without an Hilbert curve reordering (unordered positioning)
  *
  */
-template<unsigned int dim> void vd_verlet_random_benchmark(size_t k_start, size_t k_min, double ghost_part, openfpm::vector<float> & r_cutoff, openfpm::vector<size_t> & n_particles, openfpm::vector<openfpm::vector<double>> & time_rand, openfpm::vector<openfpm::vector<double>> & time_total_rand)
+template<unsigned int dim> void vd_verlet_random_benchmark(size_t k_start,
+		                                                   size_t k_min,
+														   openfpm::vector<float> & r_cutoff,
+														   openfpm::vector<size_t> & n_particles,
+														   openfpm::vector<openfpm::vector<double>> & time_force_mean,
+														   openfpm::vector<openfpm::vector<double>> & time_create_mean,
+														   openfpm::vector<openfpm::vector<double>> & time_force_dev,
+														   openfpm::vector<openfpm::vector<double>> & time_create_dev)
 {
-	time_rand.resize(r_cutoff.size());
-	time_total_rand.resize(r_cutoff.size());
+	time_force_mean.resize(r_cutoff.size());
+	time_create_mean.resize(r_cutoff.size());
+	time_force_dev.resize(r_cutoff.size());
+	time_create_dev.resize(r_cutoff.size());
 
 	std::string str("Testing " + std::to_string(dim) + "D vector no-order, Verlet-list");
 	print_test_v(str);
@@ -496,7 +278,7 @@ template<unsigned int dim> void vd_verlet_random_benchmark(size_t k_start, size_
 				for (size_t i = 0; i < dim; i++)
 					bc[i] = PERIODIC;
 
-				vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part));
+				vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(r_cut));
 
 				// Initialize a dist vector
 				vd_initialize<dim>(vd, v_cl, k_int);
@@ -505,27 +287,32 @@ template<unsigned int dim> void vd_verlet_random_benchmark(size_t k_start, size_
 
 				//Get verlet list
 
-				double sum_verlet = 0;
-				for (size_t n = 0 ; n < N_VERLET_TEST; n++)
-					sum_verlet += benchmark_get_verlet(vd,r_cut);
-				sum_verlet /= N_VERLET_TEST;
+				openfpm::vector<double> measures;
+				double sum_verlet_mean = 0;
+				double sum_verlet_dev = 0;
+				for (size_t n = 0 ; n < N_STAT_TEST; n++)
+					measures.add(benchmark_get_verlet(vd,r_cut));
+				standard_deviation(measures,sum_verlet_mean,sum_verlet_dev);
+
+				//Average total time
+				time_create_mean.get(r).add(sum_verlet_mean);
+				time_create_dev.get(r).add(sum_verlet_dev);
 
 				//Calculate forces
 
 				auto NN = vd.getCellList(r_cut);
-				double sum_forces = 0;
-
-				for (size_t l = 0 ; l < N_VERLET_TEST ; l++)
-					sum_forces += benchmark_calc_forces<dim>(NN,vd,r_cut);
-				sum_forces /= N_VERLET_TEST;
+				double sum_fr_mean = 0;
+				double sum_fr_dev = 0;
 
-				time_rand.get(r).add(sum_forces);
-
-				//Average total time
-				time_total_rand.get(r).add(sum_forces + sum_verlet);
+				measures.clear();
+				for (size_t l = 0 ; l < N_STAT_TEST ; l++)
+					measures.add(benchmark_calc_forces<dim>(NN,vd,r_cut));
+				standard_deviation(measures,sum_fr_mean,sum_fr_dev);
+				time_force_mean.get(r).add(sum_fr_mean);
+				time_force_dev.get(r).add(sum_fr_dev);
 
 				if (v_cl.getProcessUnitID() == 0)
-					std::cout << "Particles: " << k_int << "," << "cut-off: " << r_cut << " time to construct a Verlet list = " << sum_verlet << "    calculate force = " << sum_forces << std::endl;
+					std::cout << "Particles: " << k_int << "," << "cut-off: " << r_cut << " time to construct a Verlet list = " << sum_verlet_mean << " dev: " << sum_verlet_dev << "    calculate force = " << sum_fr_mean << " dev: " << sum_fr_dev << std::endl;
 			}
 		}
 	}
@@ -615,6 +402,8 @@ template<unsigned int dim> void vd_verlet_hilbert_benchmark(size_t k_start, size
 				for (size_t n = 0 ; n < N_VERLET_TEST; n++)
 					sum_verlet += benchmark_get_verlet(vd,r_cut);
 				sum_verlet /= N_VERLET_TEST;
+				//Average total time
+				time_total_hilb.get(r).get(part).get(i) = sum_verlet;
 
 				//Calculate forces
 
@@ -624,12 +413,8 @@ template<unsigned int dim> void vd_verlet_hilbert_benchmark(size_t k_start, size
 				for (size_t l = 0 ; l < N_VERLET_TEST; l++)
 					sum_forces += benchmark_calc_forces<dim>(NN,vd,r_cut);
 				sum_forces /= N_VERLET_TEST;
-
 				time_hilb.get(r).get(part).get(i) = sum_forces;
 
-				//Average total time
-				time_total_hilb.get(r).get(part).get(i) = sum_forces + sum_verlet + sum_reorder;
-
 				if (v_cl.getProcessUnitID() == 0)
 					std::cout << "Order = " << m << ", Cut-off = " << r_cut << ", Particles = " << k_int << ". Time to reorder: " << sum_reorder << " time to get the verlet-list: " << sum_verlet << " time to calculate forces: " << sum_forces << std::endl;
 			}
@@ -640,10 +425,15 @@ template<unsigned int dim> void vd_verlet_hilbert_benchmark(size_t k_start, size
 /*! \brief Function for cell list test without an Hilbert curve reordering (unordered positioning)
  *
  */
-template<unsigned int dim> void vd_cl_random_benchmark(size_t cl_k_start, size_t cl_k_min, double ghost_part, openfpm::vector<float> & cl_r_cutoff, openfpm::vector<size_t> & cl_n_particles, openfpm::vector<openfpm::vector<double>> & cl_time_rand, openfpm::vector<openfpm::vector<double>> & cl_time_total_rand)
+template<unsigned int dim> void cell_list_part_reorder_random_benchmark(size_t cl_k_start,
+		                                                                size_t cl_k_min,
+																		openfpm::vector<float> & cl_r_cutoff,
+																		openfpm::vector<size_t> & cl_n_particles,
+																		openfpm::vector<openfpm::vector<double>> & cl_time_rand_mean,
+																		openfpm::vector<openfpm::vector<double>> & cl_time_rand_dev)
 {
-	cl_time_rand.resize(cl_r_cutoff.size());
-	cl_time_total_rand.resize(cl_r_cutoff.size());
+	cl_time_rand_mean.resize(cl_r_cutoff.size());
+	cl_time_rand_dev.resize(cl_r_cutoff.size());
 
 	std::string str("Testing " + std::to_string(dim) + "D vector, no-order, Cell-list");
 	print_test_v(str);
@@ -685,7 +475,7 @@ template<unsigned int dim> void vd_cl_random_benchmark(size_t cl_k_start, size_t
 				for (size_t i = 0; i < dim; i++)
 					bc[i] = PERIODIC;
 
-				vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part));
+				vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(r_cut));
 
 				// Initialize a dist vector
 				vd_initialize<dim>(vd, v_cl, k_int);
@@ -695,29 +485,24 @@ template<unsigned int dim> void vd_cl_random_benchmark(size_t cl_k_start, size_t
 				//Get a cell list
 
 				auto NN = vd.getCellList(r_cut);
-				size_t n = 0;
-				double sum_cl = 0;
+				double sum_fr_mean = 0;
+				double sum_fr_dev = 0;
 
-				for ( ; n < N_VERLET_TEST ; n++)
-					sum_cl += benchmark_get_celllist(NN,vd,r_cut);
-				sum_cl /= N_VERLET_TEST;
+				benchmark_get_celllist(NN,vd,r_cut);
 
 				//Calculate forces
-
-				double sum_forces = 0;
 				size_t l = 0;
 
-				for ( ; l < N_VERLET_TEST; l++)
-					sum_forces += benchmark_calc_forces<dim>(NN,vd,r_cut);
-				sum_forces /= N_VERLET_TEST;
-
-				cl_time_rand.get(r).add(sum_forces / l);
+				openfpm::vector<double> measures;
+				for ( ; l < N_STAT_TEST; l++)
+					measures.add(benchmark_calc_forces<dim>(NN,vd,r_cut));
+				standard_deviation(measures,sum_fr_mean,sum_fr_dev);
 
-				//Average total time
-				cl_time_total_rand.get(r).add(sum_forces + sum_cl);
+				cl_time_rand_mean.get(r).add(sum_fr_mean);
+				cl_time_rand_dev.get(r).add(sum_fr_dev);
 
 				if (v_cl.getProcessUnitID() == 0)
-					std::cout << "Cut-off = " << r_cut << ", Particles = " << k_int << " time to get the verlet-list: " << sum_cl << " time to calculate forces: " << sum_forces << std::endl;
+					std::cout << "Cut-off = " << r_cut << ", Particles = " << k_int << " time to calculate forces: " << sum_fr_mean << " dev: " << sum_fr_dev << std::endl;
 			}
 		}
 	}
@@ -726,40 +511,43 @@ template<unsigned int dim> void vd_cl_random_benchmark(size_t cl_k_start, size_t
 /*! \brief Function for cell list test with an Hilbert curve reordering
  *
  */
-template<unsigned int dim> void vd_cl_hilbert_benchmark(size_t cl_k_start, size_t cl_k_min, double ghost_part, size_t n_moving, double dist, openfpm::vector<float> & cl_r_cutoff, openfpm::vector<size_t> & cl_n_particles, openfpm::vector<size_t> &cl_orders, openfpm::vector<openfpm::vector<openfpm::vector<double>>> &cl_time_hilb, openfpm::vector<openfpm::vector<openfpm::vector<double>>> &cl_time_total_hilb, openfpm::vector<openfpm::vector<openfpm::vector<openfpm::vector<double>>>> &cl_time_hilb_moved)
+template<unsigned int dim> void cell_list_part_reorder_hilbert_benchmark(size_t cl_k_start,
+		                                                                 size_t cl_k_min,
+																		 size_t n_moving,
+																		 double dist,
+																		 openfpm::vector<float> & cl_r_cutoff,
+																		 openfpm::vector<size_t> & cl_n_particles,
+																		 openfpm::vector<size_t> &cl_orders,
+																		 openfpm::vector<openfpm::vector<openfpm::vector<double>>> &cl_time_hilb_mean,
+																		 openfpm::vector<openfpm::vector<openfpm::vector<double>>> &cl_time_reorder_hilb_mean,
+																		 openfpm::vector<openfpm::vector<openfpm::vector<double>>> &cl_time_hilb_dev,
+																		 openfpm::vector<openfpm::vector<openfpm::vector<double>>> &cl_time_reorder_hilb_dev)
 {
 	{
-		cl_time_hilb.resize(cl_r_cutoff.size());
-		for (size_t r = 0; r < cl_time_hilb.size(); r++)
+		cl_time_hilb_mean.resize(cl_r_cutoff.size());
+		cl_time_hilb_dev.resize(cl_r_cutoff.size());
+		for (size_t r = 0; r < cl_time_hilb_mean.size(); r++)
 		{
-			cl_time_hilb.get(r).resize(cl_n_particles.size());
-			for (size_t k = 0; k < cl_time_hilb.get(r).size(); k++)
+			cl_time_hilb_mean.get(r).resize(cl_n_particles.size());
+			cl_time_hilb_dev.get(r).resize(cl_n_particles.size());
+			for (size_t k = 0; k < cl_time_hilb_mean.get(r).size(); k++)
 			{
-				cl_time_hilb.get(r).get(k).resize(cl_orders.size());
+				cl_time_hilb_mean.get(r).get(k).resize(cl_orders.size());
+				cl_time_hilb_dev.get(r).get(k).resize(cl_orders.size());
 			}
 		}
 
-		cl_time_hilb_moved.resize(n_moving);
-		for (size_t d = 0; d < cl_time_hilb_moved.size(); d++)
-		{
-			cl_time_hilb_moved.get(d).resize(cl_r_cutoff.size());
-			for (size_t r = 0; r < cl_time_hilb_moved.get(d).size(); r++)
-			{
-				cl_time_hilb_moved.get(d).get(r).resize(cl_n_particles.size());
-				for (size_t k = 0; k < cl_time_hilb_moved.get(d).get(r).size(); k++)
-				{
-					cl_time_hilb_moved.get(d).get(r).get(k).resize(cl_orders.size());
-				}
-			}
-		}
 
-		cl_time_total_hilb.resize(cl_r_cutoff.size());
-		for (size_t r = 0; r < cl_time_total_hilb.size(); r++)
+		cl_time_reorder_hilb_mean.resize(cl_r_cutoff.size());
+		cl_time_reorder_hilb_dev.resize(cl_r_cutoff.size());
+		for (size_t r = 0; r < cl_time_reorder_hilb_mean.size(); r++)
 		{
-			cl_time_total_hilb.get(r).resize(cl_n_particles.size());
-			for (size_t k = 0; k < cl_time_total_hilb.get(r).size(); k++)
+			cl_time_reorder_hilb_mean.get(r).resize(cl_n_particles.size());
+			cl_time_reorder_hilb_dev.get(r).resize(cl_n_particles.size());
+			for (size_t k = 0; k < cl_time_reorder_hilb_mean.get(r).size(); k++)
 			{
-				cl_time_total_hilb.get(r).get(k).resize(cl_orders.size());
+				cl_time_reorder_hilb_mean.get(r).get(k).resize(cl_orders.size());
+				cl_time_reorder_hilb_dev.get(r).get(k).resize(cl_orders.size());
 			}
 		}
 
@@ -802,66 +590,49 @@ template<unsigned int dim> void vd_cl_hilbert_benchmark(size_t cl_k_start, size_
 					for (size_t i = 0; i < dim; i++)
 						bc[i] = PERIODIC;
 
-					vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part));
+					vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(r_cut));
 
 					// Initialize a dist vector
 					vd_initialize<dim>(vd, v_cl, k_int);
 
 					//Reorder a vector
 
-					double sum_reorder = 0;
-					for (size_t h = 0 ; h < 3; h++)
-						sum_reorder += benchmark_reorder(vd,m);
-					sum_reorder /= N_VERLET_TEST;
+					double sum_reorder_mean = 0;
+					double sum_reorder_dev = 0;
+
+					openfpm::vector<double> measures;
+
+					for (size_t h = 0 ; h < N_STAT_TEST; h++)
+						measures.add(benchmark_reorder(vd,m));
+					standard_deviation(measures,sum_reorder_mean,sum_reorder_dev);
+
+					//Average reorder time
+					cl_time_reorder_hilb_mean.get(r).get(part).get(i) = sum_reorder_mean;
+					cl_time_reorder_hilb_dev.get(r).get(part).get(i) = sum_reorder_dev;
 
 					vd.template ghost_get<0>();
 
 					//Get cell list
 
 					auto NN = vd.getCellList(r_cut);
-					double sum_cl = 0;
-					for (size_t n = 0 ; n < N_VERLET_TEST; n++)
-						sum_cl += benchmark_get_celllist(NN,vd,r_cut);
-					sum_cl /= N_VERLET_TEST;
+					benchmark_get_celllist(NN,vd,r_cut);
 
 					//Calculate forces
 
-					double sum_forces = 0;
-					for (size_t l = 0 ; l < N_VERLET_TEST ; l++)
-						sum_forces += benchmark_calc_forces<dim>(NN,vd,r_cut);
-					sum_forces /= N_VERLET_TEST;
+					double sum_fr_mean = 0;
+					double sum_fr_dev = 0;
+					measures.clear();
 
-					if (v_cl.getProcessUnitID() == 0)
-						std::cout << "Cut-off = " << r_cut << ", Particles = " << k_int << ". Time to reorder: " << sum_reorder << " time to get the cell-list: " << sum_cl << std::endl;
+					for (size_t l = 0 ; l < N_STAT_TEST ; l++)
+						measures.add(benchmark_calc_forces<dim>(NN,vd,r_cut));
+					standard_deviation(measures,sum_fr_mean,sum_fr_dev);
 
-					cl_time_hilb.get(r).get(part).get(i) = sum_forces;
+					cl_time_hilb_mean.get(r).get(part).get(i) = sum_fr_mean;
+					cl_time_hilb_dev.get(r).get(part).get(i) = sum_fr_dev;
 
-					//Average total time
-					cl_time_total_hilb.get(r).get(part).get(i) = sum_forces + sum_cl + sum_reorder;
 
-					//Move particles
-					for ( size_t d = 0; d < n_moving; d++)
-					{
-						move_particles<dim>(vd,dist);
-
-						vd.map();
-
-						vd.template ghost_get<0>();
-
-						auto NN = vd.getCellList(r_cut);
-
-						//Calculate forces
-
-						double sum_forces_moved = 0;
-						for (size_t j = 0 ; j < N_VERLET_TEST; j++)
-							sum_forces_moved += benchmark_calc_forces<dim>(NN,vd,r_cut);
-						sum_forces_moved /= N_VERLET_TEST;
-
-						cl_time_hilb_moved.get(d).get(r).get(part).get(i) = sum_forces_moved;
-
-						if (v_cl.getProcessUnitID() == 0)
-							std::cout << "Order = " << m << " Cut-off = " << r_cut << ", Particles = " << k_int << ". Time to reorder: " << sum_reorder << " time to calculate forces: " << sum_forces_moved << std::endl;
-					}
+					if (v_cl.getProcessUnitID() == 0)
+						std::cout << "Cut-off = " << r_cut << ", Particles = " << k_int << ". Time to reorder: " << sum_reorder_mean << " dev: " << sum_reorder_dev << "      time calculate forces: " << sum_fr_mean << " dev: " << sum_fr_dev << std::endl;
 				}
 			}
 		}
@@ -871,10 +642,19 @@ template<unsigned int dim> void vd_cl_hilbert_benchmark(size_t cl_k_start, size_
 /*! \brief Function for random cell list test
  *
  */
-template<unsigned int dim> void vd_celllist_random_benchmark(size_t cl_k_start, size_t cl_k_min, double ghost_part, openfpm::vector<float> & cl_r_cutoff, openfpm::vector<size_t> & cl_n_particles, openfpm::vector<openfpm::vector<double>> & cl_time_rand, openfpm::vector<openfpm::vector<double>> & cl_time_total_rand)
+template<unsigned int dim> void cell_list_comp_reorder_random_benchmark(size_t cl_k_start,
+		                                                                size_t cl_k_min,
+																		openfpm::vector<float> & cl_r_cutoff,
+																		openfpm::vector<size_t> & cl_n_particles,
+																		openfpm::vector<openfpm::vector<double>> & cl_time_rand_mean,
+																		openfpm::vector<openfpm::vector<double>> & cl_time_rand_dev,
+																		openfpm::vector<openfpm::vector<double>> & cl_time_create_rand_mean,
+																		openfpm::vector<openfpm::vector<double>> & cl_time_create_rand_dev)
 {
-	cl_time_rand.resize(cl_r_cutoff.size());
-	cl_time_total_rand.resize(cl_r_cutoff.size());
+	cl_time_rand_mean.resize(cl_r_cutoff.size());
+	cl_time_create_rand_mean.resize(cl_r_cutoff.size());
+	cl_time_rand_dev.resize(cl_r_cutoff.size());
+	cl_time_create_rand_dev.resize(cl_r_cutoff.size());
 
 	std::string str("Testing " + std::to_string(dim) + "D vector, no order, cell-list");
 	print_test_v(str);
@@ -916,7 +696,7 @@ template<unsigned int dim> void vd_celllist_random_benchmark(size_t cl_k_start,
 				for (size_t i = 0; i < dim; i++)
 					bc[i] = PERIODIC;
 
-				vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part));
+				vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(r_cut));
 
 				// Initialize a dist vector
 				vd_initialize<dim>(vd, v_cl, k_int);
@@ -926,27 +706,33 @@ template<unsigned int dim> void vd_celllist_random_benchmark(size_t cl_k_start,
 				//Get a cell list
 
 				auto NN = vd.getCellList(r_cut);
-				double sum_cl = 0;
+				double sum_cl_mean = 0;
+				double sum_cl_dev = 0;
 
-				for (size_t n = 0 ; n < 3; n++)
-					sum_cl += benchmark_get_celllist(NN,vd,r_cut);
-				sum_cl /= N_VERLET_TEST;
+				openfpm::vector<double> measures;
+				for (size_t n = 0 ; n < N_STAT_TEST; n++)
+					measures.add(benchmark_get_celllist(NN,vd,r_cut));
+				standard_deviation(measures,sum_cl_mean,sum_cl_dev);
+				//Average total time
 
-				//Calculate forces
+				cl_time_create_rand_mean.get(r).add(sum_cl_mean);
+				cl_time_create_rand_dev.get(r).add(sum_cl_dev);
 
-				double sum_forces = 0;
+				//Calculate forces
 
-				for (size_t l = 0 ; l < 3; l++)
-					sum_forces += benchmark_calc_forces<dim>(NN,vd,r_cut);
-				sum_forces /= N_VERLET_TEST;
+				double sum_fr_mean = 0;
+				double sum_fr_dev = 0;
 
-				cl_time_rand.get(r).add(sum_forces);
+				measures.clear();
+				for (size_t l = 0 ; l < N_STAT_TEST; l++)
+					measures.add(benchmark_calc_forces<dim>(NN,vd,r_cut));
+				standard_deviation(measures,sum_fr_mean,sum_fr_dev);
 
-				//Average total time
-				cl_time_total_rand.get(r).add(sum_forces + sum_cl);
+				cl_time_rand_mean.get(r).add(sum_fr_mean);
+				cl_time_rand_dev.get(r).add(sum_fr_dev);
 
 				if (v_cl.getProcessUnitID() == 0)
-					std::cout << "Cut-off = " << r_cut << ", Particles = " << k_int << ". Time to create a cell-list: " << sum_cl << " time to calculate forces: " << sum_forces << std::endl;
+					std::cout << "Cut-off = " << r_cut << ", Particles = " << k_int << ". Time to create a cell-list: " << sum_cl_mean << " dev: " << sum_cl_dev << "    time to calculate forces: " << sum_fr_mean << " dev: " << sum_fr_dev << std::endl;
 			}
 		}
 	}
@@ -955,12 +741,21 @@ template<unsigned int dim> void vd_celllist_random_benchmark(size_t cl_k_start,
 /*! \brief Function for hilb cell list test
  *
  */
-template<unsigned int dim> void vd_celllist_hilbert_benchmark(size_t cl_k_start, size_t cl_k_min, double ghost_part, openfpm::vector<float> & cl_r_cutoff, openfpm::vector<size_t> & cl_n_particles, openfpm::vector<openfpm::vector<double>> & cl_time_hilb, openfpm::vector<openfpm::vector<double>> & cl_time_total_hilb)
+template<unsigned int dim> void cell_list_comp_reorder_hilbert_benchmark(size_t cl_k_start,
+		                                                                 size_t cl_k_min,
+																		 openfpm::vector<float> & cl_r_cutoff,
+																		 openfpm::vector<size_t> & cl_n_particles,
+																		 openfpm::vector<openfpm::vector<double>> & cl_time_force_mean,
+																		 openfpm::vector<openfpm::vector<double>> & cl_time_force_dev,
+																		 openfpm::vector<openfpm::vector<double>> & cl_time_create_mean,
+																		 openfpm::vector<openfpm::vector<double>> & cl_time_create_dev)
 {
-	cl_time_hilb.resize(cl_r_cutoff.size());
-	cl_time_total_hilb.resize(cl_r_cutoff.size());
+	cl_time_force_mean.resize(cl_r_cutoff.size());
+	cl_time_create_mean.resize(cl_r_cutoff.size());
+	cl_time_force_dev.resize(cl_r_cutoff.size());
+	cl_time_create_dev.resize(cl_r_cutoff.size());
 
-	std::string str("Testing " + std::to_string(dim) + "D vector, Hilbert reorder, cell list");
+	std::string str("Testing " + std::to_string(dim) + "D vector, Hilbert comp reorder, cell list");
 	print_test_v(str);
 
 	{
@@ -1000,7 +795,7 @@ template<unsigned int dim> void vd_celllist_hilbert_benchmark(size_t cl_k_start,
 				for (size_t i = 0; i < dim; i++)
 					bc[i] = PERIODIC;
 
-				vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(ghost_part));
+				vector_dist<dim,float, aggregate<float[dim]>, CartDecomposition<dim,float> > vd(k_int,box,bc,Ghost<dim,float>(r_cut));
 
 				// Initialize a dist vector
 				vd_initialize<dim>(vd, v_cl, k_int);
@@ -1011,25 +806,32 @@ template<unsigned int dim> void vd_celllist_hilbert_benchmark(size_t cl_k_start,
 
 				auto NN = vd.getCellList_hilb(r_cut);
 
-				double sum_cl = 0;
-				for (size_t n = 0 ; n < 3; n++)
-					sum_cl += benchmark_get_celllist_hilb(NN,vd,r_cut);
-				sum_cl /= N_VERLET_TEST;
+				openfpm::vector<double> measures;
+
+				double sum_cl_mean = 0;
+				double sum_cl_dev = 0;
+				for (size_t n = 0 ; n < N_VERLET_TEST; n++)
+					measures.add(benchmark_get_celllist_hilb(NN,vd,r_cut));
+				standard_deviation(measures,sum_cl_mean,sum_cl_dev);
+				//Average total time
+				cl_time_create_mean.get(r).add(sum_cl_mean);
+				cl_time_create_dev.get(r).add(sum_cl_dev);
 
 				//Calculate forces
 
-				double sum_forces = 0;
+				double sum_fr_mean = 0;
+				double sum_fr_dev = 0;
 
-				for (size_t l = 0 ; l < 3; l++)
-					sum_forces += benchmark_calc_forces_hilb<dim>(NN,vd,r_cut);
-				sum_forces /= N_VERLET_TEST;
-				cl_time_hilb.get(r).add(sum_forces);
+				measures.clear();
+				for (size_t l = 0 ; l < N_VERLET_TEST; l++)
+					measures.add(benchmark_calc_forces_hilb<dim>(NN,vd,r_cut));
+				standard_deviation(measures,sum_fr_mean,sum_fr_dev);
 
-				//Average total time
-				cl_time_total_hilb.get(r).add(sum_forces + sum_cl);
+				cl_time_force_mean.get(r).add(sum_fr_mean);
+				cl_time_force_dev.get(r).add(sum_fr_dev);
 
 				if (v_cl.getProcessUnitID() == 0)
-					std::cout << "Cut-off = " << r_cut << ", Particles = " << k_int << ". Time to reorder: " << sum_cl << " time to calculate forces: " << sum_forces << std::endl;
+					std::cout << "Cut-off = " << r_cut << ", Particles = " << k_int << ". Time to create: " << sum_cl_mean << " dev: " << sum_cl_dev << " time to create a Cell-list: " << sum_fr_mean << " dev: " << sum_fr_dev << std::endl;
 			}
 		}
 	}
@@ -1040,197 +842,134 @@ template<unsigned int dim> void vd_celllist_hilbert_benchmark(size_t cl_k_start,
 /*! \brief Function for verlet performance report
  *
  */
-template<unsigned int dim> void vd_verlet_performance_write_report(openfpm::vector<float> & r_cutoff,openfpm::vector<size_t> & n_particles,openfpm::vector<size_t> orders,openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_hilb,openfpm::vector<openfpm::vector<double>> time_rand,openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_total_hilb,openfpm::vector<openfpm::vector<double>> time_total_rand)
+template<unsigned int dim> void vd_verlet_performance_write_report(GoogleChart & cg,
+																   openfpm::vector<float> & r_cutoff,
+		                                                           openfpm::vector<size_t> & n_particles,
+																   openfpm::vector<openfpm::vector<double>> time_force_mean,
+																   openfpm::vector<openfpm::vector<double>> time_force_dev,
+																   openfpm::vector<openfpm::vector<double>> time_create_mean,
+																   openfpm::vector<openfpm::vector<double>> time_create_dev)
 {
+	// Get the test dir
+	std::string file_mean(test_dir);
+	std::string file_var(test_dir);
+	file_mean += std::string("/openfpm_pdata/verlet_comp_create_mean_" + std::to_string(dim) + std::string("_ref"));
+	file_var += std::string("/openfpm_pdata/verlet_comp_create_dev_" + std::to_string(dim) + std::string("_ref"));
+
+	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y_ref_create_mean;
+	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y_ref_create_dev;
+	y_ref_create_mean.load(file_mean);
+	y_ref_create_dev.load(file_var);
+
+	// Get the test dir
+	std::string file_mean2(test_dir);
+	std::string file_var2(test_dir);
+	file_mean2 += std::string("/openfpm_pdata/verlet_comp_force_mean_" + std::to_string(dim) + std::string("_ref"));
+	file_var2 += std::string("/openfpm_pdata/verlet_comp_force_dev_" + std::to_string(dim) + std::string("_ref"));
+
+	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y_ref_force_mean;
+	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y_ref_force_dev;
+	y_ref_force_mean.load(file_mean2);
+	y_ref_force_dev.load(file_var2);
+
 	// Speedup graphs data
 	openfpm::vector<size_t> x;
 	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y;
+	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y_dev;
 	openfpm::vector<std::string> yn;
 
+	yn.add("Force verlet");
 	for (size_t i = 0; i < n_particles.size() ; i++)
 		x.add(n_particles.get(i));
 
-	for (size_t i = 0; i < orders.size(); i++)
-		yn.add("Order of: " + std::to_string(orders.get(i)));
-
-	y.resize(time_hilb.size());
-	for (size_t r = 0; r < time_hilb.size(); r++)
+	y.resize(time_force_mean.size());
+	y_dev.resize(time_force_mean.size());
+	for (size_t r = 0; r < time_force_mean.size(); r++)
 	{
-		y.get(r).resize(time_hilb.get(r).size());
-		for (size_t k = 0; k < time_hilb.get(r).size(); k++)
+		y.get(r).resize(time_force_mean.get(r).size());
+		y_dev.get(r).resize(time_force_mean.get(r).size());
+		for (size_t k = 0; k < time_force_mean.get(r).size(); k++)
 		{
-			for (size_t m = 0; m < time_hilb.get(r).get(k).size(); m++)
-			{
-				// Put the speedup
-				y.get(r).get(k).add(time_rand.get(r).get(k)/time_hilb.get(r).get(k).get(m));
-			}
-		}
-	}
-
+			// Put the speedup
+			y.get(r).get(k).add(time_force_mean.get(r).get(k));
 
-	// Calculation time graphs data
-
-	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y2;
-	openfpm::vector<std::string> yn2;
-
-	yn2.add("Random");
-	for (size_t i = 0; i < orders.size(); i++)
-		yn2.add("Order of: " + std::to_string(orders.get(i)));
-
-	y2.resize(time_total_hilb.size());
-	for (size_t r = 0; r < time_total_hilb.size(); r++)
-	{
-		y2.get(r).resize(time_total_hilb.get(r).size());
-		for (size_t k = 0; k < time_total_hilb.get(r).size(); k++)
-		{
-			// Put a random case total time
-			y2.get(r).get(k).add(time_total_rand.get(r).get(k));
-			for (size_t m = 0; m < time_total_hilb.get(r).get(k).size(); m++)
-			{
-				// Put an Hilbert case total time
-				y2.get(r).get(k).add(time_total_hilb.get(r).get(k).get(m));
-			}
+			y_dev.get(r).get(k).add(time_force_dev.get(r).get(k));
 		}
 	}
 
-	// Speedup graphs report
-
-	// Google charts options
-	GCoptions options;
-
-	options.yAxis = std::string("Speedup (times)");
-	options.xAxis = std::string("Number of particles");
-	options.lineWidth = 2;
-
-	GoogleChart cg;
-
-	std::string str("<h1>Distributed " + std::to_string(dim) + "-D vector performance tests: </h1>");
-	str += "<h2> 1) Speedup between an unordered positioning and an Hilbert curve positioning of particles</h2>";
-	str += "We create a distributed vector (VD) of randomly positioned in a " + std::to_string(dim) + "D-box particles. Then we get a verlet list of VD, with a certain cut-off radius. After that we calculate the forces of each particle. Later "
-			"a VD is reordered according to an Hilbert curve, then we calculate forces again and compare the forces calculation time for an unordered and an Hilbert curve cases. The speedup is calculated and shown on graphs below, depending on different numbers of particles.";
-	cg.addHTML(str);
-
-	for (size_t i = 0; i < r_cutoff.size(); i++)
-	{
-		options.title = std::string("Distributed vector performance speedup, cut-off radius: " + std::to_string(r_cutoff.get(i)));
-		cg.AddLinesGraph(x,y.get(i),yn,options);
-	}
-
-	// Calculation time graphs report
-
-	// Google charts options
-	GCoptions options2;
-
-	options2.yAxis = std::string("Total calculation time (s)");
-	options2.xAxis = std::string("Number of particles");
-	options2.lineWidth = 2;
+	y.save("verlet_comp_force_mean_" + std::to_string(dim) + std::to_string("_ref"));
+	y_dev.save("verlet_comp_force_dev_" + std::to_string(dim) + std::to_string("_ref"));
 
-	std::string str2("<h2>2) Total calculation time</h2>");
-	str2 += "We count a total calculation time. In the case of unordered positioning it is: verlet list creation time + forces calculation time; in the case of an Hilbert curve positioning: reordering time + verlet list creation time + forces calculation time."
-			"The total calculation time is shown on graphs below, depending on different numbers of particles.";
-	cg.addHTML(str2);
-
-	for (size_t i = 0; i < r_cutoff.size(); i++)
+	if (y_ref_force_mean.size() != 0)
 	{
-		options2.title = std::string("Distributed vector performance, cut-off radius: " + std::to_string(r_cutoff.get(i)));
-		cg.AddLinesGraph(x,y2.get(i),yn2,options2);
-	}
-
-	// Get the directory of the performance test files
-	cg.write(std::string(test_dir) + "/openfpm_pdata/Vect_dist_verlet_perf_" + std::to_string(dim) + "D.html");
-}
-
-
-/*! \brief Function for cell list performance report
- *
- */
-template<unsigned int dim> void vd_cl_performance_write_report(size_t n_moving,openfpm::vector<float> & cl_r_cutoff,openfpm::vector<size_t> & cl_n_particles,openfpm::vector<size_t> cl_orders,openfpm::vector<openfpm::vector<openfpm::vector<double>>> cl_time_hilb,openfpm::vector<openfpm::vector<double>> cl_time_rand,openfpm::vector<openfpm::vector<openfpm::vector<double>>> cl_time_total_hilb,openfpm::vector<openfpm::vector<double>> cl_time_total_rand,openfpm::vector<openfpm::vector<openfpm::vector<openfpm::vector<double>>>> cl_time_hilb_moved)
-{
-	// Speedup graphs data
-
-	openfpm::vector<size_t> x;
-	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y;
-	openfpm::vector<std::string> yn;
+		yn.clear();
 
-	for (size_t i = 0; i < cl_n_particles.size() ; i++)
-		x.add(cl_n_particles.get(i));
-
-	for (size_t i = 0; i < cl_orders.size(); i++)
-		yn.add("Order of: " + std::to_string(cl_orders.get(i)));
+		yn.add("Force verlet");
+		yn.add("interval");
+		yn.add("interval");
 
-	y.resize(cl_time_hilb.size());
-	for (size_t r = 0; r < cl_time_hilb.size(); r++)
-	{
-		y.get(r).resize(cl_time_hilb.get(r).size());
-		for (size_t k = 0; k < cl_time_hilb.get(r).size(); k++)
+		y.clear();
+		y.resize(time_force_mean.size());
+		for (size_t r = 0; r < time_force_mean.size(); r++)
 		{
-			for (size_t m = 0; m < cl_time_hilb.get(r).get(k).size(); m++)
+			y.get(r).resize(time_force_mean.get(r).size());
+			y_dev.get(r).resize(time_force_mean.get(r).size());
+			for (size_t k = 0; k < time_force_mean.get(r).size(); k++)
 			{
-				// Put a speedup
-				y.get(r).get(k).add(cl_time_rand.get(r).get(k) / cl_time_hilb.get(r).get(k).get(m));
+				// Put the speedup
+				y.get(r).get(k).add(time_force_mean.get(r).get(k));
+				y.get(r).get(k).add(y_ref_force_mean.get(r).get(k).get(0) - 3.0*y_ref_force_dev.get(r).get(k).get(0) );
+				y.get(r).get(k).add(y_ref_force_mean.get(r).get(k).get(0) + 3.0*y_ref_force_dev.get(r).get(k).get(0) );
 			}
 		}
 	}
 
-
 	// Calculation time graphs data
 
 	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y2;
+	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y2_dev;
 	openfpm::vector<std::string> yn2;
 
-	yn2.add("Random");
-	for (size_t i = 0; i < cl_orders.size(); i++)
-		yn2.add("Order of: " + std::to_string(cl_orders.get(i)));
+	yn2.add("Create a Verlet");
 
-	y2.resize(cl_time_total_hilb.size());
-	for (size_t r = 0; r < cl_time_total_hilb.size(); r++)
+	y2.resize(time_create_mean.size());
+	y2_dev.resize(time_create_mean.size());
+	for (size_t r = 0; r < time_create_mean.size(); r++)
 	{
-		y2.get(r).resize(cl_time_total_hilb.get(r).size());
-		for (size_t k = 0; k < cl_time_total_hilb.get(r).size(); k++)
+		y2.get(r).resize(time_create_mean.get(r).size());
+		y2_dev.get(r).resize(time_create_mean.get(r).size());
+		for (size_t k = 0; k < time_create_mean.get(r).size(); k++)
 		{
 			// Put a random case total time
-			y2.get(r).get(k).add(cl_time_total_rand.get(r).get(k));
-			for (size_t m = 0; m < cl_time_total_hilb.get(r).get(k).size(); m++)
-			{
-				// Put an Hilbert case total time
-				y2.get(r).get(k).add(cl_time_total_hilb.get(r).get(k).get(m));
-			}
+			y2.get(r).get(k).add(time_create_mean.get(r).get(k));
+
+			y2_dev.get(r).get(k).add(time_create_dev.get(r).get(k));
 		}
 	}
 
-	// Speedup difference graphs data
-
-	openfpm::vector<size_t> x3;
-	openfpm::vector<openfpm::vector<openfpm::vector<openfpm::vector<double>>>> y3;
-	openfpm::vector<std::string> yn3;
+	y2.save("verlet_comp_create_mean_" + std::to_string(dim) + std::to_string("_ref"));
+	y2_dev.save("verlet_comp_create_dev_" + std::to_string(dim) + std::to_string("_ref"));
 
-	//'+1' for '0 moving step' case (before moving)
-	for (size_t i = 0; i < n_moving + 1 ; i++)
-		x3.add(i);
+	if (y_ref_create_mean.size() != 0)
+	{
+		yn2.clear();
+		yn2.add("Create a Verlet");
+		yn2.add("interval");
+		yn2.add("interval");
 
-	yn3.add("No speedup line");
-	for (size_t i = 0; i < cl_orders.size(); i++)
-		yn3.add("Order of: " + std::to_string(cl_orders.get(i)));
+		y2.clear();
 
-	y3.resize(cl_n_particles.size());
-	for (size_t k = 0; k < cl_n_particles.size(); k++)
-	{
-		y3.get(k).resize(cl_r_cutoff.size());
-		for (size_t r = 0; r < cl_r_cutoff.size(); r++)
+		y2.resize(time_create_mean.size());
+		for (size_t r = 0; r < time_create_mean.size(); r++)
 		{
-			y3.get(k).get(r).resize(n_moving+1);
-			for (size_t d = 0; d < n_moving+1; d++)
+			y2.get(r).resize(time_create_mean.get(r).size());
+			for (size_t k = 0; k < time_create_mean.get(r).size(); k++)
 			{
-				y3.get(k).get(r).get(d).add(1.0);
-				for (size_t m = 0; m < cl_orders.size(); m++)
-				{
-					if (d == 0)
-						// Put a "speedup before moving the particles"
-						y3.get(k).get(r).get(0).add(cl_time_rand.get(r).get(k) / cl_time_hilb.get(r).get(k).get(m));
-					else
-						// Put a "speedup for each moving step"
-						y3.get(k).get(r).get(d).add(cl_time_rand.get(r).get(k) / cl_time_hilb_moved.get(d-1).get(r).get(k).get(m));
-				}
+				// Put a random case total time
+				y2.get(r).get(k).add(time_create_mean.get(r).get(k));
+
+				y2.get(r).get(k).add(y_ref_create_mean.get(r).get(k).get(0) - 3.0*y_ref_create_dev.get(r).get(k).get(0) );
+				y2.get(r).get(k).add(y_ref_create_mean.get(r).get(k).get(0) + 3.0*y_ref_create_dev.get(r).get(k).get(0) );
 			}
 		}
 	}
@@ -1240,22 +979,17 @@ template<unsigned int dim> void vd_cl_performance_write_report(size_t n_moving,o
 	// Google charts options
 	GCoptions options;
 
-	options.yAxis = std::string("Speedup (times)");
+	options.yAxis = std::string("Time (s)");
 	options.xAxis = std::string("Number of particles");
 	options.lineWidth = 2;
+	options.more = GC_Y_LOG + "," + GC_ZOOM;
 
-	//options.more = "hAxis: {logScale: true}";
-
-	GoogleChart cg;
-
-	std::string str("<h1>Distributed " + std::to_string(dim) + "-D vector performance tests: </h1>");
-	str += "<h2> 1) Speedup in force calculation between an unordered vector and a vector with particles ordered along an Hilbert curve </h2>";
-
+	std::string str("<h1>Verlet-list " + std::to_string(dim) + "-D performance test force calculation: </h1>");
 	cg.addHTML(str);
 
-	for (size_t i = 0; i < cl_r_cutoff.size(); i++)
+	for (size_t i = 0; i < r_cutoff.size(); i++)
 	{
-		options.title = std::string("Distributed vector performance speedup, cut-off radius: " + std::to_string(cl_r_cutoff.get(i)));
+		options.title = std::string("Verlet-list cut-off radius: " + std::to_string(r_cutoff.get(i)));
 		cg.AddLinesGraph(x,y.get(i),yn,options);
 	}
 
@@ -1264,137 +998,65 @@ template<unsigned int dim> void vd_cl_performance_write_report(size_t n_moving,o
 	// Google charts options
 	GCoptions options2;
 
-	options2.yAxis = std::string("Total calculation time (s)");
+	options2.yAxis = std::string("Time to construct a verlet-list (s)");
 	options2.xAxis = std::string("Number of particles");
 	options2.lineWidth = 2;
-
-	//options2.more = "hAxis: {logScale: true}";
+	options2.more = GC_ZOOM;
 
 	std::string str2("<h2>2) Total calculation time</h2>");
-	str2 += "The sum in time of force calculation and reordering";
 	cg.addHTML(str2);
 
-	for (size_t i = 0; i < cl_r_cutoff.size(); i++)
+	for (size_t i = 0; i < r_cutoff.size(); i++)
 	{
-		options2.title = std::string("Distributed vector performance, cut-off radius: " + std::to_string(cl_r_cutoff.get(i)));
+		options2.title = std::string("Cell-list performance, cut-off radius: " + std::to_string(r_cutoff.get(i)));
 		cg.AddLinesGraph(x,y2.get(i),yn2,options2);
 	}
-
-	// Speedup difference graphs report
-
-	// Google charts options
-	GCoptions options3;
-
-	options3.yAxis = std::string("Speedup (times)");
-	options3.xAxis = std::string("A moving step number");
-	options3.lineWidth = 2;
-
-	std::string str3("<h2>3) Speedup degradation after moving the particles</h2>");
-	str3 += "Now we move all of the particles several times in a random direction but the same distance. Then we compare the speedup we were gaining before moving (step 0) and after moving the particles. The speedup degradation is shown below as a function of a moving step number.";
-
-	cg.addHTML(str3);
-
-	for (size_t k = 0; k < cl_n_particles.size(); k++)
-	{
-		options3.title = std::string("Distributed vector performance speedup degradation, cut-off radius: " + std::to_string(cl_r_cutoff.get(cl_r_cutoff.size()-1)) + ", number of particles: " + std::to_string(cl_n_particles.get(k)));
-		cg.AddLinesGraph(x3,y3.get(k).get(cl_r_cutoff.size()-1),yn3,options3);
-	}
-
-	cg.write(std::string(test_dir) + "/openfpm_pdata/Vect_dist_cl_perf_" + std::to_string(dim) + "D.html");
 }
 
-/*! \brief Function for cell list hilb performance report
+
+/*! \brief Function for cell list performance report
  *
  */
-template<unsigned int dim> void vd_celllist_performance_write_report(openfpm::vector<float> & cl_r_cutoff,openfpm::vector<size_t> & cl_n_particles,openfpm::vector<openfpm::vector<double>> & cl_time_hilb,openfpm::vector<openfpm::vector<double>> & cl_time_rand,openfpm::vector<openfpm::vector<double>> & cl_time_total_hilb,openfpm::vector<openfpm::vector<double>> & cl_time_total_rand)
+template<unsigned int dim> void cell_list_part_reorder_report(GoogleChart & cg,
+		                                                      size_t n_moving,
+		                                                      openfpm::vector<float> & cl_r_cutoff,
+															  openfpm::vector<size_t> & cl_n_particles,
+															  openfpm::vector<size_t> cl_orders,
+															  openfpm::vector<openfpm::vector<openfpm::vector<double>>> cl_time_hilb_mean,
+															  openfpm::vector<openfpm::vector<double>> cl_time_rand_mean,
+															  openfpm::vector<openfpm::vector<openfpm::vector<double>>> cl_time_reorder_mean,
+															  openfpm::vector<openfpm::vector<openfpm::vector<double>>> cl_time_hilb_dev,
+															  openfpm::vector<openfpm::vector<double>> cl_time_rand_dev,
+															  openfpm::vector<openfpm::vector<openfpm::vector<double>>> cl_time_reorder_dev)
 {
-	// Speedup graphs data
-
 	openfpm::vector<size_t> x;
-	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y;
-	openfpm::vector<std::string> yn;
 
 	for (size_t i = 0; i < cl_n_particles.size() ; i++)
 		x.add(cl_n_particles.get(i));
 
-	yn.add("Random cell list vs Hilbert cell list");
-
-	y.resize(cl_time_hilb.size());
-	for (size_t r = 0; r < cl_time_hilb.size(); r++)
-	{
-		y.get(r).resize(cl_time_hilb.get(r).size());
-		for (size_t k = 0; k < cl_time_hilb.get(r).size(); k++)
-		{
-			// Put a speedup
-			y.get(r).get(k).add(cl_time_rand.get(r).get(k) / cl_time_hilb.get(r).get(k));
-		}
-	}
-
-	// Calculation time graphs data
-
-	openfpm::vector<openfpm::vector<openfpm::vector<double>>> y2;
-	openfpm::vector<std::string> yn2;
-
-	yn2.add("Random cell list");
-	yn2.add("Hilbert cell list");
-
-	y2.resize(cl_time_total_hilb.size());
-	for (size_t r = 0; r < cl_time_total_hilb.size(); r++)
-	{
-		y2.get(r).resize(cl_time_total_hilb.get(r).size());
-		for (size_t k = 0; k < cl_time_total_hilb.get(r).size(); k++)
-		{
-			// Put a total time
-			y2.get(r).get(k).add(cl_time_total_rand.get(r).get(k));
-			y2.get(r).get(k).add(cl_time_total_hilb.get(r).get(k));
-		}
-	}
-
-	// Speedup graphs report
-
-	// Google charts options
-	GCoptions options;
-
-	options.yAxis = std::string("Speedup (times)");
-	options.xAxis = std::string("Number of particles");
-	options.lineWidth = 2;
-
-	GoogleChart cg;
-
-	std::string str("<h1>Distributed " + std::to_string(dim) + "-D vector performance tests: </h1>");
-	str += "<h2> 1) Speedup between a random and an hilberts curve walking</h2>";
-	str += "We create a distributed vector (VD) of randomly positioned in a " + std::to_string(dim) + "D-box particles. Then we get a cell list of VD, with a certain cut-off radius. After that we calculate the forces of each particle. Later "
-			"we do the same for another VD, but getting an 'hilbert cell list'. Then we compare the forces calculation time for both cases. The speedup is calculated and shown on graphs below, depending on different numbers of particles.";
-	cg.addHTML(str);
-
-	for (size_t i = 0; i < cl_r_cutoff.size(); i++)
-	{
-		options.title = std::string("Distributed vector performance, cut-off radius: " + std::to_string(cl_r_cutoff.get(i)));
-		cg.AddLinesGraph(x,y.get(i),yn,options);
-	}
-
-
-	// Calculation time graphs report
-
-	// Google charts options
-	GCoptions options2;
-
-	options2.yAxis = std::string("Total calculation time (s)");
-	options2.xAxis = std::string("Number of particles");
-	options2.lineWidth = 2;
-
-	std::string str2("<h2>2) Total calculation time</h2>");
-	str2 += "We count a total calculation time. In both cases - for a 'random' and 'hilbert' cell lists - it consists of: cell list creation time + forces calculation time. "
-			"The total calculation time is shown on graphs below, depending on different numbers of particles.";
-	cg.addHTML(str2);
+	// Speedup graphs data
 
-	for (size_t i = 0; i < cl_r_cutoff.size(); i++)
-	{
-		options2.title = std::string("Distributed vector performance, cut-off radius: " + std::to_string(cl_r_cutoff.get(i)));
-		cg.AddLinesGraph(x,y2.get(i),yn2,options2);
-	}
+	cl_part_time<dim>(cg,cl_n_particles,cl_r_cutoff,cl_orders,cl_time_hilb_mean,cl_time_rand_mean,cl_time_hilb_dev,cl_time_rand_dev);
+	cl_part_reorder_time<dim>(cg,cl_n_particles,cl_r_cutoff,cl_orders,cl_time_reorder_mean,cl_time_reorder_dev);
+}
 
-	cg.write(std::string(test_dir) + "/openfpm_pdata/Vect_dist_cl_hilb_perf_" + std::to_string(dim) + "D.html");
+/*! \brief Function for cell list hilb performance report
+ *
+ */
+template<unsigned int dim> void cell_list_comp_reorder_report(GoogleChart & cg,
+		                                                      openfpm::vector<float> & cl_r_cutoff,
+															  openfpm::vector<size_t> & cl_n_particles,
+															  openfpm::vector<openfpm::vector<double>> & cl_time_hilb_mean,
+															  openfpm::vector<openfpm::vector<double>> & cl_time_rand_mean,
+															  openfpm::vector<openfpm::vector<double>> & cl_time_hilb_dev,
+															  openfpm::vector<openfpm::vector<double>> & cl_time_rand_dev,
+															  openfpm::vector<openfpm::vector<double>> & cl_time_create_hilb_mean,
+															  openfpm::vector<openfpm::vector<double>> & cl_time_create_rand_mean,
+															  openfpm::vector<openfpm::vector<double>> & cl_time_create_hilb_dev,
+															  openfpm::vector<openfpm::vector<double>> & cl_time_create_rand_dev)
+{
+	cl_comp_normal_vs_hilbert_force_time<dim>(cg,cl_n_particles,cl_r_cutoff,cl_time_hilb_mean,cl_time_rand_mean,cl_time_hilb_dev,cl_time_rand_dev);
+	cl_comp_normal_vs_hilbert_create_time<dim>(cg,cl_n_particles,cl_r_cutoff,cl_time_create_hilb_mean,cl_time_create_rand_mean,cl_time_create_hilb_dev,cl_time_create_rand_dev);
 }
 
 #endif /* SRC_VECTOR_VECTOR_DIST_PERFORMANCE_UTIL_HPP_ */
diff --git a/src/Vector/performance/vector_dist_verlet_performance_tests.hpp b/src/Vector/performance/vector_dist_verlet_performance_tests.hpp
deleted file mode 100644
index ab23cf9b827c6171e0d4afd51eae9749f03496b5..0000000000000000000000000000000000000000
--- a/src/Vector/performance/vector_dist_verlet_performance_tests.hpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * vector_dist_verlet_performance_tests.hpp
- *
- *  Created on: Mar 9, 2016
- *      Author: Yaroslav Zaluzhnyi
- */
-
-#ifndef SRC_VECTOR_VECTOR_DIST_VERLET_PERFORMANCE_TESTS_HPP_
-#define SRC_VECTOR_VECTOR_DIST_VERLET_PERFORMANCE_TESTS_HPP_
-
-#include "Vector/vector_dist.hpp"
-#include "data_type/aggregate.hpp"
-#include "Plot/GoogleChart.hpp"
-#include "vector_dist_performance_util.hpp"
-
-
-BOOST_AUTO_TEST_SUITE( vector_dist_verlet_perf_test )
-
-///////////////////// INPUT DATA //////////////////////
-
-// Cut-off radiuses. Can be put different number of values
-openfpm::vector<float> r_cutoff {0.007,0.02};
-// Orders of a curve. Can be put different number of values
-openfpm::vector<size_t> orders = {1,2,3,5,7};
-// The starting amount of particles (remember that this number is multiplied by number of processors you use for testing)
-size_t k_start = 100000;
-// The minimal amount of particles
-size_t k_min = 15000;
-// Ghost part of distributed vector
-double ghost_part = 0.01;
-
-///////////////////////////////////////////////////////
-
-// Numbers of particles vector
-openfpm::vector<size_t> n_particles;
-// Vectors to store the data for 2D
-openfpm::vector<openfpm::vector<double>> time_rand;
-openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_hilb;
-openfpm::vector<openfpm::vector<double>> time_total_rand;
-openfpm::vector<openfpm::vector<openfpm::vector<double>>>  time_total_hilb;
-// Vectors to store the data for 3D
-openfpm::vector<openfpm::vector<double>> time_rand_2;
-openfpm::vector<openfpm::vector<openfpm::vector<double>>> time_hilb_2;
-openfpm::vector<openfpm::vector<double>> time_total_rand_2;
-openfpm::vector<openfpm::vector<openfpm::vector<double>>>  time_total_hilb_2;
-
-
-BOOST_AUTO_TEST_CASE( vector_dist_verlet_random_test )
-{
-	//Benchmark test for 2D and 3D
-	vd_verlet_random_benchmark<2>(k_start,k_min,ghost_part,r_cutoff,n_particles,time_rand,time_total_rand);
-	vd_verlet_random_benchmark<3>(k_start,k_min,ghost_part,r_cutoff,n_particles,time_rand_2,time_total_rand_2);
-}
-
-BOOST_AUTO_TEST_CASE( vector_dist_verlet_hilbert_test )
-{
-	//Benchmark test for 2D and 3D
-	vd_verlet_hilbert_benchmark<2>(k_start,k_min,ghost_part,r_cutoff,n_particles,orders,time_hilb,time_total_hilb);
-	vd_verlet_hilbert_benchmark<3>(k_start,k_min,ghost_part,r_cutoff,n_particles,orders,time_hilb_2,time_total_hilb_2);
-}
-
-BOOST_AUTO_TEST_CASE(vector_dist_verlet_performance_write_report)
-{
-	//Write report for 2D and 3D
-	vd_verlet_performance_write_report<2>(r_cutoff,n_particles,orders,time_hilb,time_rand,time_total_hilb,time_total_rand);
-	vd_verlet_performance_write_report<3>(r_cutoff,n_particles,orders,time_hilb_2,time_rand_2,time_total_hilb_2,time_total_rand_2);
-}
-
-BOOST_AUTO_TEST_SUITE_END()
-
-#endif /* SRC_VECTOR_VECTOR_DIST_VERLET_PERFORMANCE_TESTS_HPP_ */
diff --git a/src/Vector/performance/verlet_performance_tests.hpp b/src/Vector/performance/verlet_performance_tests.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b963f3ea69e1f0035396dd52102b1a0a8dcbcfca
--- /dev/null
+++ b/src/Vector/performance/verlet_performance_tests.hpp
@@ -0,0 +1,62 @@
+/*
+ * vector_dist_verlet_performance_tests.hpp
+ *
+ *  Created on: Mar 9, 2016
+ *      Author: Yaroslav Zaluzhnyi
+ */
+
+#ifndef SRC_VECTOR_VECTOR_DIST_VERLET_PERFORMANCE_TESTS_HPP_
+#define SRC_VECTOR_VECTOR_DIST_VERLET_PERFORMANCE_TESTS_HPP_
+
+BOOST_AUTO_TEST_SUITE( verletlist_part_reorder_performance_test )
+
+///////////////////// INPUT DATA //////////////////////
+
+// Cut-off radiuses. Can be put different number of values
+openfpm::vector<float> r_cutoff {0.004, 0.007, 0.01};
+// Orders of a curve. Can be put different number of values
+// The starting amount of particles (remember that this number is multiplied by number of processors you use for testing)
+size_t k_start = 100000;
+// The minimal amount of particles
+size_t k_min = 15000;
+// Ghost part of distributed vector
+
+///////////////////////////////////////////////////////
+
+// Numbers of particles vector
+openfpm::vector<size_t> n_particles;
+// Vectors to store the data for 2D
+openfpm::vector<openfpm::vector<double>> time_force_mean;
+openfpm::vector<openfpm::vector<double>> time_force_dev;
+openfpm::vector<openfpm::vector<double>> time_create_mean;
+openfpm::vector<openfpm::vector<double>> time_create_dev;
+
+// Vectors to store the data for 3D
+openfpm::vector<openfpm::vector<double>> time_force_mean_2;
+openfpm::vector<openfpm::vector<double>> time_force_dev_2;
+openfpm::vector<openfpm::vector<double>> time_create_mean_2;
+openfpm::vector<openfpm::vector<double>> time_create_dev_2;
+
+
+BOOST_AUTO_TEST_CASE( vector_dist_verlet_test )
+{
+	//Benchmark test for 2D and 3D
+	vd_verlet_random_benchmark<3>(k_start,k_min,r_cutoff,n_particles,time_force_mean,time_create_mean,time_force_dev,time_create_dev);
+	vd_verlet_random_benchmark<2>(k_start,k_min,r_cutoff,n_particles,time_force_mean_2,time_create_mean_2,time_force_dev_2,time_create_dev_2);
+}
+
+BOOST_AUTO_TEST_CASE(vector_dist_verlet_performance_write_report)
+{
+	GoogleChart cg;
+
+	//Write report for 2D and 3D
+	vd_verlet_performance_write_report<3>(cg,r_cutoff,n_particles,time_force_mean,time_force_dev,time_create_mean,time_create_dev);
+	vd_verlet_performance_write_report<2>(cg,r_cutoff,n_particles,time_force_mean_2,time_force_dev_2,time_create_mean_2,time_create_dev_2);
+
+	if (create_vcluster().getProcessUnitID() == 0)
+		cg.write(std::string(test_dir) + "/openfpm_pdata/Verletlist_comp.html");
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+#endif /* SRC_VECTOR_VECTOR_DIST_VERLET_PERFORMANCE_TESTS_HPP_ */
diff --git a/src/Vector/vector_dist_unit_test.hpp b/src/Vector/vector_dist_unit_test.hpp
index 95c9e7425d09a5066f89f0526257dc70147ecda3..aefd55abab1cc04b7f070ead0800aa2563d7c5ac 100644
--- a/src/Vector/vector_dist_unit_test.hpp
+++ b/src/Vector/vector_dist_unit_test.hpp
@@ -11,7 +11,7 @@
 #include <random>
 #include "Vector/vector_dist.hpp"
 #include "data_type/aggregate.hpp"
-#include "Vector/performance/vector_dist_performance_util.hpp"
+#include "Vector/performance/vector_dist_performance_common.hpp"
 
 /*! \brief Count the total number of particles
  *
diff --git a/src/main.cpp b/src/main.cpp
index c60b455f760aa5c64e8c240c5e273acc34046d6e..c8c4a38803231bcdef10ef08b01456f5e5c9cf52 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -40,9 +40,6 @@ int main(int argc, char* argv[])
 #include "Decomposition/Distribution/metis_util_unit_test.hpp"
 #include "dec_optimizer_unit_test.hpp"
 #include "Vector/vector_dist_unit_test.hpp"
-#ifdef PERFORMANCE_TEST
-#include "pdata_performance.hpp"
-#endif
 #include "Decomposition/Distribution/Distribution_unit_tests.hpp"
 //#include "DLB/DLB_unit_test.hpp"
 #include "Graph/dist_map_graph_unit_test.hpp"
diff --git a/src/pdata_performance.cpp b/src/pdata_performance.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2d9c401e8e11367608daa04ac551ce8aa4cb77a3
--- /dev/null
+++ b/src/pdata_performance.cpp
@@ -0,0 +1,39 @@
+/*
+ * performance.hpp
+ *
+ *  Created on: Mar 9, 2016
+ *      Author: yaroslav
+ */
+
+#ifndef SRC_PDATA_PERFORMANCE_CPP_
+#define SRC_PDATA_PERFORMANCE_CPP_
+
+#include <iostream>
+#include <mpi.h>
+#include "config.h"
+
+#include "Vector/vector_dist.hpp"
+#include "data_type/aggregate.hpp"
+#include "Plot/GoogleChart.hpp"
+#include "Point_test.hpp"
+
+#define BOOST_TEST_DYN_LINK
+#include <boost/test/unit_test.hpp>
+
+extern const char * test_dir;
+
+#ifdef PERFORMANCE_TEST
+
+BOOST_AUTO_TEST_SUITE( performance )
+
+#include "Vector/performance/vector_dist_performance_util.hpp"
+#include "Vector/performance/verlet_performance_tests.hpp"
+#include "Vector/performance/cell_list_part_reorder.hpp"
+#include "Vector/performance/cell_list_comp_reorder.hpp"
+
+
+BOOST_AUTO_TEST_SUITE_END()
+
+#endif
+
+#endif /* SRC_PDATA_PERFORMANCE_CPP_ */
diff --git a/src/pdata_performance.hpp b/src/pdata_performance.hpp
deleted file mode 100644
index 28f8be3ece84d1c974238e83a08af4ceebba4e34..0000000000000000000000000000000000000000
--- a/src/pdata_performance.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * performance.hpp
- *
- *  Created on: Mar 9, 2016
- *      Author: yaroslav
- */
-
-#ifndef SRC_PDATA_PERFORMANCE_HPP_
-#define SRC_PDATA_PERFORMANCE_HPP_
-
-BOOST_AUTO_TEST_SUITE( performance )
-
-#include "Vector/performance/vector_dist_verlet_performance_tests.hpp"
-#include "Vector/performance/vector_dist_cl_performance_tests.hpp"
-#include "Vector/performance/vector_dist_cl_hilb_performance_tests.hpp"
-
-
-BOOST_AUTO_TEST_SUITE_END()
-
-#endif /* SRC_PDATA_PERFORMANCE_HPP_ */