main.cpp 12.4 KB
Newer Older
incardon's avatar
incardon committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include "Grid/grid_dist_id.hpp"
#include "data_type/aggregate.hpp"
#include "timer.hpp"
#include "Vc/Vc"

/*!
 *
 * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization
 *
 * # Solving a gray scott-system in 3D # {#e3_gs_gray_scott_vector}
 *
 * This example is just an improved version of the previous 3D Gray scott example.
 * In particular we do the following improvements we separate U and V in two grids
 * in order to vectorize. Every loop now handle 4 double in case of AVX-256 and 2 double
 * in case of SSE. We also avoid to use the function copy and we alternate the use of the
 * fields New and Old. If at the first iteration we read from Old and we write on New in
 * the second iteration we read from New and we write on Old. The last improvement is write
 * on hdf5 rather that VTK. VTK writers are convenient but are slow for performances. HDF5
 * files can be saved with **save()** reload with **load()** and after loading can be written
 * on VTK with **write** this mean that HDF5 files can be easily converted into VTK in a second moment.
 * Not only but because HDF5 files can be saved on multiple processors and reloaded on a different
 * number of processors, you can use this method to stitch VTK files together.
 *
 *
 * In figure is the final solution of the problem
 *
 * \htmlonly
 * <img src="http://ppmcore.mpi-cbg.de/web/images/examples/gray_scott_3d/gs_alpha.png"/>
 * \endhtmlonly
 *
 * \see \ref Grid_2_solve_eq
 *
 * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp constants
 * 
 */

//! \cond [constants] \endcond

39
//#define FORTRAN_UPDATE
incardon's avatar
incardon committed
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114

constexpr int x = 0;
constexpr int y = 1;
constexpr int z = 2;

extern "C" void update_new(const int* lo, const int* hi,
                  double* u, const int* ulo, const int* uhi,
                  double* v, const int* vlo, const int* vhi,
                  double* flu, const int* fulo, const int* fuhi,
                  double* flv, const int* fvlo, const int* fvhi,
                  const double * dt, const double * uFactor, const double * vFactor, const double * F,
                  const double * K);


//! \cond [constants] \endcond

void init(grid_dist_id<3,double,aggregate<double> > & OldU,
		  grid_dist_id<3,double,aggregate<double> > & OldV,
		  grid_dist_id<3,double,aggregate<double> > & NewU,
		  grid_dist_id<3,double,aggregate<double> > & NewV,
		  Box<3,double> & domain)
{
	auto it = OldU.getDomainIterator();

	while (it.isNext())
	{
		// Get the local grid key
		auto key = it.get();

		// Old values U and V
		OldU.get(key) = 1.0;
		OldV.get(key) = 0.0;

		// Old values U and V
		NewU.get(key) = 0.0;
		NewV.get(key) = 0.0;

		++it;
	}

	long int x_start = OldU.size(0)*1.55f/domain.getHigh(0);
	long int y_start = OldU.size(1)*1.55f/domain.getHigh(1);
	long int z_start = OldU.size(1)*1.55f/domain.getHigh(2);

	long int x_stop = OldU.size(0)*1.85f/domain.getHigh(0);
	long int y_stop = OldU.size(1)*1.85f/domain.getHigh(1);
	long int z_stop = OldU.size(1)*1.85f/domain.getHigh(2);

	grid_key_dx<3> start({x_start,y_start,z_start});
	grid_key_dx<3> stop ({x_stop,y_stop,z_stop});
	auto it_init = OldU.getSubDomainIterator(start,stop);

	while (it_init.isNext())
	{
		auto key = it_init.get();

        OldU.get(key) = 0.5 + (((double)std::rand())/RAND_MAX -0.5)/10.0;
        OldV.get(key) = 0.25 + (((double)std::rand())/RAND_MAX -0.5)/20.0;

		++it_init;
	}
}


//! \cond [vectorization] \endcond

void step(grid_dist_id<3, double, aggregate<double>> & OldU,
		  grid_dist_id<3, double, aggregate<double>> & OldV,
		  grid_dist_id<3, double, aggregate<double>> & NewU,
		  grid_dist_id<3, double, aggregate<double>> & NewV,
		  grid_key_dx<3> (& star_stencil_3D)[7],
		  Vc::double_v uFactor, Vc::double_v vFactor, double deltaT, double F, double K)
{
#ifndef FORTRAN_UPDATE

115
	//! \cond [cpp_update] \endcond
incardon's avatar
incardon committed
116

117
118
119
120
121
122
	WHILE_M(OldU,star_stencil_3D)
			auto & U_old = GET_GRID_M(OldU);
			auto & V_old = GET_GRID_M(OldV);

			auto & U_new = GET_GRID_M(NewU);
			auto & V_new = GET_GRID_M(NewV);
123
	ITERATE_3D_M(Vc::double_v::Size)
incardon's avatar
incardon committed
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169

			// center point
			auto Cp = it.getStencil<0>();

			// plus,minus X,Y,Z
			auto mx = it.getStencil<1>();
			auto px = it.getStencil<2>();
			auto my = it.getStencil<3>();
			auto py = it.getStencil<4>();
			auto mz = it.getStencil<5>();
			auto pz = it.getStencil<6>();

			//
			Vc::double_v u_c(&U_old.get<0>(Cp),Vc::Unaligned);
			Vc::double_v u_mz(&U_old.get<0>(mz),Vc::Unaligned);
			Vc::double_v u_pz(&U_old.get<0>(pz),Vc::Unaligned);
			Vc::double_v u_my(&U_old.get<0>(my),Vc::Unaligned);
			Vc::double_v u_py(&U_old.get<0>(py),Vc::Unaligned);
			Vc::double_v u_mx(&U_old.get<0>(mx),Vc::Unaligned);
			Vc::double_v u_px(&U_old.get<0>(px),Vc::Unaligned);


			Vc::double_v v_c(&V_old.get<0>(Cp),Vc::Unaligned);
			Vc::double_v v_mz(&V_old.get<0>(mz),Vc::Unaligned);
			Vc::double_v v_pz(&V_old.get<0>(pz),Vc::Unaligned);
			Vc::double_v v_my(&V_old.get<0>(my),Vc::Unaligned);
			Vc::double_v v_py(&V_old.get<0>(py),Vc::Unaligned);
			Vc::double_v v_mx(&V_old.get<0>(mx),Vc::Unaligned);
			Vc::double_v v_px(&V_old.get<0>(px),Vc::Unaligned);

			Vc::double_v out1 = u_c + uFactor * (u_mz + u_pz +
					                            u_my + u_py +
												u_mx + u_px +
											    - 6.0 * u_c) +
												- deltaT * u_c * v_c * v_c
												- deltaT * F * (u_c - 1.0);

			Vc::double_v out2 = v_c + vFactor * (v_mz + v_pz +
					               v_my + v_py +
								   v_mx + v_px +
								   - 6.0 * v_c ) +
								   deltaT * u_c * v_c * v_c +
								   - deltaT * (F+K) * v_c;

			out1.store(&U_new.get<0>(Cp),Vc::Unaligned);
			out2.store(&V_new.get<0>(Cp),Vc::Unaligned);
170
171
172
	END_LOOP_M

	//! \cond [cpp_update] \endcond
incardon's avatar
incardon committed
173
174
175

#else

176
177
	//! \cond [fort_update] \endcond

incardon's avatar
incardon committed
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
	double uFactor_s = uFactor[0];
	double vFactor_s = vFactor[0];

	auto & ginfo = OldU.getLocalGridsInfo();

	for (size_t i = 0 ; i < OldU.getN_loc_grid() ; i++)
	{
		auto & U_old = OldU.get_loc_grid(i);
		auto & V_old = OldV.get_loc_grid(i);

		auto & U_new = NewU.get_loc_grid(i);
		auto & V_new = NewV.get_loc_grid(i);

		int lo[3] = {(int)ginfo.get(i).Dbox.getLow(0),(int)ginfo.get(i).Dbox.getLow(1),(int)ginfo.get(i).Dbox.getLow(2)};
		int hi[3] = {(int)ginfo.get(i).Dbox.getHigh(0),(int)ginfo.get(i).Dbox.getHigh(1),(int)ginfo.get(i).Dbox.getHigh(2)};

		int ulo[3] = {0,0,0};
		int uhi[3] = {(int)ginfo.get(i).GDbox.getHigh(0),(int)ginfo.get(i).GDbox.getHigh(1),(int)ginfo.get(i).GDbox.getHigh(2)};
		int nulo[3] = {0,0,0};
		int nuhi[3] = {(int)ginfo.get(i).GDbox.getHigh(0),(int)ginfo.get(i).GDbox.getHigh(1),(int)ginfo.get(i).GDbox.getHigh(2)};

		int vlo[3] = {0,0,0};
		int vhi[3] = {(int)ginfo.get(i).GDbox.getHigh(0),(int)ginfo.get(i).GDbox.getHigh(1),(int)ginfo.get(i).GDbox.getHigh(2)};
		int nvlo[3] = {0,0,0};
		int nvhi[3] = {(int)ginfo.get(i).GDbox.getHigh(0),(int)ginfo.get(i).GDbox.getHigh(1),(int)ginfo.get(i).GDbox.getHigh(2)};

		update_new(lo,hi,
				   (double *)U_old.getPointer(),ulo,uhi,
				   (double *)V_old.getPointer(),vlo,vhi,
				   (double *)U_new.getPointer(),nulo,nuhi,
				   (double *)V_new.getPointer(),nulo,nvhi,
				   &deltaT, &uFactor_s, &vFactor_s,&F,&K);
	}

212
213
	//! \cond [fort_update] \endcond

incardon's avatar
incardon committed
214
215
216
217
218
219
220
221
222
223
224
225
226
#endif
}

//! \cond [vectorization] \endcond

int main(int argc, char* argv[])
{
	openfpm_init(&argc,&argv);

	// domain
	Box<3,double> domain({0.0,0.0},{2.5,2.5,2.5});
	
	// grid size
227
        size_t sz[3] = {256,256,256};
incardon's avatar
incardon committed
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271

	// Define periodicity of the grid
	periodicity<3> bc = {PERIODIC,PERIODIC,PERIODIC};
	
	// Ghost in grid unit
	Ghost<3,long int> g(1);
	
	// deltaT
	double deltaT = 1;

	// Diffusion constant for specie U
	double du = 2*1e-5;

	// Diffusion constant for specie V
	double dv = 1*1e-5;

	// Number of timesteps
    size_t timeSteps = 5000;

	// K and F (Physical constant in the equation)
    double K = 0.053;
    double F = 0.014;

	//! \cond [init lib] \endcond

	/*!
	 * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization
	 *
	 * Here we create 2 distributed grid in 3D Old and New splitting U and V in two different fields.
	 *  In particular because we want that all the grids are distributed across processors in the same
	 *   way we pass the decomposition of the first grid.
	 *
	 * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp init grid
	 *
	 */

	//! \cond [init grid] \endcond

	grid_dist_id<3, double, aggregate<double>> OldU(sz,domain,g,bc);
	grid_dist_id<3, double, aggregate<double>> OldV(OldU.getDecomposition(),sz,g);

	// New grid with the decomposition of the old grid
    grid_dist_id<3, double, aggregate<double>> NewU(OldU.getDecomposition(),sz,g);
    grid_dist_id<3, double, aggregate<double>> NewV(OldV.getDecomposition(),sz,g);
incardon's avatar
incardon committed
272

incardon's avatar
incardon committed
273
	// spacing of the grid on x and y
incardon's avatar
incardon committed
274

incardon's avatar
incardon committed
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
	double spacing[3] = {OldU.spacing(0),OldU.spacing(1),OldU.spacing(2)};

	init(OldU,OldV,NewU,NewV,domain);

	//! \cond [init grid] \endcond

	// sync the ghost
	size_t count = 0;
	OldU.template ghost_get<0>();
	OldV.template ghost_get<0>();

	// because we assume that spacing[x] == spacing[y] we use formula 2
	// and we calculate the prefactor of Eq 2
	Vc::double_v uFactor = deltaT * du/(spacing[x]*spacing[x]);
	Vc::double_v vFactor = deltaT * dv/(spacing[x]*spacing[x]);

	timer tot_sim;
	tot_sim.start();

	static grid_key_dx<3> star_stencil_3D[7] = {{0,0,0},
                                         	    {0,0,-1},
						    {0,0,1},
						    {0,-1,0},
						    {0,1,0},
						    {-1,0,0},
						    {1,0,0}};

incardon's avatar
incardon committed
302
	for (size_t i = 0; i < timeSteps; ++i)
incardon's avatar
incardon committed
303
304
305
306
307
308
309
310
	{
		if (i % 300 == 0)
			std::cout << "STEP: " << i << std::endl;

		/*!
		 * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization
		 *
		 * Alternate New and Old field to run one step, switch between old and new if the iteration
311
312
		 * is even or odd. The function step is nothing else than the implementation of Gray-Scott
		 * 3D in the previous example but in a more optimized way.
incardon's avatar
incardon committed
313
314
315
		 *
		 * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp alternate
		 *
316
		 * In this function we show two methods to optimize this function.
incardon's avatar
incardon committed
317
		 *
318
319
320
321
322
		 * * We can use the macro **WHILE_M** passing the stencil definition, **ITERATE_3D** to define the loop,
		 *  **END_LOOP** to close the loop, and use the function
		 * function **getStencil<0>()** to retrieve the stencil points. Additionaly we can use Vc::double_v instead
		 *  of double to vectorize the code. This method give the advantage to keep all the
		 * code in C++.
incardon's avatar
incardon committed
323
		 *
324
325
326
327
328
329
330
331
332
333
334
		 * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp cpp_update
		 *
		 * * Another possibility is to use FORTRAN. Because FORTRAN has better
		 *  support for multi dimensional array another possibility is to process each local grid using
		 *  FORTRAN, this also give us the opportunity to show hybrid code. We can switch between
		 *   one and the other method commenting
		 *  and uncommeting the line #define FORTRAN_UPDATE in the code.
		 *
		 * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp fort_update
		 *
		 * \include Grid/3_gray_scott_3d_vectorization/update_new.f90
incardon's avatar
incardon committed
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
		 *
		 */

		//! \cond [alternate] \endcond

		if (i % 2 == 0)
		{
			step(OldU,OldV,NewU,NewV,star_stencil_3D,uFactor,vFactor,deltaT,F,K);

			NewU.ghost_get<0>();
			NewV.ghost_get<0>();
		}
		else
		{
			step(NewU,NewV,OldU,OldV,star_stencil_3D,uFactor,vFactor,deltaT,F,K);

			OldU.ghost_get<0>();
			OldV.ghost_get<0>();
		}

		//! \cond [alternate] \endcond

		/*!
		 * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization
		 *
		 * Instead of using the function **write** we use the function **save** to save on HDF5
		 *
		 * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp save hdf5
		 *
		 */

		//! \cond [save hdf5] \endcond

368
		// Every 2000 time step we output the configuration on hdf5
incardon's avatar
incardon committed
369
		if (i % 2000 == 0)
incardon's avatar
incardon committed
370
		{
371
			OldU.save("output_u_" + std::to_string(count));
incardon's avatar
incardon committed
372
373
374
375
376
377
378
379
			OldV.save("output_v_" + std::to_string(count));
			count++;
		}

		//! \cond [save hdf5] \endcond
	}
	
	tot_sim.stop();
380
381
382

	if (create_vcluster().rank() == 0)
	{std::cout << "Total simulation: " << tot_sim.getwct() << std::endl;}
incardon's avatar
incardon committed
383

incardon's avatar
incardon committed
384
385
386
	// We frite the final configuration
	OldV.write("final");

incardon's avatar
incardon committed
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
	//! \cond [time stepping] \endcond

	/*!
	 * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization
	 *
	 * ## Finalize ##
	 *
	 * Deinitialize the library
	 *
	 * \snippet Grid/3_gray_scott_3d_vectorization/main.cpp finalize
	 *
	 */

	//! \cond [finalize] \endcond

	openfpm_finalize();

	//! \cond [finalize] \endcond

	/*!
	 * \page Grid_3_gs_3D_vector Gray Scott in 3D fast implementation with vectorization
	 *
	 * # Full code # {#code}
	 *
	 * \include Grid/3_gray_scott_3d_vectorization/main.cpp
	 *
	 */
}