Skip to content
Snippets Groups Projects
Commit ef8e58f2 authored by Pietro Incardona's avatar Pietro Incardona
Browse files

Fixing CUDIFY

parent f7b843e7
No related branches found
No related tags found
No related merge requests found
Pipeline #4053 failed
......@@ -42,7 +42,7 @@ OBJ = main.o
memBW:
%.o: %.cu
$(CUDA_CC) -g -O3 $(CUDA_OPTIONS) $(OPT) -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC)
$(CUDA_CC) -O3 $(CUDA_OPTIONS) $(OPT) -c --std=c++14 -o $@ $< $(INCLUDE_PATH_NVCC)
%.o: %.cpp
$(CC) -g -O3 $(OPT) -g -c --std=c++14 -o $@ $< $(INCLUDE_PATH)
......
......@@ -53,18 +53,18 @@ __global__ void translate_fill_prop_write_array(float * vd_out_scal,
{
auto p = blockIdx.x * blockDim.x + threadIdx.x;
float a = vd_in_vec[p* + 0*stride];
float b = vd_in_vec[p* + 1*stride];
float a = vd_in_vec[p + 0*stride];
vd_out_scal[p] = a + b;
vd_out_scal[p] = a;
vd_out_vec[p + 0*stride] = a;
vd_out_vec[p + 1*stride] = b;
vd_out_vec[p + 1*stride] = a;
vd_out_mat[p + 0*2*stride + 0*stride ] = a;
vd_out_mat[p + 0*2*stride + 1*stride ] = b;
vd_out_mat[p + 1*2*stride + 0*stride ] = a + b;
vd_out_mat[p + 1*2*stride + 1*stride ] = b - a;
vd_out_mat[p + 0*2*stride + 1*stride ] = a;
vd_out_mat[p + 1*2*stride + 0*stride ] = a;
vd_out_mat[p + 1*2*stride + 1*stride ] = a;
vd_in_vec[p + 1*stride] = a;
}
......@@ -184,6 +184,18 @@ int main(int argc, char *argv[])
initialize_buf(in,out);
for (int j = 0 ; j < 100 ; j++)
{
for (int i = 0 ; i < 16777216; i++)
{
out.get<2>(i)[1][0] = in.get<0>(i)[1];
}
}
return 0;
// Read write test with TLS
auto ite = out.getGPUIterator(256);
......@@ -344,6 +356,38 @@ int main(int argc, char *argv[])
double dev_read_lamb = 0.0;
standard_deviation(res,mean_read_lamb,dev_read_lamb);
// Array benchmark
for (int i = 0 ; i < 110 ; i++)
{
cudaDeviceSynchronize();
timer t;
t.start();
float * out_s = (float *)out.getDeviceBuffer<0>();
float * out_v = (float *)out.getDeviceBuffer<1>();
float * out_m = (float *)out.getDeviceBuffer<2>();
float * in_v = (float *)in.getDeviceBuffer<0>();
CUDA_LAUNCH(translate_fill_prop_write_array,ite,out_s,out_v,out_m,in_v,out.capacity());
cudaDeviceSynchronize();
t.stop();
if (i >=10)
{res.get(i-10) = nele*4*9 / t.getwct() * 1e-9;}
std::cout << "Time ARR: " << t.getwct() << std::endl;
std::cout << "BW ARR: " << nele*4*9 / t.getwct() * 1e-9 << " GB/s" << std::endl;
}
double mean_write_arr = 0.0;
double dev_write_arr = 0.0;
standard_deviation(res,mean_write_arr,dev_write_arr);
///////////////////
#ifdef CUDIFY_USE_CUDA
for (int i = 0 ; i < 110 ; i++)
......@@ -382,6 +426,7 @@ int main(int argc, char *argv[])
std::cout << "Average READ with lamb: " << mean_read_lamb << " deviation: " << dev_read_lamb << std::endl;
std::cout << "Average WRITE with lamb: " << mean_write_lamb << " deviation: " << dev_write_lamb << std::endl;
std::cout << "Average WRITE with array: " << mean_write_arr << " deviation: " << dev_write_arr << std::endl;
}
#else
......
openfpm_devices @ ccdd64f9
Subproject commit f95b31b6d0af0a3c60a2643840854feba90abb1e
Subproject commit ccdd64f9dee09116426e685ec44debfab255aa0a
......@@ -5,6 +5,7 @@
#include <boost/test/unit_test.hpp>
#include "Grid/grid_dist_id.hpp"
BOOST_AUTO_TEST_SUITE( sgrid_gpu_test_suite )
template<unsigned int p>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment