Skip to content
Snippets Groups Projects
Commit be6e7732 authored by Pietro Incardona's avatar Pietro Incardona
Browse files

Optimizing TLS version

parent 74309603
No related branches found
No related tags found
No related merge requests found
...@@ -227,18 +227,16 @@ int main(int argc, char *argv[]) ...@@ -227,18 +227,16 @@ int main(int argc, char *argv[])
{ {
auto p = blockIdx.x * blockDim.x + threadIdx.x; auto p = blockIdx.x * blockDim.x + threadIdx.x;
float a = vd_in.template get<0>(p)[0]; vd_out.template get<0>(p) = a;
float b = vd_in.template get<0>(p)[1];
vd_out.template get<0>(p) = a + b;
vd_out.template get<1>(p)[0] = a; vd_out.template get<1>(p)[0] = a;
vd_out.template get<1>(p)[1] = b; vd_out.template get<1>(p)[1] = a;
vd_out.template get<2>(p)[0][0] = a; vd_out.template get<2>(p)[0][0] = a;
vd_out.template get<2>(p)[0][1] = b; vd_out.template get<2>(p)[0][1] = a;
vd_out.template get<2>(p)[1][0] = a + b; vd_out.template get<2>(p)[1][0] = a;
vd_out.template get<2>(p)[1][1] = b - a; vd_out.template get<2>(p)[1][1] = a;
vd_in.template get<0>(p)[1] = a;
}; };
CUDA_LAUNCH_LAMBDA(ite, lamb); CUDA_LAUNCH_LAMBDA(ite, lamb);
...@@ -275,7 +273,7 @@ int main(int argc, char *argv[]) ...@@ -275,7 +273,7 @@ int main(int argc, char *argv[])
auto p = blockIdx.x * blockDim.x + threadIdx.x; auto p = blockIdx.x * blockDim.x + threadIdx.x;
float a = vd_out.template get<0>(p); float a = vd_out.template get<0>(p);
float b = vd_out.template get<1>(p)[0]; float b = vd_out.template get<1>(p)[0];
float c = vd_out.template get<1>(p)[1]; float c = vd_out.template get<1>(p)[1];
...@@ -284,8 +282,8 @@ int main(int argc, char *argv[]) ...@@ -284,8 +282,8 @@ int main(int argc, char *argv[])
float f = vd_out.template get<2>(p)[1][0]; float f = vd_out.template get<2>(p)[1][0];
float g = vd_out.template get<2>(p)[1][1]; float g = vd_out.template get<2>(p)[1][1];
vd_in.template get<0>(p)[0] = a+b+c+d; float h = vd_in.template get<0>(p)[0];
vd_in.template get<0>(p)[1] = e+f+g; vd_in.template get<0>(p)[1] = a+b+c+d+e+f+g+h;
}; };
CUDA_LAUNCH_LAMBDA(ite, lamb); CUDA_LAUNCH_LAMBDA(ite, lamb);
......
openfpm_devices @ c4dd3cff
Subproject commit 6d8f28cc101c2350975ff1a3e82127319c03e392 Subproject commit c4dd3cffacb260372766b760bcbcab6815167b73
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment