Commit 7cb35bd4 authored by incardon's avatar incardon

Hip compilable version

parent 2b46e92d
Pipeline #1820 failed with stages
in 0 seconds
openfpm_data @ 24987fd6
Subproject commit b507c258ec90ad13593720241a3ef405dccbc551
Subproject commit 24987fd653fe6fddd822a7c985b292c334eb8b93
openfpm_devices @ 85e556bc
Subproject commit a75dc7129e44da0903d6d23d2d2fd6bae5d8248e
Subproject commit 85e556bc66bb678416dcfb2084e9da84728c4e39
openfpm_io @ 881c5b3c
Subproject commit 4f11871c70f080f1a3a09b4e564fe1c59c565894
Subproject commit 881c5b3c1e1c51314c95209b25f7e70fbf33dcad
openfpm_numerics @ 76bd9448
Subproject commit 64f7a76bc841ca31f4f96d6afbb375c0a8127f5c
Subproject commit 76bd944865b9713eb657e454b4201b6131da824d
openfpm_vcluster @ 5e50d904
Subproject commit e42c122d001cd1a9891150585f37dff377fd76c8
Subproject commit 5e50d904b0f659ef8f1e2aa40aa0380035e181f8
......@@ -11,8 +11,6 @@
* Created on: Oct 5, 2017
* Author: i-bird
*/
#include <hip/hip_runtime.h>
#include "config.h"
#define BOOST_TEST_DYN_LINK
......
#define BOOST_TEST_DYN_LINK
#include <hip/hip_runtime.h>
#include "config.h"
#include <boost/test/unit_test.hpp>
#include "Space/Shape/Box.hpp"
......
#define BOOST_TEST_DYN_LINK
#include <hip/hip_runtime.h>
#include "config.h"
#include <boost/test/unit_test.hpp>
......@@ -79,7 +77,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb
CudaMemory mem;
mem.allocate(2*sizeof(unsigned int));
hipLaunchKernelGGL(HIP_KERNEL_NAME(test_proc_idbc<decltype(gpudec)>), dim3(1), dim3(1), 0, 0, p1,p2,gpudec,(unsigned int *)mem.getDevicePointer());
test_proc_idbc<decltype(gpudec)><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem.getDevicePointer());
mem.deviceToHost();
......@@ -88,7 +86,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb
CudaMemory mem2;
mem2.allocate(2*sizeof(unsigned int));
hipLaunchKernelGGL(HIP_KERNEL_NAME(test_ghost_n<decltype(gpudec)>), dim3(1), dim3(1), 0, 0, p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer());
test_ghost_n<decltype(gpudec)><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer());
mem2.deviceToHost();
......@@ -96,7 +94,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb
openfpm::vector_gpu<aggregate<int,int>> vd;
vd.resize(tot);
hipLaunchKernelGGL(HIP_KERNEL_NAME(test_ghost<decltype(gpudec),decltype(vd.toKernel())>), dim3(1), dim3(1), 0, 0, p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer(),vd.toKernel());
test_ghost<decltype(gpudec),decltype(vd.toKernel())><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer(),vd.toKernel());
if (((unsigned int *)mem.getPointer())[0] != ((unsigned int *)mem.getPointer())[1])
{
......@@ -122,7 +120,7 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb
p2.get(j) = std::nextafter(SpaceBox<3,double>(dec.getSubDomains().get(i)).getHigh(j),1.0);
hipLaunchKernelGGL(HIP_KERNEL_NAME(test_proc_idbc<decltype(gpudec)>), dim3(1), dim3(1), 0, 0, p1,p2,gpudec,(unsigned int *)mem.getDevicePointer());
test_proc_idbc<decltype(gpudec)><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem.getDevicePointer());
mem.deviceToHost();
......@@ -130,14 +128,14 @@ BOOST_AUTO_TEST_CASE( CartDecomposition_check_cross_consistency_between_proc_idb
BOOST_REQUIRE(((unsigned int *)mem.getPointer())[1] < vcl.size());
mem2.allocate(2*sizeof(unsigned int));
hipLaunchKernelGGL(HIP_KERNEL_NAME(test_ghost_n<decltype(gpudec)>), dim3(1), dim3(1), 0, 0, p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer());
test_ghost_n<decltype(gpudec)><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer());
mem2.deviceToHost();
tot = ((unsigned int *)mem2.getPointer())[0] + ((unsigned int *)mem2.getPointer())[1];
vd.resize(tot);
hipLaunchKernelGGL(HIP_KERNEL_NAME(test_ghost<decltype(gpudec),decltype(vd.toKernel())>), dim3(1), dim3(1), 0, 0, p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer(),vd.toKernel());
test_ghost<decltype(gpudec),decltype(vd.toKernel())><<<1,1>>>(p1,p2,gpudec,(unsigned int *)mem2.getDevicePointer(),vd.toKernel());
if (((unsigned int *)mem.getPointer())[0] != ((unsigned int *)mem.getPointer())[1])
{
......
#define BOOST_TEST_DYN_LINK
#include <hip/hip_runtime.h>
#include "config.h"
#include <boost/test/unit_test.hpp>
......
#define BOOST_TEST_DYN_LINK
#include <hip/hip_runtime.h>
#include "config.h"
#define TEST1
......@@ -102,7 +100,9 @@ BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles )
v_prp.hostToDevice<0,1,2>();
// label particle processor
hipLaunchKernelGGL(HIP_KERNEL_NAME(num_shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),decltype(v_pos.toKernel()),decltype(o_part_loc.toKernel())>), dim3(ite.wthr), dim3(ite.thr), 0, 0, box_f_dev.toKernel(),box_f_sv.toKernel(),v_pos.toKernel(),o_part_loc.toKernel(),v_pos.size());
num_shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),decltype(v_pos.toKernel()),decltype(o_part_loc.toKernel())>
<<<ite.wthr,ite.thr>>>
(box_f_dev.toKernel(),box_f_sv.toKernel(),v_pos.toKernel(),o_part_loc.toKernel(),v_pos.size());
o_part_loc.deviceToHost<0>();
......@@ -161,10 +161,12 @@ BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles )
openfpm::vector_gpu<aggregate<unsigned int,unsigned int>> o_part_loc2;
o_part_loc2.resize(tot);
hipLaunchKernelGGL(HIP_KERNEL_NAME(shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),
shift_ghost_each_part<3,float,decltype(box_f_dev.toKernel()),decltype(box_f_sv.toKernel()),
decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),
decltype(starts.toKernel()),decltype(shifts.toKernel()),
decltype(o_part_loc2.toKernel())>), dim3(ite.wthr), dim3(ite.thr), 0, 0, box_f_dev.toKernel(),box_f_sv.toKernel(),
decltype(o_part_loc2.toKernel())>
<<<ite.wthr,ite.thr>>>
(box_f_dev.toKernel(),box_f_sv.toKernel(),
v_pos.toKernel(),v_prp.toKernel(),
starts.toKernel(),shifts.toKernel(),o_part_loc2.toKernel(),old,old);
......@@ -349,7 +351,9 @@ BOOST_AUTO_TEST_CASE( vector_ghost_process_local_particles )
ite = o_part_loc2.getGPUIterator();
hipLaunchKernelGGL(HIP_KERNEL_NAME(process_ghost_particles_local<true,3,decltype(o_part_loc2.toKernel()),decltype(v_pos2.toKernel()),decltype(v_prp2.toKernel()),decltype(shifts.toKernel())>), dim3(ite.wthr), dim3(ite.thr), 0, 0, o_part_loc2.toKernel(),v_pos2.toKernel(),v_prp2.toKernel(),shifts.toKernel(),old);
process_ghost_particles_local<true,3,decltype(o_part_loc2.toKernel()),decltype(v_pos2.toKernel()),decltype(v_prp2.toKernel()),decltype(shifts.toKernel())>
<<<ite.wthr,ite.thr>>>
(o_part_loc2.toKernel(),v_pos2.toKernel(),v_prp2.toKernel(),shifts.toKernel(),old);
v_pos2.template deviceToHost<0>();
v_prp2.template deviceToHost<0,1,2>();
......@@ -453,7 +457,9 @@ BOOST_AUTO_TEST_CASE( vector_ghost_fill_send_buffer_test )
auto ite = g_send_prp.get(i).getGPUIterator();
hipLaunchKernelGGL(HIP_KERNEL_NAME(process_ghost_particles_prp<decltype(g_opart_device.toKernel()),decltype(g_send_prp.get(i).toKernel()),decltype(v_prp.toKernel()),0,1,2>), dim3(ite.wthr), dim3(ite.thr), 0, 0, g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
process_ghost_particles_prp<decltype(g_opart_device.toKernel()),decltype(g_send_prp.get(i).toKernel()),decltype(v_prp.toKernel()),0,1,2>
<<<ite.wthr,ite.thr>>>
(g_opart_device.toKernel(), g_send_prp.get(i).toKernel(),
v_prp.toKernel(),offset);
offset += g_send_prp.get(i).size();
......@@ -567,7 +573,9 @@ BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
proc_id_out.template get<0>(proc_id_out.size()-1) = 0;
proc_id_out.template hostToDevice(proc_id_out.size()-1,proc_id_out.size()-1);
hipLaunchKernelGGL(HIP_KERNEL_NAME(num_proc_ghost_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel())>), dim3(ite.wthr), dim3(ite.thr), 0, 0, dec.toKernel(),vg.toKernel(),proc_id_out.toKernel());
num_proc_ghost_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel())>
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),vg.toKernel(),proc_id_out.toKernel());
proc_id_out.deviceToHost<0>();
......@@ -615,7 +623,9 @@ BOOST_AUTO_TEST_CASE( decomposition_ie_ghost_gpu_test_use )
ite = vg.getGPUIterator();
// we compute processor id for each particle
hipLaunchKernelGGL(HIP_KERNEL_NAME(proc_label_id_ghost<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(starts.toKernel()),decltype(output.toKernel())>), dim3(ite.wthr), dim3(ite.thr), 0, 0, dec.toKernel(),vg.toKernel(),starts.toKernel(),output.toKernel());
proc_label_id_ghost<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(starts.toKernel()),decltype(output.toKernel())>
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),vg.toKernel(),starts.toKernel(),output.toKernel());
output.template deviceToHost<0,1>();
......@@ -731,7 +741,9 @@ BOOST_AUTO_TEST_CASE( decomposition_to_gpu_test_use )
dev_counter.fill<1>(0);
dev_counter.fill<2>(0);
hipLaunchKernelGGL(HIP_KERNEL_NAME(process_id_proc_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel()),decltype(dev_counter.toKernel())>), dim3(ite.wthr), dim3(ite.thr), 0, 0, dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),dev_counter.toKernel(),v_cl.rank());
process_id_proc_each_part<3,float,decltype(dec.toKernel()),decltype(vg.toKernel()),decltype(proc_id_out.toKernel()),decltype(dev_counter.toKernel())>
<<<ite.wthr,ite.thr>>>
(dec.toKernel(),vg.toKernel(),proc_id_out.toKernel(),dev_counter.toKernel(),v_cl.rank());
proc_id_out.deviceToHost<0>();
......@@ -768,7 +780,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_gpu_find_buffer_offsets_test )
auto ite = vgp.getGPUIterator();
vgp.hostToDevice<0,1>();
hipLaunchKernelGGL(HIP_KERNEL_NAME((find_buffer_offsets<1,decltype(vgp.toKernel()),decltype(offs.toKernel())>)), dim3(), dim3(), 0, 0, vgp.toKernel(),(int *)mem.getDevicePointer(),offs.toKernel());
CUDA_LAUNCH((find_buffer_offsets<1,decltype(vgp.toKernel()),decltype(offs.toKernel())>),ite,vgp.toKernel(),(int *)mem.getDevicePointer(),offs.toKernel());
offs.template deviceToHost<0,1>();
......@@ -819,7 +831,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_reorder_lbl)
auto ite = lbl_p.getGPUIterator();
hipLaunchKernelGGL(HIP_KERNEL_NAME(reorder_lbl<decltype(lbl_p.toKernel()),decltype(starts.toKernel())>), dim3(ite.wthr), dim3(ite.thr), 0, 0, lbl_p.toKernel(),starts.toKernel());
reorder_lbl<decltype(lbl_p.toKernel()),decltype(starts.toKernel())><<<ite.wthr,ite.thr>>>(lbl_p.toKernel(),starts.toKernel());
starts.template deviceToHost<0>();
lbl_p.template deviceToHost<0,1,2>();
......@@ -894,7 +906,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
auto ite = v_pos.getGPUIterator();
hipLaunchKernelGGL(HIP_KERNEL_NAME(merge_sort_part<false,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(ns_to_s.toKernel()),0>), dim3(ite.wthr), dim3(ite.thr), 0, 0, v_pos.toKernel(),v_prp.toKernel(),
merge_sort_part<false,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(ns_to_s.toKernel()),0><<<ite.wthr,ite.thr>>>(v_pos.toKernel(),v_prp.toKernel(),
v_pos_out.toKernel(),v_prp_out.toKernel(),
ns_to_s.toKernel());
......@@ -918,7 +930,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
BOOST_REQUIRE_EQUAL(match,true);
hipLaunchKernelGGL(HIP_KERNEL_NAME(merge_sort_part<false,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(ns_to_s.toKernel()),1,2>), dim3(ite.wthr), dim3(ite.thr), 0, 0, v_pos.toKernel(),v_prp.toKernel(),
merge_sort_part<false,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(ns_to_s.toKernel()),1,2><<<ite.wthr,ite.thr>>>(v_pos.toKernel(),v_prp.toKernel(),
v_pos_out.toKernel(),v_prp_out.toKernel(),
ns_to_s.toKernel());
......@@ -946,7 +958,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_merge_sort)
BOOST_REQUIRE_EQUAL(match,true);
hipLaunchKernelGGL(HIP_KERNEL_NAME(merge_sort_part<true,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(ns_to_s.toKernel())>), dim3(ite.wthr), dim3(ite.thr), 0, 0, v_pos.toKernel(),v_prp.toKernel(),
merge_sort_part<true,decltype(v_pos.toKernel()),decltype(v_prp.toKernel()),decltype(ns_to_s.toKernel())><<<ite.wthr,ite.thr>>>(v_pos.toKernel(),v_prp.toKernel(),
v_pos_out.toKernel(),v_prp_out.toKernel(),
ns_to_s.toKernel());
......@@ -1036,8 +1048,10 @@ BOOST_AUTO_TEST_CASE(vector_dist_gpu_map_fill_send_buffer_test)
{
auto ite = m_pos.get(i).getGPUIterator();
hipLaunchKernelGGL(HIP_KERNEL_NAME(process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()),
decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>), dim3(ite.wthr), dim3(ite.thr), 0, 0, m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
process_map_particles<decltype(m_opart.toKernel()),decltype(m_pos.get(i).toKernel()),decltype(m_prp.get(i).toKernel()),
decltype(v_pos.toKernel()),decltype(v_prp.toKernel())>
<<<ite.wthr,ite.thr>>>
(m_opart.toKernel(),m_pos.get(i).toKernel(), m_prp.get(i).toKernel(),
v_pos.toKernel(),v_prp.toKernel(),offset);
m_pos.get(i).deviceToHost<0>();
......
#include <hip/hip_runtime.h>
#include "config.h"
#define BOOST_TEST_DYN_LINK
......@@ -563,7 +561,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_multiphase_kernel_test )
openfpm::vector_gpu<aggregate<float>> output;
output.resize(100 * phases.size());
hipLaunchKernelGGL(HIP_KERNEL_NAME(vdmkt), dim3(1), dim3(1), 0, 0, phases.toKernel(),output.toKernel());
vdmkt<<<1,1>>>(phases.toKernel(),output.toKernel());
output.template deviceToHost<0>();
......@@ -655,7 +653,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_multiphase_kernel_test_simplified )
openfpm::vector_gpu<aggregate<float>> output;
output.resize(100 * phases.size());
hipLaunchKernelGGL(HIP_KERNEL_NAME(vdmkt_simple), dim3(1), dim3(1), 0, 0, phases.toKernel(),output.toKernel());
vdmkt_simple<<<1,1>>>(phases.toKernel(),output.toKernel());
output.template deviceToHost<0>();
......@@ -764,7 +762,7 @@ BOOST_AUTO_TEST_CASE( vector_dist_multiphase_kernel_cl_test )
output.resize(tot);
output2.resize(tot_g);
hipLaunchKernelGGL(HIP_KERNEL_NAME(vdmkt_simple_cl), dim3(1), dim3(1), 0, 0, phases.toKernel(),output.toKernel(),cl_ph.toKernel(),output2.toKernel());
vdmkt_simple_cl<<<1,1>>>(phases.toKernel(),output.toKernel(),cl_ph.toKernel(),output2.toKernel());
output.template deviceToHost<0>();
......
#define BOOST_TEST_DYN_LINK
#include <hip/hip_runtime.h>
#include "config.h"
#include <boost/test/unit_test.hpp>
#include "VCluster/VCluster.hpp"
......@@ -275,7 +273,7 @@ void check_cell_list_cpu_and_gpu(vector_type & vd, CellList_type & NN, CellList_
{
auto it5 = vd.getDomainIteratorGPU(32);
hipLaunchKernelGGL(HIP_KERNEL_NAME(calculate_force<typename vector_type::stype,decltype(NN.toKernel())>), dim3(it5.wthr), dim3(it5.thr), 0, 0, vd.toKernel(),vd.toKernel_sorted(),NN.toKernel(),create_vcluster().rank());
calculate_force<typename vector_type::stype,decltype(NN.toKernel())><<<it5.wthr,it5.thr>>>(vd.toKernel(),vd.toKernel_sorted(),NN.toKernel(),create_vcluster().rank());
vd.template deviceToHostProp<1,2>();
......@@ -301,7 +299,7 @@ void check_cell_list_cpu_and_gpu(vector_type & vd, CellList_type & NN, CellList_
// We do exactly the same test as before, but now we completely use the sorted version
hipLaunchKernelGGL(HIP_KERNEL_NAME(calculate_force_full_sort<typename vector_type::stype,decltype(NN.toKernel())>), dim3(it5.wthr), dim3(it5.thr), 0, 0, vd.toKernel_sorted(),NN.toKernel(),create_vcluster().rank());
calculate_force_full_sort<typename vector_type::stype,decltype(NN.toKernel())><<<it5.wthr,it5.thr>>>(vd.toKernel_sorted(),NN.toKernel(),create_vcluster().rank());
vd.template merge_sort<1>(NN);
vd.template deviceToHostProp<1>();
......@@ -387,7 +385,7 @@ void vector_dist_gpu_test_impl()
// offload to device
vd.hostToDevicePos();
hipLaunchKernelGGL(initialize_props, dim3(it3.wthr), dim3(it3.thr), 0, 0, vd.toKernel());
initialize_props<<<it3.wthr,it3.thr>>>(vd.toKernel());
// now we check what we initialized
......@@ -473,7 +471,7 @@ void vector_dist_gpu_make_sort_test_impl()
auto it3 = vd.getDomainIteratorGPU();
hipLaunchKernelGGL(initialize_props, dim3(it3.wthr), dim3(it3.thr), 0, 0, vd.toKernel());
initialize_props<<<it3.wthr,it3.thr>>>(vd.toKernel());
// Here we check make sort does not mess-up particles we use a Cell-List to check that
// the two cell-list constructed are identical
......@@ -699,7 +697,7 @@ void vdist_calc_gpu_test()
{
vd.map(RUN_ON_DEVICE);
CUDA_SAFE(hipGetLastError());
CUDA_SAFE(cudaGetLastError());
vd.deviceToHostPos();
vd.template deviceToHostProp<0,1,2>();
......@@ -845,7 +843,7 @@ void vdist_calc_gpu_test()
// move particles on gpu
auto ite = vd.getDomainIteratorGPU();
hipLaunchKernelGGL(HIP_KERNEL_NAME(move_parts_gpu_test<3,decltype(vd.toKernel())>), dim3(ite.wthr), dim3(ite.thr), 0, 0, vd.toKernel());
move_parts_gpu_test<3,decltype(vd.toKernel())><<<ite.wthr,ite.thr>>>(vd.toKernel());
}
}
......@@ -1728,7 +1726,7 @@ BOOST_AUTO_TEST_CASE(vector_dist_overflow_se_class1)
ite.thr.y = 1;
ite.thr.z = 1;
hipLaunchKernelGGL(HIP_KERNEL_NAME(launch_overflow), dim3(), dim3(), 0, 0, vdg.toKernel(),vdg2.toKernel());
CUDA_LAUNCH(launch_overflow,ite,vdg.toKernel(),vdg2.toKernel());
std::cout << "****** TEST ERROR MESSAGE END ********" << std::endl;
}
......
#define PRINT_RANK_TO_GPU
#include <hip/hip_runtime.h>
#include "initialize_wrapper.hpp"
#include "VCluster/VCluster.hpp"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment