Commit 3dabdf44 authored by incardon's avatar incardon

Changes for GPU

parent a9991812
......@@ -41,31 +41,43 @@ AC_DEFUN([AX_CUDA],
AC_CHECK_PROG([NVCC_EXIST],[nvcc],["yes"],["no"])
AS_IF([test "x$NVCC_EXIST" = "xno"],[],[
NVCC=`which nvcc`
# Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc"
# is substituted by "include".
CUDA_CFLAGS=" -I${NVCC%bin//nvcc}"
CUDA_CFLAGS=" -I${CUDA_CFLAGS%bin/nvcc}"
CUDA_CFLAGS=" ${NVCC%bin//nvcc}"
CUDA_CFLAGS=" ${CUDA_CFLAGS%bin/nvcc}"
CUDA_CFLAGS=" -I${CUDA_CFLAGS}include"
#Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc"
#is substituted by "lib".
CUDA_LIBS=" -L${NVCC%bin//nvcc}"
CUDA_LIBS=" -L${CUDA_LIBS%bin/nvcc}"
CUDA_LIBS="${NVCC%bin//nvcc}"
CUDA_LIBS="${CUDA_LIBS%bin/nvcc}"
CUDA_PATH=$CUDA_LIBS
CUDA_LIBS=" -L${CUDA_LIBS}lib"
# If $build_cpu contains "_64", append "64" to CUDA_LIBS
AS_IF([echo $build_cpu | grep -q "_64"],
[CUDA_LIBS+="64"])
[
AS_IF([ test -d $CUDA_PATH/lib64 ], [ CUDA_LIBS+="64" ], [])
# Be carefull the return code 0 mean true return code 1 mean false
AS_IF([ command -v bumblebeed >/dev/null ], [ CUDA_LIBS+=" -L/usr/lib64/nvidia-bumblebee/ " ],
[
echo "bumblebee, NVIDIA optimus, not found"
])
AS_IF([ test -d /usr/local/cuda/lib64 ], [ CUDA_LIBS+=" -L/usr/local/cuda/lib64 " ],
[
AS_IF([ test -d /usr/local/cuda/lib ],[ CUDA_LIBS+=" -L/usr/local/cuda/lib " ])
])
])
# Append " -lcuda -lcudart" to CUDA_LIBS
CUDA_LIBS+=" -lcuda -lcudart"
# Make variables available in Makefile.am
AC_SUBST(CUDA_CFLAGS)
AC_SUBST(CUDA_LIBS)
AC_SUBST([CUDA_CFLAGS])
AC_SUBST([CUDA_LIBS])
echo $NVCC
AC_SUBST(NVCC)
AC_SUBST([NVCC])
AC_DEFINE([NVCC],[],[NVCC compiling])
])dnl
......
LINKLIBS = $(DEFAULT_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(HDF5_LDFLAGS) $(HDF5_LIBS) $(BOOST_LDFLAGS)
LINKLIBS = $(DEFAULT_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(HDF5_LDFLAGS) $(HDF5_LIBS) $(BOOST_LDFLAGS) $(CUDA_LIBS)
if BUILDCUDA
CUDA_SOURCES=../../openfpm_devices/src/memory/CudaMemory.cu
else
CUDA_SOURCES=
endif
noinst_PROGRAMS = vcluster_test
vcluster_test_SOURCES = main.cpp VCluster/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp
vcluster_test_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS)
vcluster_test_SOURCES = main.cpp VCluster/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp $(CUDA_SOURCES)
vcluster_test_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
vcluster_test_CFLAGS = $(CUDA_CFLAGS)
vcluster_test_LDADD = $(LINKLIBS)
lib_LIBRARIES = libvcluster.a
libvcluster_a_SOURCES = VCluster/VCluster.cpp
libvcluster_a_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS)
libvcluster_a_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
libvcluster_a_CFLAGS =
nobase_include_HEADERS = MPI_wrapper/MPI_IallreduceW.hpp MPI_wrapper/MPI_IrecvW.hpp MPI_wrapper/MPI_IBcastW.hpp MPI_wrapper/MPI_IsendW.hpp MPI_wrapper/MPI_util.hpp MPI_wrapper/MPI_IAllGather.hpp \
......@@ -17,7 +23,7 @@ VCluster/VCluster_base.hpp VCluster/VCluster.hpp VCluster/VCluster_meta_function
util/Vcluster_log.hpp
.cu.o :
$(NVCC) $(NVCCFLAGS) -o $@ -c $<
$(NVCC) $(NVCCFLAGS) $(INCLUDES_PATH) -o $@ -c $<
test: vcluster_test
source $(HOME)/openfpm_vars && cd .. && mpirun -np 3 ./src/vcluster_test && mpirun -np 4 ./src/vcluster_test
......
This diff is collapsed.
This diff is collapsed.
......@@ -1480,6 +1480,37 @@ BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_6)
}
}
BOOST_AUTO_TEST_CASE( Vcluster_semantic_ssend_recv_layout_switch )
{
auto & v_cl = create_vcluster();
if (v_cl.size() > 10) {return;}
openfpm::vector<openfpm::vector_gpu<aggregate<float,float[3]>>> vd;
openfpm::vector_gpu<aggregate<float,float[3]>> collect;
openfpm::vector<size_t> prc_send;
openfpm::vector<size_t> prc_recv;
openfpm::vector<size_t> sz_recv;
vd.resize(v_cl.size());
for (size_t i = 0 ; i < vd.size() ; i++)
{
vd.get(i).resize(100);
for (size_t j = 0 ; j < vd.get(i).size() ; j++)
{
vd.get(i).template get<0>(j) = 10000*i + v_cl.rank()*100 + j;
vd.get(i).template get<1>(j)[0] = 400000 + 10000*i + v_cl.rank()*100 + j;
vd.get(i).template get<1>(j)[1] = 400000 + 10000*i + v_cl.rank()*100 + j;
vd.get(i).template get<1>(j)[2] = 400000 + 10000*i + v_cl.rank()*100 + j;
}
}
v_cl.SSendRecv<openfpm::vector_gpu<aggregate<float,float[3]>>,decltype(collect),memory_traits_inte>(vd,collect,prc_send, prc_recv,sz_recv);
}
/*BOOST_AUTO_TEST_CASE (Vcluster_semantic_bench_all_all)
{
Vcluster & vcl = create_vcluster();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment