Commit 3dabdf44 authored by incardon's avatar incardon

Changes for GPU

parent a9991812
...@@ -41,31 +41,43 @@ AC_DEFUN([AX_CUDA], ...@@ -41,31 +41,43 @@ AC_DEFUN([AX_CUDA],
AC_CHECK_PROG([NVCC_EXIST],[nvcc],["yes"],["no"]) AC_CHECK_PROG([NVCC_EXIST],[nvcc],["yes"],["no"])
AS_IF([test "x$NVCC_EXIST" = "xno"],[],[ AS_IF([test "x$NVCC_EXIST" = "xno"],[],[
NVCC=`which nvcc` NVCC=`which nvcc`
# Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc" # Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc"
# is substituted by "include". # is substituted by "include".
CUDA_CFLAGS=" -I${NVCC%bin//nvcc}" CUDA_CFLAGS=" ${NVCC%bin//nvcc}"
CUDA_CFLAGS=" -I${CUDA_CFLAGS%bin/nvcc}" CUDA_CFLAGS=" ${CUDA_CFLAGS%bin/nvcc}"
CUDA_CFLAGS=" -I${CUDA_CFLAGS}include" CUDA_CFLAGS=" -I${CUDA_CFLAGS}include"
#Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc" #Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc"
#is substituted by "lib". #is substituted by "lib".
CUDA_LIBS=" -L${NVCC%bin//nvcc}" CUDA_LIBS="${NVCC%bin//nvcc}"
CUDA_LIBS=" -L${CUDA_LIBS%bin/nvcc}" CUDA_LIBS="${CUDA_LIBS%bin/nvcc}"
CUDA_PATH=$CUDA_LIBS
CUDA_LIBS=" -L${CUDA_LIBS}lib" CUDA_LIBS=" -L${CUDA_LIBS}lib"
# If $build_cpu contains "_64", append "64" to CUDA_LIBS # If $build_cpu contains "_64", append "64" to CUDA_LIBS
AS_IF([echo $build_cpu | grep -q "_64"], AS_IF([echo $build_cpu | grep -q "_64"],
[CUDA_LIBS+="64"]) [
AS_IF([ test -d $CUDA_PATH/lib64 ], [ CUDA_LIBS+="64" ], [])
# Be carefull the return code 0 mean true return code 1 mean false
AS_IF([ command -v bumblebeed >/dev/null ], [ CUDA_LIBS+=" -L/usr/lib64/nvidia-bumblebee/ " ],
[
echo "bumblebee, NVIDIA optimus, not found"
])
AS_IF([ test -d /usr/local/cuda/lib64 ], [ CUDA_LIBS+=" -L/usr/local/cuda/lib64 " ],
[
AS_IF([ test -d /usr/local/cuda/lib ],[ CUDA_LIBS+=" -L/usr/local/cuda/lib " ])
])
])
# Append " -lcuda -lcudart" to CUDA_LIBS # Append " -lcuda -lcudart" to CUDA_LIBS
CUDA_LIBS+=" -lcuda -lcudart" CUDA_LIBS+=" -lcuda -lcudart"
# Make variables available in Makefile.am # Make variables available in Makefile.am
AC_SUBST(CUDA_CFLAGS) AC_SUBST([CUDA_CFLAGS])
AC_SUBST(CUDA_LIBS) AC_SUBST([CUDA_LIBS])
echo $NVCC echo $NVCC
AC_SUBST(NVCC) AC_SUBST([NVCC])
AC_DEFINE([NVCC],[],[NVCC compiling]) AC_DEFINE([NVCC],[],[NVCC compiling])
])dnl ])dnl
......
LINKLIBS = $(DEFAULT_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(HDF5_LDFLAGS) $(HDF5_LIBS) $(BOOST_LDFLAGS) LINKLIBS = $(DEFAULT_LIB) $(PTHREAD_LIBS) $(OPT_LIBS) $(HDF5_LDFLAGS) $(HDF5_LIBS) $(BOOST_LDFLAGS) $(CUDA_LIBS)
if BUILDCUDA
CUDA_SOURCES=../../openfpm_devices/src/memory/CudaMemory.cu
else
CUDA_SOURCES=
endif
noinst_PROGRAMS = vcluster_test noinst_PROGRAMS = vcluster_test
vcluster_test_SOURCES = main.cpp VCluster/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp vcluster_test_SOURCES = main.cpp VCluster/VCluster.cpp ../../openfpm_devices/src/memory/HeapMemory.cpp ../../openfpm_devices/src/memory/PtrMemory.cpp ../../openfpm_devices/src/Memleak_check.cpp $(CUDA_SOURCES)
vcluster_test_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) vcluster_test_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
vcluster_test_CFLAGS = $(CUDA_CFLAGS) vcluster_test_CFLAGS = $(CUDA_CFLAGS)
vcluster_test_LDADD = $(LINKLIBS) vcluster_test_LDADD = $(LINKLIBS)
lib_LIBRARIES = libvcluster.a lib_LIBRARIES = libvcluster.a
libvcluster_a_SOURCES = VCluster/VCluster.cpp libvcluster_a_SOURCES = VCluster/VCluster.cpp
libvcluster_a_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) libvcluster_a_CXXFLAGS = $(AM_CXXFLAGS) $(INCLUDES_PATH) $(BOOST_CPPFLAGS) $(CUDA_CFLAGS)
libvcluster_a_CFLAGS = libvcluster_a_CFLAGS =
nobase_include_HEADERS = MPI_wrapper/MPI_IallreduceW.hpp MPI_wrapper/MPI_IrecvW.hpp MPI_wrapper/MPI_IBcastW.hpp MPI_wrapper/MPI_IsendW.hpp MPI_wrapper/MPI_util.hpp MPI_wrapper/MPI_IAllGather.hpp \ nobase_include_HEADERS = MPI_wrapper/MPI_IallreduceW.hpp MPI_wrapper/MPI_IrecvW.hpp MPI_wrapper/MPI_IBcastW.hpp MPI_wrapper/MPI_IsendW.hpp MPI_wrapper/MPI_util.hpp MPI_wrapper/MPI_IAllGather.hpp \
...@@ -17,7 +23,7 @@ VCluster/VCluster_base.hpp VCluster/VCluster.hpp VCluster/VCluster_meta_function ...@@ -17,7 +23,7 @@ VCluster/VCluster_base.hpp VCluster/VCluster.hpp VCluster/VCluster_meta_function
util/Vcluster_log.hpp util/Vcluster_log.hpp
.cu.o : .cu.o :
$(NVCC) $(NVCCFLAGS) -o $@ -c $< $(NVCC) $(NVCCFLAGS) $(INCLUDES_PATH) -o $@ -c $<
test: vcluster_test test: vcluster_test
source $(HOME)/openfpm_vars && cd .. && mpirun -np 3 ./src/vcluster_test && mpirun -np 4 ./src/vcluster_test source $(HOME)/openfpm_vars && cd .. && mpirun -np 3 ./src/vcluster_test && mpirun -np 4 ./src/vcluster_test
......
This diff is collapsed.
This diff is collapsed.
...@@ -1480,6 +1480,37 @@ BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_6) ...@@ -1480,6 +1480,37 @@ BOOST_AUTO_TEST_CASE (Vcluster_semantic_sendrecv_6)
} }
} }
BOOST_AUTO_TEST_CASE( Vcluster_semantic_ssend_recv_layout_switch )
{
auto & v_cl = create_vcluster();
if (v_cl.size() > 10) {return;}
openfpm::vector<openfpm::vector_gpu<aggregate<float,float[3]>>> vd;
openfpm::vector_gpu<aggregate<float,float[3]>> collect;
openfpm::vector<size_t> prc_send;
openfpm::vector<size_t> prc_recv;
openfpm::vector<size_t> sz_recv;
vd.resize(v_cl.size());
for (size_t i = 0 ; i < vd.size() ; i++)
{
vd.get(i).resize(100);
for (size_t j = 0 ; j < vd.get(i).size() ; j++)
{
vd.get(i).template get<0>(j) = 10000*i + v_cl.rank()*100 + j;
vd.get(i).template get<1>(j)[0] = 400000 + 10000*i + v_cl.rank()*100 + j;
vd.get(i).template get<1>(j)[1] = 400000 + 10000*i + v_cl.rank()*100 + j;
vd.get(i).template get<1>(j)[2] = 400000 + 10000*i + v_cl.rank()*100 + j;
}
}
v_cl.SSendRecv<openfpm::vector_gpu<aggregate<float,float[3]>>,decltype(collect),memory_traits_inte>(vd,collect,prc_send, prc_recv,sz_recv);
}
/*BOOST_AUTO_TEST_CASE (Vcluster_semantic_bench_all_all) /*BOOST_AUTO_TEST_CASE (Vcluster_semantic_bench_all_all)
{ {
Vcluster & vcl = create_vcluster(); Vcluster & vcl = create_vcluster();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment