diff --git a/CMakeLists.txt b/CMakeLists.txt index 08d025b9195c8a798fe52140fabdf99eaba8d380..261b250b53c04946fdac3a03d32bc78af17c5ea6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,13 +10,12 @@ list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake_modules/) set(BOOST_INCLUDE ${Boost_INCLUDE_DIR} CACHE PATH "Include directory for BOOST") set(LIBHILBERT_ROOT CACHE PATH "LibHilbert root path") set(SE_CLASS1 CACHE BOOL "Activate compilation with SE_CLASS1") -set(SE_CLASS2 CACHE BOOL "Activate compilation with SE_CLASS2") set(SE_CLASS3 CACHE BOOL "Activate compilation with SE_CLASS3") set(ENABLE_GPU CACHE BOOL "Disable the GPU code independently that a cuda compiler is found") set(TEST_PERFORMANCE CACHE BOOL "Enable test performance") -set (CMAKE_CXX_STANDARD 11) -set (CMAKE_CUDA_STANDARD 11) +set (CMAKE_CXX_STANDARD 14) +set (CMAKE_CUDA_STANDARD 14) if (ENABLE_GPU) enable_language(CUDA) @@ -31,8 +30,11 @@ if (ENABLE_GPU) elseif ( CUDA_VERSION_MAJOR EQUAL 10 AND CUDA_VERSION_MINOR EQUAL 2 ) message("CUDA is compatible 10.2") set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=2976 --diag_suppress=2977 --diag_suppress=2979 --diag_suppress=186" --expt-extended-lambda) +elseif ( CUDA_VERSION_MAJOR EQUAL 11 AND CUDA_VERSION_MINOR EQUAL 0 ) + message("CUDA is compatible 11.0") + set(WARNING_SUPPRESSION_AND_OPTION_NVCC -Xcudafe "--display_error_number --diag_suppress=3059 --diag_suppress=3058 --diag_suppress=3057 --diag_suppress=3056 --diag_suppress=611 --diag_suppress=186" --expt-extended-lambda) else() - message(FATAL_ERROR "CUDA is incompatible, version 9.2 10.1 and 10.2 is only supported") + message(FATAL_ERROR "CUDA is incompatible, version 9.2 10.1 10.2 and 11.0 is only supported") endif() endif() @@ -59,10 +61,6 @@ if(SE_CLASS1) set(DEFINE_SE_CLASS1 "#define SE_CLASS1") endif() -if(SE_CLASS2) - set(DEFINE_SE_CLASS2 "#define SE_CLASS2") -endif() - if(SE_CLASS3) set(DEFINE_SE_CLASS3 "#define SE_CLASS3") endif() diff --git a/configure b/configure index 090cab97b48a5f3167389387bfeaa98c689e864d..c49ce09b2d0f693ceeb15c375efb1501a7072434 100755 --- a/configure +++ b/configure @@ -104,7 +104,6 @@ enable_test_performance enable_test_coverage with_parmetis enable_se_class1 -enable_se_class2 enable_se_class3 with_action_on_error with_boost @@ -221,9 +220,6 @@ do se_class1) conf_options="$conf_options -DSE_CLASS1=ON" ;; - se_class2) - conf_options="$conf_options -DSE_CLASS2=ON" - ;; se_class3) conf_options="$conf_options -DSE_CLASS3=ON" ;; diff --git a/m4/acx_mpi.m4 b/m4/acx_mpi.m4 deleted file mode 100755 index 2f7e487e55497f42c547dc295f18d845322be32c..0000000000000000000000000000000000000000 --- a/m4/acx_mpi.m4 +++ /dev/null @@ -1,108 +0,0 @@ -dnl @synopsis ACX_MPI([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) -dnl -dnl This macro tries to find out how to compile programs that use MPI -dnl (Message Passing Interface), a standard API for parallel process -dnl communication (see http://www-unix.mcs.anl.gov/mpi/) -dnl -dnl On success, it sets the MPICC, MPICXX, or MPIF77 output variable to -dnl the name of the MPI compiler, depending upon the current language. -dnl (This may just be $CC/$CXX/$F77, but is more often something like -dnl mpicc/mpiCC/mpif77.) It also sets MPILIBS to any libraries that are -dnl needed for linking MPI (e.g. -lmpi, if a special -dnl MPICC/MPICXX/MPIF77 was not found). -dnl -dnl If you want to compile everything with MPI, you should set: -dnl -dnl CC="$MPICC" #OR# CXX="$MPICXX" #OR# F77="$MPIF77" -dnl LIBS="$MPILIBS $LIBS" -dnl -dnl The user can force a particular library/compiler by setting the -dnl MPICC/MPICXX/MPIF77 and/or MPILIBS environment variables. -dnl -dnl ACTION-IF-FOUND is a list of shell commands to run if an MPI -dnl library is found, and ACTION-IF-NOT-FOUND is a list of commands to -dnl run it if it is not found. If ACTION-IF-FOUND is not specified, the -dnl default action will define HAVE_MPI. -dnl -dnl @category InstalledPackages -dnl @author Steven G. Johnson <stevenj@alum.mit.edu> -dnl @version 2004-11-05 -dnl @license GPLWithACException - -AC_DEFUN([ACX_MPI], [ -AC_PREREQ(2.50) dnl for AC_LANG_CASE - -AC_LANG_CASE([C], [ - AC_REQUIRE([AC_PROG_CC]) - AC_ARG_VAR(MPICC,[MPI C compiler command]) - AC_CHECK_PROGS(MPICC, mpicc hcc mpcc mpcc_r mpxlc cmpicc, $CC) - acx_mpi_save_CC="$CC" - LAMMPICC="$CC" - CC="$MPICC" - AC_SUBST(MPICC) -], -[C++], [ - AC_REQUIRE([AC_PROG_CXX]) - AC_ARG_VAR(MPICXX,[MPI C++ compiler command]) - AC_CHECK_PROGS(MPICXX, mpic++ mpicxx mpiCC mpCC hcp mpxlC mpxlC_r cmpic++, $CXX) - acx_mpi_save_CXX="$CXX" - LAMMPICXX="$CXX" - CXX="$MPICXX" - AC_SUBST(MPICXX) -], -[Fortran 77], [ - AC_REQUIRE([AC_PROG_F77]) - AC_ARG_VAR(MPIF77,[MPI Fortran compiler command]) - AC_CHECK_PROGS(MPIF77, mpif77 hf77 mpxlf mpf77 mpif90 mpf90 mpxlf90 mpxlf95 mpxlf_r cmpifc cmpif90c, $F77) - acx_mpi_save_F77="$F77" - LAMMPIF77="$F77" - F77="$MPIF77" - AC_SUBST(MPIF77) -]) - -if test x = x"$MPILIBS"; then - AC_LANG_CASE([C], [AC_CHECK_FUNC(MPI_Init, [MPILIBS=" "])], - [C++], [AC_CHECK_FUNC(MPI_Init, [MPILIBS=" "])], - [Fortran 77], [AC_MSG_CHECKING([for MPI_Init]) - AC_TRY_LINK([],[ call MPI_Init], [MPILIBS=" " - AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no)])]) -fi -if test x = x"$MPILIBS"; then - AC_CHECK_LIB(mpi, MPI_Init, [MPILIBS="-lmpi"]) -fi -if test x = x"$MPILIBS"; then - AC_CHECK_LIB(mpich, MPI_Init, [MPILIBS="-lmpich"]) -fi - -dnl We have to use AC_TRY_COMPILE and not AC_CHECK_HEADER because the -dnl latter uses $CPP, not $CC (which may be mpicc). -AC_LANG_CASE([C], [if test x != x"$MPILIBS"; then - AC_MSG_CHECKING([for mpi.h]) - export LAMMPICC="$acx_mpi_save_CC" - AC_TRY_COMPILE([#include <mpi.h>],[],[AC_MSG_RESULT(yes)], [MPILIBS="" - AC_MSG_RESULT(no)]) - unset LAMMPICC -fi], -[C++], [if test x != x"$MPILIBS"; then - AC_MSG_CHECKING([for mpi.h]) - export LAMMPICXX="$acx_mpi_save_CXX" - AC_TRY_COMPILE([#include <mpi.h>],[],[AC_MSG_RESULT(yes)], [MPILIBS="" - AC_MSG_RESULT(no)]) - unset LAMMPICXX -fi]) - -AC_LANG_CASE([C], [CC="$acx_mpi_save_CC"], - [C++], [CXX="$acx_mpi_save_CXX"], - [Fortran 77], [F77="$acx_mpi_save_F77"]) - -AC_SUBST(MPILIBS) - -# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: -if test x = x"$MPILIBS"; then - $2 - : -else - ifelse([$1],,[AC_DEFINE(HAVE_MPI,1,[Define if you have the MPI library.])],[$1]) - : -fi -])dnl ACX_MPI diff --git a/m4/acx_pthread.m4 b/m4/acx_pthread.m4 deleted file mode 100755 index 508df866b5c619f7a09632919f6502bcdb722134..0000000000000000000000000000000000000000 --- a/m4/acx_pthread.m4 +++ /dev/null @@ -1,242 +0,0 @@ -dnl -dnl ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) -dnl -dnl Description -dnl -dnl This macro figures out how to build C programs using POSIX threads. It -dnl sets the PTHREAD_LIBS output variable to the threads library and linker -dnl flags, and the PTHREAD_CFLAGS output variable to any special C compiler -dnl flags that are needed. (The user can also force certain compiler -dnl flags/libs to be tested by setting these environment variables.) -dnl -dnl Also sets PTHREAD_CC to any special C compiler that is needed for -dnl multi-threaded programs (defaults to the value of CC otherwise). (This is -dnl necessary on AIX to use the special cc_r compiler alias.) -dnl -dnl NOTE: You are assumed to not only compile your program with these flags, -dnl but also link it with them as well. e.g. you should link with $PTHREAD_CC -dnl $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS -dnl -dnl If you are only building threads programs, you may wish to use these -dnl variables in your default LIBS, CFLAGS, and CC: -dnl -dnl LIBS="$PTHREAD_LIBS $LIBS" -dnl CFLAGS="$CFLAGS $PTHREAD_CFLAGS" -dnl CC="$PTHREAD_CC" -dnl -dnl In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant -dnl has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name -dnl (e.g. PTHREAD_CREATE_UNDETACHED on AIX). -dnl -dnl ACTION-IF-FOUND is a list of shell commands to run if a threads library -dnl is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it -dnl is not found. If ACTION-IF-FOUND is not specified, the default action -dnl will define HAVE_PTHREAD. -dnl -dnl Please let the authors know if this macro fails on any platform, or if -dnl you have any other suggestions or comments. This macro was based on work -dnl by SGJ on autoconf scripts for FFTW (www.fftw.org) (with help from M. -dnl Frigo), as well as ac_pthread and hb_pthread macros posted by AFC to the -dnl autoconf macro repository. We are also grateful for the helpful feedback -dnl of numerous users. -dnl -dnl Version: 1.8 (last modified: 2003-05-21) -dnl Author: Steven G. Johnson <stevenj@alum.mit.edu> and -dnl Alejandro Forero Cuervo <bachue@bachue.com> -dnl -dnl from http://www.gnu.org/software/ac-archive/htmldoc/index.html -dnl -dnl License: -dnl GNU General Public License -dnl [http://www.gnu.org/software/ac-archive/htmldoc/COPYING.html] -dnl with this special exception -dnl [http://www.gnu.org/software/ac-archive/htmldoc/COPYING-Exception.html]. -dnl - -AC_DEFUN([ACX_PTHREAD], [ -AC_REQUIRE([AC_CANONICAL_HOST]) -AC_LANG_SAVE -AC_LANG_C -acx_pthread_ok=no - -# We used to check for pthread.h first, but this fails if pthread.h -# requires special compiler flags (e.g. on True64 or Sequent). -# It gets checked for in the link test anyway. - -# First of all, check if the user has set any of the PTHREAD_LIBS, -# etcetera environment variables, and if threads linking works using -# them: -if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) - AC_TRY_LINK_FUNC(pthread_join, acx_pthread_ok=yes) - AC_MSG_RESULT($acx_pthread_ok) - if test x"$acx_pthread_ok" = xno; then - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" - fi - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" -fi - -# We must check for the threads library under a number of different -# names; the ordering is very important because some systems -# (e.g. DEC) have both -lpthread and -lpthreads, where one of the -# libraries is broken (non-POSIX). - -# Create a list of thread flags to try. Items starting with a "-" are -# C compiler flags, and other items are library names, except for "none" -# which indicates that we try without any flags at all. - -acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt" - -# The ordering *is* (sometimes) important. Some notes on the -# individual items follow: - -# pthreads: AIX (must check this before -lpthread) -# none: in case threads are in libc; should be tried before -Kthread and -# other compiler flags to prevent continual compiler warnings -# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) -# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) -# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) -# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) -# -pthreads: Solaris/gcc -# -mthreads: Mingw32/gcc, Lynx/gcc -# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it -# doesn't hurt to check since this sometimes defines pthreads too; -# also defines -D_REENTRANT) -# pthread: Linux, etcetera -# --thread-safe: KAI C++ - -case "${host_cpu}-${host_os}" in - *solaris*) - - # On Solaris (at least, for some versions), libc contains stubbed - # (non-functional) versions of the pthreads routines, so link-based - # tests will erroneously succeed. (We need to link with -pthread or - # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather - # a function called by this macro, so we could check for that, but - # who knows whether they'll stub that too in a future libc.) So, - # we'll just look for -pthreads and -lpthread first: - - acx_pthread_flags="-pthread -pthreads pthread -mt $acx_pthread_flags" - ;; -esac - -if test x"$acx_pthread_ok" = xno; then -for flag in $acx_pthread_flags; do - - case $flag in - none) - AC_MSG_CHECKING([whether pthreads work without any flags]) - ;; - - -*) - AC_MSG_CHECKING([whether pthreads work with $flag]) - PTHREAD_CFLAGS="$flag" - ;; - - *) - AC_MSG_CHECKING([for the pthreads library -l$flag]) - PTHREAD_LIBS="-l$flag" - ;; - esac - - save_LIBS="$LIBS" - save_CFLAGS="$CFLAGS" - LIBS="$PTHREAD_LIBS $LIBS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Check for various functions. We must include pthread.h, - # since some functions may be macros. (On the Sequent, we - # need a special flag -Kthread to make this header compile.) - # We check for pthread_join because it is in -lpthread on IRIX - # while pthread_create is in libc. We check for pthread_attr_init - # due to DEC craziness with -lpthreads. We check for - # pthread_cleanup_push because it is one of the few pthread - # functions on Solaris that doesn't have a non-functional libc stub. - # We try pthread_create on general principles. - AC_TRY_LINK([#include <pthread.h>], - [pthread_t th; pthread_join(th, 0); - pthread_attr_init(0); pthread_cleanup_push(0, 0); - pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], - [acx_pthread_ok=yes]) - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - - AC_MSG_RESULT($acx_pthread_ok) - if test "x$acx_pthread_ok" = xyes; then - break; - fi - - PTHREAD_LIBS="" - PTHREAD_CFLAGS="" -done -fi - -# Various other checks: -if test "x$acx_pthread_ok" = xyes; then - save_LIBS="$LIBS" - LIBS="$PTHREAD_LIBS $LIBS" - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $PTHREAD_CFLAGS" - - # Detect AIX lossage: threads are created detached by default - # and the JOINABLE attribute has a nonstandard name (UNDETACHED). - AC_MSG_CHECKING([for joinable pthread attribute]) - AC_TRY_LINK([#include <pthread.h>], - [int attr=PTHREAD_CREATE_JOINABLE;], - ok=PTHREAD_CREATE_JOINABLE, ok=unknown) - if test x"$ok" = xunknown; then - AC_TRY_LINK([#include <pthread.h>], - [int attr=PTHREAD_CREATE_UNDETACHED;], - ok=PTHREAD_CREATE_UNDETACHED, ok=unknown) - fi - if test x"$ok" != xPTHREAD_CREATE_JOINABLE; then - AC_DEFINE(PTHREAD_CREATE_JOINABLE, $ok, - [Define to the necessary symbol if this constant - uses a non-standard name on your system.]) - fi - AC_MSG_RESULT(${ok}) - if test x"$ok" = xunknown; then - AC_MSG_WARN([we do not know how to create joinable pthreads]) - fi - - AC_MSG_CHECKING([if more special flags are required for pthreads]) - flag=no - case "${host_cpu}-${host_os}" in - *-aix* | *-freebsd*) flag="-D_THREAD_SAFE";; - *solaris* | *-osf* | *-hpux*) flag="-D_REENTRANT";; - esac - AC_MSG_RESULT(${flag}) - if test "x$flag" != xno; then - PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" - fi - - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - - # More AIX lossage: must compile with cc_r - AC_CHECK_PROG(PTHREAD_CC, cc_r, cc_r, ${CC}) -else - PTHREAD_CC="$CC" -fi - -AC_SUBST(PTHREAD_LIBS) -AC_SUBST(PTHREAD_CFLAGS) -AC_SUBST(PTHREAD_CC) - -# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: -if test x"$acx_pthread_ok" = xyes; then - ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) - : -else - acx_pthread_ok=no - $2 -fi -AC_LANG_RESTORE -])dnl ACX_PTHREAD \ No newline at end of file diff --git a/m4/ax_boost_base.m4 b/m4/ax_boost_base.m4 deleted file mode 100644 index b8ffb03f616cb96c2762bcc24afe9133d690fc4b..0000000000000000000000000000000000000000 --- a/m4/ax_boost_base.m4 +++ /dev/null @@ -1,286 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_boost_base.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_BASE([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# -# DESCRIPTION -# -# Test for the Boost C++ libraries of a particular version (or newer) -# -# If no path to the installed boost library is given the macro searchs -# under /usr, /usr/local, /opt and /opt/local and evaluates the -# $BOOST_ROOT environment variable. Further documentation is available at -# <http://randspringer.de/boost/index.html>. -# -# This macro calls: -# -# AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS) -# -# And sets: -# -# HAVE_BOOST -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de> -# Copyright (c) 2009 Peter Adolphs -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 26 - -AC_DEFUN([AX_BOOST_BASE], -[ -AC_ARG_WITH([boost], - [AS_HELP_STRING([--with-boost@<:@=ARG@:>@], - [use Boost library from a standard location (ARG=yes), - from the specified location (ARG=<path>), - or disable it (ARG=no) - @<:@ARG=yes@:>@ ])], - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ac_boost_path="" - else - want_boost="yes" - ac_boost_path="$withval" - fi - ], - [want_boost="yes"]) - - -AC_ARG_WITH([boost-libdir], - AS_HELP_STRING([--with-boost-libdir=LIB_DIR], - [Force given directory for boost libraries. Note that this will override library path detection, so use this parameter only if default library detection fails and you know exactly where your boost libraries are located.]), - [ - if test -d "$withval" - then - ac_boost_lib_path="$withval" - else - AC_MSG_ERROR(--with-boost-libdir expected directory name) - fi - ], - [ac_boost_lib_path=""] -) - -if test "x$want_boost" = "xyes"; then - boost_lib_version_req=ifelse([$1], ,1.20.0,$1) - boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'` - boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'` - boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'` - boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` - if test "x$boost_lib_version_req_sub_minor" = "x" ; then - boost_lib_version_req_sub_minor="0" - fi - WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+ $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor` - AC_MSG_CHECKING(for boostlib >= $boost_lib_version_req) - succeeded=no - - dnl On 64-bit systems check for system libraries in both lib64 and lib. - dnl The former is specified by FHS, but e.g. Debian does not adhere to - dnl this (as it rises problems for generic multi-arch support). - dnl The last entry in the list is chosen by default when no libraries - dnl are found, e.g. when only header-only libraries are installed! - libsubdirs="lib" - ax_arch=`uname -m` - case $ax_arch in - x86_64) - libsubdirs="lib64 libx32 lib lib64" - ;; - ppc64|s390x|sparc64|aarch64|ppc64le) - libsubdirs="lib64 lib lib64 ppc64le" - ;; - esac - - dnl allow for real multi-arch paths e.g. /usr/lib/x86_64-linux-gnu. Give - dnl them priority over the other paths since, if libs are found there, they - dnl are almost assuredly the ones desired. - AC_REQUIRE([AC_CANONICAL_HOST]) - libsubdirs="lib/${host_cpu}-${host_os} $libsubdirs" - - case ${host_cpu} in - i?86) - libsubdirs="lib/i386-${host_os} $libsubdirs" - ;; - esac - - dnl first we check the system location for boost libraries - dnl this location ist chosen if boost libraries are installed with the --layout=system option - dnl or if you install boost with RPM - if test "$ac_boost_path" != ""; then - BOOST_CPPFLAGS="-I$ac_boost_path/include" - for ac_boost_path_tmp in $libsubdirs; do - if test -d "$ac_boost_path"/"$ac_boost_path_tmp" ; then - BOOST_LDFLAGS="-L$ac_boost_path/$ac_boost_path_tmp" - break - fi - done - elif test "$cross_compiling" != yes; then - for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do - if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then - for libsubdir in $libsubdirs ; do - if ls "$ac_boost_path_tmp/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi - done - BOOST_LDFLAGS="-L$ac_boost_path_tmp/$libsubdir" - BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include" - break; - fi - done - fi - - dnl overwrite ld flags if we have required special directory with - dnl --with-boost-libdir parameter - if test "$ac_boost_lib_path" != ""; then - BOOST_LDFLAGS="-L$ac_boost_lib_path" - fi - - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_REQUIRE([AC_PROG_CXX]) - AC_LANG_PUSH(C++) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ - @%:@include <boost/version.hpp> - ]], [[ - #if BOOST_VERSION >= $WANT_BOOST_VERSION - // Everything is okay - #else - # error Boost version is too old - #endif - ]])],[ - AC_MSG_RESULT(yes) - succeeded=yes - found_system=yes - ],[ - ]) - AC_LANG_POP([C++]) - - - - dnl if we found no boost with system layout we search for boost libraries - dnl built and installed without the --layout=system option or for a staged(not installed) version - if test "x$succeeded" != "xyes"; then - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - BOOST_CPPFLAGS= - BOOST_LDFLAGS= - _version=0 - if test "$ac_boost_path" != ""; then - if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then - for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do - _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` - V_CHECK=`expr $_version_tmp \> $_version` - if test "$V_CHECK" = "1" ; then - _version=$_version_tmp - fi - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` - BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE" - done - dnl if nothing found search for layout used in Windows distributions - if test -z "$BOOST_CPPFLAGS"; then - if test -d "$ac_boost_path/boost" && test -r "$ac_boost_path/boost"; then - BOOST_CPPFLAGS="-I$ac_boost_path" - fi - fi - fi - else - if test "$cross_compiling" != yes; then - for ac_boost_path in /usr /usr/local /opt /opt/local ; do - if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then - for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do - _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` - V_CHECK=`expr $_version_tmp \> $_version` - if test "$V_CHECK" = "1" ; then - _version=$_version_tmp - best_path=$ac_boost_path - fi - done - fi - done - - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` - BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" - if test "$ac_boost_lib_path" = ""; then - for libsubdir in $libsubdirs ; do - if ls "$best_path/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi - done - BOOST_LDFLAGS="-L$best_path/$libsubdir" - fi - fi - - if test "x$BOOST_ROOT" != "x"; then - for libsubdir in $libsubdirs ; do - if ls "$BOOST_ROOT/stage/$libsubdir/libboost_"* >/dev/null 2>&1 ; then break; fi - done - if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/$libsubdir" && test -r "$BOOST_ROOT/stage/$libsubdir"; then - version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'` - stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'` - stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'` - V_CHECK=`expr $stage_version_shorten \>\= $_version` - if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then - AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT) - BOOST_CPPFLAGS="-I$BOOST_ROOT" - BOOST_LDFLAGS="-L$BOOST_ROOT/stage/$libsubdir" - fi - fi - fi - fi - - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_LANG_PUSH(C++) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ - @%:@include <boost/version.hpp> - ]], [[ - #if BOOST_VERSION >= $WANT_BOOST_VERSION - // Everything is okay - #else - # error Boost version is too old - #endif - ]])],[ - AC_MSG_RESULT(yes) - succeeded=yes - found_system=yes - ],[ - ]) - AC_LANG_POP([C++]) - fi - - if test "$succeeded" != "yes" ; then - if test "$_version" = "0" ; then - AC_MSG_NOTICE([[We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in <boost/version.hpp>. See http://randspringer.de/boost for more documentation.]]) - else - AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).]) - fi - # execute ACTION-IF-NOT-FOUND (if present): - ifelse([$3], , :, [$3]) - else - AC_SUBST(BOOST_CPPFLAGS) - AC_SUBST(BOOST_LDFLAGS) - AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available]) - # execute ACTION-IF-FOUND (if present): - ifelse([$2], , :, [$2]) - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" -fi - -]) - diff --git a/m4/ax_boost_chrono.m4 b/m4/ax_boost_chrono.m4 deleted file mode 100644 index c4eef7f5109e141e8889d86dfa8dd4d2f898e897..0000000000000000000000000000000000000000 --- a/m4/ax_boost_chrono.m4 +++ /dev/null @@ -1,119 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_boost_chrono.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_CHRONO -# -# DESCRIPTION -# -# Test for System library from the Boost C++ libraries. The macro requires -# a preceding call to AX_BOOST_BASE. Further documentation is available at -# <http://randspringer.de/boost/index.html>. -# -# This macro calls: -# -# AC_SUBST(BOOST_CHRONO_LIB) -# -# And sets: -# -# HAVE_BOOST_CHRONO -# -# LICENSE -# -# Copyright (c) 2012 Xiyue Deng <manphiz@gmail.com> -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 1 - -AC_DEFUN([AX_BOOST_CHRONO], -[ - AC_ARG_WITH([boost-chrono], - AS_HELP_STRING([--with-boost-chrono@<:@=special-lib@:>@], - [use the Chrono library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-chrono=boost_chrono-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_chrono_lib="" - else - want_boost="yes" - ax_boost_user_chrono_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - AC_REQUIRE([AC_CANONICAL_BUILD]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS -lboost_system" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::Chrono library is available, - ax_cv_boost_chrono, - [AC_LANG_PUSH([C++]) - CXXFLAGS_SAVE=$CXXFLAGS - - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/chrono.hpp>]], - [[boost::chrono::system_clock::time_point time;]])], - ax_cv_boost_chrono=yes, ax_cv_boost_chrono=no) - CXXFLAGS=$CXXFLAGS_SAVE - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_chrono" = "xyes"; then - AC_SUBST(BOOST_CPPFLAGS) - - AC_DEFINE(HAVE_BOOST_CHRONO,,[define if the Boost::Chrono library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - - LDFLAGS_SAVE=$LDFLAGS - if test "x$ax_boost_user_chrono_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_chrono*.so* $BOOSTLIBDIR/libboost_chrono*.dylib* $BOOSTLIBDIR/libboost_chrono*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_chrono.*\)\.so.*$;\1;' -e 's;^lib\(boost_chrono.*\)\.dylib.*$;\1;' -e 's;^lib\(boost_chrono.*\)\.a.*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_CHRONO_LIB="-l$ax_lib"; AC_SUBST(BOOST_CHRONO_LIB) link_chrono="yes"; break], - [link_chrono="no"]) - done - if test "x$link_chrono" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_chrono*.dll* $BOOSTLIBDIR/boost_chrono*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_chrono.*\)\.dll.*$;\1;' -e 's;^\(boost_chrono.*\)\.a.*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_CHRONO_LIB="-l$ax_lib"; AC_SUBST(BOOST_CHRONO_LIB) link_chrono="yes"; break], - [link_chrono="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_chrono_lib boost_chrono-$ax_boost_user_chrono_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_CHRONO_LIB="-l$ax_lib"; AC_SUBST(BOOST_CHRONO_LIB) link_chrono="yes"; break], - [link_chrono="no"]) - done - - fi - if test "x$ax_lib" = "x"; then - AC_MSG_ERROR(Could not find a version of the library!) - fi - if test "x$link_chrono" = "xno"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) - diff --git a/m4/ax_boost_iostreams.m4 b/m4/ax_boost_iostreams.m4 deleted file mode 100644 index b4e970be04bbba9e5c889ae21dcc6865ede0547b..0000000000000000000000000000000000000000 --- a/m4/ax_boost_iostreams.m4 +++ /dev/null @@ -1,119 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_boost_iostreams.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_IOSTREAMS -# -# DESCRIPTION -# -# Test for IOStreams library from the Boost C++ libraries. The macro -# requires a preceding call to AX_BOOST_BASE. Further documentation is -# available at <http://randspringer.de/boost/index.html>. -# -# This macro calls: -# -# AC_SUBST(BOOST_IOSTREAMS_LIB) -# -# And sets: -# -# HAVE_BOOST_IOSTREAMS -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de> -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 20 - -AC_DEFUN([AX_BOOST_IOSTREAMS], -[ - AC_ARG_WITH([boost-iostreams], - AS_HELP_STRING([--with-boost-iostreams@<:@=special-lib@:>@], - [use the IOStreams library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-iostreams=boost_iostreams-gcc-mt-d-1_33_1 ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_iostreams_lib="" - else - want_boost="yes" - ax_boost_user_iostreams_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::IOStreams library is available, - ax_cv_boost_iostreams, - [AC_LANG_PUSH([C++]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/iostreams/filtering_stream.hpp> - @%:@include <boost/range/iterator_range.hpp> - ]], - [[std::string input = "Hello World!"; - namespace io = boost::iostreams; - io::filtering_istream in(boost::make_iterator_range(input)); - return 0; - ]])], - ax_cv_boost_iostreams=yes, ax_cv_boost_iostreams=no) - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_iostreams" = "xyes"; then - AC_DEFINE(HAVE_BOOST_IOSTREAMS,,[define if the Boost::IOStreams library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - if test "x$ax_boost_user_iostreams_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_iostreams*.so* $BOOSTLIBDIR/libboost_iostream*.dylib* $BOOSTLIBDIR/libboost_iostreams*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_iostreams.*\)\.so.*$;\1;' -e 's;^lib\(boost_iostream.*\)\.dylib.*$;\1;' -e 's;^lib\(boost_iostreams.*\)\.a.*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break], - [link_iostreams="no"]) - done - if test "x$link_iostreams" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_iostreams*.dll* $BOOSTLIBDIR/boost_iostreams*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_iostreams.*\)\.dll.*$;\1;' -e 's;^\(boost_iostreams.*\)\.a.*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break], - [link_iostreams="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_iostreams_lib boost_iostreams-$ax_boost_user_iostreams_lib; do - AC_CHECK_LIB($ax_lib, main, - [BOOST_IOSTREAMS_LIB="-l$ax_lib"; AC_SUBST(BOOST_IOSTREAMS_LIB) link_iostreams="yes"; break], - [link_iostreams="no"]) - done - - fi - if test "x$ax_lib" = "x"; then - echo "Could not find a version of the library!" - exit 202 - fi - if test "x$link_iostreams" != "xyes"; then - echo "Could not link against $ax_lib !" - exit 202 - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) - diff --git a/m4/ax_boost_program_options.m4 b/m4/ax_boost_program_options.m4 deleted file mode 100644 index 71668d9b1430dbd1aa70556168128dd4550949cb..0000000000000000000000000000000000000000 --- a/m4/ax_boost_program_options.m4 +++ /dev/null @@ -1,110 +0,0 @@ - # ============================================================================ -# http://www.gnu.org/software/autoconf-archive/ax_boost_program_options.html -# ============================================================================ -# -# SYNOPSIS -# -# AX_BOOST_PROGRAM_OPTIONS -# -# DESCRIPTION -# -# Test for program options library from the Boost C++ libraries. The macro -# requires a preceding call to AX_BOOST_BASE. Further documentation is -# available at <http://randspringer.de/boost/index.html>. -# -# This macro calls: -# -# AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) -# -# And sets: -# -# HAVE_BOOST_PROGRAM_OPTIONS -# -# LICENSE -# -# Copyright (c) 2009 Thomas Porschberg <thomas@randspringer.de> -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 24 - -AC_DEFUN([AX_BOOST_PROGRAM_OPTIONS], -[ - AC_ARG_WITH([boost-program-options], - AS_HELP_STRING([--with-boost-program-options@<:@=special-lib@:>@], - [use the program options library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-program-options=boost_program_options-gcc-mt-1_33_1 ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_program_options_lib="" - else - want_boost="yes" - ax_boost_user_program_options_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - export want_boost - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - AC_CACHE_CHECK([whether the Boost::Program_Options library is available], - ax_cv_boost_program_options, - [AC_LANG_PUSH(C++) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/program_options/errors.hpp> - ]], - [[boost::program_options::error err("Error message"); - return 0;]])], - ax_cv_boost_program_options=yes, ax_cv_boost_program_options=no) - AC_LANG_POP([C++]) - ]) - if test "$ax_cv_boost_program_options" = yes; then - AC_DEFINE(HAVE_BOOST_PROGRAM_OPTIONS,,[define if the Boost::PROGRAM_OPTIONS library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - if test "x$ax_boost_user_program_options_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_program_options*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_program_options*.dylib* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.dylib.*$;\1;'` `ls $BOOSTLIBDIR/libboost_program_options*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_program_options.*\)\.a.*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], - [link_program_options="no"]) - done - if test "x$link_program_options" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_program_options*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_program_options.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_program_options*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_program_options.*\)\.a.*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], - [link_program_options="no"]) - done - fi - else - for ax_lib in $ax_boost_user_program_options_lib boost_program_options-$ax_boost_user_program_options_lib; do - AC_CHECK_LIB($ax_lib, main, - [BOOST_PROGRAM_OPTIONS_LIB="-l$ax_lib"; AC_SUBST(BOOST_PROGRAM_OPTIONS_LIB) link_program_options="yes"; break], - [link_program_options="no"]) - done - fi - if test "x$ax_lib" = "x"; then - echo "Could not find a version of the library!" - exit 202 - fi - if test "x$link_program_options" != "xyes"; then - echo "Could not link against $ax_lib !" - exit 202 - fi - fi - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/m4/ax_boost_system.m4 b/m4/ax_boost_system.m4 deleted file mode 100644 index 96f1e4aee34b08ce99d3ea90e1523730d9ad47cc..0000000000000000000000000000000000000000 --- a/m4/ax_boost_system.m4 +++ /dev/null @@ -1,122 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_boost_system.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_SYSTEM -# -# DESCRIPTION -# -# Test for System library from the Boost C++ libraries. The macro requires -# a preceding call to AX_BOOST_BASE. Further documentation is available at -# <http://randspringer.de/boost/index.html>. -# -# This macro calls: -# -# AC_SUBST(BOOST_SYSTEM_LIB) -# -# And sets: -# -# HAVE_BOOST_SYSTEM -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de> -# Copyright (c) 2008 Michael Tindal -# Copyright (c) 2008 Daniel Casimiro <dan.casimiro@gmail.com> -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 18 - -AC_DEFUN([AX_BOOST_SYSTEM], -[ - AC_ARG_WITH([boost-system], - AS_HELP_STRING([--with-boost-system@<:@=special-lib@:>@], - [use the System library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-system=boost_system-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_system_lib="" - else - want_boost="yes" - ax_boost_user_system_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - AC_REQUIRE([AC_CANONICAL_BUILD]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::System library is available, - ax_cv_boost_system, - [AC_LANG_PUSH([C++]) - CXXFLAGS_SAVE=$CXXFLAGS - CXXFLAGS= - - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/system/error_code.hpp>]], - [[boost::system::error_category *a = 0;]])], - ax_cv_boost_system=yes, ax_cv_boost_system=no) - CXXFLAGS=$CXXFLAGS_SAVE - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_system" = "xyes"; then - AC_SUBST(BOOST_CPPFLAGS) - - AC_DEFINE(HAVE_BOOST_SYSTEM,,[define if the Boost::System library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - - LDFLAGS_SAVE=$LDFLAGS - if test "x$ax_boost_user_system_lib" = "x"; then - for libextension in `ls -r $BOOSTLIBDIR/libboost_system* 2>/dev/null | sed 's,.*/lib,,' | sed 's,\..*,,'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - if test "x$link_system" != "xyes"; then - for libextension in `ls -r $BOOSTLIBDIR/boost_system* 2>/dev/null | sed 's,.*/,,' | sed -e 's,\..*,,'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_system_lib boost_system-$ax_boost_user_system_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - - fi - if test "x$ax_lib" = "x"; then - AC_MSG_ERROR(Could not find a version of the library!) - fi - if test "x$link_system" = "xno"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) - diff --git a/m4/ax_boost_timer.m4 b/m4/ax_boost_timer.m4 deleted file mode 100644 index 3940592030b8cbb12658eb045ef68aac04fcfa70..0000000000000000000000000000000000000000 --- a/m4/ax_boost_timer.m4 +++ /dev/null @@ -1,120 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_boost_timer.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_TIMER -# -# DESCRIPTION -# -# Test for System library from the Boost C++ libraries. The macro requires -# a preceding call to AX_BOOST_BASE. Further documentation is available at -# <http://randspringer.de/boost/index.html>. -# -# This macro calls: -# -# AC_SUBST(BOOST_TIMER_LIB) -# -# And sets: -# -# HAVE_BOOST_TIMER -# -# LICENSE -# -# Copyright (c) 2012 Xiyue Deng <manphiz@gmail.com> -# Copyright (c) 2012 Murray Cumming <murrayc@openismus.com> -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 2 (based on serial 1 of ax_boost_locale.m4 with some simple find/replace by Murray Cumming) - -AC_DEFUN([AX_BOOST_TIMER], -[ - AC_ARG_WITH([boost-timer], - AS_HELP_STRING([--with-boost-timer@<:@=special-lib@:>@], - [use the Timer library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-timer=boost_timer-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_timer_lib="" - else - want_boost="yes" - ax_boost_user_timer_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - AC_REQUIRE([AC_CANONICAL_BUILD]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS -lboost_chrono -lboost_system" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::Timer library is available, - ax_cv_boost_timer, - [AC_LANG_PUSH([C++]) - CXXFLAGS_SAVE=$CXXFLAGS - - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/timer/timer.hpp>]], - [[boost::timer::cpu_timer().stop();]])], - ax_cv_boost_timer=yes, ax_cv_boost_timer=no) - CXXFLAGS=$CXXFLAGS_SAVE - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_timer" = "xyes"; then - AC_SUBST(BOOST_CPPFLAGS) - - AC_DEFINE(HAVE_BOOST_TIMER,,[define if the Boost::Timer library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - - LDFLAGS_SAVE=$LDFLAGS - if test "x$ax_boost_user_timer_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_timer*.so* $BOOSTLIBDIR/libboost_timer*.dylib* $BOOSTLIBDIR/libboost_timer*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_timer.*\)\.so.*$;\1;' -e 's;^lib\(boost_timer.*\)\.dylib.*$;\1;' -e 's;^lib\(boost_timer.*\)\.a.*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_TIMER_LIB="-l$ax_lib"; AC_SUBST(BOOST_TIMER_LIB) link_timer="yes"; break], - [link_timer="no"]) - done - if test "x$link_timer" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_timer*.dll* $BOOSTLIBDIR/boost_timer*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_timer.*\)\.dll.*$;\1;' -e 's;^\(boost_timer.*\)\.a.*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_TIMER_LIB="-l$ax_lib"; AC_SUBST(BOOST_TIMER_LIB) link_timer="yes"; break], - [link_timer="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_timer_lib boost_timer-$ax_boost_user_timer_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_TIMER_LIB="-l$ax_lib"; AC_SUBST(BOOST_TIMER_LIB) link_timer="yes"; break], - [link_timer="no"]) - done - - fi - if test "x$ax_lib" = "x"; then - AC_MSG_ERROR(Could not find a version of the library!) - fi - if test "x$link_timer" = "xno"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) - diff --git a/m4/ax_boost_unit_test_framework.m4 b/m4/ax_boost_unit_test_framework.m4 deleted file mode 100644 index ff3e8b0f9ac309ab6c156e7bc5d48bb51a4ff886..0000000000000000000000000000000000000000 --- a/m4/ax_boost_unit_test_framework.m4 +++ /dev/null @@ -1,139 +0,0 @@ -# ================================================================================ -# http://www.gnu.org/software/autoconf-archive/ax_boost_unit_test_framework.html -# ================================================================================ -# -# SYNOPSIS -# -# AX_BOOST_UNIT_TEST_FRAMEWORK -# -# DESCRIPTION -# -# Test for Unit_Test_Framework library from the Boost C++ libraries. The -# macro requires a preceding call to AX_BOOST_BASE. Further documentation -# is available at <http://randspringer.de/boost/index.html>. -# -# This macro calls: -# -# AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB) -# -# And sets: -# -# HAVE_BOOST_UNIT_TEST_FRAMEWORK -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de> -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 19 - -AC_DEFUN([AX_BOOST_UNIT_TEST_FRAMEWORK], -[ - AC_ARG_WITH([boost-unit-test-framework], - AS_HELP_STRING([--with-boost-unit-test-framework@<:@=special-lib@:>@], - [use the Unit_Test_Framework library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-unit-test-framework=boost_unit_test_framework-gcc ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_unit_test_framework_lib="" - else - want_boost="yes" - ax_boost_user_unit_test_framework_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::Unit_Test_Framework library is available, - ax_cv_boost_unit_test_framework, - [AC_LANG_PUSH([C++]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[@%:@include <boost/test/unit_test.hpp>]], - [[using boost::unit_test::test_suite; - test_suite* test= BOOST_TEST_SUITE( "Unit test example 1" ); return 0;]])], - ax_cv_boost_unit_test_framework=yes, ax_cv_boost_unit_test_framework=no) - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_unit_test_framework" = "xyes"; then - AC_DEFINE(HAVE_BOOST_UNIT_TEST_FRAMEWORK,,[define if the Boost::Unit_Test_Framework library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - - if test "x$ax_boost_user_unit_test_framework_lib" = "x"; then - saved_ldflags="${LDFLAGS}" - for monitor_library in `ls $BOOSTLIBDIR/libboost_unit_test_framework*.so* $BOOSTLIBDIR/libboost_unit_test_framework*.dylib* $BOOSTLIBDIR/libboost_unit_test_framework*.a* 2>/dev/null` ; do - if test -r $monitor_library ; then - libextension=`echo $monitor_library | sed 's,.*/,,' | sed -e 's;^lib\(boost_unit_test_framework.*\)\.so.*$;\1;' -e 's;^lib\(boost_unit_test_framework.*\)\.dylib.*$;\1;' -e 's;^lib\(boost_unit_test_framework.*\)\.a.*$;\1;'` - ax_lib=${libextension} - link_unit_test_framework="yes" - else - link_unit_test_framework="no" - fi - - if test "x$link_unit_test_framework" = "xyes"; then - BOOST_UNIT_TEST_FRAMEWORK_LIB="-l$ax_lib" - AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB) - break - fi - done - if test "x$link_unit_test_framework" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_unit_test_framework*.dll* $BOOSTLIBDIR/boost_unit_test_framework*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_unit_test_framework.*\)\.dll.*$;\1;' -e 's;^\(boost_unit_test_framework.*\)\.a.*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_UNIT_TEST_FRAMEWORK_LIB="-l$ax_lib"; AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB) link_unit_test_framework="yes"; break], - [link_unit_test_framework="no"]) - done - fi - else - link_unit_test_framework="no" - saved_ldflags="${LDFLAGS}" - for ax_lib in boost_unit_test_framework-$ax_boost_user_unit_test_framework_lib $ax_boost_user_unit_test_framework_lib ; do - if test "x$link_unit_test_framework" = "xyes"; then - break; - fi - for unittest_library in `ls $BOOSTLIBDIR/lib${ax_lib}.so* $BOOSTLIBDIR/lib${ax_lib}.a* 2>/dev/null` ; do - if test -r $unittest_library ; then - libextension=`echo $unittest_library | sed 's,.*/,,' | sed -e 's;^lib\(boost_unit_test_framework.*\)\.so.*$;\1;' -e 's;^lib\(boost_unit_test_framework.*\)\.a*$;\1;'` - ax_lib=${libextension} - link_unit_test_framework="yes" - else - link_unit_test_framework="no" - fi - - if test "x$link_unit_test_framework" = "xyes"; then - BOOST_UNIT_TEST_FRAMEWORK_LIB="-l$ax_lib" - AC_SUBST(BOOST_UNIT_TEST_FRAMEWORK_LIB) - break - fi - done - done - fi - if test "x$ax_lib" = "x"; then - echo "Could not find a version of the library!" - exit 202 - fi - if test "x$link_unit_test_framework" != "xyes"; then - echo "Could not link against $ax_lib !" - exit 202 - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/m4/ax_check_compiler_flags.m4 b/m4/ax_check_compiler_flags.m4 deleted file mode 100755 index 35bfd2a5a27405d140712bc060a6a5e6d57469c7..0000000000000000000000000000000000000000 --- a/m4/ax_check_compiler_flags.m4 +++ /dev/null @@ -1,76 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_check_compiler_flags.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_CHECK_COMPILER_FLAGS(FLAGS, [ACTION-SUCCESS], [ACTION-FAILURE]) -# -# DESCRIPTION -# -# Check whether the given compiler FLAGS work with the current language's -# compiler, or whether they give an error. (Warnings, however, are -# ignored.) -# -# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on -# success/failure. -# -# LICENSE -# -# Copyright (c) 2009 Steven G. Johnson <stevenj@alum.mit.edu> -# Copyright (c) 2009 Matteo Frigo -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 9 - -AC_DEFUN([AX_CHECK_COMPILER_FLAGS], -[AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX -AC_MSG_CHECKING([whether _AC_LANG compiler accepts $1]) -dnl Some hackery here since AC_CACHE_VAL can't handle a non-literal varname: -AS_LITERAL_IF([$1], - [AC_CACHE_VAL(AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_[$1]), [ - ax_save_FLAGS=$[]_AC_LANG_PREFIX[]FLAGS - _AC_LANG_PREFIX[]FLAGS="$1" - AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], - AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_[$1])=yes, - AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_[$1])=no) - _AC_LANG_PREFIX[]FLAGS=$ax_save_FLAGS])], - [ax_save_FLAGS=$[]_AC_LANG_PREFIX[]FLAGS - _AC_LANG_PREFIX[]FLAGS="$1" - AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], - eval AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_[$1])=yes, - eval AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_[$1])=no) - _AC_LANG_PREFIX[]FLAGS=$ax_save_FLAGS]) -eval ax_check_compiler_flags=$AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_[$1]) -AC_MSG_RESULT($ax_check_compiler_flags) -if test "x$ax_check_compiler_flags" = xyes; then - m4_default([$2], :) -else - m4_default([$3], :) -fi -])dnl AX_CHECK_COMPILER_FLAGS diff --git a/m4/ax_cuda.m4 b/m4/ax_cuda.m4 deleted file mode 100644 index 17e2f068318b57ece41990a40b85b91afb05c4c9..0000000000000000000000000000000000000000 --- a/m4/ax_cuda.m4 +++ /dev/null @@ -1,84 +0,0 @@ -# -*- mode: autoconf -*- -# -# AX_OPENCL -# -# Check for an OpenCL implementation. If CL is found, _OPENCL is defined and -# the required compiler and linker flags are included in the output variables -# "CL_CFLAGS" and "CL_LIBS", respectively. If no usable CL implementation is -# found, "no_cl" is set to "yes". -# -# If the header "CL/OpenCL.h" is found, "HAVE_CL_OPENCL_H" is defined. If the -# header "OpenCL/OpenCL.h" is found, HAVE_OPENCL_OPENCL_H is defined. These -# preprocessor definitions may not be mutually exclusive. -# -# Based on AX_CHECK_GL, version: 2.4 author: Braden McDaniel -# <braden@endoframe.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301, USA. -# -# As a special exception, the you may copy, distribute and modify the -# configure scripts that are the output of Autoconf when processing -# the Macro. You need not follow the terms of the GNU General Public -# License when using or distributing such scripts. -# -AC_DEFUN([AX_CUDA], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl - -# Search nvcc compiler -AC_CHECK_PROG([NVCC_EXIST],[nvcc],["yes"],["no"]) -AS_IF([test "x$NVCC_EXIST" = "xno"],[],[ - NVCC=`which nvcc` - - # Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc" - # is substituted by "include". - CUDA_CFLAGS=" ${NVCC%bin//nvcc}" - CUDA_CFLAGS=" ${CUDA_CFLAGS%bin/nvcc}" - CUDA_CFLAGS=" -I${CUDA_CFLAGS}include" - - #Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc" - #is substituted by "lib". - CUDA_LIBS="${NVCC%bin//nvcc}" - CUDA_LIBS="${CUDA_LIBS%bin/nvcc}" - CUDA_PATH=$CUDA_LIBS - CUDA_LIBS=" -L${CUDA_LIBS}lib" - - # If $build_cpu contains "_64", append "64" to CUDA_LIBS - AS_IF([echo $build_cpu | grep -q "_64"], - [ - AS_IF([ test -d $CUDA_PATH/lib64 ], [ CUDA_LIBS+="64" ], []) - # Be carefull the return code 0 mean true return code 1 mean false - AS_IF([ command -v bumblebeed >/dev/null ], [ CUDA_LIBS+=" -L/usr/lib64/nvidia-bumblebee/ " ], - [ - echo "bumblebee, NVIDIA optimus, not found" - ]) - AS_IF([ test -d /usr/local/cuda/lib64 ], [ CUDA_LIBS+=" -L/usr/local/cuda/lib64 " ], - [ - AS_IF([ test -d /usr/local/cuda/lib ],[ CUDA_LIBS+=" -L/usr/local/cuda/lib " ]) - ]) - ]) - - # Append " -lcuda -lcudart" to CUDA_LIBS - CUDA_LIBS+=" -lcuda -lcudart" - - # Make variables available in Makefile.am - AC_SUBST([CUDA_CFLAGS]) - AC_SUBST([CUDA_LIBS]) - echo $NVCC - AC_SUBST([NVCC]) - AC_DEFINE([NVCC],[],[NVCC compiling]) -])dnl - -])dnl diff --git a/m4/ax_gcc_archflag.m4 b/m4/ax_gcc_archflag.m4 deleted file mode 100755 index e918ece1759f977d4703711bde58fdc548b0833b..0000000000000000000000000000000000000000 --- a/m4/ax_gcc_archflag.m4 +++ /dev/null @@ -1,220 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_gcc_archflag.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_GCC_ARCHFLAG([PORTABLE?], [ACTION-SUCCESS], [ACTION-FAILURE]) -# -# DESCRIPTION -# -# This macro tries to guess the "native" arch corresponding to the target -# architecture for use with gcc's -march=arch or -mtune=arch flags. If -# found, the cache variable $ax_cv_gcc_archflag is set to this flag and -# ACTION-SUCCESS is executed; otherwise $ax_cv_gcc_archflag is is set to -# "unknown" and ACTION-FAILURE is executed. The default ACTION-SUCCESS is -# to add $ax_cv_gcc_archflag to the end of $CFLAGS. -# -# PORTABLE? should be either [yes] (default) or [no]. In the former case, -# the flag is set to -mtune (or equivalent) so that the architecture is -# only used for tuning, but the instruction set used is still portable. In -# the latter case, the flag is set to -march (or equivalent) so that -# architecture-specific instructions are enabled. -# -# The user can specify --with-gcc-arch=<arch> in order to override the -# macro's choice of architecture, or --without-gcc-arch to disable this. -# -# When cross-compiling, or if $CC is not gcc, then ACTION-FAILURE is -# called unless the user specified --with-gcc-arch manually. -# -# Requires macros: AX_CHECK_COMPILER_FLAGS, AX_GCC_X86_CPUID -# -# (The main emphasis here is on recent CPUs, on the principle that doing -# high-performance computing on old hardware is uncommon.) -# -# LICENSE -# -# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu> -# Copyright (c) 2008 Matteo Frigo -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 8 - -AC_DEFUN([AX_GCC_ARCHFLAG], -[AC_REQUIRE([AC_PROG_CC]) -AC_REQUIRE([AC_CANONICAL_HOST]) - -AC_ARG_WITH(gcc-arch, [AS_HELP_STRING([--with-gcc-arch=<arch>], [use architecture <arch> for gcc -march/-mtune, instead of guessing])], - ax_gcc_arch=$withval, ax_gcc_arch=yes) - -AC_MSG_CHECKING([for gcc architecture flag]) -AC_MSG_RESULT([]) -AC_CACHE_VAL(ax_cv_gcc_archflag, -[ -ax_cv_gcc_archflag="unknown" - -if test "$GCC" = yes; then - -if test "x$ax_gcc_arch" = xyes; then -ax_gcc_arch="" -if test "$cross_compiling" = no; then -case $host_cpu in - i[[3456]]86*|x86_64*) # use cpuid codes, in part from x86info-1.7 by D. Jones - AX_GCC_X86_CPUID(0) - AX_GCC_X86_CPUID(1) - case $ax_cv_gcc_x86_cpuid_0 in - *:756e6547:*:*) # Intel - case $ax_cv_gcc_x86_cpuid_1 in - *5[[48]]?:*:*:*) ax_gcc_arch="pentium-mmx pentium" ;; - *5??:*:*:*) ax_gcc_arch=pentium ;; - 206??:*:*:*) ax_gcc_arch="corei7 native nocona core2 prescott pentium4 pentiumpro";break;; - 106a?:*:*:*) ax_gcc_arch="corei7 native nocona core2 prescott pentium4 pentiumpro";break;; - 106e?:*:*:*) ax_gcc_arch="corei7 native nocona core2 prescott pentium4 pentiumpro";break;; - *6[[520]]?:*:*:*) ax_gcc_arch="i7core nocona core2 prescott pentium4 pentiumpro" ;; - *6[[3456]]?:*:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; - *6a?:*[[01]]:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; - *6a?:*[[234]]:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; - *6[[9d]]?:*:*:*) ax_gcc_arch="pentium-m pentium3 pentiumpro" ;; - *6[[78b]]?:*:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; - *6f?:*:*:*) ax_gcc_arch="core2 native pentium-m pentium3 pentiumpro" ;; - *6??:*:*:*) ax_gcc_arch=pentiumpro ;; - *f3[[347]]:*:*:*|*f4[1347]:*:*:*) - case $host_cpu in - x86_64*) ax_gcc_arch="nocona pentium4 pentiumpro" ;; - *) ax_gcc_arch="prescott pentium4 pentiumpro" ;; - esac ;; - *f??:*:*:*) ax_gcc_arch="pentium4 pentiumpro";; - esac ;; - *:68747541:*:*) # AMD - case $ax_cv_gcc_x86_cpuid_1 in - *5[[67]]?:*:*:*) ax_gcc_arch=k6 ;; - *5[[8d]]?:*:*:*) ax_gcc_arch="k6-2 k6" ;; - *5[[9]]?:*:*:*) ax_gcc_arch="k6-3 k6" ;; - *60?:*:*:*) ax_gcc_arch=k7 ;; - *6[[12]]?:*:*:*) ax_gcc_arch="athlon k7" ;; - *6[[34]]?:*:*:*) ax_gcc_arch="athlon-tbird k7" ;; - *67?:*:*:*) ax_gcc_arch="athlon-4 athlon k7" ;; - *6[[68a]]?:*:*:*) - AX_GCC_X86_CPUID(0x80000006) # L2 cache size - case $ax_cv_gcc_x86_cpuid_0x80000006 in - *:*:*[[1-9a-f]]??????:*) # (L2 = ecx >> 16) >= 256 - ax_gcc_arch="athlon-xp athlon-4 athlon k7" ;; - *) ax_gcc_arch="athlon-4 athlon k7" ;; - esac ;; - *f[[4cef8b]]?:*:*:*) ax_gcc_arch="athlon64 k8" ;; - *f5?:*:*:*) ax_gcc_arch="opteron k8" ;; - *f7?:*:*:*) ax_gcc_arch="athlon-fx opteron k8" ;; - *f??:*:*:*) ax_gcc_arch="k8" ;; - esac ;; - *:746e6543:*:*) # IDT - case $ax_cv_gcc_x86_cpuid_1 in - *54?:*:*:*) ax_gcc_arch=winchip-c6 ;; - *58?:*:*:*) ax_gcc_arch=winchip2 ;; - *6[[78]]?:*:*:*) ax_gcc_arch=c3 ;; - *69?:*:*:*) ax_gcc_arch="c3-2 c3" ;; - esac ;; - esac - if test x"$ax_gcc_arch" = x; then # fallback - case $host_cpu in - i586*) ax_gcc_arch=pentium ;; - i686*) ax_gcc_arch=pentiumpro ;; - esac - fi - ;; - - sparc*) - AC_PATH_PROG([PRTDIAG], [prtdiag], [prtdiag], [$PATH:/usr/platform/`uname -i`/sbin/:/usr/platform/`uname -m`/sbin/]) - cputype=`(((grep cpu /proc/cpuinfo | cut -d: -f2) ; ($PRTDIAG -v |grep -i sparc) ; grep -i cpu /var/run/dmesg.boot ) | head -n 1) 2> /dev/null` - cputype=`echo "$cputype" | tr -d ' -' |tr $as_cr_LETTERS $as_cr_letters` - case $cputype in - *ultrasparciv*) ax_gcc_arch="ultrasparc4 ultrasparc3 ultrasparc v9" ;; - *ultrasparciii*) ax_gcc_arch="ultrasparc3 ultrasparc v9" ;; - *ultrasparc*) ax_gcc_arch="ultrasparc v9" ;; - *supersparc*|*tms390z5[[05]]*) ax_gcc_arch="supersparc v8" ;; - *hypersparc*|*rt62[[056]]*) ax_gcc_arch="hypersparc v8" ;; - *cypress*) ax_gcc_arch=cypress ;; - esac ;; - - alphaev5) ax_gcc_arch=ev5 ;; - alphaev56) ax_gcc_arch=ev56 ;; - alphapca56) ax_gcc_arch="pca56 ev56" ;; - alphapca57) ax_gcc_arch="pca57 pca56 ev56" ;; - alphaev6) ax_gcc_arch=ev6 ;; - alphaev67) ax_gcc_arch=ev67 ;; - alphaev68) ax_gcc_arch="ev68 ev67" ;; - alphaev69) ax_gcc_arch="ev69 ev68 ev67" ;; - alphaev7) ax_gcc_arch="ev7 ev69 ev68 ev67" ;; - alphaev79) ax_gcc_arch="ev79 ev7 ev69 ev68 ev67" ;; - - powerpc*) - cputype=`((grep cpu /proc/cpuinfo | head -n 1 | cut -d: -f2 | cut -d, -f1 | sed 's/ //g') ; /usr/bin/machine ; /bin/machine; grep CPU /var/run/dmesg.boot | head -n 1 | cut -d" " -f2) 2> /dev/null` - cputype=`echo $cputype | sed -e 's/ppc//g;s/ *//g'` - case $cputype in - *750*) ax_gcc_arch="750 G3" ;; - *740[[0-9]]*) ax_gcc_arch="$cputype 7400 G4" ;; - *74[[4-5]][[0-9]]*) ax_gcc_arch="$cputype 7450 G4" ;; - *74[[0-9]][[0-9]]*) ax_gcc_arch="$cputype G4" ;; - *970*) ax_gcc_arch="970 G5 power4";; - *POWER4*|*power4*|*gq*) ax_gcc_arch="power4 970";; - *POWER5*|*power5*|*gr*|*gs*) ax_gcc_arch="power5 power4 970";; - 603ev|8240) ax_gcc_arch="$cputype 603e 603";; - *) ax_gcc_arch=$cputype ;; - esac - ax_gcc_arch="$ax_gcc_arch powerpc" - ;; -esac -fi # not cross-compiling -fi # guess arch - -if test "x$ax_gcc_arch" != x -a "x$ax_gcc_arch" != xno; then -for arch in $ax_gcc_arch; do - if test "x[]m4_default([$1],yes)" = xyes; then # if we require portable code - flags="-mtune=$arch" - # -mcpu=$arch and m$arch generate nonportable code on every arch except - # x86. And some other arches (e.g. Alpha) don't accept -mtune. Grrr. - case $host_cpu in i*86|x86_64*) flags="$flags -mcpu=$arch -m$arch";; esac - else - flags="-march=$arch -mcpu=$arch -m$arch" - fi - for flag in $flags; do - AX_CHECK_COMPILER_FLAGS($flag, [ax_cv_gcc_archflag=$flag; break]) - done - test "x$ax_cv_gcc_archflag" = xunknown || break -done -fi - -fi # $GCC=yes -]) -AC_MSG_CHECKING([for gcc architecture flag]) -AC_MSG_RESULT($ax_cv_gcc_archflag) -if test "x$ax_cv_gcc_archflag" = xunknown; then - m4_default([$3],:) -else - m4_default([$2], [CFLAGS="$CFLAGS $ax_cv_gcc_archflag"]) -fi -]) diff --git a/m4/ax_gcc_version.m4 b/m4/ax_gcc_version.m4 deleted file mode 100755 index 0d924741f4acf74613d80eb98c32de37727a0278..0000000000000000000000000000000000000000 --- a/m4/ax_gcc_version.m4 +++ /dev/null @@ -1,65 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_gcc_version.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_GCC_VERSION -# -# DESCRIPTION -# -# This macro retrieves the gcc version and returns it in the GCC_VERSION -# variable if available, an empty string otherwise. -# -# LICENSE -# -# Copyright (c) 2009 Francesco Salvestrini <salvestrini@users.sourceforge.net> -# -# This program is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation; either version 2 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 8 - -AC_DEFUN([AX_GCC_VERSION], [ - GCC_VERSION="" - AX_GCC_OPTION([-dumpversion],[],[],[ - ax_gcc_version_option=yes - ],[ - ax_gcc_version_option=no - ]) - AS_IF([test "x$GCC" = "xyes"],[ - AS_IF([test "x$ax_gcc_version_option" != "xno"],[ - AC_CACHE_CHECK([gcc version],[ax_cv_gcc_version],[ - ax_cv_gcc_version="`$CC -dumpversion`" - AS_IF([test "x$ax_cv_gcc_version" = "x"],[ - ax_cv_gcc_version="" - ]) - ]) - GCC_VERSION=$ax_cv_gcc_version - ]) - ]) - AC_SUBST([GCC_VERSION]) -]) diff --git a/m4/ax_gcc_x86_cpuid.m4 b/m4/ax_gcc_x86_cpuid.m4 deleted file mode 100755 index 7d46fee0219e970f98e960a5b0717a9ec061ee95..0000000000000000000000000000000000000000 --- a/m4/ax_gcc_x86_cpuid.m4 +++ /dev/null @@ -1,79 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_GCC_X86_CPUID(OP) -# -# DESCRIPTION -# -# On Pentium and later x86 processors, with gcc or a compiler that has a -# compatible syntax for inline assembly instructions, run a small program -# that executes the cpuid instruction with input OP. This can be used to -# detect the CPU type. -# -# On output, the values of the eax, ebx, ecx, and edx registers are stored -# as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable -# ax_cv_gcc_x86_cpuid_OP. -# -# If the cpuid instruction fails (because you are running a -# cross-compiler, or because you are not using gcc, or because you are on -# a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP -# is set to the string "unknown". -# -# This macro mainly exists to be used in AX_GCC_ARCHFLAG. -# -# LICENSE -# -# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu> -# Copyright (c) 2008 Matteo Frigo -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 7 - -AC_DEFUN([AX_GCC_X86_CPUID], -[AC_REQUIRE([AC_PROG_CC]) -AC_LANG_PUSH([C]) -AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1, - [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [ - int op = $1, eax, ebx, ecx, edx; - FILE *f; - __asm__("cpuid" - : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) - : "a" (op)); - f = fopen("conftest_cpuid", "w"); if (!f) return 1; - fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); - fclose(f); - return 0; -])], - [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid], - [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid], - [ax_cv_gcc_x86_cpuid_$1=unknown])]) -AC_LANG_POP([C]) -]) diff --git a/m4/ax_lib_mysql.m4 b/m4/ax_lib_mysql.m4 deleted file mode 100755 index b4d19120e8f8c4e0cd32746decc961d4bf19e7d9..0000000000000000000000000000000000000000 --- a/m4/ax_lib_mysql.m4 +++ /dev/null @@ -1,145 +0,0 @@ -# =========================================================================== -# http://www.nongnu.org/autoconf-archive/ax_lib_mysql.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_LIB_MYSQL([MINIMUM-VERSION]) -# -# DESCRIPTION -# -# This macro provides tests of availability of MySQL client library of -# particular version or newer. -# -# AX_LIB_MYSQL macro takes only one argument which is optional. If there -# is no required version passed, then macro does not run version test. -# -# The --with-mysql option takes one of three possible values: -# -# no - do not check for MySQL client library -# -# yes - do check for MySQL library in standard locations (mysql_config -# should be in the PATH) -# -# path - complete path to mysql_config utility, use this option if -# mysql_config can't be found in the PATH -# -# This macro calls: -# -# AC_SUBST(MYSQL_CFLAGS) -# AC_SUBST(MYSQL_LDFLAGS) -# AC_SUBST(MYSQL_VERSION) -# -# And sets: -# -# HAVE_MYSQL -# -# LICENSE -# -# Copyright (c) 2008 Mateusz Loskot <mateusz@loskot.net> -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. - -AC_DEFUN([AX_LIB_MYSQL], -[ - AC_ARG_WITH([mysql], - AC_HELP_STRING([--with-mysql=@<:@ARG@:>@], - [use MySQL client library @<:@default=yes@:>@, optionally specify path to mysql_config] - ), - [ - if test "$withval" = "no"; then - want_mysql="no" - elif test "$withval" = "yes"; then - want_mysql="yes" - else - want_mysql="yes" - MYSQL_CONFIG="$withval" - fi - ], - [want_mysql="yes"] - ) - - MYSQL_CFLAGS="" - MYSQL_LDFLAGS="" - MYSQL_VERSION="" - - dnl - dnl Check MySQL libraries (libpq) - dnl - - if test "$want_mysql" = "yes"; then - - if test -z "$MYSQL_CONFIG" -o test; then - AC_PATH_PROG([MYSQL_CONFIG], [mysql_config], [no]) - fi - - if test "$MYSQL_CONFIG" != "no"; then - AC_MSG_CHECKING([for MySQL libraries]) - - MYSQL_CFLAGS="`$MYSQL_CONFIG --cflags`" - MYSQL_LDFLAGS="`$MYSQL_CONFIG --libs`" - - MYSQL_VERSION=`$MYSQL_CONFIG --version` - - AC_DEFINE([HAVE_MYSQL], [1], - [Define to 1 if MySQL libraries are available]) - - found_mysql="yes" - AC_MSG_RESULT([yes]) - else - found_mysql="no" -# AC_MSG_RESULT([no]) - fi - fi - - dnl - dnl Check if required version of MySQL is available - dnl - - - mysql_version_req=ifelse([$1], [], [], [$1]) - - if test "$found_mysql" = "yes" -a -n "$mysql_version_req"; then - - AC_MSG_CHECKING([if MySQL version is >= $mysql_version_req]) - - dnl Decompose required version string of MySQL - dnl and calculate its number representation - mysql_version_req_major=`expr $mysql_version_req : '\([[0-9]]*\)'` - mysql_version_req_minor=`expr $mysql_version_req : '[[0-9]]*\.\([[0-9]]*\)'` - mysql_version_req_micro=`expr $mysql_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` - if test "x$mysql_version_req_micro" = "x"; then - mysql_version_req_micro="0" - fi - - mysql_version_req_number=`expr $mysql_version_req_major \* 1000000 \ - \+ $mysql_version_req_minor \* 1000 \ - \+ $mysql_version_req_micro` - - dnl Decompose version string of installed MySQL - dnl and calculate its number representation - mysql_version_major=`expr $MYSQL_VERSION : '\([[0-9]]*\)'` - mysql_version_minor=`expr $MYSQL_VERSION : '[[0-9]]*\.\([[0-9]]*\)'` - mysql_version_micro=`expr $MYSQL_VERSION : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` - if test "x$mysql_version_micro" = "x"; then - mysql_version_micro="0" - fi - - mysql_version_number=`expr $mysql_version_major \* 1000000 \ - \+ $mysql_version_minor \* 1000 \ - \+ $mysql_version_micro` - - mysql_version_check=`expr $mysql_version_number \>\= $mysql_version_req_number` - if test "$mysql_version_check" = "1"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi - fi - - AC_SUBST([MYSQL_VERSION]) - AC_SUBST([MYSQL_CFLAGS]) - AC_SUBST([MYSQL_LDFLAGS]) -]) diff --git a/m4/ax_libhilbert.m4 b/m4/ax_libhilbert.m4 deleted file mode 100644 index d83f7181d238d4bbb0cb6413c4e8e05958106f6d..0000000000000000000000000000000000000000 --- a/m4/ax_libhilbert.m4 +++ /dev/null @@ -1,143 +0,0 @@ -# =========================================================================== -# -# =========================================================================== -# -# SYNOPSIS -# -# AX_LIB_HILBERT() -# -# DESCRIPTION -# -# This macro provides tests of the availability of libHilbert library. -# -# -# The macro adds a --with-libhilbert option accepting one of three values: -# -# no - do not check for the libhilbert library. -# yes - do check for libhilbert library in standard locations. -# path - complete path to the libhilbert library. -# -# If libhilbert is successfully found, this macro calls -# -# AC_SUBST(LIBHILBERT_INCLUDE) -# AC_SUBST(LIBHILBERT_LIB) -# AC_DEFINE(HAVE_LIBHILBERT) -# -# and sets with_libhilbert="yes" -# -# If libhilbert is disabled or not found, this macros sets with_libhilbert="no" -# -# Your configuration script can test $with_libhilbert to take any further -# actions. LIBHILBERT_{INCLUDE,LIB} may be used when building with C or C++. -# -# To use the macro, one would code one of the following in "configure.ac" -# before AC_OUTPUT: -# -# 1) dnl Check for libhilbert support -# AX_LIB_HILBERT() -# -# One could test $with_libhilbert for the outcome or display it as follows -# -# echo "libhilbert support: $with_libhilbert" -# -# You could also for example, override the default CC in "configure.ac" -# -# LICENSE -# -# Copyright (c) 2009 Timothy Brown <tbrown@freeshell.org> -# Copyright (c) 2010 Rhys Ulerich <rhys.ulerich@gmail.com> -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 12 - -AC_DEFUN([AX_LIB_HILBERT], [ - AC_MSG_CHECKING(for libhilbert library) - AC_REQUIRE([AC_PROG_CC]) - # - # User hints... - # - AC_ARG_VAR([LIBHILBERT], [Libhilbert library location]) - AC_ARG_WITH([libhilbert], - [AC_HELP_STRING([--with-libhilbert], - [user defined path to LIBHILBERT library])], - [ - if test -n "$LIBHILBERT" ; then - AC_MSG_RESULT(yes) - with_libhilbert=$LIBHILBERT - elif test "$withval" != no ; then - AC_MSG_RESULT(yes) - with_libhilbert=$withval - else - AC_MSG_RESULT(no) - fi - ], - [ - if test -n "$PETSC" ; then - with_libhilbert=$PETSC - AC_MSG_RESULT(yes) - else - with_petsc=/usr - if test ! -f "$with_libhilbert/include/hilbertKey.h" ; then - with_libhilbert=/usr/local - if test ! -f "$with_libhilbert/include/hilbertKey.h" ; then - with_libhilbert="" - AC_MSG_RESULT(failed) - else - AC_MSG_RESULT(yes) - fi - else - AC_MSG_RESULT(yes) - fi - fi - ]) - # - # locate LIBHILBERT library - # - - if test -n "$with_libhilbert" ; then - old_CC=$CC - old_CFLAGS=$CFLAGS - old_LDFLAGS=$LDFLAGS - CFLAGS="-I$with_libhilbert/include " - LDFLAGS="-L$with_libhilbert/lib " - CC=$CXX - - AC_LANG_SAVE - AC_LANG_C - - AC_CHECK_HEADER([hilbertKey.h],libhilbert_h=yes,## Copy LIB and include in the target directory -AC_MSG_WARN([could not find header file hilbertKey.h])) - AC_CHECK_LIB([libhilbert],[getIntCoordFromHKey],libhilbert_lib=yes,AC_MSG_WARN([could not find libhilbert])) - - AC_LANG_RESTORE - - CFLAGS=$old_CFLAGS - LDFLAGS=$old_LDFLAGS - CC=$old_CC - - AC_MSG_CHECKING(LIBHILBERT in $with_libhilbert) - if test x"$libhilbert_lib" = x"yes" -a x"$libhilbert_h" = x"yes" ; then - AC_SUBST(LIBHILBERT_INCLUDE, [-I$with_libhilbert/include]) - AC_SUBST(LIBHILBERT_LIB, ["-L$with_libhilbert/lib -llibhilbert"]) - AC_MSG_RESULT(ok) - AC_DEFINE(HAVE_LIBHILBERT,1,[Define if you have LIBHILBERT library]) - else - AC_MSG_RESULT(failed) - fi - fi - # - # - # - if test x = x"$LIBHILBERT_LIB" ; then - ifelse([$2],,[],[$2]) - : - else - ifelse([$1],,[],[$1]) - : - fi - ])dnl AX_LIB_HILBERT - diff --git a/m4/ax_opencl.m4 b/m4/ax_opencl.m4 deleted file mode 100755 index 9ed1d963bcd1e486283fb5b81401f272de855b63..0000000000000000000000000000000000000000 --- a/m4/ax_opencl.m4 +++ /dev/null @@ -1,119 +0,0 @@ -# -*- mode: autoconf -*- -# -# AX_OPENCL -# -# Check for an OpenCL implementation. If CL is found, _OPENCL is defined and -# the required compiler and linker flags are included in the output variables -# "CL_CFLAGS" and "CL_LIBS", respectively. If no usable CL implementation is -# found, "no_cl" is set to "yes". -# -# If the header "CL/OpenCL.h" is found, "HAVE_CL_OPENCL_H" is defined. If the -# header "OpenCL/OpenCL.h" is found, HAVE_OPENCL_OPENCL_H is defined. These -# preprocessor definitions may not be mutually exclusive. -# -# Based on AX_CHECK_GL, version: 2.4 author: Braden McDaniel -# <braden@endoframe.com> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -# 02110-1301, USA. -# -# As a special exception, the you may copy, distribute and modify the -# configure scripts that are the output of Autoconf when processing -# the Macro. You need not follow the terms of the GNU General Public -# License when using or distributing such scripts. -# -AC_DEFUN([AX_OPENCL], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_REQUIRE([AC_PROG_SED])dnl -AC_REQUIRE([ACX_PTHREAD])dnl - -AC_ARG_ENABLE([opencl], -[AC_HELP_STRING([--disable-opencl], -[do not use OpenCL])], -[disable_opencl=$enableval], -[disable_opencl='yes']) - -if test "$disable_opencl" = 'yes'; then -AC_LANG_PUSH([$1]) -AX_LANG_COMPILER_MS -AS_IF([test X$ax_compiler_ms = Xno], -[CL_CFLAGS="${PTHREAD_CFLAGS}"; CL_LIBS="${PTHREAD_LIBS} -lm"]) - -ax_save_CPPFLAGS=$CPPFLAGS -CPPFLAGS="$CL_CFLAGS $CPPFLAGS" -AC_CHECK_HEADERS([CL/cl.h OpenCL/cl.h]) -CPPFLAGS=$ax_save_CPPFLAGS - -AC_CHECK_HEADERS([windows.h]) - -m4_define([AX_OPENCL_PROGRAM], -[AC_LANG_PROGRAM([[ -# if defined(HAVE_WINDOWS_H) && defined(_WIN32) -# include <windows.h> -# endif -# ifdef HAVE_CL_CL_H -# include <CL/cl.h> -# elif defined(HAVE_OPENCL_CL_H) -# include <OpenCL/cl.h> -# else -# error no CL.h -# endif]], -[[clCreateContextFromType(0,0,0,0,0)]])]) - -AC_CACHE_CHECK([for OpenCL library], [ax_cv_check_cl_libcl], -[ax_cv_check_cl_libcl=no -case $host_cpu in -x86_64) ax_check_cl_libdir=lib64 ;; -*) ax_check_cl_libdir=lib ;; -esac -ax_save_CPPFLAGS=$CPPFLAGS -CPPFLAGS="$CL_CFLAGS $CPPFLAGS" -ax_save_LIBS=$LIBS -LIBS="" -ax_check_libs="-lOpenCL -lCL -lclparser" -for ax_lib in $ax_check_libs; do -AS_IF([test X$ax_compiler_ms = Xyes], -[ax_try_lib=`echo $ax_lib | $SED -e 's/^-l//' -e 's/$/.lib/'`], -[ax_try_lib=$ax_lib]) -LIBS="$ax_try_lib $CL_LIBS $ax_save_LIBS" -AC_LINK_IFELSE([AX_OPENCL_PROGRAM], -[ax_cv_check_cl_libcl=$ax_try_lib; break], -[ax_check_cl_nvidia_flags="-L/usr/$ax_check_cl_libdir/nvidia" LIBS="$ax_try_lib $ax_check_cl_nvidia_flags $CL_LIBS $ax_save_LIBS" -AC_LINK_IFELSE([AX_OPENCL_PROGRAM], -[ax_cv_check_cl_libcl="$ax_try_lib $ax_check_cl_nvidia_flags"; break], -[ax_check_cl_dylib_flag='-framework OpenCL -L/System/Library/Frameworks/OpenCL.framework/Versions/A/Libraries' LIBS="$ax_try_lib $ax_check_cl_dylib_flag $CL_LIBS $ax_save_LIBS" -AC_LINK_IFELSE([AX_OPENCL_PROGRAM], -[ax_cv_check_cl_libcl="$ax_try_lib $ax_check_cl_dylib_flag"; break])])]) -done - -AS_IF([test "X$ax_cv_check_cl_libcl" = Xno -a X$no_x = Xyes], -[LIBS='-framework OpenCL' -AC_LINK_IFELSE([AX_OPENCL_PROGRAM], -[ax_cv_check_cl_libcl=$LIBS])]) - -LIBS=$ax_save_LIBS -CPPFLAGS=$ax_save_CPPFLAGS]) - -AS_IF([test "X$ax_cv_check_cl_libcl" = Xno], -[no_cl=yes; CL_CFLAGS=""; CL_LIBS=""], -[CL_LIBS="$ax_cv_check_cl_libcl $CL_LIBS"; AC_DEFINE([_OPENCL], [1], -[Define this for the OpenCL Accelerator])]) -AC_LANG_POP([$1]) -fi - -AC_SUBST([CL_CFLAGS]) -AC_SUBST([CL_LIBS]) -])dnl - diff --git a/m4/ax_openmp.m4 b/m4/ax_openmp.m4 deleted file mode 100755 index 7ea794be03aa265f8fcaa3a0abf4df27095a8ab5..0000000000000000000000000000000000000000 --- a/m4/ax_openmp.m4 +++ /dev/null @@ -1,99 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_openmp.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_OPENMP([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) -# -# DESCRIPTION -# -# This macro tries to find out how to compile programs that use OpenMP a -# standard API and set of compiler directives for parallel programming -# (see http://www-unix.mcs/) -# -# On success, it sets the OPENMP_CFLAGS/OPENMP_CXXFLAGS/OPENMP_F77FLAGS -# output variable to the flag (e.g. -omp) used both to compile *and* link -# OpenMP programs in the current language. -# -# NOTE: You are assumed to not only compile your program with these flags, -# but also link it with them as well. -# -# If you want to compile everything with OpenMP, you should set: -# -# CFLAGS="$CFLAGS $OPENMP_CFLAGS" -# #OR# CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS" -# #OR# FFLAGS="$FFLAGS $OPENMP_FFLAGS" -# -# (depending on the selected language). -# -# The user can override the default choice by setting the corresponding -# environment variable (e.g. OPENMP_CFLAGS). -# -# ACTION-IF-FOUND is a list of shell commands to run if an OpenMP flag is -# found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is -# not found. If ACTION-IF-FOUND is not specified, the default action will -# define HAVE_OPENMP. -# -# LICENSE -# -# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu> -# -# This program is free software: you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -# Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see <http://www.gnu.org/licenses/>. -# -# As a special exception, the respective Autoconf Macro's copyright owner -# gives unlimited permission to copy, distribute and modify the configure -# scripts that are the output of Autoconf when processing the Macro. You -# need not follow the terms of the GNU General Public License when using -# or distributing such scripts, even though portions of the text of the -# Macro appear in them. The GNU General Public License (GPL) does govern -# all other use of the material that constitutes the Autoconf Macro. -# -# This special exception to the GPL applies to versions of the Autoconf -# Macro released by the Autoconf Archive. When you make and distribute a -# modified version of the Autoconf Macro, you may extend this special -# exception to the GPL to apply to your modified version as well. - -#serial 8 - -AC_DEFUN([AX_OPENMP], [ -AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX - -AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS -ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown -# Flags to try: -fopenmp (gcc), -openmp (icc), -mp (SGI & PGI), -# -xopenmp (Sun), -omp (Tru64), -qsmp=omp (AIX), none -ax_openmp_flags="-fopenmp -openmp -mp -xopenmp -omp -qsmp=omp none" -if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then - ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags" -fi -for ax_openmp_flag in $ax_openmp_flags; do - case $ax_openmp_flag in - none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;; - *) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;; - esac - AC_TRY_LINK_FUNC(omp_set_num_threads, - [ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break]) -done -[]_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS -]) -if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" = "xunknown"; then - m4_default([$2],:) -else - if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" != "xnone"; then - OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ax_cv_[]_AC_LANG_ABBREV[]_openmp - fi - m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])]) -fi -])dnl AX_OPENMP diff --git a/run.sh b/run.sh index ec7c605e053c87e56c60072c52c115ff5a528601..185d155748e08ffba78e8b7478e8fa1d37b1501c 100755 --- a/run.sh +++ b/run.sh @@ -1,3 +1,4 @@ +#! \bin\bash $pre_command ./build/src/mem_map if [ $? -ne 0 ]; then diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3ef044fbbf31afbfe0cc1a4c73c5e5b868ebadcd..902b05519beedbae3e028e77d70568eb17d1ad80 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -41,9 +41,10 @@ else () set(CUDA_SOURCES) endif () -add_executable(mem_map ../../openfpm_devices/src/Memleak_check.cpp - ${CUDA_SOURCES} + +add_executable(mem_map ${CUDA_SOURCES} main.cpp + data_type/aggregate_unit_tests.cpp Vector/map_vector_sparse_unit_tests.cpp util/multi_array_openfpm/multi_array_ref_openfpm_unit_test.cpp memory_ly/memory_conf_unit_tests.cpp @@ -189,6 +190,7 @@ install(FILES SparseGrid/SparseGrid_iterator.hpp SparseGrid/SparseGridUtil.hpp SparseGrid/SparseGrid_iterator_block.hpp SparseGrid/SparseGrid_chunk_copy.hpp + SparseGrid/SparseGrid_conv_opt.hpp SparseGrid/cp_block.hpp DESTINATION openfpm_data/include/SparseGrid) @@ -301,13 +303,7 @@ install(FILES Packer_Unpacker/Pack_selector.hpp Packer_Unpacker/has_max_prop.hpp DESTINATION openfpm_data/include/Packer_Unpacker) -install(FILES util/boost/boost_multi_array_base_openfpm.hpp - util/boost/boost_multi_array_openfpm.hpp - util/boost/boost_multi_array_subarray_openfpm.hpp - util/boost/boost_multi_array_ref_openfpm.hpp - util/boost/boost_multi_array_view_openfpm.hpp - util/boost/boost_array_openfpm.hpp - util/boost/boost_multi_array_iterator_openfpm.hpp +install(FILES util/boost/boost_array_openfpm.hpp DESTINATION openfpm_data/include/util/boost/) install(FILES Grid/iterators/grid_key_dx_iterator_sp.hpp diff --git a/src/Grid/Encap.hpp b/src/Grid/Encap.hpp index e413dd98dd4c35be05b7f35ce2abd9e2666cd148..9d3d85c69c3a6249a0c12ccf51bf132cd765566f 100644 --- a/src/Grid/Encap.hpp +++ b/src/Grid/Encap.hpp @@ -12,9 +12,6 @@ #include "util/copy_compare/meta_copy.hpp" #include "boost/mpl/range_c.hpp" #include <boost/fusion/container/vector.hpp> -#ifdef SE_CLASS2 -#include "Memleak_check.hpp" -#endif #include "util/se_util.hpp" #include "util/copy_compare/copy_fusion_vector.hpp" #include "util/copy_compare/compare_fusion_vector.hpp" @@ -489,9 +486,6 @@ public: template <unsigned int p, typename r_type=decltype(boost::fusion::at_c<p>(data_c))> __device__ __host__ inline r_type get() { -#ifdef SE_CLASS2 - check_valid(&boost::fusion::at_c<p>(data_c),sizeof(typename boost::mpl::at<type,boost::mpl::int_<p>>::type)); -#endif return boost::fusion::at_c<p>(data_c); } @@ -503,9 +497,6 @@ public: template <unsigned int p, typename r_type=decltype(boost::fusion::at_c<p>(data_c))> __device__ __host__ inline const r_type get() const { -#ifdef SE_CLASS2 - check_valid(&boost::fusion::at_c<p>(data_c),sizeof(typename boost::mpl::at<type,boost::mpl::int_<p>>::type)); -#endif return boost::fusion::at_c<p>(data_c); } @@ -519,9 +510,6 @@ public: template <unsigned int p> inline __device__ __host__ void set(decltype(boost::fusion::at_c<p>(data_c)) & ele) { -#ifdef SE_CLASS2 - check_valid(&boost::fusion::at_c<p>(data_c),sizeof(typename boost::mpl::at<type,boost::mpl::int_<p>>::type)); -#endif return boost::fusion::at_c<p>(data_c) = ele; } diff --git a/src/Grid/copy_grid_unit_test.cpp b/src/Grid/copy_grid_unit_test.cpp index 176d62357a05b8155395d7fdb84303790e408e12..2368e41a8dfb8e58ef2e46c5666863879c07db0a 100644 --- a/src/Grid/copy_grid_unit_test.cpp +++ b/src/Grid/copy_grid_unit_test.cpp @@ -10,6 +10,7 @@ #include "Grid/map_grid.hpp" #include "data_type/aggregate.hpp" #include "Vector/map_vector.hpp" +#include "Point_test.hpp" BOOST_AUTO_TEST_SUITE( copy_grid_test ) @@ -210,8 +211,6 @@ BOOST_AUTO_TEST_CASE( copy_grid_test_use) Test_copy_grid_cmp(g2_src,g2_dst,bsrc_2,bdst_2); -#ifndef SE_CLASS2 - Box<3,size_t> bsrc_3({4,7,1},{11,20,6}); Box<3,size_t> bdst_3({20,5,10},{27,18,15}); @@ -228,7 +227,58 @@ BOOST_AUTO_TEST_CASE( copy_grid_test_use) Test_copy_grid_cmp(g4_src,g4_dst,bsrc_4,bdst_4); #endif -#endif + } +} + +BOOST_AUTO_TEST_CASE( copy_grid_test_invalid) +{ + { + size_t sz2_dst[2] = {3,7}; + size_t sz2_src[2] = {3,4}; + + grid_cpu<2,Point_test<double>> g2_src(sz2_src); + grid_cpu<2,Point_test<double>> g2_dst(sz2_dst); + g2_src.setMemory(); + g2_dst.setMemory(); + + Box<2,long int> bsrc_2({2,1},{1,1}); + Box<2,long int> bdst_2({0,1},{-1,1}); + + auto it = g2_src.getIterator(); + + while (it.isNext()) + { + auto key = it.get(); + + g2_src.template get<0>(key) = 0.0; + + ++it; + } + + auto it3 = g2_dst.getIterator(); + + while (it3.isNext()) + { + auto key = it3.get(); + + g2_dst.template get<0>(key) = 1.0; + + ++it3; + } + + g2_dst.copy_to(g2_src,bsrc_2,bdst_2); + + auto it2 = g2_dst.getIterator(); + + while (it2.isNext()) + { + auto key = it2.get(); + + BOOST_REQUIRE_EQUAL(g2_dst.template get<0>(key),1.0); + + ++it2; + } + } } diff --git a/src/Grid/grid_base_implementation.hpp b/src/Grid/grid_base_implementation.hpp index 565f59b16f96c24e65854e5fd4edcb08fefc6d44..5c6f24b87bcd1f4d594d098920814004c7508ce8 100644 --- a/src/Grid/grid_base_implementation.hpp +++ b/src/Grid/grid_base_implementation.hpp @@ -581,9 +581,6 @@ public: :g1(0),isExternal(false) { // Add this pointer -#ifdef SE_CLASS2 - check_new(this,8,GRID_EVENT,1); -#endif } /*! \brief create a grid from another grid @@ -607,9 +604,6 @@ public: :g1(sz),isExternal(false) { // Add this pointer -#ifdef SE_CLASS2 - check_new(this,8,GRID_EVENT,1); -#endif } /*! \brief Constructor @@ -623,18 +617,12 @@ public: :g1(sz),isExternal(false) { // Add this pointer -#ifdef SE_CLASS2 - check_new(this,8,GRID_EVENT,1); -#endif } //! Destructor ~grid_base_impl() THROW { // delete this pointer -#ifdef SE_CLASS2 - check_delete(this); -#endif } /*! \brief It copy a grid @@ -646,10 +634,6 @@ public: */ grid_base_impl<dim,T,S,layout,layout_base> & operator=(const grid_base_impl<dim,T,S,layout,layout_base> & g) { - // Add this pointer -#ifdef SE_CLASS2 - check_new(this,8,GRID_EVENT,1); -#endif swap(g.duplicate()); return *this; @@ -664,11 +648,6 @@ public: */ grid_base_impl<dim,T,S,layout,layout_base> & operator=(grid_base_impl<dim,T,S,layout,layout_base> && g) { - // Add this pointer -#ifdef SE_CLASS2 - check_new(this,8,GRID_EVENT,1); -#endif - swap(g); return *this; @@ -709,9 +688,6 @@ public: */ grid_base_impl<dim,T,S,layout,layout_base> duplicate() const THROW { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif //! Create a completely new grid with sz grid_base_impl<dim,T,S,layout,layout_base> grid_new(g1.getSize()); @@ -769,9 +745,6 @@ public: const grid_sm<dim,void> & getGrid() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return g1; } @@ -785,10 +758,6 @@ public: void setMemory() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif - mem_setm<S,layout_base<T>,decltype(this->data_),decltype(this->g1)>::setMemory(data_,g1,is_mem_init); } @@ -805,9 +774,6 @@ public: */ template<unsigned int p = 0> void setMemory(S & m) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif //! Is external isExternal = true; @@ -837,9 +803,6 @@ public: */ void setMemoryArray(S * m) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif //! Is external isExternal = true; @@ -860,10 +823,6 @@ public: template<unsigned int p = 0> void * getPointer() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif - return mem_getpointer<decltype(data_),layout_base_>::template getPointer<p>(data_); } @@ -877,10 +836,6 @@ public: template<unsigned int p = 0> const void * getPointer() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif - return mem_getpointer<decltype(data_),layout_base_>::template getPointer<p>(data_); } @@ -895,9 +850,6 @@ public: template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get(data_,g1,grid_key_dx<dim>()))> inline r_type insert(const grid_key_dx<dim> & v1) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(v1); @@ -916,9 +868,6 @@ public: template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get(data_,g1,grid_key_dx<dim>()))> __device__ __host__ inline r_type get_usafe(const grid_key_dx<dim> & v1) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); #endif @@ -935,9 +884,6 @@ public: template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_c(data_,g1,grid_key_dx<dim>()))> __device__ __host__ inline r_type get_unsafe(const grid_key_dx<dim> & v1) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); #endif @@ -954,9 +900,6 @@ public: template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get(data_,g1,grid_key_dx<dim>()))> __device__ __host__ inline r_type get(const grid_key_dx<dim> & v1) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(v1); @@ -986,9 +929,6 @@ public: template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_c(data_,g1,grid_key_dx<dim>()))> __device__ __host__ inline r_type get(const grid_key_dx<dim> & v1) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(v1); @@ -1006,9 +946,6 @@ public: template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_lin(data_,g1,0))> __device__ __host__ inline r_type get(const size_t lin_id) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(lin_id); @@ -1026,9 +963,6 @@ public: template <unsigned int p, typename r_type=decltype(mem_get<p,layout_base<T>,layout,grid_sm<dim,T>,grid_key_dx<dim>>::get_lin(data_,g1,0))> __device__ __host__ inline const r_type get(size_t lin_id) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(lin_id); @@ -1050,9 +984,6 @@ public: */ inline encapc<dim,T,layout> get_o(const grid_key_dx<dim> & v1) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(v1); @@ -1073,9 +1004,6 @@ public: */ inline const encapc<dim,T,layout> get_o(const grid_key_dx<dim> & v1) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(v1); @@ -1099,9 +1027,6 @@ public: */ inline encapc<dim,T,layout> insert_o(const grid_key_dx<dim> & v1) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(v1); @@ -1122,9 +1047,6 @@ public: */ inline encapc<dim,T,layout> get_o(size_t v1) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(v1); @@ -1145,9 +1067,6 @@ public: */ inline const encapc<dim,T,layout> get_o(size_t v1) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(v1); @@ -1166,9 +1085,6 @@ public: template<int prp> void fill(unsigned char fl) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif if (prp != 0 || is_layout_mlin<layout_base<T>>::type::value == false) { std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " unsupported fill operation " << std::endl; @@ -1184,7 +1100,7 @@ public: * \param box_src box to kill the points * */ - void remove(Box<dim,size_t> & section_to_delete) + void remove(Box<dim,long int> & section_to_delete) {} /*! \brief Reset the queue to remove and copy section of grids @@ -1229,7 +1145,7 @@ public: for (size_t i = 0 ; i < dim ; i++) { - if (box_dst.getHigh(i) >= g1.size(i)) + if (box_dst.getHigh(i) >= (long int)g1.size(i)) { long int shift = box_dst.getHigh(i) - g1.size(i) + 1; box_dst_.setHigh(i,box_dst.getHigh(i) - shift); @@ -1353,9 +1269,6 @@ public: */ void resize(const size_t (& sz)[dim], size_t opt = DATA_ON_HOST | DATA_ON_DEVICE, unsigned int blockSize = 1) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif //! Create a completely new grid with sz grid_base_impl<dim,T,S,layout,layout_base> grid_new(sz); @@ -1401,9 +1314,6 @@ public: */ void resize_no_device(const size_t (& sz)[dim]) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif //! Create a completely new grid with sz grid_base_impl<dim,T,S,layout,layout_base> grid_new(sz); @@ -1421,9 +1331,6 @@ public: */ void remove(size_t key) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif if (dim != 1) { #ifdef SE_CLASS1 @@ -1474,10 +1381,6 @@ public: void swap_nomode(grid_base_impl<dim,T,S,layout,layout_base> & grid) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif - mem_swap<T,layout_base<T>,decltype(data_),decltype(grid)>::template swap_nomode<S>(data_,grid.data_); // exchange the grid info @@ -1497,10 +1400,6 @@ public: void swap(grid_base_impl<dim,T,S,layout,layout_base> & grid) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif - mem_swap<T,layout_base<T>,decltype(data_),decltype(grid)>::swap(data_,grid.data_); // exchange the grid info @@ -1528,9 +1427,6 @@ public: void swap(grid_base_impl<dim,T,S,layout,layout_base> && grid) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif swap(grid); } @@ -1561,9 +1457,6 @@ public: */ template<typename Memory> inline void set(grid_key_dx<dim> dx, const encapc<1,T,Memory> & obj) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(dx); @@ -1587,9 +1480,6 @@ public: inline void set(grid_key_dx<dim> dx, const T & obj) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(dx); @@ -1611,9 +1501,6 @@ public: const grid_base_impl<dim,T,S,layout,layout_base> & g, const grid_key_dx<dim> & key2) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(key1); @@ -1635,9 +1522,6 @@ public: const grid_base_impl<dim,T,S,layout,layout_base> & g, const size_t key2) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(key1); @@ -1657,9 +1541,6 @@ public: template<typename Mem> inline void set(const grid_key_dx<dim> & key1,const grid_base_impl<dim,T,Mem,layout,layout_base> & g, const grid_key_dx<dim> & key2) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(key1); @@ -1681,9 +1562,6 @@ public: const grid_base_impl<dim,T,Mem,layout2,layout_base2> & g, const grid_key_dx<dim> & key2) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_init(); check_bound(key1); @@ -1700,9 +1578,6 @@ public: */ inline size_t size() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return g1.size(); } @@ -1718,9 +1593,6 @@ public: */ inline grid_key_dx_iterator_sub<dim> getSubIterator(const grid_key_dx<dim> & start, const grid_key_dx<dim> & stop) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return g1.getSubIterator(start,stop); } @@ -1735,9 +1607,6 @@ public: */ inline grid_key_dx_iterator_sub<dim> getSubIterator(size_t m) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return grid_key_dx_iterator_sub<dim>(g1,m); } @@ -1750,9 +1619,6 @@ public: */ inline grid_key_dx_iterator<dim> getIterator() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return grid_key_dx_iterator<dim>(g1); } @@ -1791,9 +1657,6 @@ public: inline grid_key_dx_iterator<dim,stencil_offset_compute<dim,Np>> getIteratorStencil(const grid_key_dx<dim> (& stencil_pnt)[Np]) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return grid_key_dx_iterator<dim,stencil_offset_compute<dim,Np>>(g1,stencil_pnt); } @@ -1810,9 +1673,6 @@ public: */ inline grid_key_dx_iterator_sub<dim> getIterator(const grid_key_dx<dim> & start, const grid_key_dx<dim> & stop) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif // get the starting point and the end point of the real domain return grid_key_dx_iterator_sub<dim>(g1,start,stop); @@ -1838,22 +1698,6 @@ public: return data_; } - /*! \brief It return the id of structure in the allocation list - * - * \see print_alloc and SE_CLASS2 - * - * \return the id - * - */ - long int who() - { -#ifdef SE_CLASS2 - return check_whoami(this,8); -#else - return -1; -#endif - } - /*! \brief In this case it does nothing * * \note this function exist to respect the interface to work as distributed diff --git a/src/Grid/grid_common.hpp b/src/Grid/grid_common.hpp index 28539e9231fc144e5caade202241e721c236ef13..398401887277cbfba1381c89919096e11b5dfa83 100644 --- a/src/Grid/grid_common.hpp +++ b/src/Grid/grid_common.hpp @@ -84,6 +84,7 @@ struct call_recursive_host_device_if_vector for(size_t i = start ; i < stop ; i++) { + new (&ptr_tt[i]) T_ker(); ptr_tt[i] = ptr[i].toKernel(); } @@ -167,6 +168,56 @@ struct call_recursive_host_device_if_vector<T,T_ker,type_prp,layout_base,4> static void call(obj_type & obj, size_t start, size_t stop) {} }; +/////////// destructor + + +template<typename T, typename T_ker, typename type_prp, template<typename> class layout_base , int is_vector> +struct call_recursive_destructor_if_vector +{ + template<typename mem_type, typename obj_type> static void destruct(mem_type * mem, obj_type & obj) + { + size_t sz = mem->size() / sizeof(type_prp); + // The type of device and the type on host does not match (in general) + // So we have to convert before transfer + + mem_type tmp; + + tmp.allocate(mem->size()); + + mem->deviceToHost(tmp); + T_ker * ptr = static_cast<T_ker *>(tmp.getPointer()); + + for(size_t i = 0 ; i < sz ; i++) + { + ptr->~T_ker(); + ++ptr; + } + } +}; + +template<typename T, typename T_ker, typename type_prp ,template<typename> class layout_base> +struct call_recursive_destructor_if_vector<T,T_ker,type_prp,layout_base,0> +{ + template<typename mem_type,typename obj_type> static void destruct(mem_type * mem, obj_type & obj) + {} +}; + +template<typename T, typename T_ker, typename type_prp ,template<typename> class layout_base> +struct call_recursive_destructor_if_vector<T,T_ker,type_prp,layout_base,3> +{ + template<typename mem_type,typename obj_type> static void destruct(mem_type * mem, obj_type & obj) + {} +}; + +template<typename T, typename T_ker, typename type_prp ,template<typename> class layout_base> +struct call_recursive_destructor_if_vector<T,T_ker,type_prp,layout_base,4> +{ + template<typename mem_type,typename obj_type> static void destruct(mem_type * mem, obj_type & obj) + {} +}; + +/////////////////////// + /*! \brief this class is a functor for "for_each" algorithm * * This class is a functor for "for_each" algorithm. For each diff --git a/src/Grid/grid_sm.hpp b/src/Grid/grid_sm.hpp index c0c62ba88e5e64d59d08a63dda65f7970bad7462..730b06e273f1b33034fc259e02d6bb7ef7b38814 100755 --- a/src/Grid/grid_sm.hpp +++ b/src/Grid/grid_sm.hpp @@ -32,7 +32,7 @@ struct Box_check Box<dim,T> box; template<typename T2> - Box_check(Box<dim,T2> & box) + explicit Box_check(Box<dim,T2> & box) :box(box) {} diff --git a/src/Grid/grid_unit_tests.hpp b/src/Grid/grid_unit_tests.hpp index acc7d636a989c6fc0ea4c026cfa7f10e7c4b53b1..9611d6d53d3f556af6fe3a91f4d79cbb01ace5b9 100644 --- a/src/Grid/grid_unit_tests.hpp +++ b/src/Grid/grid_unit_tests.hpp @@ -301,23 +301,6 @@ template<unsigned int dim, typename g> void test_layout_gridObjNd(g & c3, size_t BOOST_AUTO_TEST_SUITE( grid_test ) -////////// Test function /////////// - -#ifdef SE_CLASS2 - -grid_cpu<3,aggregate<float>> & test_error() -{ - size_t sz[] = {16,16,16}; - - grid_cpu<3,aggregate<float>> g(sz); - - return g; -} - -#endif - -///////////////////////////////////// - BOOST_AUTO_TEST_CASE( grid_safety_check ) { #if defined(SE_CLASS1) && defined (THROW_ON_ERROR) @@ -415,38 +398,6 @@ BOOST_AUTO_TEST_CASE( grid_safety_check ) } BOOST_REQUIRE_EQUAL(error,true); - #if defined(SE_CLASS2) && defined (THROW_ON_ERROR) - - error = false; - - // Create a grid - - grid_cpu<3,aggregate<float>> * gp = new grid_cpu<3,aggregate<float>>(sz); - delete gp; - - // Try to access the class - - try - {gp->size();} - catch (std::exception & e) - { - error = true; - BOOST_REQUIRE_EQUAL(e.what(),"Runtime memory error"); - } - BOOST_REQUIRE_EQUAL(error,true); - - error = false; - try - {grid_cpu<3,aggregate<float>> gr = test_error();} - catch (std::exception & e) - { - error = true; - BOOST_REQUIRE_EQUAL(e.what(),"Runtime memory error"); - } - BOOST_REQUIRE_EQUAL(error,true); - - #endif - #endif } diff --git a/src/Grid/map_grid.hpp b/src/Grid/map_grid.hpp index b86a3e15db80c5f628f6997b72540bce9f077a09..f62e37f202c293c990e4d5e261bb371832e68b95 100755 --- a/src/Grid/map_grid.hpp +++ b/src/Grid/map_grid.hpp @@ -25,9 +25,6 @@ #include <boost/mpl/for_each.hpp> #include "memory_ly/memory_conf.hpp" #include "util/copy_compare/meta_copy.hpp" -#ifdef SE_CLASS2 -#include "Memleak_check.hpp" -#endif #include "util/for_each_ref.hpp" #include "util.hpp" #include <utility> @@ -492,6 +489,57 @@ struct host_to_device_impl } }; +/*! \brief this class is a functor for "for_each" algorithm + * + * This class is a functor for "for_each" algorithm. For each + * element of the boost::vector the operator() is called. + * Is mainly used to copy one encap into another encap object + * + * \tparam encap source + * \tparam encap dst + * + */ +template<typename T_type, template<typename> class layout_base , typename Memory> +struct deconstruct_impl +{ + //! object to destruct + typename memory_traits_inte<T_type>::type & dst; + + /*! \brief constructor + * + * \param src source encapsulated object + * \param dst source encapsulated object + * + */ + inline deconstruct_impl(typename memory_traits_inte<T_type>::type & dst) + :dst(dst) + {}; + + + //! It call the copy function for each property + template<typename T> + inline void operator()(T& t) const + { + typedef decltype(boost::fusion::at_c<T::value>(dst).mem_r) mem_r_type; + + typedef typename boost::mpl::at<typename T_type::type,T>::type type_prp; + + typedef typename toKernel_transform<layout_base,typename mem_r_type::value_type>::type kernel_type; + + typedef boost::mpl::int_<(is_vector<typename mem_r_type::value_type>::value || + is_vector_dist<typename mem_r_type::value_type>::value || + is_gpu_celllist<typename mem_r_type::value_type>::value) + 2*std::is_array<type_prp>::value + std::rank<type_prp>::value> crh_cond; + + call_recursive_destructor_if_vector<typename mem_r_type::value_type, + kernel_type, + type_prp, + layout_base, + crh_cond::value> + ::template destruct<Memory,mem_r_type>(static_cast<Memory *>(boost::fusion::at_c<T::value>(dst).mem), + boost::fusion::at_c<T::value>(dst).mem_r); + } +}; + /*! \brief this class is a functor for "for_each" algorithm * * This class is a functor for "for_each" algorithm. For each @@ -854,6 +902,13 @@ public: return *this; } + + ~grid_cpu() + { + deconstruct_impl<T,memory_traits_inte,S> dth(this->data_); + + boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(dth); + } }; //! short formula for a grid on gpu diff --git a/src/NN/CellList/CellDecomposer.hpp b/src/NN/CellList/CellDecomposer.hpp index 835337b2d50dbf31e33a38c66dbdbee6f3bcf96e..dc9b29c170a3b3d660d91cbdd0d52ea405bdbfe1 100644 --- a/src/NN/CellList/CellDecomposer.hpp +++ b/src/NN/CellList/CellDecomposer.hpp @@ -153,6 +153,14 @@ class shift_only public: + /*! \brief Default constructor + * + */ + shift_only() + { + sh.zero(); + } + /*! \brief Constructor * * \param t Matrix transformation diff --git a/src/NN/CellList/CellListM.hpp b/src/NN/CellList/CellListM.hpp index 9709e25426a5cb55a28a7e97437f16908538999c..a1cb8066736aebe010118601c64f15072dcafc0f 100644 --- a/src/NN/CellList/CellListM.hpp +++ b/src/NN/CellList/CellListM.hpp @@ -13,8 +13,6 @@ struct PV_cl { - //! particle id - size_t ele; //! phase id size_t v; }; diff --git a/src/NN/CellList/NNc_array.hpp b/src/NN/CellList/NNc_array.hpp index 65fb59d4d220c7c1b7e96a8c1234089b6d1f2651..5b154b2523c0eb032ea32d394ac138610a174d30 100644 --- a/src/NN/CellList/NNc_array.hpp +++ b/src/NN/CellList/NNc_array.hpp @@ -272,6 +272,8 @@ public: sym_mid = nnc.sym_mid; full_or_sym = nnc.full_or_sym; + + return *this; } /*! \brief swap the NNc_array diff --git a/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh b/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh index db59bf7b80f2f5faf2c0a9ac258e3dc9bf671042..a81b2f2c7a42f096971eb4bb45c5fd735584798b 100644 --- a/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh +++ b/src/NN/CellList/cuda/CellDecomposer_gpu_ker.cuh @@ -30,6 +30,9 @@ class CellDecomposer_gpu_ker public: + __device__ __host__ CellDecomposer_gpu_ker() + {} + __device__ __host__ CellDecomposer_gpu_ker(openfpm::array<T,dim,cnt_type> & spacing_c, openfpm::array<ids_type,dim,cnt_type> & div_c, openfpm::array<ids_type,dim,cnt_type> & off, diff --git a/src/NN/CellList/cuda/CellList_gpu_ker.cuh b/src/NN/CellList/cuda/CellList_gpu_ker.cuh index f712ca699039d2f2801b066129e10fca24ae2821..004716a7dd1872a5fef8c8adb0876ed5ae1f1808 100644 --- a/src/NN/CellList/cuda/CellList_gpu_ker.cuh +++ b/src/NN/CellList/cuda/CellList_gpu_ker.cuh @@ -440,6 +440,10 @@ public: //! Indicate this structure has a function to check the device pointer typedef int yes_has_check_device_pointer; + __host__ __device__ inline CellList_gpu_ker() + :g_m(0) + {} + __host__ __device__ inline CellList_gpu_ker(openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> starts, openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> srt, openfpm::vector_gpu_ker<aggregate<cnt_type>,memory_traits_inte> dprt, diff --git a/src/NN/VerletList/VerletListFast.hpp b/src/NN/VerletList/VerletListFast.hpp index c1771a966afe0818eb1d705a0f8b6d46bb3bb0d8..c6869025a85bdafb2eb58e42319c911f4a9e3248 100644 --- a/src/NN/VerletList/VerletListFast.hpp +++ b/src/NN/VerletList/VerletListFast.hpp @@ -507,6 +507,7 @@ public: // Initialize a cell-list cli.Initialize(bt,div); + initCl(cli,pos,g_m,opt); // Unuseful empty vector diff --git a/src/Space/Shape/Box.hpp b/src/Space/Shape/Box.hpp index 321d22876d8e5406ff1ddddeef4579e1d9b9fea8..e8a5e95da5b5286405f2a9909a5d34a2237044fa 100644 --- a/src/Space/Shape/Box.hpp +++ b/src/Space/Shape/Box.hpp @@ -364,7 +364,7 @@ public: * \param box_data from which to construct * */ - inline Box(type box_data) + explicit inline Box(type box_data) { // we copy the data diff --git a/src/Space/Shape/Point_operators_functions.hpp b/src/Space/Shape/Point_operators_functions.hpp index 7c4cedff0b0f550d4e91c8888e8a7bb64651d1d7..4c74fca212663b4007cb7acf80ca4091e74dab72 100644 --- a/src/Space/Shape/Point_operators_functions.hpp +++ b/src/Space/Shape/Point_operators_functions.hpp @@ -32,7 +32,7 @@ public:\ typedef typename first_or_second_pt<has_coordtype<exp1>::value,exp1,exp2>::coord_type coord_type;\ \ \ - inline point_expression_op(const exp1 & o1)\ + inline explicit point_expression_op(const exp1 & o1)\ :o1(o1),scal(0)\ {}\ \ @@ -117,7 +117,7 @@ public: typedef typename exp1::coord_type coord_type; //! Constructor from expression - __device__ __host__ inline point_expression_op(const exp1 & o1) + __device__ __host__ inline explicit point_expression_op(const exp1 & o1) :o1(o1),scal(0.0) {} @@ -191,7 +191,7 @@ public: typedef typename exp1::coord_type coord_type; //! constructor from an expression - __device__ __host__ inline point_expression_op(const exp1 & o1) + __device__ __host__ inline explicit point_expression_op(const exp1 & o1) :o1(o1),scal(0.0) {} diff --git a/src/Space/Shape/Sphere.hpp b/src/Space/Shape/Sphere.hpp index 55141bc741f3f1c7adfd49f9e6e023a259dfdfd2..7d5f81ff9402945ecb2256b666d7f23bb82933ba 100644 --- a/src/Space/Shape/Sphere.hpp +++ b/src/Space/Shape/Sphere.hpp @@ -112,7 +112,7 @@ template<unsigned int dim ,typename T> class Sphere * \return the radius of the sphere * */ - __device__ __host__ T radius() + __device__ __host__ T radius() const { return boost::fusion::at_c<r>(data); } @@ -206,6 +206,26 @@ template<unsigned int dim ,typename T> class Sphere return false; } + + /*! \brief Return the distance from the surface + * + * \param p point + * \return the distance from the surface. The sign indicate if is outside or inside the shape + * + */ + __device__ __host__ T distance(Point<dim,T> & p) const + { + T dist = 0.0; + + // calculate the distance of the center from the point + + for (int i = 0; i < dim ; i++) + { + dist += (boost::fusion::at_c<x>(data)[i] - p.get(i))*(boost::fusion::at_c<x>(data)[i] - p.get(i)); + } + + return sqrt(dist) - radius(); + } }; #endif diff --git a/src/Space/Shape/Sphere_unit_test.cpp b/src/Space/Shape/Sphere_unit_test.cpp index 5b3355245ec27603d0400ae89bcf29d4d90768c3..1e5aa0a90daa6d4856b43f4df1a20467d397bb07 100644 --- a/src/Space/Shape/Sphere_unit_test.cpp +++ b/src/Space/Shape/Sphere_unit_test.cpp @@ -25,6 +25,9 @@ BOOST_AUTO_TEST_CASE( Sphere_test_use) BOOST_REQUIRE_EQUAL(s.isInside(p1),true); BOOST_REQUIRE_EQUAL(s.isInside(p3),false); + + double dist = s.distance(p3); + BOOST_REQUIRE_EQUAL(dist,0.0866025403784); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/Space/SpaceBox.hpp b/src/Space/SpaceBox.hpp index a17b71162c4f803648308fc6fcd4b3c367361c92..aec873404d391a0c9bb0252d312487629b197b11 100644 --- a/src/Space/SpaceBox.hpp +++ b/src/Space/SpaceBox.hpp @@ -54,7 +54,7 @@ class SpaceBox : public Box<dim,T> * \param b box * */ - template <typename S> inline SpaceBox(const Box<dim,S> & b) + template <typename S>inline SpaceBox(const Box<dim,S> & b) { for (size_t d = 0 ; d < dim ; d++) {this->setLow(d,b.getLow(d));} @@ -83,7 +83,7 @@ class SpaceBox : public Box<dim,T> * */ - SpaceBox(const Box<dim,T> & b) + explicit SpaceBox(const Box<dim,T> & b) :Box<dim,T>(b) { } @@ -94,7 +94,7 @@ class SpaceBox : public Box<dim,T> * */ - template<unsigned int dim_s,typename Mem>SpaceBox(const encapc<dim_s,Box<dim,T>,Mem> & box) + template<unsigned int dim_s,typename Mem> SpaceBox(const encapc<dim_s,Box<dim,T>,Mem> & box) { // for each dimension set high and low @@ -110,7 +110,7 @@ class SpaceBox : public Box<dim,T> * \param box box (Encapsulated) * */ - template<unsigned int dim_s,typename Mem>SpaceBox(const encapc<dim_s,SpaceBox<dim,T>,Mem> & box) + template<unsigned int dim_s,typename Mem> SpaceBox(const encapc<dim_s,SpaceBox<dim,T>,Mem> & box) { // for each dimension set high and low diff --git a/src/SparseGrid/SparseGrid.hpp b/src/SparseGrid/SparseGrid.hpp index 0c8e97e12ad0ebf90b2da0df14e429d80d5aa064..f0e24b6cc5abf1873dd93b4c2e42a16e75ebe910 100644 --- a/src/SparseGrid/SparseGrid.hpp +++ b/src/SparseGrid/SparseGrid.hpp @@ -19,6 +19,7 @@ #include "SparseGridUtil.hpp" #include "SparseGrid_iterator.hpp" #include "SparseGrid_iterator_block.hpp" +#include "SparseGrid_conv_opt.hpp" //#include "util/debug.hpp" // We do not want parallel writer @@ -187,46 +188,7 @@ public: }; -template<unsigned int l> -union data_il -{ -}; - -template<> -union data_il<8> -{ - typedef long int type; - - unsigned char uc[8]; - long int i; -}; - -template<> -union data_il<4> -{ - typedef int type; - - unsigned char uc[4]; - int i; -}; - -template<> -union data_il<2> -{ - typedef short int type; - unsigned char uc[2]; - short int i; -}; - -template<> -union data_il<1> -{ - typedef char type; - - unsigned char uc[4]; - char i; -}; /*! \brief this class is a functor for "for_each" algorithm * @@ -261,694 +223,6 @@ struct copy_sz } }; -template<unsigned int dim> -struct conv_impl -{ - template<unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT > - void conv(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) - { -#ifndef __NVCC__ - std::cout << __FILE__ << ":" << __LINE__ << " error conv operation not implemented for this dimension " << std::endl; -#else - std::cout << __FILE__ << ":" << __LINE__ << " error conv is unsupported when compiled on NVCC " << std::endl; -#endif - } - - template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > - static void conv_cross(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) - { -#ifndef __NVCC__ - std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross operation not implemented for this dimension " << std::endl; -#else - std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross is unsupported when compiled on NVCC " << std::endl; -#endif - } - - template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2, - unsigned int prop_dst1, unsigned int prop_dst2, - unsigned int stencil_size , unsigned int N, - typename SparseGridType, typename lambda_f, typename ... ArgsT > - static void conv2(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) - { -#ifndef __NVCC__ - std::cout << __FILE__ << ":" << __LINE__ << " error conv2 operation not implemented for this dimension " << std::endl; -#else - std::cout << __FILE__ << ":" << __LINE__ << " error conv2 is unsupported when compiled on NVCC " << std::endl; -#endif - } - - template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > - static void conv_cross2(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) - { -#ifndef __NVCC__ - std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 operation not implemented for this dimension " << std::endl; -#else - std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 is unsupported when compiled on NVCC " << std::endl; -#endif - } -}; - -#ifndef __NVCC__ - -struct cross_stencil_v -{ - Vc::double_v xm; - Vc::double_v xp; - Vc::double_v ym; - Vc::double_v yp; - Vc::double_v zm; - Vc::double_v zp; -}; - -template<> -struct conv_impl<3> -{ - template<bool findNN, typename NNtype, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT > - static void conv(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) - { - auto it = grid.template getBlockIterator<stencil_size>(start,stop); - - typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type; - - unsigned char mask[decltype(it)::sizeBlockBord]; - unsigned char mask_sum[decltype(it)::sizeBlockBord]; - unsigned char mask_unused[decltype(it)::sizeBlock]; - __attribute__ ((aligned (32))) prop_type block_bord_src[decltype(it)::sizeBlockBord]; - __attribute__ ((aligned (32))) prop_type block_bord_dst[decltype(it)::sizeBlock]; - - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; - - while (it.isNext()) - { - it.template loadBlockBorder<prop_src,NNtype,findNN>(block_bord_src,mask); - - if (it.start_b(2) != stencil_size || it.start_b(1) != stencil_size || it.start_b(0) != stencil_size || - it.stop_b(2) != sz2::value+stencil_size || it.stop_b(1) != sz1::value+stencil_size || it.stop_b(0) != sz0::value+stencil_size) - { - auto & header_mask = grid.private_get_header_mask(); - auto & header_inf = grid.private_get_header_inf(); - - loadBlock_impl<prop_dst,0,3,typename decltype(it)::vector_blocks_exts_type, typename decltype(it)::vector_ext_type>::template loadBlock<decltype(it)::sizeBlock>(block_bord_dst,grid,it.getChunkId(),mask_unused); - } - - // Sum the mask - for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) - { - for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) - { - int cc = it.LinB(it.start_b(0),j,k); - int c[N]; - - for (int s = 0 ; s < N ; s++) - { - c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); - } - - for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += sizeof(size_t)) - { - size_t cmd = *(size_t *)&mask[cc]; - - if (cmd != 0) - { - size_t xm[N]; - - for (int s = 0 ; s < N ; s++) - { - xm[s] = *(size_t *)&mask[c[s]]; - } - - size_t sum = 0; - for (int s = 0 ; s < N ; s++) - { - sum += xm[s]; - } - - *(size_t *)&mask_sum[cc] = sum; - } - - cc += sizeof(size_t); - for (int s = 0 ; s < N ; s++) - { - c[s] += sizeof(size_t); - } - } - } - } - - for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) - { - for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) - { - int cc = it.LinB(it.start_b(0),j,k); - int c[N]; - - int cd = it.LinB_off(it.start_b(0),j,k); - - for (int s = 0 ; s < N ; s++) - { - c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); - } - - for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size) - { - Vc::Mask<prop_type> cmp; - - for (int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++) - { - cmp[s] = (mask[cc+s] == true && i+s < it.stop_b(0)); - } - - // we do only if exist the point - if (Vc::none_of(cmp) == false) - { - Vc::Mask<prop_type> surround; - - Vc::Vector<prop_type> xs[N+1]; - - xs[0] = Vc::Vector<prop_type>(&block_bord_src[cc],Vc::Unaligned); - - for (int s = 1 ; s < N+1 ; s++) - { - xs[s] = Vc::Vector<prop_type>(&block_bord_src[c[s-1]],Vc::Unaligned); - } - - auto res = func(xs, &mask_sum[cc], args ...); - - res.store(&block_bord_dst[cd],cmp,Vc::Aligned); - } - - cc += Vc::Vector<prop_type>::Size; - for (int s = 0 ; s < N ; s++) - { - c[s] += Vc::Vector<prop_type>::Size; - } - cd += Vc::Vector<prop_type>::Size; - } - } - } - - it.template storeBlock<prop_dst>(block_bord_dst); - - ++it; - } - } - - template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > - static void conv_cross(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) - { - auto it = grid.template getBlockIterator<1>(start,stop); - - auto & datas = grid.private_get_data(); - auto & headers = grid.private_get_header_mask(); - - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; - - typedef typename SparseGridType::chunking_type chunking; - - typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type; - - while (it.isNext()) - { - // Load - long int offset_jump[6]; - - size_t cid = it.getChunkId(); - - auto chunk = datas.get(cid); - auto & mask = headers.get(cid); - - bool exist; - grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0}); - long int r = grid.getChunk(p,exist); - offset_jump[0] = (r-cid)*decltype(it)::sizeBlock; - - p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0}); - r = grid.getChunk(p,exist); - offset_jump[1] = (r-cid)*decltype(it)::sizeBlock; - - p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0}); - r = grid.getChunk(p,exist); - offset_jump[2] = (r-cid)*decltype(it)::sizeBlock; - - p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0}); - r = grid.getChunk(p,exist); - offset_jump[3] = (r-cid)*decltype(it)::sizeBlock; - - p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1}); - r = grid.getChunk(p,exist); - offset_jump[4] = (r-cid)*decltype(it)::sizeBlock; - - p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1}); - r = grid.getChunk(p,exist); - offset_jump[5] = (r-cid)*decltype(it)::sizeBlock; - - // Load offset jumps - - // construct a row mask - - long int s2 = 0; - - typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz; - typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy; - typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx; - - - bool mask_row[sx::value]; - - for (int k = 0 ; k < sx::value ; k++) - { - mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false; - } - - for (int v = it.start(2) ; v < it.stop(2) ; v++) - { - for (int j = it.start(1) ; j < it.stop(1) ; j++) - { - s2 = it.Lin(0,j,v); - for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size) - { - // we do only id exist the point - if (*(int *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;} - - data_il<Vc::Vector<prop_type>::Size> mxm; - data_il<Vc::Vector<prop_type>::Size> mxp; - data_il<Vc::Vector<prop_type>::Size> mym; - data_il<Vc::Vector<prop_type>::Size> myp; - data_il<Vc::Vector<prop_type>::Size> mzm; - data_il<Vc::Vector<prop_type>::Size> mzp; - - cross_stencil_v cs; - - Vc::Vector<prop_type> cmd(&chunk.template get<prop_src>()[s2]); - - // Load x-1 - long int sumxm = s2-1; - sumxm += (k==0)?offset_jump[0] + sx::value:0; - - // Load x+1 - long int sumxp = s2+Vc::Vector<prop_type>::Size; - sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0; - - long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value; - sumym += s2; - long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value; - sumyp += s2; - long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value; - sumzm += s2; - long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value; - sumzp += s2; - - if (Vc::Vector<prop_type>::Size == 2) - { - mxm.i = *(short int *)&mask.mask[s2]; - mxm.i = mxm.i << 8; - mxm.i |= (short int)mask.mask[sumxm]; - - mxp.i = *(short int *)&mask.mask[s2]; - mxp.i = mxp.i >> 8; - mxp.i |= ((short int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8; - - mym.i = *(short int *)&mask.mask[sumym]; - myp.i = *(short int *)&mask.mask[sumyp]; - - mzm.i = *(short int *)&mask.mask[sumzm]; - mzp.i = *(short int *)&mask.mask[sumzp]; - } - else if (Vc::Vector<prop_type>::Size == 4) - { - mxm.i = *(int *)&mask.mask[s2]; - mxm.i = mxm.i << 8; - mxm.i |= (int)mask.mask[sumxm]; - - mxp.i = *(int *)&mask.mask[s2]; - mxp.i = mxp.i >> 8; - mxp.i |= ((int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8; - - mym.i = *(int *)&mask.mask[sumym]; - myp.i = *(int *)&mask.mask[sumyp]; - - mzm.i = *(int *)&mask.mask[sumzm]; - mzp.i = *(int *)&mask.mask[sumzp]; - } - else - { - std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl; - } - - cs.xm = cmd; - cs.xm = cs.xm.shifted(-1); - cs.xm[0] = chunk.template get<prop_src>()[sumxm]; - - - cs.xp = cmd; - cs.xp = cs.xp.shifted(1); - cs.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src>()[sumxp]; - - // Load y and z direction - - cs.ym.load(&chunk.template get<prop_src>()[sumym],Vc::Aligned); - cs.yp.load(&chunk.template get<prop_src>()[sumyp],Vc::Aligned); - cs.zm.load(&chunk.template get<prop_src>()[sumzm],Vc::Aligned); - cs.zp.load(&chunk.template get<prop_src>()[sumzp],Vc::Aligned); - - // Calculate - - data_il<Vc::Vector<prop_type>::Size> tot_m; - tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i; - - Vc::Vector<prop_type> res = func(cmd,cs,tot_m.uc,args ... ); - - Vc::Mask<prop_type> m(&mask_row[k]); - - res.store(&chunk.template get<prop_dst>()[s2],m,Vc::Aligned); - - s2 += Vc::Vector<prop_type>::Size; - } - } - } - - ++it; - } - } - - - template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2, - unsigned int prop_dst1, unsigned int prop_dst2, - unsigned int stencil_size , unsigned int N, - typename SparseGridType, typename lambda_f, typename ... ArgsT > - static void conv2(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) - { - auto it = grid.template getBlockIterator<stencil_size>(start,stop); - - typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type; - - unsigned char mask[decltype(it)::sizeBlockBord]; - unsigned char mask_sum[decltype(it)::sizeBlockBord]; - __attribute__ ((aligned (32))) prop_type block_bord_src1[decltype(it)::sizeBlockBord]; - __attribute__ ((aligned (32))) prop_type block_bord_dst1[decltype(it)::sizeBlock]; - __attribute__ ((aligned (32))) prop_type block_bord_src2[decltype(it)::sizeBlockBord]; - __attribute__ ((aligned (32))) prop_type block_bord_dst2[decltype(it)::sizeBlock]; - - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; - - while (it.isNext()) - { - it.template loadBlockBorder<prop_src1,NNType,findNN>(block_bord_src1,mask); - it.template loadBlockBorder<prop_src2,NNType,findNN>(block_bord_src2,mask); - - // Sum the mask - for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) - { - for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) - { - int cc = it.LinB(it.start_b(0),j,k); - int c[N]; - - for (int s = 0 ; s < N ; s++) - { - c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); - } - - for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += sizeof(size_t)) - { - size_t cmd = *(size_t *)&mask[cc]; - - if (cmd == 0) {continue;} - - - size_t xm[N]; - - for (int s = 0 ; s < N ; s++) - { - xm[s] = *(size_t *)&mask[c[s]]; - } - - size_t sum = 0; - for (int s = 0 ; s < N ; s++) - { - sum += xm[s]; - } - - *(size_t *)&mask_sum[cc] = sum; - - cc += sizeof(size_t); - for (int s = 0 ; s < N ; s++) - { - c[s] += sizeof(size_t); - } - } - } - } - - for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) - { - for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) - { - int cc = it.LinB(it.start_b(0),j,k); - int c[N]; - - int cd = it.LinB_off(it.start_b(0),j,k); - - for (int s = 0 ; s < N ; s++) - { - c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); - } - - for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size) - { - Vc::Mask<prop_type> cmp; - - for (int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++) - { - cmp[s] = (mask[cc+s] == true); - } - - // we do only id exist the point - if (Vc::none_of(cmp) == true) {continue;} - - Vc::Mask<prop_type> surround; - - Vc::Vector<prop_type> xs1[N+1]; - Vc::Vector<prop_type> xs2[N+1]; - - xs1[0] = Vc::Vector<prop_type>(&block_bord_src1[cc],Vc::Unaligned); - xs2[0] = Vc::Vector<prop_type>(&block_bord_src2[cc],Vc::Unaligned); - - for (int s = 1 ; s < N+1 ; s++) - { - xs1[s] = Vc::Vector<prop_type>(&block_bord_src1[c[s-1]],Vc::Unaligned); - xs2[s] = Vc::Vector<prop_type>(&block_bord_src2[c[s-1]],Vc::Unaligned); - } - - Vc::Vector<prop_type> vo1; - Vc::Vector<prop_type> vo2; - - func(vo1, vo2, xs1, xs2, &mask_sum[cc], args ...); - - vo1.store(&block_bord_dst1[cd],cmp,Vc::Aligned); - vo2.store(&block_bord_dst2[cd],cmp,Vc::Aligned); - - cc += Vc::Vector<prop_type>::Size; - for (int s = 0 ; s < N ; s++) - { - c[s] += Vc::Vector<prop_type>::Size; - } - cd += Vc::Vector<prop_type>::Size; - } - } - } - - it.template storeBlock<prop_dst1>(block_bord_dst1); - it.template storeBlock<prop_dst2>(block_bord_dst2); - - ++it; - } - } - - template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > - static void conv_cross2(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) - { - auto it = grid.template getBlockIterator<stencil_size>(start,stop); - - auto & datas = grid.private_get_data(); - auto & headers = grid.private_get_header_mask(); - - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; - typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; - - typedef typename SparseGridType::chunking_type chunking; - - typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type; - - while (it.isNext()) - { - // Load - long int offset_jump[6]; - - size_t cid = it.getChunkId(); - - auto chunk = datas.get(cid); - auto & mask = headers.get(cid); - - bool exist; - grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0}); - long int r = grid.getChunk(p,exist); - offset_jump[0] = (r-cid)*decltype(it)::sizeBlock; - - p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0}); - r = grid.getChunk(p,exist); - offset_jump[1] = (r-cid)*decltype(it)::sizeBlock; - - p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0}); - r = grid.getChunk(p,exist); - offset_jump[2] = (r-cid)*decltype(it)::sizeBlock; - - p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0}); - r = grid.getChunk(p,exist); - offset_jump[3] = (r-cid)*decltype(it)::sizeBlock; - - p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1}); - r = grid.getChunk(p,exist); - offset_jump[4] = (r-cid)*decltype(it)::sizeBlock; - - p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1}); - r = grid.getChunk(p,exist); - offset_jump[5] = (r-cid)*decltype(it)::sizeBlock; - - // Load offset jumps - - // construct a row mask - - long int s2 = 0; - - typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz; - typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy; - typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx; - - - bool mask_row[sx::value]; - - for (int k = 0 ; k < sx::value ; k++) - { - mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false; - } - - for (int v = it.start(2) ; v < it.stop(2) ; v++) - { - for (int j = it.start(1) ; j < it.stop(1) ; j++) - { - s2 = it.Lin(0,j,v); - for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size) - { - // we do only id exist the point - if (*(int *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;} - - data_il<4> mxm; - data_il<4> mxp; - data_il<4> mym; - data_il<4> myp; - data_il<4> mzm; - data_il<4> mzp; - - cross_stencil_v cs1; - cross_stencil_v cs2; - - Vc::Vector<prop_type> cmd1(&chunk.template get<prop_src1>()[s2]); - Vc::Vector<prop_type> cmd2(&chunk.template get<prop_src2>()[s2]); - - // Load x-1 - long int sumxm = s2-1; - sumxm += (k==0)?offset_jump[0] + sx::value:0; - - // Load x+1 - long int sumxp = s2+Vc::Vector<prop_type>::Size; - sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0; - - long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value; - sumym += s2; - long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value; - sumyp += s2; - long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value; - sumzm += s2; - long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value; - sumzp += s2; - - mxm.i = *(int *)&mask.mask[s2]; - mxm.i = mxm.i << 8; - mxm.i |= (int)mask.mask[sumxm]; - - mxp.i = *(int *)&mask.mask[s2]; - mxp.i = mxp.i >> 8; - mxp.i |= ((int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8; - - mym.i = *(int *)&mask.mask[sumym]; - myp.i = *(int *)&mask.mask[sumyp]; - - mzm.i = *(int *)&mask.mask[sumzm]; - mzp.i = *(int *)&mask.mask[sumzp]; - - cs1.xm = cmd1; - cs1.xm = cs1.xm.shifted(-1); - cs1.xm[0] = chunk.template get<prop_src1>()[sumxm]; - - cs2.xm = cmd2; - cs2.xm = cs2.xm.shifted(-1); - cs2.xm[0] = chunk.template get<prop_src2>()[sumxm]; - - cs1.xp = cmd1; - cs1.xp = cs1.xp.shifted(1); - cs1.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src1>()[sumxp]; - - cs2.xp = cmd2; - cs2.xp = cs2.xp.shifted(1); - cs2.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src2>()[sumxp]; - - // Load y and z direction - - cs1.ym.load(&chunk.template get<prop_src1>()[sumym],Vc::Aligned); - cs1.yp.load(&chunk.template get<prop_src1>()[sumyp],Vc::Aligned); - cs1.zm.load(&chunk.template get<prop_src1>()[sumzm],Vc::Aligned); - cs1.zp.load(&chunk.template get<prop_src1>()[sumzp],Vc::Aligned); - - cs2.ym.load(&chunk.template get<prop_src2>()[sumym],Vc::Aligned); - cs2.yp.load(&chunk.template get<prop_src2>()[sumyp],Vc::Aligned); - cs2.zm.load(&chunk.template get<prop_src2>()[sumzm],Vc::Aligned); - cs2.zp.load(&chunk.template get<prop_src2>()[sumzp],Vc::Aligned); - - // Calculate - - data_il<4> tot_m; - tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i; - - Vc::Vector<prop_type> res1; - Vc::Vector<prop_type> res2; - - func(res1,res2,cmd1,cmd2,cs1,cs2,tot_m.uc,args ... ); - - Vc::Mask<prop_type> m(&mask_row[k]); - - res1.store(&chunk.template get<prop_dst1>()[s2],m,Vc::Aligned); - res2.store(&chunk.template get<prop_dst2>()[s2],m,Vc::Aligned); - - s2 += Vc::Vector<prop_type>::Size; - } - } - } - - ++it; - } - } - -}; - -#endif template<unsigned int N> struct load_mask_impl @@ -1089,65 +363,19 @@ class sgrid_cpu //! grid size information with shift grid_lin g_sm_shift; - //! conversion position in the chunks - grid_key_dx<dim> pos_chunk[chunking::size::value]; - - //! size of the chunk - size_t sz_cnk[dim]; - - openfpm::vector<size_t> empty_v; - - //! bool that indicate if the NNlist is filled - bool findNN; - - //! for each chunk store the neighborhood chunks - openfpm::vector<int> NNlist; - - /*! \brief Given a key return the chunk than contain that key, in case that chunk does not exist return the key of the - * background chunk - * - * \param v1 point to search - * \param return active_chunk - * \param return index inside the chunk - * - */ - inline void find_active_chunk(const grid_key_dx<dim> & kh,size_t & active_cnk,bool & exist) - { - long int lin_id = g_sm_shift.LinId(kh); - - size_t id = 0; - for (size_t k = 0 ; k < SGRID_CACHE; k++) - {id += (cache[k] == lin_id)?k+1:0;} - - if (id == 0) - { - // we do not have it in cache we check if we have it in the map - - auto fnd = map.find(lin_id); - if (fnd == map.end()) - { - exist = false; - active_cnk = 0; - return; - } - else - {active_cnk = fnd->second;} - - // Add on cache the chunk - cache[cache_pnt] = lin_id; - cached_id[cache_pnt] = active_cnk; - cache_pnt++; - cache_pnt = (cache_pnt >= SGRID_CACHE)?0:cache_pnt; - } - else - { - active_cnk = cached_id[id-1]; - cache_pnt = id; - cache_pnt = (cache_pnt == SGRID_CACHE)?0:cache_pnt; - } + //! conversion position in the chunks + grid_key_dx<dim> pos_chunk[chunking::size::value]; - exist = true; - } + //! size of the chunk + size_t sz_cnk[dim]; + + openfpm::vector<size_t> empty_v; + + //! bool that indicate if the NNlist is filled + bool findNN; + + //! for each chunk store the neighborhood chunks + openfpm::vector<int> NNlist; /*! \brief Given a key return the chunk than contain that key, in case that chunk does not exist return the key of the * background chunk @@ -1358,6 +586,72 @@ class sgrid_cpu } } + /*! \brief Given a key return the chunk than contain that key, in case that chunk does not exist return the key of the + * background chunk + * + * \param v1 point to search + * \param return active_chunk + * \param return index inside the chunk + * + */ + inline void find_active_chunk(const grid_key_dx<dim> & kh,size_t & active_cnk,bool & exist) const + { + long int lin_id = g_sm_shift.LinId(kh); + + size_t id = 0; + for (size_t k = 0 ; k < SGRID_CACHE; k++) + {id += (cache[k] == lin_id)?k+1:0;} + + if (id == 0) + { + // we do not have it in cache we check if we have it in the map + + auto fnd = map.find(lin_id); + if (fnd == map.end()) + { + exist = false; + active_cnk = 0; + return; + } + else + {active_cnk = fnd->second;} + + // Add on cache the chunk + cache[cache_pnt] = lin_id; + cached_id[cache_pnt] = active_cnk; + cache_pnt++; + cache_pnt = (cache_pnt >= SGRID_CACHE)?0:cache_pnt; + } + else + { + active_cnk = cached_id[id-1]; + cache_pnt = id; + cache_pnt = (cache_pnt == SGRID_CACHE)?0:cache_pnt; + } + + exist = true; + } + + /*! Given a key v1 in coordinates it calculate the chunk position and the position in the chunk + * + * \param v1 coordinates + * \param chunk position + * \param sub_id element id + * + */ + inline void pre_get(const grid_key_dx<dim> & v1, size_t & active_cnk, size_t & sub_id, bool & exist) const + { + grid_key_dx<dim> kh = v1; + grid_key_dx<dim> kl; + + // shift the key + key_shift<dim,chunking>::shift(kh,kl); + + find_active_chunk(kh,active_cnk,exist); + + sub_id = sublin<dim,typename chunking::shift_c>::lin(kl); + } + /*! \brief Before insert data you have to do this * * \param v1 grid key where you want to insert data @@ -1445,44 +739,14 @@ class sgrid_cpu inline void remove_point(const grid_key_dx<dim> & v1) { + bool exist; size_t active_cnk = 0; + size_t sub_id; - grid_key_dx<dim> kh = v1; - grid_key_dx<dim> kl; - - // shift the key - key_shift<dim,chunking>::shift(kh,kl); - - long int lin_id = g_sm_shift.LinId(kh); - - size_t id = 0; - for (size_t k = 0 ; k < SGRID_CACHE; k++) - {id += (cache[k] == lin_id)?k+1:0;} - - if (id == 0) - { - // we do not have it in cache we check if we have it in the map - - auto fnd = map.find(lin_id); - if (fnd == map.end()) - {return;} - else - {active_cnk = fnd->second;} - - // Add on cache the chunk - cache[cache_pnt] = lin_id; - cached_id[cache_pnt] = active_cnk; - cache_pnt++; - cache_pnt = (cache_pnt >= SGRID_CACHE)?0:cache_pnt; - } - else - { - active_cnk = cached_id[id-1]; - cache_pnt = id; - cache_pnt = (cache_pnt == SGRID_CACHE)?0:cache_pnt; - } + pre_get(v1,active_cnk,sub_id,exist); - size_t sub_id = sublin<dim,typename chunking::shift_c>::lin(kl); + if (exist == false) + {return;} // eliminate the element @@ -1631,79 +895,11 @@ public: inline r_type insert(const grid_key_dx<dim> & v1) { size_t active_cnk = 0; + size_t ele_id = 0; - grid_key_dx<dim> kh = v1; - grid_key_dx<dim> kl; - - // shift the key - key_shift<dim,chunking>::shift(kh,kl); - - long int lin_id = g_sm_shift.LinId(kh); - - size_t id = 0; - for (size_t k = 0 ; k < SGRID_CACHE; k++) - {id += (cache[k] == lin_id)?k+1:0;} - - if (id == 0) - { - // we do not have it in cache we check if we have it in the map - - auto fnd = map.find(lin_id); - if (fnd == map.end()) - { - // we do not have it in the map create a chunk - - map[lin_id] = chunks.size(); - chunks.add(); - header_inf.add(); - header_inf.last().pos = kh; - header_inf.last().nele = 0; - header_mask.add(); - - // set the mask to null - auto & h = header_mask.last().mask; - - for (size_t i = 0 ; i < chunking::size::value ; i++) - {h[i] = 0;} - - key_shift<dim,chunking>::cpos(header_inf.last().pos); - - active_cnk = chunks.size() - 1; - } - else - { - // we have it in the map - - active_cnk = fnd->second; - } - - // Add on cache the chunk - cache[cache_pnt] = lin_id; - cached_id[cache_pnt] = active_cnk; - cache_pnt++; - cache_pnt = (cache_pnt >= SGRID_CACHE)?0:cache_pnt; - } - else - { - active_cnk = cached_id[id-1]; - cache_pnt = id; - cache_pnt = (cache_pnt == SGRID_CACHE)?0:cache_pnt; - } - - size_t sub_id = sublin<dim,typename chunking::shift_c>::lin(kl); - - // the chunk is in cache, solve - - // we notify that we added one element - auto & hc = header_inf.get(active_cnk); - auto & hm = header_mask.get(active_cnk); - - // we set the mask - - hc.nele = (hm.mask[sub_id] & 1)?hc.nele:hc.nele + 1; - hm.mask[sub_id] |= 1; + pre_insert(v1,active_cnk,ele_id); - return chunks.template get<p>(active_cnk)[sub_id]; + return chunks.template get<p>(active_cnk)[ele_id]; } /*! \brief Get the reference of the selected element @@ -1742,36 +938,14 @@ public: template <unsigned int p> inline auto get(const grid_key_dx<dim> & v1) const -> decltype(openfpm::as_const(chunks.template get<p>(0))[0]) { + bool exist; size_t active_cnk; - grid_key_dx<dim> kh = v1; - grid_key_dx<dim> kl; - - // shift the key - key_shift<dim,chunking>::shift(kh,kl); - - long int lin_id = g_sm_shift.LinId(kh); - - size_t id = 0; - for (size_t k = 0 ; k < SGRID_CACHE; k++) - {id += (cache[k] == lin_id)?k+1:0;} + size_t sub_id; - if (id == 0) - { - auto it = map.find(lin_id); - - if (it == map.end()) - {return background.template get<p>();} - - add_on_cache(lin_id,it->second); - - active_cnk = it->second; - } - else - { - active_cnk = cached_id[id-1]; - } + pre_get(v1,active_cnk,sub_id,exist); - size_t sub_id = sublin<dim,typename chunking::shift_c>::lin(kl); + if (exist == false) + {return background.template get<p>();} // we check the mask auto & hm = header_mask.get(active_cnk); @@ -1792,36 +966,14 @@ public: template <unsigned int p> inline auto get(const grid_key_dx<dim> & v1) -> decltype(openfpm::as_const(chunks.template get<p>(0))[0]) { + bool exist; size_t active_cnk; - grid_key_dx<dim> kh = v1; - grid_key_dx<dim> kl; - - // shift the key - key_shift<dim,chunking>::shift(kh,kl); - - long int lin_id = g_sm_shift.LinId(kh); - - size_t id = 0; - for (size_t k = 0 ; k < SGRID_CACHE; k++) - {id += (cache[k] == lin_id)?k+1:0;} - - if (id == 0) - { - auto it = map.find(lin_id); - - if (it == map.end()) - {return background.template get<p>();} + size_t sub_id; - add_on_cache(lin_id,it->second); - - active_cnk = it->second; - } - else - { - active_cnk = cached_id[id-1]; - } + pre_get(v1,active_cnk,sub_id,exist); - size_t sub_id = sublin<dim,typename chunking::shift_c>::lin(kl); + if (exist == false) + {return background.template get<p>();} // we check the mask auto & hc = header_inf.get(active_cnk); @@ -1842,36 +994,14 @@ public: */ inline bool existPoint(const grid_key_dx<dim> & v1) const { + bool exist; size_t active_cnk; - grid_key_dx<dim> kh = v1; - grid_key_dx<dim> kl; - - // shift the key - key_shift<dim,chunking>::shift(kh,kl); - - long int lin_id = g_sm_shift.LinId(kh); + size_t sub_id; - size_t id = 0; - for (size_t k = 0 ; k < SGRID_CACHE; k++) - {id += (cache[k] == lin_id)?k+1:0;} - - if (id == 0) - { - auto it = map.find(lin_id); - - if (it == map.end()) - {return false;} - - add_on_cache(lin_id,it->second); + pre_get(v1,active_cnk,sub_id,exist); - active_cnk = it->second; - } - else - { - active_cnk = cached_id[id-1]; - } - - size_t sub_id = sublin<dim,typename chunking::shift_c>::lin(kl); + if (exist == false) + {return false;} // we check the mask auto & hm = header_mask.get(active_cnk); @@ -1966,9 +1096,6 @@ public: */ const grid_lin & getGrid() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return g_sm; } @@ -2635,7 +1762,7 @@ public: * \param box_src box to kill the points * */ - void remove(Box<dim,size_t> & section_to_delete) + void remove(Box<dim,long int> & section_to_delete) { grid_sm<dim,void> gs_cnk(sz_cnk); @@ -3301,8 +2428,8 @@ public: tmp_pos.add(p); tmp_prp.add(); - copy_prop_to_vector<decltype(chunks. get_o(key.getChunk())),decltype(tmp_prp.last())> - cp(chunks. get_o(key.getChunk()),tmp_prp.last(),key.getPos()); + copy_prop_to_vector<decltype(chunks.get_o(key.getChunk())),decltype(tmp_prp.last())> + cp(chunks.get_o(key.getChunk()),tmp_prp.last(),key.getPos()); boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(cp); diff --git a/src/SparseGrid/SparseGrid_conv_opt.hpp b/src/SparseGrid/SparseGrid_conv_opt.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6472e2deb400b347cb12a4ab2795ca24ab229a25 --- /dev/null +++ b/src/SparseGrid/SparseGrid_conv_opt.hpp @@ -0,0 +1,742 @@ +/* + * SparseGrid_conv_opt.hpp + * + * Created on: Jul 19, 2020 + * Author: i-bird + */ + +#ifndef SPARSEGRID_CONV_OPT_HPP_ +#define SPARSEGRID_CONV_OPT_HPP_ + +template<unsigned int l> +union data_il +{ +}; + +template<> +union data_il<8> +{ + typedef long int type; + + unsigned char uc[8]; + long int i; +}; + +template<> +union data_il<4> +{ + typedef int type; + + unsigned char uc[4]; + int i; +}; + +template<> +union data_il<2> +{ + typedef short int type; + + unsigned char uc[2]; + short int i; +}; + +template<> +union data_il<1> +{ + typedef char type; + + unsigned char uc[4]; + char i; +}; + +template<unsigned int dim> +struct conv_impl +{ + template<unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT > + void conv(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) + { +#ifndef __NVCC__ + std::cout << __FILE__ << ":" << __LINE__ << " error conv operation not implemented for this dimension " << std::endl; +#else + std::cout << __FILE__ << ":" << __LINE__ << " error conv is unsupported when compiled on NVCC " << std::endl; +#endif + } + + template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > + static void conv_cross(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) + { +#ifndef __NVCC__ + std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross operation not implemented for this dimension " << std::endl; +#else + std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross is unsupported when compiled on NVCC " << std::endl; +#endif + } + + template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2, + unsigned int prop_dst1, unsigned int prop_dst2, + unsigned int stencil_size , unsigned int N, + typename SparseGridType, typename lambda_f, typename ... ArgsT > + static void conv2(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) + { +#ifndef __NVCC__ + std::cout << __FILE__ << ":" << __LINE__ << " error conv2 operation not implemented for this dimension " << std::endl; +#else + std::cout << __FILE__ << ":" << __LINE__ << " error conv2 is unsupported when compiled on NVCC " << std::endl; +#endif + } + + template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > + static void conv_cross2(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) + { +#ifndef __NVCC__ + std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 operation not implemented for this dimension " << std::endl; +#else + std::cout << __FILE__ << ":" << __LINE__ << " error conv_cross2 is unsupported when compiled on NVCC " << std::endl; +#endif + } +}; + +#ifndef __NVCC__ + +struct cross_stencil_v +{ + Vc::double_v xm; + Vc::double_v xp; + Vc::double_v ym; + Vc::double_v yp; + Vc::double_v zm; + Vc::double_v zp; +}; + +template<> +struct conv_impl<3> +{ + template<bool findNN, typename NNtype, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size , unsigned int N, typename SparseGridType, typename lambda_f, typename ... ArgsT > + static void conv(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) + { + auto it = grid.template getBlockIterator<stencil_size>(start,stop); + + typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type; + + unsigned char mask[decltype(it)::sizeBlockBord]; + unsigned char mask_sum[decltype(it)::sizeBlockBord]; + unsigned char mask_unused[decltype(it)::sizeBlock]; + __attribute__ ((aligned (32))) prop_type block_bord_src[decltype(it)::sizeBlockBord]; + __attribute__ ((aligned (32))) prop_type block_bord_dst[decltype(it)::sizeBlock]; + + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; + + while (it.isNext()) + { + it.template loadBlockBorder<prop_src,NNtype,findNN>(block_bord_src,mask); + + if (it.start_b(2) != stencil_size || it.start_b(1) != stencil_size || it.start_b(0) != stencil_size || + it.stop_b(2) != sz2::value+stencil_size || it.stop_b(1) != sz1::value+stencil_size || it.stop_b(0) != sz0::value+stencil_size) + { + auto & header_mask = grid.private_get_header_mask(); + auto & header_inf = grid.private_get_header_inf(); + + loadBlock_impl<prop_dst,0,3,typename decltype(it)::vector_blocks_exts_type, typename decltype(it)::vector_ext_type>::template loadBlock<decltype(it)::sizeBlock>(block_bord_dst,grid,it.getChunkId(),mask_unused); + } + + // Sum the mask + for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) + { + for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) + { + int cc = it.LinB(it.start_b(0),j,k); + int c[N]; + + for (int s = 0 ; s < N ; s++) + { + c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); + } + + for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += sizeof(size_t)) + { + size_t cmd = *(size_t *)&mask[cc]; + + if (cmd != 0) + { + size_t xm[N]; + + for (int s = 0 ; s < N ; s++) + { + xm[s] = *(size_t *)&mask[c[s]]; + } + + size_t sum = 0; + for (int s = 0 ; s < N ; s++) + { + sum += xm[s]; + } + + *(size_t *)&mask_sum[cc] = sum; + } + + cc += sizeof(size_t); + for (int s = 0 ; s < N ; s++) + { + c[s] += sizeof(size_t); + } + } + } + } + + for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) + { + for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) + { + int cc = it.LinB(it.start_b(0),j,k); + int c[N]; + + int cd = it.LinB_off(it.start_b(0),j,k); + + for (int s = 0 ; s < N ; s++) + { + c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); + } + + for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size) + { + Vc::Mask<prop_type> cmp; + + for (int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++) + { + cmp[s] = (mask[cc+s] == true && i+s < it.stop_b(0)); + } + + // we do only if exist the point + if (Vc::none_of(cmp) == false) + { + Vc::Mask<prop_type> surround; + + Vc::Vector<prop_type> xs[N+1]; + + xs[0] = Vc::Vector<prop_type>(&block_bord_src[cc],Vc::Unaligned); + + for (int s = 1 ; s < N+1 ; s++) + { + xs[s] = Vc::Vector<prop_type>(&block_bord_src[c[s-1]],Vc::Unaligned); + } + + auto res = func(xs, &mask_sum[cc], args ...); + + res.store(&block_bord_dst[cd],cmp,Vc::Aligned); + } + + cc += Vc::Vector<prop_type>::Size; + for (int s = 0 ; s < N ; s++) + { + c[s] += Vc::Vector<prop_type>::Size; + } + cd += Vc::Vector<prop_type>::Size; + } + } + } + + it.template storeBlock<prop_dst>(block_bord_dst); + + ++it; + } + } + + template<bool findNN, unsigned int prop_src, unsigned int prop_dst, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > + static void conv_cross(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) + { + auto it = grid.template getBlockIterator<1>(start,stop); + + auto & datas = grid.private_get_data(); + auto & headers = grid.private_get_header_mask(); + + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; + + typedef typename SparseGridType::chunking_type chunking; + + typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src>>::type prop_type; + + while (it.isNext()) + { + // Load + long int offset_jump[6]; + + size_t cid = it.getChunkId(); + + auto chunk = datas.get(cid); + auto & mask = headers.get(cid); + + bool exist; + grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0}); + long int r = grid.getChunk(p,exist); + offset_jump[0] = (r-cid)*decltype(it)::sizeBlock; + + p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0}); + r = grid.getChunk(p,exist); + offset_jump[1] = (r-cid)*decltype(it)::sizeBlock; + + p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0}); + r = grid.getChunk(p,exist); + offset_jump[2] = (r-cid)*decltype(it)::sizeBlock; + + p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0}); + r = grid.getChunk(p,exist); + offset_jump[3] = (r-cid)*decltype(it)::sizeBlock; + + p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1}); + r = grid.getChunk(p,exist); + offset_jump[4] = (r-cid)*decltype(it)::sizeBlock; + + p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1}); + r = grid.getChunk(p,exist); + offset_jump[5] = (r-cid)*decltype(it)::sizeBlock; + + // Load offset jumps + + // construct a row mask + + long int s2 = 0; + + typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz; + typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy; + typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx; + + + bool mask_row[sx::value]; + + for (int k = 0 ; k < sx::value ; k++) + { + mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false; + } + + for (int v = it.start(2) ; v < it.stop(2) ; v++) + { + for (int j = it.start(1) ; j < it.stop(1) ; j++) + { + s2 = it.Lin(0,j,v); + for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size) + { + // we do only id exist the point + if (*(int *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;} + + data_il<Vc::Vector<prop_type>::Size> mxm; + data_il<Vc::Vector<prop_type>::Size> mxp; + data_il<Vc::Vector<prop_type>::Size> mym; + data_il<Vc::Vector<prop_type>::Size> myp; + data_il<Vc::Vector<prop_type>::Size> mzm; + data_il<Vc::Vector<prop_type>::Size> mzp; + + cross_stencil_v cs; + + Vc::Vector<prop_type> cmd(&chunk.template get<prop_src>()[s2]); + + // Load x-1 + long int sumxm = s2-1; + sumxm += (k==0)?offset_jump[0] + sx::value:0; + + // Load x+1 + long int sumxp = s2+Vc::Vector<prop_type>::Size; + sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0; + + long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value; + sumym += s2; + long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value; + sumyp += s2; + long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value; + sumzm += s2; + long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value; + sumzp += s2; + + if (Vc::Vector<prop_type>::Size == 2) + { + mxm.i = *(short int *)&mask.mask[s2]; + mxm.i = mxm.i << 8; + mxm.i |= (short int)mask.mask[sumxm]; + + mxp.i = *(short int *)&mask.mask[s2]; + mxp.i = mxp.i >> 8; + mxp.i |= ((short int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8; + + mym.i = *(short int *)&mask.mask[sumym]; + myp.i = *(short int *)&mask.mask[sumyp]; + + mzm.i = *(short int *)&mask.mask[sumzm]; + mzp.i = *(short int *)&mask.mask[sumzp]; + } + else if (Vc::Vector<prop_type>::Size == 4) + { + mxm.i = *(int *)&mask.mask[s2]; + mxm.i = mxm.i << 8; + mxm.i |= (int)mask.mask[sumxm]; + + mxp.i = *(int *)&mask.mask[s2]; + mxp.i = mxp.i >> 8; + mxp.i |= ((int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8; + + mym.i = *(int *)&mask.mask[sumym]; + myp.i = *(int *)&mask.mask[sumyp]; + + mzm.i = *(int *)&mask.mask[sumzm]; + mzp.i = *(int *)&mask.mask[sumzp]; + } + else + { + std::cout << __FILE__ << ":" << __LINE__ << " UNSUPPORTED" << std::endl; + } + + cs.xm = cmd; + cs.xm = cs.xm.shifted(-1); + cs.xm[0] = chunk.template get<prop_src>()[sumxm]; + + + cs.xp = cmd; + cs.xp = cs.xp.shifted(1); + cs.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src>()[sumxp]; + + // Load y and z direction + + cs.ym.load(&chunk.template get<prop_src>()[sumym],Vc::Aligned); + cs.yp.load(&chunk.template get<prop_src>()[sumyp],Vc::Aligned); + cs.zm.load(&chunk.template get<prop_src>()[sumzm],Vc::Aligned); + cs.zp.load(&chunk.template get<prop_src>()[sumzp],Vc::Aligned); + + // Calculate + + data_il<Vc::Vector<prop_type>::Size> tot_m; + tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i; + + Vc::Vector<prop_type> res = func(cmd,cs,tot_m.uc,args ... ); + + Vc::Mask<prop_type> m(&mask_row[k]); + + res.store(&chunk.template get<prop_dst>()[s2],m,Vc::Aligned); + + s2 += Vc::Vector<prop_type>::Size; + } + } + } + + ++it; + } + } + + + template<bool findNN, typename NNType, unsigned int prop_src1, unsigned int prop_src2, + unsigned int prop_dst1, unsigned int prop_dst2, + unsigned int stencil_size , unsigned int N, + typename SparseGridType, typename lambda_f, typename ... ArgsT > + static void conv2(int (& stencil)[N][3], grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) + { + auto it = grid.template getBlockIterator<stencil_size>(start,stop); + + typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type; + + unsigned char mask[decltype(it)::sizeBlockBord]; + unsigned char mask_sum[decltype(it)::sizeBlockBord]; + __attribute__ ((aligned (32))) prop_type block_bord_src1[decltype(it)::sizeBlockBord]; + __attribute__ ((aligned (32))) prop_type block_bord_dst1[decltype(it)::sizeBlock]; + __attribute__ ((aligned (32))) prop_type block_bord_src2[decltype(it)::sizeBlockBord]; + __attribute__ ((aligned (32))) prop_type block_bord_dst2[decltype(it)::sizeBlock]; + + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; + + while (it.isNext()) + { + it.template loadBlockBorder<prop_src1,NNType,findNN>(block_bord_src1,mask); + it.template loadBlockBorder<prop_src2,NNType,findNN>(block_bord_src2,mask); + + // Sum the mask + for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) + { + for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) + { + int cc = it.LinB(it.start_b(0),j,k); + int c[N]; + + for (int s = 0 ; s < N ; s++) + { + c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); + } + + for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += sizeof(size_t)) + { + size_t cmd = *(size_t *)&mask[cc]; + + if (cmd == 0) {continue;} + + + size_t xm[N]; + + for (int s = 0 ; s < N ; s++) + { + xm[s] = *(size_t *)&mask[c[s]]; + } + + size_t sum = 0; + for (int s = 0 ; s < N ; s++) + { + sum += xm[s]; + } + + *(size_t *)&mask_sum[cc] = sum; + + cc += sizeof(size_t); + for (int s = 0 ; s < N ; s++) + { + c[s] += sizeof(size_t); + } + } + } + } + + for (int k = it.start_b(2) ; k < it.stop_b(2) ; k++) + { + for (int j = it.start_b(1) ; j < it.stop_b(1) ; j++) + { + int cc = it.LinB(it.start_b(0),j,k); + int c[N]; + + int cd = it.LinB_off(it.start_b(0),j,k); + + for (int s = 0 ; s < N ; s++) + { + c[s] = it.LinB(it.start_b(0)+stencil[s][0],j+stencil[s][1],k+stencil[s][2]); + } + + for (int i = it.start_b(0) ; i < it.stop_b(0) ; i += Vc::Vector<prop_type>::Size) + { + Vc::Mask<prop_type> cmp; + + for (int s = 0 ; s < Vc::Vector<prop_type>::Size ; s++) + { + cmp[s] = (mask[cc+s] == true); + } + + // we do only id exist the point + if (Vc::none_of(cmp) == true) {continue;} + + Vc::Mask<prop_type> surround; + + Vc::Vector<prop_type> xs1[N+1]; + Vc::Vector<prop_type> xs2[N+1]; + + xs1[0] = Vc::Vector<prop_type>(&block_bord_src1[cc],Vc::Unaligned); + xs2[0] = Vc::Vector<prop_type>(&block_bord_src2[cc],Vc::Unaligned); + + for (int s = 1 ; s < N+1 ; s++) + { + xs1[s] = Vc::Vector<prop_type>(&block_bord_src1[c[s-1]],Vc::Unaligned); + xs2[s] = Vc::Vector<prop_type>(&block_bord_src2[c[s-1]],Vc::Unaligned); + } + + Vc::Vector<prop_type> vo1; + Vc::Vector<prop_type> vo2; + + func(vo1, vo2, xs1, xs2, &mask_sum[cc], args ...); + + vo1.store(&block_bord_dst1[cd],cmp,Vc::Aligned); + vo2.store(&block_bord_dst2[cd],cmp,Vc::Aligned); + + cc += Vc::Vector<prop_type>::Size; + for (int s = 0 ; s < N ; s++) + { + c[s] += Vc::Vector<prop_type>::Size; + } + cd += Vc::Vector<prop_type>::Size; + } + } + } + + it.template storeBlock<prop_dst1>(block_bord_dst1); + it.template storeBlock<prop_dst2>(block_bord_dst2); + + ++it; + } + } + + template<bool findNN, unsigned int prop_src1, unsigned int prop_src2, unsigned int prop_dst1, unsigned int prop_dst2, unsigned int stencil_size, typename SparseGridType, typename lambda_f, typename ... ArgsT > + static void conv_cross2(grid_key_dx<3> & start, grid_key_dx<3> & stop, SparseGridType & grid , lambda_f func, ArgsT ... args) + { + auto it = grid.template getBlockIterator<stencil_size>(start,stop); + + auto & datas = grid.private_get_data(); + auto & headers = grid.private_get_header_mask(); + + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<0>>::type sz0; + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<1>>::type sz1; + typedef typename boost::mpl::at<typename decltype(it)::stop_border_vmpl,boost::mpl::int_<2>>::type sz2; + + typedef typename SparseGridType::chunking_type chunking; + + typedef typename boost::mpl::at<typename SparseGridType::value_type::type, boost::mpl::int_<prop_src1>>::type prop_type; + + while (it.isNext()) + { + // Load + long int offset_jump[6]; + + size_t cid = it.getChunkId(); + + auto chunk = datas.get(cid); + auto & mask = headers.get(cid); + + bool exist; + grid_key_dx<3> p = grid.getChunkPos(cid) + grid_key_dx<3>({-1,0,0}); + long int r = grid.getChunk(p,exist); + offset_jump[0] = (r-cid)*decltype(it)::sizeBlock; + + p = grid.getChunkPos(cid) + grid_key_dx<3>({1,0,0}); + r = grid.getChunk(p,exist); + offset_jump[1] = (r-cid)*decltype(it)::sizeBlock; + + p = grid.getChunkPos(cid) + grid_key_dx<3>({0,-1,0}); + r = grid.getChunk(p,exist); + offset_jump[2] = (r-cid)*decltype(it)::sizeBlock; + + p = grid.getChunkPos(cid) + grid_key_dx<3>({0,1,0}); + r = grid.getChunk(p,exist); + offset_jump[3] = (r-cid)*decltype(it)::sizeBlock; + + p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,-1}); + r = grid.getChunk(p,exist); + offset_jump[4] = (r-cid)*decltype(it)::sizeBlock; + + p = grid.getChunkPos(cid) + grid_key_dx<3>({0,0,1}); + r = grid.getChunk(p,exist); + offset_jump[5] = (r-cid)*decltype(it)::sizeBlock; + + // Load offset jumps + + // construct a row mask + + long int s2 = 0; + + typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<2>>::type sz; + typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<1>>::type sy; + typedef typename boost::mpl::at<typename chunking::type,boost::mpl::int_<0>>::type sx; + + + bool mask_row[sx::value]; + + for (int k = 0 ; k < sx::value ; k++) + { + mask_row[k] = (k >= it.start(0) && k < it.stop(0))?true:false; + } + + for (int v = it.start(2) ; v < it.stop(2) ; v++) + { + for (int j = it.start(1) ; j < it.stop(1) ; j++) + { + s2 = it.Lin(0,j,v); + for (int k = 0 ; k < sx::value ; k += Vc::Vector<prop_type>::Size) + { + // we do only id exist the point + if (*(int *)&mask.mask[s2] == 0) {s2 += Vc::Vector<prop_type>::Size; continue;} + + data_il<4> mxm; + data_il<4> mxp; + data_il<4> mym; + data_il<4> myp; + data_il<4> mzm; + data_il<4> mzp; + + cross_stencil_v cs1; + cross_stencil_v cs2; + + Vc::Vector<prop_type> cmd1(&chunk.template get<prop_src1>()[s2]); + Vc::Vector<prop_type> cmd2(&chunk.template get<prop_src2>()[s2]); + + // Load x-1 + long int sumxm = s2-1; + sumxm += (k==0)?offset_jump[0] + sx::value:0; + + // Load x+1 + long int sumxp = s2+Vc::Vector<prop_type>::Size; + sumxp += (k+Vc::Vector<prop_type>::Size == sx::value)?offset_jump[1] - sx::value:0; + + long int sumym = (j == 0)?offset_jump[2] + (sy::value-1)*sx::value:-sx::value; + sumym += s2; + long int sumyp = (j == sy::value-1)?offset_jump[3] - (sy::value - 1)*sx::value:sx::value; + sumyp += s2; + long int sumzm = (v == 0)?offset_jump[4] + (sz::value-1)*sx::value*sy::value:-sx::value*sy::value; + sumzm += s2; + long int sumzp = (v == sz::value-1)?offset_jump[5] - (sz::value - 1)*sx::value*sy::value:sx::value*sy::value; + sumzp += s2; + + mxm.i = *(int *)&mask.mask[s2]; + mxm.i = mxm.i << 8; + mxm.i |= (int)mask.mask[sumxm]; + + mxp.i = *(int *)&mask.mask[s2]; + mxp.i = mxp.i >> 8; + mxp.i |= ((int)mask.mask[sumxp]) << (Vc::Vector<prop_type>::Size - 1)*8; + + mym.i = *(int *)&mask.mask[sumym]; + myp.i = *(int *)&mask.mask[sumyp]; + + mzm.i = *(int *)&mask.mask[sumzm]; + mzp.i = *(int *)&mask.mask[sumzp]; + + cs1.xm = cmd1; + cs1.xm = cs1.xm.shifted(-1); + cs1.xm[0] = chunk.template get<prop_src1>()[sumxm]; + + cs2.xm = cmd2; + cs2.xm = cs2.xm.shifted(-1); + cs2.xm[0] = chunk.template get<prop_src2>()[sumxm]; + + cs1.xp = cmd1; + cs1.xp = cs1.xp.shifted(1); + cs1.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src1>()[sumxp]; + + cs2.xp = cmd2; + cs2.xp = cs2.xp.shifted(1); + cs2.xp[Vc::Vector<prop_type>::Size - 1] = chunk.template get<prop_src2>()[sumxp]; + + // Load y and z direction + + cs1.ym.load(&chunk.template get<prop_src1>()[sumym],Vc::Aligned); + cs1.yp.load(&chunk.template get<prop_src1>()[sumyp],Vc::Aligned); + cs1.zm.load(&chunk.template get<prop_src1>()[sumzm],Vc::Aligned); + cs1.zp.load(&chunk.template get<prop_src1>()[sumzp],Vc::Aligned); + + cs2.ym.load(&chunk.template get<prop_src2>()[sumym],Vc::Aligned); + cs2.yp.load(&chunk.template get<prop_src2>()[sumyp],Vc::Aligned); + cs2.zm.load(&chunk.template get<prop_src2>()[sumzm],Vc::Aligned); + cs2.zp.load(&chunk.template get<prop_src2>()[sumzp],Vc::Aligned); + + // Calculate + + data_il<4> tot_m; + tot_m.i = mxm.i + mxp.i + mym.i + myp.i + mzm.i + mzp.i; + + Vc::Vector<prop_type> res1; + Vc::Vector<prop_type> res2; + + func(res1,res2,cmd1,cmd2,cs1,cs2,tot_m.uc,args ... ); + + Vc::Mask<prop_type> m(&mask_row[k]); + + res1.store(&chunk.template get<prop_dst1>()[s2],m,Vc::Aligned); + res2.store(&chunk.template get<prop_dst2>()[s2],m,Vc::Aligned); + + s2 += Vc::Vector<prop_type>::Size; + } + } + } + + ++it; + } + } + +}; + +#endif + + +#endif /* SPARSEGRID_CONV_OPT_HPP_ */ diff --git a/src/SparseGrid/SparseGrid_unit_tests.cpp b/src/SparseGrid/SparseGrid_unit_tests.cpp index f14815f81db813bb936281baded780f4de4d7bbd..7e3fe2ba91158c005b48ec3e402748b76776a1ae 100644 --- a/src/SparseGrid/SparseGrid_unit_tests.cpp +++ b/src/SparseGrid/SparseGrid_unit_tests.cpp @@ -1753,7 +1753,7 @@ BOOST_AUTO_TEST_CASE( sparse_grid_remove_area) Box<3,size_t> bx_create({100,100,100},{400,400,400}); - Box<3,size_t> bx_delete({150,150,150},{350,350,350}); + Box<3,long int> bx_delete({150,150,150},{350,350,350}); grid_sm<3,void> gs(sz); grid_key_dx_iterator_sub<3> sub(gs,bx_create.getKP1(),bx_create.getKP2()); diff --git a/src/SparseGrid/cp_block.hpp b/src/SparseGrid/cp_block.hpp index 68e0e818c1ce59bb5040112ce7cd513eeb866859..5b82711b7f4e18212b5bf0582f3487a909758c49 100644 --- a/src/SparseGrid/cp_block.hpp +++ b/src/SparseGrid/cp_block.hpp @@ -11,88 +11,88 @@ #include "util/create_vmpl_sequence.hpp" template<typename T, unsigned int stencil_size, typename vector_vmpl, unsigned int dim> -class cp_block +class cp_block_base { - // we create first a vector with +public: typedef typename vmpl_sum_constant<2*stencil_size,vector_vmpl>::type stop_border_vmpl; typedef typename vmpl_create_constant<dim,stencil_size>::type start_border_vmpl; static const int sizeBlock = vmpl_reduce_prod<vector_vmpl>::type::value; static const int sizeBlockBord = vmpl_reduce_prod<stop_border_vmpl>::type::value; +}; + +template<typename T, unsigned int stencil_size, typename vector_vmpl, unsigned int dim> +class cp_block: public cp_block_base<T,stencil_size,vector_vmpl,dim> +{ + // we create first a vector with + + typedef cp_block_base<T,stencil_size,vector_vmpl,dim> base; - T (& ptr)[sizeBlock]; + T (& ptr)[base::sizeBlock]; public: - __device__ __host__ cp_block(T (& ptr)[sizeBlock]) + __device__ __host__ explicit cp_block(T (& ptr)[base::sizeBlock]) :ptr(ptr) {} template<typename ... ArgsT> __device__ __host__ T & operator()(ArgsT ... args) { - return ptr[Lin_vmpl_off<vector_vmpl,start_border_vmpl>(args ...)]; + return ptr[Lin_vmpl_off<vector_vmpl,typename base::start_border_vmpl>(args ...)]; } }; template<typename T, unsigned int stencil_size, typename vector_vmpl> -class cp_block<T,stencil_size,vector_vmpl,2> +class cp_block<T,stencil_size,vector_vmpl,2>: public cp_block_base<T,stencil_size,vector_vmpl,2> { // we create first a vector with - typedef typename vmpl_sum_constant<2*stencil_size,vector_vmpl>::type stop_border_vmpl; - typedef typename vmpl_create_constant<2,stencil_size>::type start_border_vmpl; - - static const int sizeBlock = vmpl_reduce_prod<vector_vmpl>::type::value; - static const int sizeBlockBord = vmpl_reduce_prod<stop_border_vmpl>::type::value; + typedef cp_block_base<T,stencil_size,vector_vmpl,2> base; - T (& ptr)[sizeBlock]; + T (& ptr)[base::sizeBlock]; public: - __device__ __host__ cp_block(T (& ptr)[sizeBlock]) + __device__ __host__ explicit cp_block(T (& ptr)[base::sizeBlock]) :ptr(ptr) {} __device__ __host__ int LinId(int i , int j) { - return Lin_vmpl_off<vector_vmpl,start_border_vmpl>(i,j); + return Lin_vmpl_off<vector_vmpl,typename base::start_border_vmpl>(i,j); } __device__ __host__ T & operator()( int i , int j) { - return ptr[Lin_vmpl_off<vector_vmpl,start_border_vmpl>(i,j)]; + return ptr[Lin_vmpl_off<vector_vmpl,typename base::start_border_vmpl>(i,j)]; } }; template<typename T, unsigned int stencil_size, typename vector_vmpl> -class cp_block<T,stencil_size,vector_vmpl,3> +class cp_block<T,stencil_size,vector_vmpl,3>: public cp_block_base<T,stencil_size,vector_vmpl,3> { // we create first a vector with - typedef typename vmpl_sum_constant<2*stencil_size,vector_vmpl>::type stop_border_vmpl; - typedef typename vmpl_create_constant<3,stencil_size>::type start_border_vmpl; - - static const int sizeBlock = vmpl_reduce_prod<vector_vmpl>::type::value; - static const int sizeBlockBord = vmpl_reduce_prod<stop_border_vmpl>::type::value; + typedef cp_block_base<T,stencil_size,vector_vmpl,3> base; - T (& ptr)[sizeBlock]; + T (& ptr)[base::sizeBlock]; public: - __device__ __host__ cp_block(T (& ptr)[sizeBlock]) + __device__ __host__ explicit cp_block(T (& ptr)[base::sizeBlock]) :ptr(ptr) {} __device__ __host__ int LinId(int i , int j, int k) { - return Lin_vmpl_off<vector_vmpl,start_border_vmpl>(i,j,k); + return Lin_vmpl_off<vector_vmpl,typename base::start_border_vmpl>(i,j,k); } __device__ __host__ T & operator()(int i , int j, int k) { - return ptr[Lin_vmpl_off<vector_vmpl,start_border_vmpl>(i,j,k)]; + return ptr[Lin_vmpl_off<vector_vmpl,typename base::start_border_vmpl>(i,j,k)]; } }; diff --git a/src/SparseGridGpu/BlockMapGpu.hpp b/src/SparseGridGpu/BlockMapGpu.hpp index 3da9215589ddf0bc5b99565339356f9d93792365..a1d3d5199be71271c127c9a1d80698787b0adb61 100644 --- a/src/SparseGridGpu/BlockMapGpu.hpp +++ b/src/SparseGridGpu/BlockMapGpu.hpp @@ -314,9 +314,6 @@ template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT void BlockMapGpu<AggregateBlockT, threadBlockSize, indexT, layout_base>::deviceToHost() { blockMap.template deviceToHost<pMask>(); - /////////////// DEBUG //////////////////// - auto indexBuffer = blockMap.getIndexBuffer(); - ////////////////////////////////////////// } template<typename AggregateBlockT, unsigned int threadBlockSize, typename indexT, template<typename> class layout_base> diff --git a/src/SparseGridGpu/BlockMapGpu_kernels.cuh b/src/SparseGridGpu/BlockMapGpu_kernels.cuh index dfa74d8bc235aa7256b2f81ddae36548b3a70966..525cabdebded2234b3008c1437dcfe63d971d5cd 100644 --- a/src/SparseGridGpu/BlockMapGpu_kernels.cuh +++ b/src/SparseGridGpu/BlockMapGpu_kernels.cuh @@ -723,6 +723,9 @@ namespace BlockMapGpuFunctors p_ids.resize(mergeIndices.size()); s_ids.resize(mergeIndices.size()); + // shut-up valgrind uninitialized + + p_ids.template get<1>(p_ids.size()-1) = 0; CUDA_LAUNCH(BlockMapGpuKernels::compute_predicate,ite,keys.toKernel(),mergeIndices.toKernel(),dataOld.size(),p_ids.toKernel()); openfpm::scan((int *)p_ids.template getDeviceBuffer<0>(), diff --git a/src/SparseGridGpu/SparseGridGpu.hpp b/src/SparseGridGpu/SparseGridGpu.hpp index 1e8ef18ff87d01bef6e6b6897bc8ce505c998440..55ec25bfa48aa9ad9aadac08148463461ab73813 100644 --- a/src/SparseGridGpu/SparseGridGpu.hpp +++ b/src/SparseGridGpu/SparseGridGpu.hpp @@ -525,6 +525,8 @@ private: openfpm::vector<int> n_shift_cp_swp; openfpm::vector<int> n_shift_cp_swp_r; typedef typename aggregate_convert<dim,blockEdgeSize,aggregate<int>>::type convertAggr; + + // Map to convert blocks from missaligned chunks openfpm::vector_gpu<convertAggr> convert_blk; openfpm::vector_gpu<convertAggr> convert_blk_swp; openfpm::vector_gpu<convertAggr> convert_blk_swp_r; @@ -576,11 +578,11 @@ private: mutable openfpm::vector_gpu<Box<dim,int>> pack_subs_swp; mutable openfpm::vector_gpu<Box<dim,int>> pack_subs_swp_r; - //! Size of the index vector packed. These varaible are unsed to understand if the option + //! Size of the index vector packed. These varaible are used to understand if the option //! KEEP_GEOMETRY can be used keep geometry option infact require that when we record the //! packing variables the number of chunks (and chunks indexes) does not change - mutable int index_size_swp; - mutable int index_size_swp_r; + mutable int index_size_swp = -1; + mutable int index_size_swp_r = -1; //! links of the padding points with real points of a coarse sparsegrid openfpm::vector_gpu<aggregate<size_t>> links_up; @@ -602,6 +604,62 @@ private: bool findNN = false; + inline void swap_internal_remote() + { + n_cnk_cp_swp_r.swap(n_cnk_cp); + n_pnt_cp_swp_r.swap(n_pnt_cp); + n_shift_cp_swp_r.swap(n_shifts_cp); + convert_blk_swp_r.swap(convert_blk); + box_cp_swp_r.swap(box_cp); + new_map_swp_r.swap(new_map); + } + + inline void swap_internal_local() + { + offset_ptrs_cp_swp.swap(offset_ptrs_cp); + scan_ptrs_cp_swp.swap(scan_ptrs_cp); + data_base_ptr_cp_swp.swap(data_base_ptr_cp); + n_cnk_cp_swp.swap(n_cnk_cp); + n_pnt_cp_swp.swap(n_pnt_cp); + n_shift_cp_swp.swap(n_shifts_cp); + convert_blk_swp.swap(convert_blk); + box_cp_swp.swap(box_cp); + new_map_swp.swap(new_map); + } + + inline void swap_local_pack() + { + index_ptrs_swp.swap(index_ptrs); + scan_ptrs_swp.swap(scan_ptrs); + data_ptrs_swp.swap(data_ptrs); + offset_ptrs_swp.swap(offset_ptrs); + mask_ptrs_swp.swap(mask_ptrs); + + e_points_swp.swap(e_points); + pack_output_swp.swap(pack_output); + tmp_swp.swap(tmp); + + pack_subs_swp.swap(pack_subs); + index_size_swp = private_get_index_array().size(); + } + + inline void swap_remote_pack() + { + index_ptrs_swp_r.swap(index_ptrs); + scan_ptrs_swp_r.swap(scan_ptrs); + data_ptrs_swp_r.swap(data_ptrs); + offset_ptrs_swp_r.swap(offset_ptrs); + mask_ptrs_swp_r.swap(mask_ptrs); + + e_points_swp_r.swap(e_points); + pack_output_swp_r.swap(pack_output); + tmp_swp_r.swap(tmp); + + pack_subs_swp_r.swap(pack_subs); + //req_index_swp_r = req_index; + index_size_swp_r = private_get_index_array().size(); + } + protected: static constexpr unsigned int blockSize = BlockTypeOf<AggregateBlockT, 0>::size; typedef AggregateBlockT AggregateInternalT; @@ -680,56 +738,23 @@ public: } + void saveUnpackVariableIfNotKeepGeometry(int opt, bool is_unpack_remote) { if (is_unpack_remote == true) - { - n_cnk_cp_swp_r.swap(n_cnk_cp); - n_pnt_cp_swp_r.swap(n_pnt_cp); - n_shift_cp_swp_r.swap(n_shifts_cp); - convert_blk_swp_r.swap(convert_blk); - box_cp_swp_r.swap(box_cp); - new_map_swp_r.swap(new_map); - } + {swap_internal_remote();} if (is_unpack_remote == false) - { - offset_ptrs_cp_swp.swap(offset_ptrs_cp); - scan_ptrs_cp_swp.swap(scan_ptrs_cp); - data_base_ptr_cp_swp.swap(data_base_ptr_cp); - n_cnk_cp_swp.swap(n_cnk_cp); - n_pnt_cp_swp.swap(n_pnt_cp); - n_shift_cp_swp.swap(n_shifts_cp); - convert_blk_swp.swap(convert_blk); - box_cp_swp.swap(box_cp); - new_map_swp.swap(new_map); - } + {swap_internal_local();} } void RestoreUnpackVariableIfKeepGeometry(int opt, bool is_unpack_remote) { if (opt & KEEP_GEOMETRY && is_unpack_remote == true) - { - n_cnk_cp_swp_r.swap(n_cnk_cp); - n_pnt_cp_swp_r.swap(n_pnt_cp); - n_shift_cp_swp_r.swap(n_shifts_cp); - convert_blk_swp_r.swap(convert_blk); - box_cp_swp_r.swap(box_cp); - new_map_swp_r.swap(new_map); - } + {swap_internal_remote();} if (opt & KEEP_GEOMETRY && is_unpack_remote == false) - { - offset_ptrs_cp_swp.swap(offset_ptrs_cp); - scan_ptrs_cp_swp.swap(scan_ptrs_cp); - data_base_ptr_cp_swp.swap(data_base_ptr_cp); - n_cnk_cp_swp.swap(n_cnk_cp); - n_pnt_cp_swp.swap(n_pnt_cp); - n_shift_cp_swp.swap(n_shifts_cp); - convert_blk_swp.swap(convert_blk); - box_cp_swp.swap(box_cp); - new_map_swp.swap(new_map); - } + {swap_internal_local();} } @@ -737,36 +762,14 @@ public: { if (is_pack_remote == false) { - index_ptrs_swp.swap(index_ptrs); - scan_ptrs_swp.swap(scan_ptrs); - data_ptrs_swp.swap(data_ptrs); - offset_ptrs_swp.swap(offset_ptrs); - mask_ptrs_swp.swap(mask_ptrs); - - e_points_swp.swap(e_points); - pack_output_swp.swap(pack_output); - tmp_swp.swap(tmp); - - pack_subs_swp.swap(pack_subs); + swap_local_pack(); req_index_swp = req_index; - index_size_swp = private_get_index_array().size(); } if (is_pack_remote == true) { - index_ptrs_swp_r.swap(index_ptrs); - scan_ptrs_swp_r.swap(scan_ptrs); - data_ptrs_swp_r.swap(data_ptrs); - offset_ptrs_swp_r.swap(offset_ptrs); - mask_ptrs_swp_r.swap(mask_ptrs); - - e_points_swp_r.swap(e_points); - pack_output_swp_r.swap(pack_output); - tmp_swp_r.swap(tmp); - - pack_subs_swp_r.swap(pack_subs); + swap_remote_pack(); req_index_swp_r = req_index; - index_size_swp_r = private_get_index_array().size(); } } @@ -774,33 +777,13 @@ public: { if (opt & KEEP_GEOMETRY && is_pack_remote == false) { - index_ptrs_swp.swap(index_ptrs); - scan_ptrs_swp.swap(scan_ptrs); - data_ptrs_swp.swap(data_ptrs); - offset_ptrs_swp.swap(offset_ptrs); - mask_ptrs_swp.swap(mask_ptrs); - - e_points_swp.swap(e_points); - pack_output_swp.swap(pack_output); - tmp_swp.swap(tmp); - - pack_subs_swp.swap(pack_subs); + swap_local_pack(); req_index = req_index_swp; } if (opt & KEEP_GEOMETRY && is_pack_remote == true) { - index_ptrs_swp_r.swap(index_ptrs); - scan_ptrs_swp_r.swap(scan_ptrs); - data_ptrs_swp_r.swap(data_ptrs); - offset_ptrs_swp_r.swap(offset_ptrs); - mask_ptrs_swp_r.swap(mask_ptrs); - - e_points_swp_r.swap(e_points); - pack_output_swp_r.swap(pack_output); - tmp_swp_r.swap(tmp); - - pack_subs_swp_r.swap(pack_subs); + swap_remote_pack(); req_index = req_index_swp_r; } } @@ -1213,7 +1196,7 @@ private: auto & dataBuffer = private_get_data_array(); if (req_index != pack_subs.size()) - {std::cerr << __FILE__ << ":" << __LINE__ << " error the packing request number differ from the number of packed objects" << std::endl;} + {std::cerr << __FILE__ << ":" << __LINE__ << " error the packing request number differ from the number of packed objects " << req_index << " " << pack_subs.size() << std::endl;} size_t tot_pnt = 0; size_t tot_cnk = 0; @@ -2793,9 +2776,11 @@ public: size_t n_cnk = 0; tmp.template get<0>((i+1)*(indexBuffer.size() + 1)-1) = 0; + tmp.template get<1>((i+1)*(indexBuffer.size() + 1)-1) = 0; // put a zero at the end tmp.template hostToDevice<0>((i+1)*(indexBuffer.size() + 1)-1,(i+1)*(indexBuffer.size() + 1)-1); + tmp.template hostToDevice<1>((i+1)*(indexBuffer.size() + 1)-1,(i+1)*(indexBuffer.size() + 1)-1); openfpm::scan(((indexT *)tmp. template getDeviceBuffer<0>()) + i*(indexBuffer.size() + 1), indexBuffer.size() + 1, (indexT *)tmp. template getDeviceBuffer<0>() + i*(indexBuffer.size() + 1), context); @@ -3127,7 +3112,7 @@ public: * \param box_src box to kill the points * */ - void remove(const Box<dim,unsigned int> & section_to_delete) + void remove(const Box<dim,int> & section_to_delete) { rem_sects.add(section_to_delete); } diff --git a/src/SparseGridGpu/SparseGridGpu_ker_util.hpp b/src/SparseGridGpu/SparseGridGpu_ker_util.hpp index 06afd126b4c5e1df383db143182d20533d7ebf59..67d801da5e6db2931892896b0e9ef4c9a9bee6ea 100644 --- a/src/SparseGridGpu/SparseGridGpu_ker_util.hpp +++ b/src/SparseGridGpu/SparseGridGpu_ker_util.hpp @@ -862,14 +862,14 @@ struct loadGhostBlock_impl<7,dim,AggregateBlockT,pMask,p,ct_params,blockEdgeSize auto gmask6 = blockMap.template get_ele<pMask>(nPos6)[offset6]; auto gmask7 = blockMap.template get_ele<pMask>(nPos7)[offset7]; - if (bmask == 0) {bdata = bck.template get<p>();} - if (gmask == 0) {gdata = bck.template get<p>();} - if (gmask2 == 0) {gdata2 = bck.template get<p>();} - if (gmask3 == 0) {gdata3 = bck.template get<p>();} - if (gmask4 == 0) {gdata4 = bck.template get<p>();} - if (gmask5 == 0) {gdata5 = bck.template get<p>();} - if (gmask6 == 0) {gdata6 = bck.template get<p>();} - if (gmask7 == 0) {gdata7 = bck.template get<p>();} + if (bmask == 0) {set_compile_condition<pMask != p>::template set<p>(bdata,bck);} + if (gmask == 0) {set_compile_condition<pMask != p>::template set<p>(gdata,bck);} + if (gmask2 == 0) {set_compile_condition<pMask != p>::template set<p>(gdata2,bck);} + if (gmask3 == 0) {set_compile_condition<pMask != p>::template set<p>(gdata3,bck);} + if (gmask4 == 0) {set_compile_condition<pMask != p>::template set<p>(gdata4,bck);} + if (gmask5 == 0) {set_compile_condition<pMask != p>::template set<p>(gdata5,bck);} + if (gmask6 == 0) {set_compile_condition<pMask != p>::template set<p>(gdata6,bck);} + if (gmask7 == 0) {set_compile_condition<pMask != p>::template set<p>(gdata7,bck);} sharedRegionPtr[linId] = gdata; sharedRegionPtr[linId2] = gdata2; diff --git a/src/SparseGridGpu/performance/SparseGridGpu_performance_get_nn.cu b/src/SparseGridGpu/performance/SparseGridGpu_performance_get_nn.cu index 243199426db30156cc6eae1f51f634751b872c99..768f9ea8d7cee7d2bef4b540709d0744e303e25f 100644 --- a/src/SparseGridGpu/performance/SparseGridGpu_performance_get_nn.cu +++ b/src/SparseGridGpu/performance/SparseGridGpu_performance_get_nn.cu @@ -4,8 +4,6 @@ * Created on: Sep 10, 2019 * Author: i-bird */ - -#define SCAN_WITH_CUB #define BOOST_TEST_DYN_LINK #define DISABLE_MPI_WRITTERS diff --git a/src/SparseGridGpu/performance/SparseGridGpu_performance_get_single.cu b/src/SparseGridGpu/performance/SparseGridGpu_performance_get_single.cu index 43f262476374f29319bbf6c888ca25646ded71e0..a2db1e646226776ffa8778eedd32369f3dea169c 100644 --- a/src/SparseGridGpu/performance/SparseGridGpu_performance_get_single.cu +++ b/src/SparseGridGpu/performance/SparseGridGpu_performance_get_single.cu @@ -4,7 +4,6 @@ * Created on: Sep 9, 2019 * Author: i-bird */ -#define SCAN_WITH_CUB #define BOOST_TEST_DYN_LINK #define DISABLE_MPI_WRITTERS diff --git a/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil.cu b/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil.cu index d6ed35be2a2fc448e8bc9570f8837a251e0f7eea..d3d5164dc3544ae8357454e05776c26ebf7cd864 100644 --- a/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil.cu +++ b/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil.cu @@ -4,7 +4,6 @@ * Created on: Sep 10, 2019 * Author: i-bird */ -#define SCAN_WITH_CUB #define BOOST_TEST_DYN_LINK #define DISABLE_MPI_WRITTERS @@ -66,9 +65,9 @@ void testStencilHeat_perf(unsigned int i, std::string base) timer ts; ts.start(); - sparseGrid.template applyStencils<Stencil01T>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); cudaDeviceSynchronize(); - sparseGrid.template applyStencils<Stencil10T>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); cudaDeviceSynchronize(); ts.stop(); diff --git a/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil_3d.cu b/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil_3d.cu index ba1155eb002f881e1a319c0c4351dc8af0d64cc2..73eed9dbf79e6f4066d57e76b6629c8bdf9550a2 100644 --- a/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil_3d.cu +++ b/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil_3d.cu @@ -4,7 +4,6 @@ * Created on: Sep 10, 2019 * Author: i-bird */ -#define SCAN_WITH_CUB #define BOOST_TEST_DYN_LINK #define DISABLE_MPI_WRITTERS @@ -72,9 +71,9 @@ void testStencilHeat3D_perf(unsigned int i, std::string base) timer ts; ts.start(); - sparseGrid.template applyStencils<Stencil01T>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); cudaDeviceSynchronize(); - sparseGrid.template applyStencils<Stencil10T>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); cudaDeviceSynchronize(); ts.stop(); @@ -163,7 +162,6 @@ void testStencilHeat3DSparse_perf(unsigned int i, std::string base, float fillMu openfpm::vector<double> measures_tm; dim3 gridSize(gridEdgeSize, gridEdgeSize, gridEdgeSize); - dim3 blockSize(blockEdgeSize, blockEdgeSize, blockEdgeSize); unsigned int spatialEdgeSize = 10000; size_t sz[3] = {spatialEdgeSize, spatialEdgeSize, spatialEdgeSize}; typename SparseGridZ::grid_info blockGeometry(sz); @@ -201,23 +199,19 @@ void testStencilHeat3DSparse_perf(unsigned int i, std::string base, float fillMu unsigned long long numElements = existingElements - boundaryElements; // Now apply some boundary conditions - sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(STENCIL_MODE_INPLACE, + sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, centerPoint, centerPoint + 2*blockEdgeSize*gridEdgeSize, 0.0, 10.0); - cudaDeviceSynchronize(); iterations /= 2; for (unsigned int iter=0; iter<iterations; ++iter) { - cudaDeviceSynchronize(); timer ts; ts.start(); - sparseGrid.template applyStencils<Stencil01T>(STENCIL_MODE_INPLACE, 0.1); - cudaDeviceSynchronize(); - sparseGrid.template applyStencils<Stencil10T>(STENCIL_MODE_INPLACE, 0.1); - cudaDeviceSynchronize(); + sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); + sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); ts.stop(); diff --git a/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil_sparse.cu b/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil_sparse.cu index 543d3bcdb3bac6be42997a590c4433ae51e7581c..abec6ef5b6b96a058e61d47b8d2e285065d9381b 100644 --- a/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil_sparse.cu +++ b/src/SparseGridGpu/performance/SparseGridGpu_performance_heat_stencil_sparse.cu @@ -4,8 +4,6 @@ * Created on: Sep 10, 2019 * Author: i-bird */ - -#define SCAN_WITH_CUB #define BOOST_TEST_DYN_LINK #define DISABLE_MPI_WRITTERS @@ -43,7 +41,6 @@ void testStencilHeatSparse_perf(unsigned int i, std::string base, float fillMult openfpm::vector<double> measures_tm; dim3 gridSize(gridEdgeSize, gridEdgeSize); - dim3 blockSize(blockEdgeSize,blockEdgeSize); unsigned int spatialEdgeSize = 1000000; size_t sz[2] = {spatialEdgeSize, spatialEdgeSize}; typename SparseGridZ::grid_info blockGeometry(sz); @@ -82,7 +79,7 @@ void testStencilHeatSparse_perf(unsigned int i, std::string base, float fillMult unsigned long long numElements = existingElements - boundaryElements; // Now apply some boundary conditions - sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(STENCIL_MODE_INPLACE, + sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, centerPoint, centerPoint + 2*blockEdgeSize*gridEdgeSize, 0.0, 10.0); @@ -94,9 +91,9 @@ void testStencilHeatSparse_perf(unsigned int i, std::string base, float fillMult timer ts; ts.start(); - sparseGrid.template applyStencils<Stencil01T>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); cudaDeviceSynchronize(); - sparseGrid.template applyStencils<Stencil10T>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); cudaDeviceSynchronize(); ts.stop(); diff --git a/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_block.cu b/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_block.cu index 72997ae2f32a6f8c0a8858f96da3f03445fa7a6b..402709319e4028ce2882a1313d3ce54bce2713ef 100644 --- a/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_block.cu +++ b/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_block.cu @@ -4,8 +4,6 @@ * Created on: Sep 10, 2019 * Author: i-bird */ - -#define SCAN_WITH_CUB #define BOOST_TEST_DYN_LINK #define DISABLE_MPI_WRITTERS diff --git a/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_single.cu b/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_single.cu index c55b0be809073088ef5365d3ae169d3b414bc0f6..afacc2d3115f7c4679d098fafc5afb76bdc278b2 100644 --- a/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_single.cu +++ b/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_single.cu @@ -4,7 +4,6 @@ * Created on: Sep 10, 2019 * Author: i-bird */ -#define SCAN_WITH_CUB #define BOOST_TEST_DYN_LINK #define DISABLE_MPI_WRITTERS diff --git a/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_stencil.cu b/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_stencil.cu index 119e779a311baaf3ec9b861a61a9a8ed4c2ad828..94d424f07e7496e5c45fa75a394df6bd3292e79b 100644 --- a/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_stencil.cu +++ b/src/SparseGridGpu/performance/SparseGridGpu_performance_insert_stencil.cu @@ -4,7 +4,6 @@ * Created on: Sep 10, 2019 * Author: i-bird */ -#define SCAN_WITH_CUB #define BOOST_TEST_DYN_LINK #define DISABLE_MPI_WRITTERS @@ -19,99 +18,6 @@ extern report_sparse_grid_tests report_sparsegrid_funcs; extern std::set<std::string> testSet; -template<unsigned int blockEdgeSize, unsigned int gridEdgeSize> -void testInsertStencil(std::string testURI, unsigned int i) -{ - auto testName = "Insert stencil"; - constexpr unsigned int dim = 2; -// constexpr unsigned int blockEdgeSize = 8; - constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value; - typedef aggregate<float> AggregateT; - typedef HeatStencil<dim,0,0> StencilT; - - unsigned int iterations = 10; - - std::string base(testURI + "(" + std::to_string(i) + ")"); - report_sparsegrid_funcs.graphs.put(base + ".test.name","StencilInsertN"); - - report_sparsegrid_funcs.graphs.put(base + ".dim",2); - report_sparsegrid_funcs.graphs.put(base + ".blockSize",blockEdgeSize); - report_sparsegrid_funcs.graphs.put(base + ".gridSize.x",gridEdgeSize*blockEdgeSize); - report_sparsegrid_funcs.graphs.put(base + ".gridSize.y",gridEdgeSize*blockEdgeSize); - - dim3 gridSize(gridEdgeSize, gridEdgeSize); - dim3 blockSize(blockEdgeSize, blockEdgeSize); - grid_smb<dim, blockEdgeSize> blockGeometry(gridSize); - SparseGridGpu<dim, AggregateT, blockEdgeSize, chunkSize> sparseGrid(blockGeometry); - mgpu::ofp_context_t ctx; - sparseGrid.template setBackgroundValue<0>(0); - - // Initialize the grid - sparseGrid.setGPUInsertBuffer(gridSize, dim3(1)); - CUDA_LAUNCH_DIM3((insertConstantValue<0>),gridSize, blockSize,sparseGrid.toKernel(), 0); - sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); - - sparseGrid.setGPUInsertBuffer(gridSize, dim3(1)); - dim3 sourcePt(gridSize.x * blockEdgeSize / 2, gridSize.y * blockEdgeSize / 2, 0); - insertOneValue<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), sourcePt, 100); - sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); - - sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block! - - unsigned long long numElements = gridEdgeSize*blockEdgeSize*gridEdgeSize*blockEdgeSize; - - for (unsigned int iter=0; iter<5; ++iter) - { - sparseGrid.template applyStencils<StencilT>(STENCIL_MODE_INSERT, 0.1); - sparseGrid.template flush<smax_<0>>(ctx, flush_type::FLUSH_ON_DEVICE); - } - - openfpm::vector<double> gElemSMeasures; - openfpm::vector<double> gFlopsSMeasures; - - for (unsigned int iter=0; iter<iterations; ++iter) - { - timer ts; - ts.start(); - - cudaDeviceSynchronize(); - - sparseGrid.template applyStencils<StencilT>(STENCIL_MODE_INSERT, 0.1); - - cudaDeviceSynchronize(); - - ts.stop(); - - float gElemS = numElements / (1e9 * ts.getwct()); - float gFlopsS = gElemS * StencilT::flops; - - gElemSMeasures.add(gElemS); - gFlopsSMeasures.add(gFlopsS); - } - - - double elemMean=0, elemDeviation=0; - standard_deviation(gElemSMeasures, elemMean, elemDeviation); - report_sparsegrid_funcs.graphs.put(base + ".GElems.mean",elemMean); - report_sparsegrid_funcs.graphs.put(base +".GElems.dev",elemDeviation); - double flopsMean=0, flopsDeviation=0; - standard_deviation(gFlopsSMeasures, flopsMean, flopsDeviation); - report_sparsegrid_funcs.graphs.put(base + ".GFlops.mean",flopsMean); - report_sparsegrid_funcs.graphs.put(base +".GFlops.dev",flopsDeviation); - - std::cout << "Test: " << testName << "\n"; - std::cout << "Block: " << blockEdgeSize << "x" << blockEdgeSize << "\n"; - std::cout << "Grid: " << gridEdgeSize*blockEdgeSize << "x" << gridEdgeSize*blockEdgeSize << "\n"; - double dataOccupancyMean, dataOccupancyDev; - sparseGrid.deviceToHost(); - sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout << "Data Occupancy: " << dataOccupancyMean << " dev:" << dataOccupancyDev << std::endl; - report_sparsegrid_funcs.graphs.put(base + ".dataOccupancy.mean",dataOccupancyMean); - report_sparsegrid_funcs.graphs.put(base +".dataOccupancy.dev",dataOccupancyDev); - std::cout << "Iterations: " << iterations << "\n"; - std::cout << "Throughput:\n\t" << elemMean << " GElem/s dev: " << elemDeviation << " GElem/s" << std::endl - << "\t" << flopsMean << " GFlops/s dev: " << flopsDeviation << " GFlops/s" << std::endl; -} - BOOST_AUTO_TEST_SUITE(performance) @@ -119,53 +25,18 @@ BOOST_AUTO_TEST_SUITE(SparseGridGpu_test) BOOST_AUTO_TEST_CASE(testStencilHeatInsert_gridScaling) { - std::string testURI = suiteURI + ".device.stencilInsert.dense.N.2D.gridScaling"; - unsigned int counter = 0; - testInsertStencil<8, 64>(testURI, counter++); - testInsertStencil<8, 128>(testURI, counter++); - testInsertStencil<8, 256>(testURI, counter++); - testInsertStencil<8, 512>(testURI, counter++); - testInsertStencil<8, 1024>(testURI, counter++); - - testSet.insert(testURI); } BOOST_AUTO_TEST_CASE(testStencilHeatInsert_gridScaling_8) { - std::string testURI = suiteURI + ".device.stencilInsert.dense.N.2D.8.gridScaling"; - unsigned int counter = 0; - testInsertStencil<8, 64>(testURI, counter++); - testInsertStencil<8, 128>(testURI, counter++); - testInsertStencil<8, 256>(testURI, counter++); - testInsertStencil<8, 512>(testURI, counter++); - testInsertStencil<8, 1024>(testURI, counter++); - - testSet.insert(testURI); } BOOST_AUTO_TEST_CASE(testStencilHeatInsert_gridScaling_16) { - std::string testURI = suiteURI + ".device.stencilInsert.dense.N.2D.16.gridScaling"; - unsigned int counter = 0; - testInsertStencil<16, 32>(testURI, counter++); - testInsertStencil<16, 64>(testURI, counter++); - testInsertStencil<16, 128>(testURI, counter++); - testInsertStencil<16, 256>(testURI, counter++); - testInsertStencil<16, 512>(testURI, counter++); - - testSet.insert(testURI); } BOOST_AUTO_TEST_CASE(testStencilHeatInsert_blockScaling) { - std::string testURI = suiteURI + ".device.stencilInsert.dense.N.2D.blockScaling"; - unsigned int counter = 0; - testInsertStencil<4, 1024>(testURI, counter++); - testInsertStencil<8, 512>(testURI, counter++); - testInsertStencil<16, 256>(testURI, counter++); - testInsertStencil<32, 128>(testURI, counter++); - - testSet.insert(testURI); } diff --git a/src/SparseGridGpu/performance/SparseGridGpu_performance_stencil_heat_host.cu b/src/SparseGridGpu/performance/SparseGridGpu_performance_stencil_heat_host.cu index a4d3334656d202f1b32c369611f37f24578de1da..193b59b9524bfe995441dfa04e227578185d2f39 100644 --- a/src/SparseGridGpu/performance/SparseGridGpu_performance_stencil_heat_host.cu +++ b/src/SparseGridGpu/performance/SparseGridGpu_performance_stencil_heat_host.cu @@ -5,7 +5,6 @@ * Author: i-bird */ -#define SCAN_WITH_CUB #define BOOST_TEST_DYN_LINK #define DISABLE_MPI_WRITTERS @@ -19,318 +18,26 @@ extern std::string suiteURI; extern report_sparse_grid_tests report_sparsegrid_funcs; extern std::set<std::string> testSet; -template<unsigned int blockEdgeSize, unsigned int gridEdgeSize, typename SparseGridZ> -void testStencilHeatSparseHost_perf(unsigned int i, std::string base, float fillMultiplier=1, float voidMultiplier=1) -{ - auto testName = "In-place sparse stencil"; -// unsigned int gridEdgeSize = 128; - constexpr unsigned int dim = SparseGridZ::dims; -// const unsigned int blockEdgeSize = SparseGridZ::blockEdgeSize_; - - typedef HeatStencil<dim, 0, 1> Stencil01T; - typedef HeatStencil<dim, 1, 0> Stencil10T; - -// std::string base("performance.SparseGridGpu(" + std::to_string(i) + ").stencil"); - - report_sparsegrid_funcs.graphs.put(base + ".dim",2); - report_sparsegrid_funcs.graphs.put(base + ".blockSize",blockEdgeSize); - report_sparsegrid_funcs.graphs.put(base + ".gridSize.x",gridEdgeSize*blockEdgeSize); - report_sparsegrid_funcs.graphs.put(base + ".gridSize.y",gridEdgeSize*blockEdgeSize); - - unsigned int iterations = 100; - - openfpm::vector<double> measures_gf; - openfpm::vector<double> measures_tm; - - dim3 gridSize(gridEdgeSize, gridEdgeSize); - dim3 blockSize(blockEdgeSize,blockEdgeSize); - unsigned int spatialEdgeSize = 1000000; - size_t sz[2] = {spatialEdgeSize, spatialEdgeSize}; - typename SparseGridZ::grid_info blockGeometry(sz); - SparseGridZ sparseGrid(blockGeometry); - mgpu::ofp_context_t ctx; - sparseGrid.template setBackgroundValue<0>(0); - - ///// Insert sparse content, a set of concentric spheres ///// - float allMultiplier = fillMultiplier + voidMultiplier; - const unsigned int numSpheres = gridEdgeSize / (2*allMultiplier); -// const unsigned int numSpheres = 1; - unsigned int centerPoint = spatialEdgeSize / 2; - - for (int i = 1; i <= numSpheres; ++i) - { - unsigned int rBig = allMultiplier*i * blockEdgeSize; - unsigned int rSmall = (allMultiplier*i - fillMultiplier) * blockEdgeSize; - // Sphere i-th - grid_key_dx<dim, int> start1({centerPoint, centerPoint}); - sparseGrid.setGPUInsertBuffer(gridSize, dim3(1)); - CUDA_LAUNCH_DIM3((insertSphere<0>), - gridSize, dim3(blockEdgeSize * blockEdgeSize, 1, 1), - sparseGrid.toKernel(), start1, rBig, rSmall, 5); - cudaDeviceSynchronize(); - sparseGrid.template flush<smax_<0 >>(ctx, flush_type::FLUSH_ON_DEVICE); - cudaDeviceSynchronize(); - } - ///// ///// - - sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block! - sparseGrid.tagBoundaries(ctx); - - sparseGrid.template deviceToHost<0>(); // NECESSARY as count takes place on Host! - auto existingElements = sparseGrid.countExistingElements(); - auto boundaryElements = sparseGrid.countBoundaryElements(); - unsigned long long numElements = existingElements - boundaryElements; - - // Now apply some boundary conditions - sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(STENCIL_MODE_INPLACE, - centerPoint, centerPoint + 2*blockEdgeSize*gridEdgeSize, - 0.0, 10.0); - cudaDeviceSynchronize(); - - sparseGrid.template deviceToHost<0>(); // NECESSARY as stencils are applied on Host! - - iterations /= 2; - for (unsigned int iter=0; iter<iterations; ++iter) - { - cudaDeviceSynchronize(); - - timer ts; - ts.start(); - - sparseGrid.template applyStencilsHost<Stencil01T>(STENCIL_MODE_INPLACE, 0.1); - cudaDeviceSynchronize(); - sparseGrid.template applyStencilsHost<Stencil10T>(STENCIL_MODE_INPLACE, 0.1); - cudaDeviceSynchronize(); - - ts.stop(); - - measures_tm.add(ts.getwct()); - - float gElemS = 2 * numElements / (1e9 * ts.getwct()); - float gFlopsS = gElemS * Stencil01T::flops; - - measures_gf.add(gFlopsS); - } - - double mean_tm = 0; - double deviation_tm = 0; - standard_deviation(measures_tm,mean_tm,deviation_tm); - - double mean_gf = 0; - double deviation_gf = 0; - standard_deviation(measures_gf,mean_gf,deviation_gf); - - // All times above are in ms - - float gElemS = 2 * numElements / (1e9 * mean_tm); - float gFlopsS = gElemS * Stencil01T::flops; - std::cout << "Test: " << testName << std::endl; - std::cout << "Block: " << blockEdgeSize << "x" << blockEdgeSize << std::endl; - std::cout << "Grid: " << gridEdgeSize*blockEdgeSize << "x" << gridEdgeSize*blockEdgeSize << std::endl; - double dataOccupancyMean, dataOccupancyDev; - sparseGrid.deviceToHost(); - sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout << "Data Occupancy: " << dataOccupancyMean << " dev:" << dataOccupancyDev << std::endl; - report_sparsegrid_funcs.graphs.put(base + ".dataOccupancy.mean",dataOccupancyMean); - report_sparsegrid_funcs.graphs.put(base +".dataOccupancy.dev",dataOccupancyDev); - std::cout << "Iterations: " << iterations << std::endl; - std::cout << "\tStencil: " << mean_gf << " dev:" << deviation_gf << " s" << std::endl; - std::cout << "Throughput: " << std::endl << "\t " << gElemS << " GElem/s " << std::endl << "\t " << gFlopsS << " GFlops/s" << std::endl; - - report_sparsegrid_funcs.graphs.put(base + ".GFlops.mean",mean_gf); - report_sparsegrid_funcs.graphs.put(base +".GFlops.dev",deviation_gf); - report_sparsegrid_funcs.graphs.put(base + ".time.mean",mean_tm); - report_sparsegrid_funcs.graphs.put(base +".time.dev",deviation_tm); -} - - -template<unsigned int blockEdgeSize, unsigned int gridEdgeSize, typename SparseGridZ> -void testStencilHeatHost_perf(unsigned int i, std::string base) -{ - // todo: Make sure to reimplement the host stencil application function to pre-load to a block of memory both content and ghost - // this way we can avoid binary searches... - auto testName = "In-place stencil HOST"; - typedef HeatStencil<SparseGridZ::dims,0,1> Stencil01T; - typedef HeatStencil<SparseGridZ::dims,1,0> Stencil10T; - - - constexpr unsigned int dim = 2; - -// std::string base("performance.SparseGridGpu(" + std::to_string(i) + ").stencil"); - - report_sparsegrid_funcs.graphs.put(base + ".dim",dim); - report_sparsegrid_funcs.graphs.put(base + ".blockSize",blockEdgeSize); - report_sparsegrid_funcs.graphs.put(base + ".gridSize.x",gridEdgeSize*SparseGridZ::blockEdgeSize_); - report_sparsegrid_funcs.graphs.put(base + ".gridSize.y",gridEdgeSize*SparseGridZ::blockEdgeSize_); - -// unsigned int iterations = 100; - unsigned int iterations = 10; -// unsigned int iterations = 2; -// unsigned int iterations = 1; // Debug - - openfpm::vector<double> measures_gf; - openfpm::vector<double> measures_tm; - - dim3 gridSize(gridEdgeSize, gridEdgeSize); - dim3 blockSize(SparseGridZ::blockEdgeSize_,SparseGridZ::blockEdgeSize_); - typename SparseGridZ::grid_info blockGeometry(gridSize); - SparseGridZ sparseGrid(blockGeometry); - mgpu::ofp_context_t ctx; - sparseGrid.template setBackgroundValue<0>(0); - - unsigned long long numElements = gridEdgeSize*SparseGridZ::blockEdgeSize_*gridEdgeSize*SparseGridZ::blockEdgeSize_; - - // Initialize the grid - sparseGrid.setGPUInsertBuffer(gridSize, dim3(1)); - CUDA_LAUNCH_DIM3((insertConstantValue<0>),gridSize, blockSize,sparseGrid.toKernel(), 0); - sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); - - sparseGrid.setGPUInsertBuffer(gridSize, dim3(1)); - dim3 sourcePt(gridSize.x * SparseGridZ::blockEdgeSize_ / 2, gridSize.y * SparseGridZ::blockEdgeSize_ / 2, 0); - insertOneValue<0> << < gridSize, blockSize >> > (sparseGrid.toKernel(), sourcePt, 100); - sparseGrid.template flush < sRight_ < 0 >> (ctx, flush_type::FLUSH_ON_DEVICE); - - sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block! - cudaDeviceSynchronize(); - - sparseGrid.template deviceToHost<0,1>(); - - iterations /= 2; - for (unsigned int iter=0; iter<iterations; ++iter) - { - cudaDeviceSynchronize(); - - timer ts; - ts.start(); - - sparseGrid.template applyStencilsHost<Stencil01T>(STENCIL_MODE_INPLACE, 0.1); - cudaDeviceSynchronize(); - sparseGrid.template applyStencilsHost<Stencil10T>(STENCIL_MODE_INPLACE, 0.1); - cudaDeviceSynchronize(); - - ts.stop(); - - measures_tm.add(ts.getwct()); - - float gElemS = 2 * numElements / (1e9 * ts.getwct()); - float gFlopsS = gElemS * Stencil01T::flops; - - measures_gf.add(gFlopsS); - } - - double mean_tm = 0; - double deviation_tm = 0; - standard_deviation(measures_tm,mean_tm,deviation_tm); - - double mean_gf = 0; - double deviation_gf = 0; - standard_deviation(measures_gf,mean_gf,deviation_gf); - - // All times above are in ms - - float gElemS = 2 * numElements / (1e9 * mean_tm); - float gFlopsS = gElemS * Stencil01T::flops; - - std::cout << "Test: " << testName << std::endl; - std::cout << "Host: " << SparseGridZ::blockEdgeSize_ << "x" << SparseGridZ::blockEdgeSize_ << std::endl; - std::cout << "Grid: " << gridEdgeSize*SparseGridZ::blockEdgeSize_ << "x" << gridEdgeSize*SparseGridZ::blockEdgeSize_ << std::endl; - double dataOccupancyMean=0, dataOccupancyDev=0; - sparseGrid.deviceToHost(); - sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);std::cout << "Data Occupancy: " << dataOccupancyMean << " dev:" << dataOccupancyDev << std::endl; - report_sparsegrid_funcs.graphs.put(base + ".dataOccupancy.mean",dataOccupancyMean); - report_sparsegrid_funcs.graphs.put(base +".dataOccupancy.dev",dataOccupancyDev); - std::cout << "Iterations: " << iterations << std::endl; - std::cout << "\tStencil: " << mean_gf << " dev:" << deviation_gf << " s" << std::endl; - std::cout << "Throughput: " << std::endl << "\t " << gElemS << " GElem/s " << std::endl - << "\t " << gFlopsS << " GFlops/s" << std::endl; - - report_sparsegrid_funcs.graphs.put(base + ".GFlops.mean",mean_gf); - report_sparsegrid_funcs.graphs.put(base +".GFlops.dev",deviation_gf); - report_sparsegrid_funcs.graphs.put(base + ".time.mean",mean_tm); - report_sparsegrid_funcs.graphs.put(base +".time.dev",deviation_tm); -} - -template<unsigned int blockEdgeSize, unsigned int gridEdgeSize> -void launch_testStencilHeatSparseHost_perf(std::string testURI, unsigned int i, - float fillMultiplier=1, float voidMultiplier=1, std::string occupancyStr="05") -{ - constexpr unsigned int dim = 2; - typedef aggregate<float,float> AggregateT; - constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value; - - std::string base(testURI + "(" + std::to_string(i) + ")"); - report_sparsegrid_funcs.graphs.put(base + ".test.name","StencilNSparseHost"+occupancyStr); - - testStencilHeatSparseHost_perf<blockEdgeSize, gridEdgeSize, - SparseGridGpu<dim, AggregateT, blockEdgeSize, chunkSize, long int>>(i, base, - fillMultiplier, voidMultiplier); - cudaDeviceSynchronize(); -} - -template<unsigned int blockEdgeSize, unsigned int gridEdgeSize> -void launch_testStencilHeatHost_perf(std::string testURI, unsigned int i) -{ - constexpr unsigned int dim = 2; - typedef aggregate<float,float> AggregateT; - constexpr unsigned int chunkSize = IntPow<blockEdgeSize,dim>::value; - - std::string base(testURI + "(" + std::to_string(i) + ")"); - report_sparsegrid_funcs.graphs.put(base + ".test.name","StencilN_Host"); - - testStencilHeatHost_perf<blockEdgeSize, gridEdgeSize, - SparseGridGpu<dim, AggregateT, blockEdgeSize, chunkSize>>(i, base); -} - BOOST_AUTO_TEST_SUITE(performance) BOOST_AUTO_TEST_SUITE(SparseGridGpu_test) BOOST_AUTO_TEST_CASE(testStencilHeatHost_gridScaling) { - std::string testURI = suiteURI + ".host.stencil.dense.N.2D.gridScaling"; - unsigned int counter = 0; - launch_testStencilHeatHost_perf<8, 128>(testURI, counter++); - launch_testStencilHeatHost_perf<8, 256>(testURI, counter++); - launch_testStencilHeatHost_perf<8, 512>(testURI, counter++); -// launch_testStencilHeatHost_perf<8, 1024>(testURI, counter++); - testSet.insert(testURI); } BOOST_AUTO_TEST_CASE(testStencilHeatHost_blockScaling) { - std::string testURI = suiteURI + ".host.stencil.dense.N.2D.blockScaling"; - unsigned int counter = 0; - launch_testStencilHeatHost_perf<4, 2048>(testURI, counter++); - launch_testStencilHeatHost_perf<8, 1024>(testURI, counter++); - launch_testStencilHeatHost_perf<16, 512>(testURI, counter++); -// launch_testStencilHeatHost_perf<32, 256>(testURI, counter++); - testSet.insert(testURI); } BOOST_AUTO_TEST_CASE(testStencilHeatSparseHost_gridScaling) { - std::string testURI = suiteURI + ".host.stencil.sparse.N.2D.05.gridScaling"; - unsigned int counter = 0; - constexpr unsigned int blockSize = 32; - constexpr unsigned int __referenceBlockSize = 8; - constexpr unsigned int __bsFactor = blockSize / __referenceBlockSize; - launch_testStencilHeatSparseHost_perf<blockSize, 128/__bsFactor>(testURI, counter++, 1.45, 1, "05"); - launch_testStencilHeatSparseHost_perf<blockSize, 256/__bsFactor>(testURI, counter++, 1.45, 1, "05"); - launch_testStencilHeatSparseHost_perf<blockSize, 512/__bsFactor>(testURI, counter++, 1.45, 1, "05"); - launch_testStencilHeatSparseHost_perf<blockSize, 1024/__bsFactor>(testURI, counter++, 1.45, 1, "05"); - testSet.insert(testURI); } BOOST_AUTO_TEST_CASE(testStencilHeatSparseHost_blockScaling) { - std::string testURI = suiteURI + ".host.stencil.sparse.N.2D.05.blockScaling"; - unsigned int counter = 0; - launch_testStencilHeatSparseHost_perf<4, 2048>(testURI, counter++, 1.45, 1, "05"); - launch_testStencilHeatSparseHost_perf<8, 1024>(testURI, counter++, 1.45, 1, "05"); - launch_testStencilHeatSparseHost_perf<16, 512>(testURI, counter++, 1.45, 1, "05"); - launch_testStencilHeatSparseHost_perf<32, 256>(testURI, counter++, 1.45, 1, "05"); - testSet.insert(testURI); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/SparseGridGpu/performance/SparseGridGpu_performance_tests.cu b/src/SparseGridGpu/performance/SparseGridGpu_performance_tests.cu index a5c5f8094932822a1ff1a2c327d0414fe86e3f18..af6dcf8ecb9fe3fdbe44ec2e107afc871226728c 100644 --- a/src/SparseGridGpu/performance/SparseGridGpu_performance_tests.cu +++ b/src/SparseGridGpu/performance/SparseGridGpu_performance_tests.cu @@ -1,8 +1,6 @@ // // Created by tommaso on 4/07/19. // - -#define SCAN_WITH_CUB #define BOOST_TEST_DYN_LINK #define DISABLE_MPI_WRITTERS @@ -83,9 +81,9 @@ void testStencilHeatGet_perf(unsigned int i, std::string base) timer ts; ts.start(); - sparseGrid.template applyStencils<Stencil01T>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); cudaDeviceSynchronize(); - sparseGrid.template applyStencils<Stencil10T>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); cudaDeviceSynchronize(); ts.stop(); @@ -207,9 +205,9 @@ void testStencilSkeleton_perf(unsigned int i, std::string base) timer ts; ts.start(); - sparseGrid.template applyStencils<Stencil01T>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); cudaDeviceSynchronize(); - sparseGrid.template applyStencils<Stencil10T>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); cudaDeviceSynchronize(); ts.stop(); @@ -280,7 +278,6 @@ void launch_testConv3x3x3_perf_no_shared_z_morton(std::string testURI, unsigned { constexpr unsigned int dim = 3; typedef aggregate<float,float> AggregateT; - constexpr unsigned int chunkSize = IntPow<8,dim>::value; std::string base(testURI + "(" + std::to_string(i) + ")"); report_sparsegrid_funcs.graphs.put(base + ".test.name","Conv3x3x3"); @@ -292,7 +289,6 @@ void launch_testConv3x3x3_perf_no_shared(std::string testURI, unsigned int i) { constexpr unsigned int dim = 3; typedef aggregate<float,float> AggregateT; - constexpr unsigned int chunkSize = IntPow<8,dim>::value; std::string base(testURI + "(" + std::to_string(i) + ")"); report_sparsegrid_funcs.graphs.put(base + ".test.name","Conv3x3x3"); diff --git a/src/SparseGridGpu/tests/SparseGridGpu_tests.cu b/src/SparseGridGpu/tests/SparseGridGpu_tests.cu index 3f1653eb9e0cea098343fe0b4a308171ea300fd9..7ebd947e94f96119bacfa2de265ee24ef621884e 100644 --- a/src/SparseGridGpu/tests/SparseGridGpu_tests.cu +++ b/src/SparseGridGpu/tests/SparseGridGpu_tests.cu @@ -2002,7 +2002,7 @@ BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3DHeatStencil) sparseGrid.tagBoundaries(ctx); // Now apply some boundary conditions - sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(STENCIL_MODE_INPLACE, + sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 192, 384, 0.0, 10.0); @@ -2014,9 +2014,9 @@ BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3DHeatStencil) { for (int innerIter=0; innerIter<10; ++innerIter) { - sparseGrid.applyStencils<HeatStencil<dim, 0, 1>>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.applyStencils<HeatStencil<dim, 0, 1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); - sparseGrid.applyStencils<HeatStencil<dim, 1, 0>>(STENCIL_MODE_INPLACE, 0.1); + sparseGrid.applyStencils<HeatStencil<dim, 1, 0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1); } } @@ -2082,7 +2082,7 @@ BOOST_AUTO_TEST_CASE(testSparseGridGpuOutput3D) sparseGrid.findNeighbours(); // Pre-compute the neighbours pos for each block! sparseGrid.tagBoundaries(ctx); - sparseGrid.template applyStencils<BoundaryStencilSetX<dim,0,0>>(STENCIL_MODE_INPLACE); + sparseGrid.template applyStencils<BoundaryStencilSetX<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE); sparseGrid.template deviceToHost<0>(); diff --git a/src/SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh b/src/SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh index 93ad322d07b73898acfd15e3a9f2b5b571e60aed..7e74d82e7410d5fb84bde0f11a08dc77c944b5dd 100644 --- a/src/SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh +++ b/src/SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh @@ -546,7 +546,7 @@ void testConv3x3x3_perf(std::string testName) timer ts; ts.start(); - sparseGrid.template applyStencils<Conv3x3x3<dim,0,1>>(STENCIL_MODE_INPLACE,cc); + sparseGrid.template applyStencils<Conv3x3x3<dim,0,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,cc); cudaDeviceSynchronize(); ts.stop(); @@ -641,7 +641,7 @@ static void testConv3x3x3_no_shared_perf(std::string testName) timer ts; ts.start(); - sparseGrid.template applyStencils<Conv3x3x3_noshared<SparseGridZ::dims,0,1>>(STENCIL_MODE_INPLACE_NO_SHARED,cc); + sparseGrid.template applyStencils<Conv3x3x3_noshared<SparseGridZ::dims,0,1>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE_NO_SHARED,cc); cudaDeviceSynchronize(); ts.stop(); diff --git a/src/Vector/cuda/map_vector_cuda_ker.cuh b/src/Vector/cuda/map_vector_cuda_ker.cuh index 2f011c564a54a0a5889229667d2c009b1c2ced89..276a16131be4d912491aa8258b143e14d95b3e45 100644 --- a/src/Vector/cuda/map_vector_cuda_ker.cuh +++ b/src/Vector/cuda/map_vector_cuda_ker.cuh @@ -33,6 +33,38 @@ __global__ void copy_two_vectors(vector_src_type v_dst, vector_dst_type v_src) v_dst.get(i) = v_src.get(i); } +template<template<typename,typename> class op, + typename vector_src_type, + typename vector_dst_type, + typename vector_opart_type, + unsigned int ... args> +__global__ void merge_add_prp_device_impl_src_dst_opar_offset(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart, unsigned int start) +{ + int i = threadIdx.x + blockIdx.x * blockDim.x; + + if (i >= v_src.size()) + {return;} + + // write the object in the last element + object_s_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(i),v_dst.get(opart.template get<1>(start + i))); +} + +template<template<typename,typename> class op, + typename vector_src_type, + typename vector_dst_type, + typename vector_opart_type, + unsigned int ... args> +__global__ void merge_add_prp_device_impl_src_offset_dst_opar(vector_src_type v_src, vector_dst_type v_dst, vector_opart_type opart, unsigned int start) +{ + int i = threadIdx.x + blockIdx.x * blockDim.x; + + if (i >= opart.size()) + {return;} + + // write the object in the last element + object_s_di_op<op,decltype(v_src.get(0)),decltype(v_dst.get(0)),OBJ_ENCAP,args...>(v_src.get(start + i),v_dst.get(opart.template get<0>(i))); +} + #endif diff --git a/src/Vector/cuda/map_vector_std_cuda.hpp b/src/Vector/cuda/map_vector_std_cuda.hpp index 7a644bc023543b870acd475f43ab2b62e3c651fa..31dcc8cc1360f2e583d16fa323d420f3d4feded8 100644 --- a/src/Vector/cuda/map_vector_std_cuda.hpp +++ b/src/Vector/cuda/map_vector_std_cuda.hpp @@ -59,9 +59,6 @@ public: //! return the size of the vector inline size_t size() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return base.size(); } @@ -73,22 +70,7 @@ public: */ inline void resize(size_t slot) { -#ifdef SE_CLASS2 - check_valid(this,8); - - // here we have to check if the vector go into reallocation - void * ptr_old = &base[0]; -#endif - base.resize_no_device(slot); - -#ifdef SE_CLASS2 - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],slot*sizeof(T),VECTOR_STD_EVENT,1); - } -#endif } /*! \brief Remove all the element from the vector @@ -96,9 +78,6 @@ public: */ inline void clear() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.clear(); } @@ -113,23 +92,8 @@ public: */ inline void add(const T & v) { -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base[0]; -#endif - base.add_no_device(); base.template get<0>(size()-1) = v; - -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif } /*! \brief It insert a new object on the vector, eventually it reallocate the grid @@ -143,23 +107,8 @@ public: */ inline void add(T && v) { -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base.template get<0>(0); -#endif - base.add_no_device(); base.template get<0>(size()-1).swap(v); - -#ifdef SE_CLASS2 - - if (ptr_old != &base.template get<0>(0)) - { - check_delete(ptr_old); - check_new(&base.template get<0>(0),base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif } /*! \brief Add an empty object (it call the default constructor () ) at the end of the vector @@ -167,22 +116,7 @@ public: */ inline void add() { -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base.template get<0>(0); -#endif - base.add_no_device(); - -#ifdef SE_CLASS2 - - if (ptr_old != &base.template get<0>(0)) - { - check_delete(ptr_old); - check_new(&base.template get<0>(0),base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif } /*! \brief Get the last element @@ -192,9 +126,6 @@ public: */ inline T & last() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (base.size() == 0) std::cerr << "Error vector: " << __FILE__ << ":" << __LINE__ << " vector of size 0\n"; @@ -209,9 +140,6 @@ public: */ inline const T & last() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (base.size() == 0) std::cerr << "Error vector: " << __FILE__ << ":" << __LINE__ << " vector of size 0\n"; @@ -226,9 +154,6 @@ public: */ void swap(vector<T,CudaMemory,typename memory_traits_inte<aggregate<T>>::type,memory_traits_inte,grow_policy_double,STD_VECTOR> && v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.swap(v.base); } @@ -246,9 +171,6 @@ public: */ inline T& operator[](int id) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 vector_overflow(id); #endif @@ -265,9 +187,6 @@ public: */ inline T& get(int id) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 vector_overflow(id); #endif @@ -286,9 +205,6 @@ public: */ inline const T& get(int id) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 vector_overflow(id); #endif @@ -305,9 +221,6 @@ public: */ inline T & get(size_t id) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 vector_overflow(id); #endif @@ -323,9 +236,6 @@ public: */ inline const T & get(size_t id) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 vector_overflow(id); #endif @@ -339,9 +249,6 @@ public: */ inline void reserve(size_t ns) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.reserve(ns); } @@ -349,41 +256,19 @@ public: vector() noexcept :err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); -#endif } //! Constructor, vector of size sz vector(size_t sz) noexcept :base(sz),err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); - check_new(&base[0],sizeof(T)*sz,VECTOR_STD_EVENT,1); -#endif } //! Constructor from another vector vector(const vector<T,CudaMemory,typename memory_traits_inte<aggregate<T>>::type,memory_traits_inte,grow_policy_double,STD_VECTOR> & v) noexcept :err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); - void * ptr_old = &base[0]; -#endif - base = v.base; - -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif } /*! \brief Initializer from constructor @@ -394,30 +279,18 @@ public: vector(const std::initializer_list<T> & v) :base(v) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); - check_new(&base[0],sizeof(T)*v.size(),VECTOR_STD_EVENT,1); -#endif } //! Constructor from another vector vector(vector<T,CudaMemory,typename memory_traits_inte<aggregate<T>>::type,memory_traits_inte,grow_policy_double,STD_VECTOR> && v) noexcept :err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); -#endif - base.swap(v.base); } //! destructor ~vector() noexcept { -#ifdef SE_CLASS2 - check_delete(this); - check_delete(&base.template get<0>(0)); -#endif } /*! swap the content of the vector @@ -427,9 +300,6 @@ public: */ void swap(vector<T,CudaMemory,typename memory_traits_inte<aggregate<T>>::type,memory_traits_inte,grow_policy_double,STD_VECTOR> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.swap(v.base); } @@ -443,23 +313,8 @@ public: vector<T,CudaMemory,typename memory_traits_inte<aggregate<T>>::type,memory_traits_inte,grow_policy_double,STD_VECTOR> & operator=(const vector<T,CudaMemory,typename memory_traits_inte<aggregate<T>>::type,memory_traits_inte,grow_policy_double,STD_VECTOR> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base[0]; -#endif - base = v.base; -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif - return *this; } @@ -470,9 +325,6 @@ public: */ vector_key_iterator getIterator() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return vector_key_iterator(base.size()); } @@ -485,9 +337,6 @@ public: */ vector_key_iterator getIteratorTo(size_t k) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return vector_key_iterator(k); } @@ -498,9 +347,6 @@ public: */ void * getPointer() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return &base.template get<0>(0); } @@ -579,9 +425,6 @@ public: */ size_t getLastError() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return err_code; } @@ -601,22 +444,6 @@ public: ACTION_ON_ERROR(VECTOR_ERROR_OBJECT);\ } } - - /* \brief It return the id of structure in the allocation list - * - * \see print_alloc and SE_CLASS2 - * - * \return the allocation id of this class - * - */ - long int who() - { -#ifdef SE_CLASS2 - return check_whoami(this,8); -#else - return -1; -#endif - } }; diff --git a/src/Vector/map_vector.hpp b/src/Vector/map_vector.hpp index 487e2827ddafcbf5f261c2776ca6ebabe7bcecf0..da78c721a37be76cdffcdc01adb547748403c0d4 100644 --- a/src/Vector/map_vector.hpp +++ b/src/Vector/map_vector.hpp @@ -94,9 +94,6 @@ namespace openfpm unsigned int ...args> static void run(vector<T,Memory,layout,layout_base,grow_p,impl> & this_ ,const vector<S,M,typename layout_base2<S>::type,layout_base2,gp,impl> & v) { - #ifdef SE_CLASS2 - check_valid(&this_,8); - #endif // merge the data on device #if defined(CUDA_GPU) && defined(__NVCC__) @@ -129,9 +126,6 @@ namespace openfpm const vector<S,M,typename layout_base2<S>::type,layout_base2,gp,impl> & v, unsigned int offset) { - #ifdef SE_CLASS2 - check_valid(&this_,8); - #endif // merge the data on device #if defined(CUDA_GPU) && defined(__NVCC__) @@ -292,9 +286,6 @@ namespace openfpm */ size_t size() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return v_size; } @@ -306,9 +297,6 @@ namespace openfpm size_t capacity() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return base.size(); } @@ -322,9 +310,6 @@ namespace openfpm void reserve(size_t sp) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif if (sp > base.size()) { //! Resize the memory @@ -340,9 +325,6 @@ namespace openfpm */ void clear() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif resize(0); } @@ -353,9 +335,6 @@ namespace openfpm */ void shrink_to_fit() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif size_t sz[1] = {size()}; base.resize(sz); } @@ -374,9 +353,6 @@ namespace openfpm */ void resize(size_t slot, size_t opt = DATA_ON_DEVICE | DATA_ON_HOST, unsigned int blockSize = 1) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif // If we need more space than what we allocated, allocate new memory if (slot > base.size()) @@ -410,9 +386,6 @@ namespace openfpm */ void resize_no_device(size_t slot) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif // If we need more space than what we allocated, allocate new memory if (slot > base.size()) @@ -439,9 +412,6 @@ namespace openfpm */ void add() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif //! Check if we have enough space if (v_size >= base.size()) @@ -464,9 +434,6 @@ namespace openfpm */ void add_no_device() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif //! Check if we have enough space if (v_size >= base.size()) @@ -491,9 +458,6 @@ namespace openfpm */ void add(const T & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif //! Check if we have enough space if (v_size >= base.size()) @@ -522,9 +486,6 @@ namespace openfpm */ void add(const typename grid_cpu<1,T,Memory,typename layout_base<T>::type>::container & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif //! Check if we have enough space if (v_size >= base.size()) @@ -549,9 +510,6 @@ namespace openfpm */ template <typename M, typename gp> void add(const vector<T, M,layout, layout_base,gp,OPENFPM_NATIVE> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif //! Add the element of v for (size_t i = 0 ; i < v.size() ; i++) add(v.get(i)); @@ -596,9 +554,6 @@ namespace openfpm void merge_prp(const vector<S,M,typename layout_base<S>::type,layout_base,gp,OPENFPM_NATIVE> & v, const openfpm::vector<size_t> & opart) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (v.size() != opart.size()) @@ -658,10 +613,6 @@ namespace openfpm void merge_prp_device(const vector<S,M,typename layout_base<S>::type,layout_base,gp,OPENFPM_NATIVE> & v, unsigned int start) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif - merge_prp_device_impl<std::is_same<Memory,CudaMemory>::value,T,Memory,layout,layout_base,grow_p> ::template run<S,M,gp,OPENFPM_NATIVE,layout_base,args...>(*this,v,start); } @@ -707,13 +658,11 @@ namespace openfpm typename M, typename gp, template <typename> class layout_base2, + typename vector_opart_type, unsigned int ...args> void merge_prp_v(const vector<S,M,typename layout_base2<S>::type,layout_base2,gp,OPENFPM_NATIVE> & v, - const openfpm::vector<aggregate<size_t,size_t>> & opart) + const vector_opart_type & opart) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (v.size() != opart.size()) @@ -766,6 +715,7 @@ namespace openfpm * \tparam args one or more number that define which property to set-up * * \param v source vector + * \param offset offset from where to copy in v * \param start index from where to start the merging * */ @@ -774,13 +724,218 @@ namespace openfpm typename M, typename gp, template <typename> class layout_base2, + typename vector_opart_type, unsigned int ...args> void merge_prp_v(const vector<S,M,typename layout_base2<S>::type,layout_base2,gp,OPENFPM_NATIVE> & v, - size_t start) + unsigned int offset, + const vector_opart_type & opart) { -#ifdef SE_CLASS2 - check_valid(this,8); + size_t i2 = 0; + + for (size_t i = offset ; i < v.size() ; i++) + { + auto dst = v.get(opart.template get<0>(i2)); + auto src = v.get(i); + copy_cpu_encap_encap_op_prp<op,decltype(v.get(0)),decltype(v.get(0)),args...> cp(src,dst); + + boost::mpl::for_each_ref< boost::mpl::range_c<int,0,sizeof...(args)> >(cp); + i2++; + } + } + + /*! \brief It merge the elements of a source vector to this vector + * + * Given 2 vector v1 and v2 of size 7,3. and as merging operation the function add. + * Merging the second vector v2 to + * the first one v1 starting from the element 2. Mean + * + * \verbarim + * + * 6 8 3 2 1 0 3 v1 elements + * | | | + * op op op + * | | | + * 5 1 9 v2 elements + * + *------------------------------------- + * 6 8 8 3 10 0 3 updated v1 elements + * + * This operation is done for each selected property in args + * + * \endverbatim + * + * The number of properties in the source vector must be smaller than the destination + * all the properties of S must be mapped so if S has 3 properties + * 3 numbers for args are required + * + * \tparam op merging operation + * \tparam S Base object of the source vector + * \tparam M memory type of the source vector + * \tparam gp Grow policy of the source vector + * \tparam args one or more number that define which property to set-up + * + * \param v source vector + * \param opart merging indexes (property 1) + * \param start starting merging index for opart + * \param stop stop merging index for opart + * + */ + template <template<typename,typename> class op, + typename S, + typename M, + typename gp, + template <typename> class layout_base2, + typename vector_opart_type, + unsigned int ...args> + void merge_prp_v_device(const vector<S,M,typename layout_base2<S>::type,layout_base2,gp,OPENFPM_NATIVE> & v, + const vector_opart_type & opart, + unsigned int start, + unsigned int stop) + { +#ifdef SE_CLASS1 + + if (v.size() != stop - start) + std::cerr << __FILE__ << ":" << __LINE__ << " error merge_prp: v.size()=" << v.size() << " must be the same as stop - start" << stop - start << std::endl; + +#endif + +#ifdef __NVCC__ + + size_t sz[1] = {stop - start}; + grid_sm<1,void> nm(sz); + + auto ite = nm.getGPUIterator(); + + // write the object in the last element + CUDA_LAUNCH((merge_add_prp_device_impl_src_dst_opar_offset<op, + decltype(v.toKernel()), + decltype(this->toKernel()), + decltype(opart.toKernel()), + args...>),ite,v.toKernel(),this->toKernel(),opart.toKernel(),start); + + // calculate +#else + std::cout << __FILE__ << ":" << __LINE__ << " Error you have to compile map_vector.hpp with nvcc to make GPU code working" << std::endl; + +#endif + } + + /*! \brief It merge the elements of a source vector to this vector + * + * Given 2 vector v1 and v2 of size 7,3. and as merging operation the function add. + * Merging the second vector v2 to + * the first one v1 starting from the element 2. Mean + * + * \verbarim + * + * 6 8 3 2 1 0 3 v1 elements + * | | | + * op op op + * | | | + * 5 1 9 v2 elements + * + *------------------------------------- + * 6 8 8 3 10 0 3 updated v1 elements + * + * This operation is done for each selected property in args + * + * \endverbatim + * + * The number of properties in the source vector must be smaller than the destination + * all the properties of S must be mapped so if S has 3 properties + * 3 numbers for args are required + * + * \tparam op merging operation + * \tparam S Base object of the source vector + * \tparam M memory type of the source vector + * \tparam gp Grow policy of the source vector + * \tparam args one or more number that define which property to set-up + * + * \param v source vector + * \param opart merging indexes (property 0) + * \param i starting mergong indexes + * + */ + template <template<typename,typename> class op, + typename S, + typename M, + typename gp, + template <typename> class layout_base2, + typename vector_opart_type, + unsigned int ...args> + void merge_prp_v_device(const vector<S,M,typename layout_base2<S>::type,layout_base2,gp,OPENFPM_NATIVE> & v, + unsigned int start, + const vector_opart_type & opart) + { +#ifdef SE_CLASS1 + + if (v.size() < opart.size() + start) + std::cerr << __FILE__ << ":" << __LINE__ << " error merge_prp: v.size()=" << v.size() << " must be snaller than o_part.size() + start " << opart.size() + start << std::endl; + +#endif + +#ifdef __NVCC__ + + auto ite = opart.getGPUIterator(); + + // write the object in the last element + CUDA_LAUNCH((merge_add_prp_device_impl_src_offset_dst_opar<op, + decltype(v.toKernel()), + decltype(this->toKernel()), + decltype(opart.toKernel()), + args... >),ite,v.toKernel(),this->toKernel(),opart.toKernel(),start); + + // calculate +#else + std::cout << __FILE__ << ":" << __LINE__ << " Error you have to compile map_vector.hpp with nvcc to make GPU code working" << std::endl; + #endif + } + + /*! \brief It merge the elements of a source vector to this vector + * + * Given 2 vector v1 and v2 of size 7,3. and as merging operation the function add. + * Merging the second vector v2 to + * the first one v1 starting from the element 2. Mean + * + * \verbarim + * + * 6 8 3 2 1 0 3 v1 elements + * | | | + * op op op + * | | | + * 5 1 9 v2 elements + * + *------------------------------------- + * 6 8 8 3 10 0 3 updated v1 elements + * + * This operation is done for each selected property in args + * + * \endverbatim + * + * The number of properties in the source vector must be smaller than the destination + * all the properties of S must be mapped so if S has 3 properties + * 3 numbers for args are required + * + * \tparam op merging operation + * \tparam S Base object of the source vector + * \tparam M memory type of the source vector + * \tparam gp Grow policy of the source vector + * \tparam args one or more number that define which property to set-up + * + * \param v source vector + * \param start index from where to start the merging + * + */ + template <template<typename,typename> class op, + typename S, + typename M, + typename gp, + template <typename> class layout_base2, + unsigned int ...args> + void merge_prp_v(const vector<S,M,typename layout_base2<S>::type,layout_base2,gp,OPENFPM_NATIVE> & v, + size_t start) + { //! Add the element of v for (size_t i = 0 ; i < v.size() ; i++) { @@ -817,9 +972,6 @@ namespace openfpm unsigned int ...args> void add_prp(const vector<S,M,typename layout_base2<S>::type,layout_base2,gp,impl> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif //! Add the element of v for (size_t i = 0 ; i < v.size() ; i++) { @@ -864,9 +1016,6 @@ namespace openfpm */ void insert(size_t key) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif add(); long int d_k = (long int)size()-1; @@ -889,9 +1038,6 @@ namespace openfpm */ void remove(size_t key) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif size_t d_k = key; size_t s_k = key + 1; @@ -918,9 +1064,6 @@ namespace openfpm */ void remove(openfpm::vector<size_t> & keys, size_t start = 0) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif // Nothing to remove return if (keys.size() <= start ) return; @@ -967,9 +1110,6 @@ namespace openfpm template <unsigned int p> inline auto get(size_t id) const -> decltype(base.template get<p>(grid_key_dx<1>(0))) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #if defined(SE_CLASS1) && !defined(__NVCC__) check_overflow(id); #endif @@ -989,7 +1129,6 @@ namespace openfpm return false; } - /*! \brief Get an element of the vector * * Get an element of the vector @@ -1001,9 +1140,6 @@ namespace openfpm */ inline auto get(size_t id) -> decltype(base.get_o(grid_key_dx<1>(id))) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #if defined(SE_CLASS1) && !defined(__NVCC__) check_overflow(id); #endif @@ -1026,9 +1162,6 @@ namespace openfpm inline const typename grid_cpu<1,T,Memory,typename layout_base<T>::type>::container get_o(size_t id) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #if defined(SE_CLASS1) && !defined(__NVCC__) check_overflow(id); #endif @@ -1079,9 +1212,6 @@ namespace openfpm */ inline const typename grid_cpu<1,T,Memory,layout>::container last() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif grid_key_dx<1> key(size()-1); return base.get_o(key); @@ -1134,9 +1264,6 @@ namespace openfpm template <unsigned int p> inline auto get(size_t id) -> decltype(base.template get<p>(grid_key_dx<1>(0))) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #if defined(SE_CLASS1) && !defined(__NVCC__) check_overflow(id); #endif @@ -1175,9 +1302,6 @@ namespace openfpm inline typename grid_cpu<1,T,Memory,typename layout_base<T>::type >::container last() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif grid_key_dx<1> key(size()-1); return base.get_o(key); @@ -1187,9 +1311,6 @@ namespace openfpm ~vector() THROW { // Eliminate the pointer - #ifdef SE_CLASS2 - check_delete(this); - #endif } /*! \brief It duplicate the vector @@ -1199,9 +1320,6 @@ namespace openfpm */ vector<T, Memory,layout,layout_base,grow_p,OPENFPM_NATIVE> duplicate() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif vector<T, Memory,layout,layout_base,grow_p,OPENFPM_NATIVE> dup; dup.v_size = v_size; @@ -1231,10 +1349,6 @@ namespace openfpm vector(vector<T, Memory,layout,layout_base,grow_p,OPENFPM_NATIVE> && v) :v_size(0) { - // Add this pointer -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_EVENT,1); -#endif swap(v); } @@ -1246,9 +1360,6 @@ namespace openfpm vector(const vector<T, Memory,layout,layout_base,grow_p,OPENFPM_NATIVE> & v) THROW :v_size(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_EVENT,1); -#endif swap(v.duplicate()); } @@ -1256,9 +1367,6 @@ namespace openfpm vector() THROW :v_size(0),base(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_EVENT,1); -#endif base.setMemory(); } @@ -1266,9 +1374,6 @@ namespace openfpm vector(size_t sz) THROW :v_size(sz),base(sz) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_EVENT,1); -#endif base.setMemory(); } @@ -1280,9 +1385,6 @@ namespace openfpm */ void set(size_t id, const typename grid_cpu<1,T,Memory,typename layout_base<T>::type>::container & obj) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_overflow(id); #endif @@ -1307,10 +1409,6 @@ namespace openfpm */ template <typename encap_S, unsigned int ...args> void set_o(size_t i, const encap_S & obj) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif - // write the object in the last element object_s_di<encap_S,decltype(get(i)),OBJ_ENCAP,args...>(obj,get(i)); } @@ -1323,9 +1421,6 @@ namespace openfpm */ void set(size_t id, const T & obj) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_overflow(id); #endif @@ -1342,9 +1437,6 @@ namespace openfpm */ void set(size_t id, vector<T,Memory,layout,layout_base,grow_p,OPENFPM_NATIVE> & v, size_t src) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 check_overflow(id); #endif @@ -1369,9 +1461,6 @@ namespace openfpm */ vector<T, Memory,layout,layout_base,grow_p,OPENFPM_NATIVE> & operator=(vector<T, Memory, layout, layout_base,grow_p,OPENFPM_NATIVE> && mv) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif v_size = mv.v_size; base.swap(mv.base); @@ -1389,9 +1478,6 @@ namespace openfpm */ vector<T, Memory,layout,layout_base,grow_p,OPENFPM_NATIVE> & operator=(const vector<T, Memory, layout, layout_base ,grow_p,OPENFPM_NATIVE> & mv) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif v_size = mv.v_size; size_t rsz[1] = {v_size}; base.resize(rsz); @@ -1430,9 +1516,6 @@ namespace openfpm */ template<typename Mem, typename gp> vector<T, Memory,layout,layout_base,grow_p,OPENFPM_NATIVE> & operator=(vector<T, Mem, layout, layout_base,gp,OPENFPM_NATIVE> && mv) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif v_size = mv.v_size; base.swap(mv.base); @@ -1450,9 +1533,6 @@ namespace openfpm */ template<typename Mem, typename gp> vector<T, Memory,layout,layout_base,grow_p,OPENFPM_NATIVE> & operator=(const vector<T, Mem, layout, layout_base ,gp,OPENFPM_NATIVE> & mv) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif v_size = mv.getInternal_v_size(); size_t rsz[1] = {v_size}; base.resize(rsz); @@ -1491,9 +1571,6 @@ namespace openfpm template<typename Mem, template <typename> class layout_base2> vector<T, Memory,layout,layout_base2,grow_p,OPENFPM_NATIVE> & operator=(vector<T, Mem, layout, layout_base2,grow_p,OPENFPM_NATIVE> && mv) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif v_size = mv.v_size; base.swap(mv.base); @@ -1516,9 +1593,6 @@ namespace openfpm vector<T, Memory,layout,layout_base,grow_p,OPENFPM_NATIVE> & operator=(const vector<T, Mem, layout2, layout_base2 ,grow_p,OPENFPM_NATIVE> & mv) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif v_size = mv.getInternal_v_size(); size_t rsz[1] = {v_size}; base.resize(rsz); @@ -1587,9 +1661,6 @@ namespace openfpm */ void swap_nomode(openfpm::vector<T,Memory,layout, layout_base,grow_p,OPENFPM_NATIVE> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif size_t sz_sp = v_size; // swap the v_size @@ -1606,9 +1677,6 @@ namespace openfpm */ void swap(openfpm::vector<T,Memory,layout, layout_base,grow_p,OPENFPM_NATIVE> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif size_t sz_sp = v_size; // swap the v_size @@ -1625,9 +1693,6 @@ namespace openfpm */ void swap(openfpm::vector<T,Memory,layout, layout_base,grow_p,OPENFPM_NATIVE> && v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif size_t sz_sp = v_size; // swap the v_size @@ -1646,9 +1711,6 @@ namespace openfpm */ vector_key_iterator getIteratorFrom(size_t start) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return vector_key_iterator(v_size,start); } @@ -1663,9 +1725,6 @@ namespace openfpm */ vector_key_iterator getIteratorTo(size_t stop) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return vector_key_iterator(stop,0); } @@ -1709,9 +1768,6 @@ namespace openfpm vector_key_iterator getIterator() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return vector_key_iterator(v_size); } @@ -1753,9 +1809,6 @@ namespace openfpm size_t packObjectSize() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return base.packObjectSize(); } @@ -1768,9 +1821,6 @@ namespace openfpm */ size_t packObject(void * mem) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return base.packObject(mem); } @@ -1840,9 +1890,6 @@ namespace openfpm */ template<unsigned int p = 0> void setMemory(Memory & mem) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.template setMemory<p>(mem); } @@ -1853,9 +1900,6 @@ namespace openfpm */ void setMemoryArray(Memory * mem) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.setMemoryArray(mem); } @@ -1868,9 +1912,6 @@ namespace openfpm */ template<unsigned int p = 0> void * getPointer() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return base.template getPointer<p>(); } @@ -1881,9 +1922,6 @@ namespace openfpm */ template<unsigned int p = 0> const void * getPointer() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return base.getPointer(); } @@ -1897,20 +1935,6 @@ namespace openfpm return false; } - /* \brief It return the id of structure in the allocation list - * - * \see print_alloc and SE_CLASS2 - * - */ - long int who() - { -#ifdef SE_CLASS2 - return check_whoami(this,8); -#else - return -1; -#endif - } - /*! \brief Internal function * * \return the size of the vector diff --git a/src/Vector/map_vector_std.hpp b/src/Vector/map_vector_std.hpp index fb9e7987b41d6134a79baecf305a11cead2bb893..f56e558a8395a730b4878d80eec5e96592a89a01 100644 --- a/src/Vector/map_vector_std.hpp +++ b/src/Vector/map_vector_std.hpp @@ -35,10 +35,6 @@ struct add_prp_impl */ template <typename S, typename M, typename gp, unsigned int impl, unsigned int ...args> inline static void add(const vector<S,M,typename memory_traits_lin<S>::type,memory_traits_lin,gp,impl> & v_src, vect_dst & v_dst) { -#ifdef SE_CLASS2 - check_valid(&v_src,8); - check_valid(&v_dst,8); -#endif //! Add the element of v for (size_t i = 0 ; i < v_src.size() ; i++) { @@ -72,10 +68,6 @@ struct add_prp_impl<OBJECT_ADD,vect_dst> */ template <typename S, typename M, typename gp, unsigned int impl, unsigned int ...args> inline static void add(const vector<S,M,typename memory_traits_lin<S>::type,memory_traits_lin,gp,impl> & v_src, vect_dst & v_dst) { -#ifdef SE_CLASS2 - check_valid((void *)&v_dst,8); - check_valid((void *)&v_src,8); -#endif // Add a new element v_dst.add(); @@ -139,9 +131,6 @@ public: //! return the size of the vector inline size_t size() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return base.size(); } @@ -153,22 +142,7 @@ public: */ inline void resize(size_t slot) { -#ifdef SE_CLASS2 - check_valid(this,8); - - // here we have to check if the vector go into reallocation - void * ptr_old = &base[0]; -#endif - base.resize(slot); - -#ifdef SE_CLASS2 - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],slot*sizeof(T),VECTOR_STD_EVENT,1); - } -#endif } /*! \brief Remove all the element from the vector @@ -176,9 +150,6 @@ public: */ inline void clear() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.clear(); } @@ -187,9 +158,6 @@ public: */ inline void shrink_to_fit() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.shrink_to_fit(); } @@ -204,11 +172,6 @@ public: */ inline void add(const T & v) { -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base[0]; -#endif - if (std::is_same<grow_p,openfpm::grow_policy_identity>::value == true) { // we reserve just one space more to avoid the capacity to increase by two @@ -216,16 +179,6 @@ public: } base.push_back(v); - -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif } /*! \brief It insert a new object on the vector, eventually it reallocate the grid @@ -239,11 +192,6 @@ public: */ inline void add(T && v) { -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base[0]; -#endif - if (std::is_same<grow_p,openfpm::grow_policy_identity>::value == true) { // we reserve just one space more to avoid the capacity to increase by two @@ -251,16 +199,6 @@ public: } base.emplace_back(v); - -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif } /*! \brief Add an empty object (it call the default constructor () ) at the end of the vector @@ -268,22 +206,7 @@ public: */ inline void add() { -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base[0]; -#endif - base.emplace_back(T()); - -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif } /*! \brief add elements to the vector @@ -293,12 +216,6 @@ public: */ template<typename Mem,typename l,template<typename> class lb,typename gp> inline void add(const openfpm::vector<T,Mem,l,lb,gp> & eles) { - -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base[0]; -#endif - if (std::is_same<grow_p,openfpm::grow_policy_identity>::value == true) { // we reserve just one space more to avoid the capacity to increase by two @@ -310,16 +227,6 @@ public: // copy the elements std::copy(eles.begin(),eles.end(),base.begin()+start); - -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif } /*! \brief It insert a new object on the vector, eventually it reallocate the object @@ -333,22 +240,7 @@ public: */ template<typename S> inline void add(const S & v) { -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base[0]; -#endif - push_back_op<is_vector<T>::value,is_vector<S>::value,T,S>::push_back(base,v); - -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif } /*! \brief It insert a new object on the vector, eventually it reallocate the grid @@ -362,11 +254,6 @@ public: */ template<typename S> inline void add(const S && v) { -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base[0]; -#endif - if (std::is_same<grow_p,openfpm::grow_policy_identity>::value == true) { // we reserve just one space more to avoid the capacity to increase by two @@ -374,16 +261,6 @@ public: } base.push_back(v); - -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif } /*! \brief It add the element of a source vector to this vector @@ -408,10 +285,6 @@ public: unsigned int ...args> void add_prp(const vector<S,M,typename layout_base<S>::type,layout_base,gp,impl> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif - add_prp_impl<std::is_same<S,T>::value,typename std::remove_pointer<decltype(*this)>::type>::template add<S,M,gp,impl,args...>(v,*this); } @@ -437,10 +310,6 @@ public: unsigned int ...args> void add_prp_device(const vector<S,M,typename layout_base<S>::type,layout_base,gp,impl> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif - add_prp_impl<std::is_same<S,T>::value,typename std::remove_pointer<decltype(*this)>::type>::template add<S,M,gp,impl,args...>(v,*this); } @@ -465,9 +334,6 @@ public: unsigned int ...args> void add_prp(const T & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif add(v); } @@ -479,10 +345,6 @@ public: */ void erase(typename std::vector<T>::iterator start, typename std::vector<T>::iterator end) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif - base.erase(start,end); } @@ -493,9 +355,6 @@ public: */ void remove(size_t key) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 vector_overflow(key); #endif @@ -512,9 +371,6 @@ public: */ void remove(openfpm::vector<size_t> & keys, size_t start = 0) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif // Nothing to remove return if (keys.size() <= start ) return; @@ -594,9 +450,6 @@ public: */ inline T & last() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (base.size() == 0) std::cerr << "Error vector: " << __FILE__ << ":" << __LINE__ << " vector of size 0\n"; @@ -611,9 +464,6 @@ public: */ inline const T & last() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (base.size() == 0) std::cerr << "Error vector: " << __FILE__ << ":" << __LINE__ << " vector of size 0\n"; @@ -628,9 +478,6 @@ public: */ openfpm::vector<T> duplicate() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return *this; } @@ -641,9 +488,6 @@ public: */ void swap(std::vector<T> && v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.swap(v); } @@ -654,9 +498,6 @@ public: */ void unique() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif auto it = std::unique(base.begin(),base.end()); base.resize( std::distance(base.begin(),it) ); } @@ -668,9 +509,6 @@ public: */ void sort() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif std::sort(base.begin(), base.end()); } @@ -685,9 +523,6 @@ public: */ template <unsigned int p>inline T& get(size_t id) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (p != 0) {std::cerr << "Error the property does not exist" << "\n";} @@ -709,9 +544,6 @@ public: */ template <unsigned int p>inline const T& get(size_t id) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (p != 0) {std::cerr << "Error the property does not exist" << "\n";} @@ -731,9 +563,6 @@ public: */ inline T & get(size_t id) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 vector_overflow(id); #endif @@ -749,9 +578,6 @@ public: */ inline const T & get(size_t id) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 vector_overflow(id); #endif @@ -768,9 +594,6 @@ public: */ inline void fill(unsigned char fl) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif memset(&base[0],fl,base.size() * sizeof(T)); } @@ -781,9 +604,6 @@ public: */ inline void reserve(size_t ns) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.reserve(ns); } @@ -791,41 +611,19 @@ public: vector() noexcept :err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); -#endif } //! Constructor, vector of size sz vector(size_t sz) noexcept :base(sz),err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); - check_new(&base[0],sizeof(T)*sz,VECTOR_STD_EVENT,1); -#endif } //! Constructor from another vector vector(const vector<T,HeapMemory,layout,memory_traits_lin,grow_policy_double,STD_VECTOR> & v) noexcept :err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); - void * ptr_old = &base[0]; -#endif - base = v.base; - -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif } /*! \brief Initializer from constructor @@ -836,30 +634,18 @@ public: vector(const std::initializer_list<T> & v) :base(v) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); - check_new(&base[0],sizeof(T)*v.size(),VECTOR_STD_EVENT,1); -#endif } //! Constructor from another vector vector(vector<T,HeapMemory,layout,memory_traits_lin,grow_policy_double,STD_VECTOR> && v) noexcept :err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); -#endif - base.swap(v.base); } //! destructor ~vector() noexcept { -#ifdef SE_CLASS2 - check_delete(this); - check_delete(&base[0]); -#endif } /*! swap the content of the vector @@ -869,9 +655,6 @@ public: */ void swap(openfpm::vector<T,HeapMemory,layout,memory_traits_lin,grow_policy_double,STD_VECTOR> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.swap(v.base); } @@ -882,9 +665,6 @@ public: */ void swap(openfpm::vector<T,HeapMemory,layout,memory_traits_lin,grow_policy_double,STD_VECTOR> && v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.swap(v.base); } @@ -897,23 +677,8 @@ public: */ vector<T,HeapMemory,layout,memory_traits_lin,grow_policy_double,STD_VECTOR> & operator=(const vector<T,HeapMemory,layout,memory_traits_lin,grow_policy_double,STD_VECTOR> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base[0]; -#endif - base = v.base; -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif - return *this; } @@ -924,25 +689,10 @@ public: */ template<typename Mem, typename gp> vector<T,HeapMemory,layout,memory_traits_lin,grow_policy_double,STD_VECTOR> & operator=(const vector<T,Mem,layout,memory_traits_lin,gp,STD_VECTOR> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); - void * ptr_old = &base[0]; -#endif - base_copy<has_base_to_copy<vector<T,Mem,layout,memory_traits_lin,gp,STD_VECTOR>>::value, decltype(*this), vector<T,Mem,layout,memory_traits_lin,gp,STD_VECTOR> >::copy(*this,v); -#ifdef SE_CLASS2 - - if (ptr_old != &base[0]) - { - check_delete(ptr_old); - check_new(&base[0],base.size()*sizeof(T),VECTOR_STD_EVENT,1); - } - -#endif - return *this; } @@ -955,9 +705,6 @@ public: */ vector<T,HeapMemory,layout,memory_traits_lin,grow_policy_double,STD_VECTOR> & operator=(vector<T,HeapMemory,layout,memory_traits_lin,grow_policy_double,STD_VECTOR> && v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.swap(v.base); return *this; @@ -972,9 +719,6 @@ public: */ template<typename Mem, typename gp> vector<T,HeapMemory,layout,memory_traits_lin,grow_policy_double,STD_VECTOR> & operator=(vector<T,Mem,layout,memory_traits_lin,gp,STD_VECTOR> && v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif base.swap(v.base); return *this; @@ -1011,9 +755,6 @@ public: */ vector_key_iterator getIterator() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return vector_key_iterator(base.size()); } @@ -1026,9 +767,6 @@ public: */ vector_key_iterator getIteratorTo(size_t k) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return vector_key_iterator(k); } @@ -1088,9 +826,6 @@ public: */ void * getPointer() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return &base[0]; } @@ -1147,9 +882,6 @@ public: */ size_t getLastError() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return err_code; } @@ -1169,22 +901,6 @@ public: ACTION_ON_ERROR(VECTOR_ERROR_OBJECT);\ } } - - /* \brief It return the id of structure in the allocation list - * - * \see print_alloc and SE_CLASS2 - * - * \return the allocation id of this class - * - */ - long int who() - { -#ifdef SE_CLASS2 - return check_whoami(this,8); -#else - return -1; -#endif - } }; /*! \brief Implementation of 1-D std::vector like structure diff --git a/src/Vector/map_vector_std_ptr.hpp b/src/Vector/map_vector_std_ptr.hpp index 944b8eed1920fe5651bff4c353dc846e1ef971f7..00c5df01fdc35aa2f46755a1ec488c8c2d448b1d 100644 --- a/src/Vector/map_vector_std_ptr.hpp +++ b/src/Vector/map_vector_std_ptr.hpp @@ -42,9 +42,6 @@ public: //! return the size of the vector inline size_t size() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return v_size; } @@ -56,9 +53,6 @@ public: */ inline void resize(size_t slot) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif // resize is valid only if v_size is 0 and it match the size of PtrMemory if (slot > mem->size()/sizeof(T)) std::cerr << __FILE__ << ":" << __LINE__ << " error: this vector cannot be bigger than " << mem->size()/sizeof(T) << " elements\n"; @@ -70,9 +64,6 @@ public: */ inline void clear() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif v_size = 0; } @@ -87,9 +78,6 @@ public: */ inline void add(const T & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif std::cerr << __FILE__ << ":" << __LINE__ << " error: you cannot add a new element to this vector \n"; } @@ -104,9 +92,6 @@ public: */ inline void add(T && v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif std::cerr << __FILE__ << ":" << __LINE__ << " error: you cannot add new element to this vector \n"; } @@ -116,9 +101,6 @@ public: inline void add() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif v_size++; if (v_size > mem->size()/sizeof(T)) @@ -133,9 +115,6 @@ public: */ void erase(typename std::vector<T>::iterator start, typename std::vector<T>::iterator end) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif std::cerr << __FILE__ << ":" << __LINE__ << " error: you cannot erase element from this vector \n"; } @@ -146,9 +125,6 @@ public: */ void remove(size_t key) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 vector_overflow(key); #endif @@ -202,9 +178,6 @@ public: */ inline T & last() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (v_size == 0) std::cerr << "Error vector: " << __FILE__ << ":" << __LINE__ << " vector of size 0\n"; @@ -219,9 +192,6 @@ public: */ inline const T & last() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (v_size == 0) std::cerr << "Error vector: " << __FILE__ << ":" << __LINE__ << " vector of size 0" << std::endl; @@ -240,9 +210,6 @@ public: */ template <unsigned int p>inline T& get(size_t id) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (p != 0) {std::cerr << "Error the property does not exist" << "\n";} @@ -269,9 +236,6 @@ public: */ template <unsigned int p>inline const T& get(size_t id) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (p != 0) {std::cerr << "Error the property does not exist" << "\n";} @@ -291,9 +255,6 @@ public: */ inline T & get(size_t id) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 if (id >= v_size) std::cerr << "Error vector: " << __FILE__ << ":" << __LINE__ << " overflow id: " << id << "\n"; @@ -310,9 +271,6 @@ public: */ inline const T & get(size_t id) const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif #ifdef SE_CLASS1 vector_overflow(id); #endif @@ -330,9 +288,6 @@ public: inline void fill(unsigned char fl) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif memset(mem->getPointer(),fl,v_size * sizeof(T)); } @@ -344,37 +299,24 @@ public: inline void reserve(size_t ns) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif } //! Constructor, vector of size 0 vector() noexcept :v_size(0),mem(NULL),err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); -#endif } //! Constructor, vector of size sz vector(size_t sz) noexcept :v_size(sz),err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); -#endif } //! Constructor from another vector vector(const vector<T,PtrMemory,layout,layout_base,gp,STD_VECTOR> & v) noexcept :v_size(0),err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); -#endif - std::cerr << __FILE__ << ":" << __LINE__ << " error: copy constructor is not supported by this vector \n"; } @@ -383,19 +325,12 @@ public: vector(vector<T,PtrMemory,layout,layout_base,gp,STD_VECTOR> && v) noexcept :v_size(0),err_code(0) { -#ifdef SE_CLASS2 - check_new(this,8,VECTOR_STD_EVENT,1); -#endif - std::cerr << __FILE__ << ":" << __LINE__ << " error: copy constructor is not supported by this vector \n"; } //! destructor ~vector() noexcept { -#ifdef SE_CLASS2 - check_delete(this); -#endif } /*! swap the content of the vector @@ -405,9 +340,6 @@ public: */ void swap(openfpm::vector<T,PtrMemory,layout,layout_base,gp,STD_VECTOR> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif std::cerr << __FILE__ << ":" << __LINE__ << " error: swap is not supported by this vector \n"; } @@ -418,9 +350,6 @@ public: */ vector<T,HeapMemory,layout,layout_base,grow_policy_double,STD_VECTOR> & operator=(const vector<T,HeapMemory,layout,layout_base,grow_policy_double,STD_VECTOR> & v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif std::cerr << __FILE__ << ":" << __LINE__ << " error: operator= is not supported by this vector \n"; return *this; @@ -435,9 +364,6 @@ public: */ vector<T,HeapMemory,layout,layout_base,grow_policy_double,STD_VECTOR> & operator=(vector<T,HeapMemory,layout,layout_base,grow_policy_double,STD_VECTOR> && v) { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif std::cerr << __FILE__ << ":" << __LINE__ << " error: operator= is not supported by this vector \n"; return *this; @@ -478,9 +404,6 @@ public: */ vector_key_iterator getIterator() const { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return vector_key_iterator(v_size); } @@ -514,9 +437,6 @@ public: */ void * getPointer() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return mem->getPointer(); } @@ -547,9 +467,6 @@ public: */ size_t getLastError() { -#ifdef SE_CLASS2 - check_valid(this,8); -#endif return err_code; } @@ -568,22 +485,6 @@ public: ACTION_ON_ERROR(VECTOR_ERROR_OBJECT);\ } } - - /* \brief It return the id of structure in the allocation list - * - * \see print_alloc and SE_CLASS2 - * - * \return the allocation id - * - */ - long int who() - { -#ifdef SE_CLASS2 - return check_whoami(this,8); -#else - return -1; -#endif - } }; diff --git a/src/Vector/vector_unit_tests.hpp b/src/Vector/vector_unit_tests.hpp index 23c304c1c40cfa30bdc1deab8eea06094d3af696..b47299bf24bb187c4191d5a9a2e63ad0690de6ef 100644 --- a/src/Vector/vector_unit_tests.hpp +++ b/src/Vector/vector_unit_tests.hpp @@ -479,17 +479,6 @@ BOOST_AUTO_TEST_CASE( vector_load_and_save_check ) ////////// Test function /////////// ///////////////////////////////////// -#ifdef SE_CLASS2 - -openfpm::vector<aggregate<float>> & test_error_v() -{ - openfpm::vector<aggregate<float>> v(16); - - return v; -} - -#endif - BOOST_AUTO_TEST_CASE( vector_safety_check ) { #if defined(SE_CLASS1) && defined (THROW_ON_ERROR) @@ -567,39 +556,6 @@ BOOST_AUTO_TEST_CASE( vector_safety_check ) } BOOST_REQUIRE_EQUAL(error,true); - #if defined(SE_CLASS2) && defined (THROW_ON_ERROR) - - error = false; - - // Create a vector - - openfpm::vector<aggregate<float>> * v3 = new openfpm::vector<aggregate<float>>(16); - delete v3; - - // Try to access the class - - try - {v3->size();} - catch (std::exception & e) - { - error = true; - BOOST_REQUIRE_EQUAL(e.what(),"Runtime memory error"); - } - BOOST_REQUIRE_EQUAL(error,true); - - try - { - openfpm::vector<aggregate<float>> vr = test_error_v(); - } - catch (std::exception & e) - { - error = true; - BOOST_REQUIRE_EQUAL(e.what(),"Runtime memory error"); - } - BOOST_REQUIRE_EQUAL(error,true); - - #endif - #endif } diff --git a/src/config/config_cmake.h.in b/src/config/config_cmake.h.in index a0d62c9edc325fad8f80ce5ca11c06c068f57baf..c559b196a38fd2caa24a31fcd162be354198980c 100644 --- a/src/config/config_cmake.h.in +++ b/src/config/config_cmake.h.in @@ -137,9 +137,6 @@ ${DEFINE_PERFORMANCE_TEST} /* Security enhancement class 1 */ ${DEFINE_SE_CLASS1} -/* Security enhancement class 2 */ -${DEFINE_SE_CLASS2} - /* Security enhancement class 3 */ ${DEFINE_SE_CLASS3} diff --git a/src/data_type/aggregate.hpp b/src/data_type/aggregate.hpp index 91e76c7e7ef761bc35a9dc9f3cf84ea124a6a659..90ba5620f5e07641e1e2ab6d8095b8dbd2edf02b 100644 --- a/src/data_type/aggregate.hpp +++ b/src/data_type/aggregate.hpp @@ -10,6 +10,7 @@ #include <boost/fusion/container/vector.hpp> #include <Packer_Unpacker/has_pack_agg.hpp> +#include "util/copy_compare/copy_compare_aggregates.hpp" /*! \brief this class is a functor for "for_each" algorithm * @@ -268,5 +269,19 @@ template<typename T> struct is_aggregate<T, typename Void< typename T::yes_is_aggregate>::type> : std::true_type {}; +namespace openfpm +{ + template<unsigned int p, typename aggr> + auto at_c(aggr & agg) -> decltype(boost::fusion::at_c<p>(agg.data)) + { + return boost::fusion::at_c<p>(agg.data); + } + + template<unsigned int p, typename aggr> + auto get(aggr & agg) -> decltype(boost::fusion::at_c<p>(agg.data)) + { + return boost::fusion::at_c<p>(agg.data); + } +} #endif /* OPENFPM_DATA_SRC_UTIL_AGGREGATE_HPP_ */ diff --git a/src/data_type/aggregate_unit_tests.cpp b/src/data_type/aggregate_unit_tests.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aa038423d1bdb581814f94c00295ecffcf1ccc1a --- /dev/null +++ b/src/data_type/aggregate_unit_tests.cpp @@ -0,0 +1,52 @@ +/* + * aggregate_unit_tests.cpp + * + * Created on: Jul 20, 2020 + * Author: i-bird + */ +#define BOOST_TEST_DYN_LINK +#include <boost/test/unit_test.hpp> + +#include "config.h" +#include "data_type/aggregate.hpp" + +BOOST_AUTO_TEST_SUITE( aggregate_test ) + +BOOST_AUTO_TEST_CASE( aggregate_at_c_get_test ) +{ + aggregate<double,double[3]> data; + + openfpm::at_c<0>(data) = 1.0; + + openfpm::at_c<1>(data)[0] = 1.0; + openfpm::at_c<1>(data)[1] = 2.0; + openfpm::at_c<1>(data)[2] = 3.0; + + BOOST_REQUIRE_EQUAL(openfpm::get<0>(data),1.0); + + BOOST_REQUIRE_EQUAL(openfpm::get<1>(data)[0],1.0); + BOOST_REQUIRE_EQUAL(openfpm::get<1>(data)[1],2.0); + BOOST_REQUIRE_EQUAL(openfpm::get<1>(data)[2],3.0); +} + +template<unsigned int integer> +struct value_function +{ + enum + { + value = integer + }; +}; + +template<typename arg_f1,typename arg_f2, unsigned int s> +struct function +{ + typedef value_function<arg_f1::value + arg_f2::value + s> value; +}; + +BOOST_AUTO_TEST_CASE( meta_function_check ) +{ + BOOST_REQUIRE_EQUAL((function<value_function<5>,value_function<4>,3>::value::value),12); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/memory_ly/memory_array.hpp b/src/memory_ly/memory_array.hpp index c2a6c898b047881745e858ae1d9ea4ac7959abf7..736b57f1a634651bc214beda2b56861a6a7ffdec 100644 --- a/src/memory_ly/memory_array.hpp +++ b/src/memory_ly/memory_array.hpp @@ -9,7 +9,6 @@ #define MEMORY_ARRAY_HPP_ #include "memory/memory.hpp" -#include "Memleak_check.hpp" #include "util/cuda_util.hpp" @@ -73,9 +72,6 @@ class memory_array { this->ptr = static_cast<T *>(ptr); -#ifdef SE_CLASS2 - check_valid(ptr,sz); -#endif // Initialize the constructors diff --git a/src/memory_ly/memory_c.hpp b/src/memory_ly/memory_c.hpp index 0778c318cf03a39d7f94301c053c66cb341fa942..a34ffaac05c2d0e83605531e482cd053edc576da 100644 --- a/src/memory_ly/memory_c.hpp +++ b/src/memory_ly/memory_c.hpp @@ -13,7 +13,7 @@ #include <boost/mpl/pop_front.hpp> #include <boost/mpl/push_front.hpp> -#include "util/boost/boost_multi_array_openfpm.hpp" +//#include "util/boost/boost_multi_array_openfpm.hpp" #include "util/multi_array_openfpm/multi_array_ref_openfpm.hpp" #include "util/ct_array.hpp" #include "memory_array.hpp" diff --git a/src/performance.hpp b/src/performance.hpp index 1959ae68608cb5503777d282bbdb90343257dad0..f0f76de8f3e7d3ce95e2f2fb667db271cde45cc4 100644 --- a/src/performance.hpp +++ b/src/performance.hpp @@ -10,7 +10,6 @@ #include "Plot/GoogleChart.hpp" #include "timer.hpp" -#include "Memleak_check.hpp" #include <boost/property_tree/ptree.hpp> #include <boost/property_tree/xml_parser.hpp> #include "util/performance/performance_util.hpp" diff --git a/src/test_3.hpp b/src/test_3.hpp deleted file mode 100644 index 6f9935ea742ab9f1606ffc8579306f33c0145e4d..0000000000000000000000000000000000000000 --- a/src/test_3.hpp +++ /dev/null @@ -1,115 +0,0 @@ -/* - * test_3.hpp - * - * Created on: Feb 4, 2014 - * Author: Pietro Incardona - */ - - -void test3(layout_gpu< grid<Point<float>>, memory_gpu<memory_gpu_type<Point<float>>::type> > & c3) -{ - std::cout << "3D Array with grid_key_3 storing property: " << "\n"; - - typedef Point<float> P; - - timespec ts_start; - // clock_gettime(CLOCK_MONOTONIC, &ts); // Works on FreeBSD - clock_gettime(CLOCK_REALTIME, &ts_start); // Works on Linux - - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - grid_key_3<P::x> kx(i,j,k); - grid_key_3<P::y> ky(i,j,k); - grid_key_3<P::z> kz(i,j,k); - grid_key_3<P::s> ks(i,j,k); - grid_key_3<P::v> kv(i,j,k); - grid_key_3<P::t> kt(i,j,k); - - c3.get(kx) = 1.1f; - c3.get(ky) = 1.2f; - c3.get(kz) = 1.3f; - c3.get(ks) = 1.0f; - - c3.get(kv)[0] = 1.0f; - c3.get(kv)[1] = 2.0f; - c3.get(kv)[2] = 3.0f; - - c3.get(kt)[0][0] = 1.0f; - c3.get(kt)[0][1] = 2.0f; - c3.get(kt)[0][2] = 3.0f; - c3.get(kt)[1][0] = 4.0f; - c3.get(kt)[1][1] = 5.0f; - c3.get(kt)[1][2] = 6.0f; - c3.get(kt)[2][0] = 7.0f; - c3.get(kt)[2][1] = 8.0f; - c3.get(kt)[2][2] = 9.0f; - } - } - } - - timespec end_time; - clock_gettime(CLOCK_REALTIME, &end_time); // Works on Linux - float time_dif =(float)( end_time.tv_sec - ts_start.tv_sec + (double)(end_time.tv_nsec - ts_start.tv_nsec)/1000000000.0 ); - - std::cout << "End : " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024 << " MB " << " Bandwidth: " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024/time_dif << " MB/s \n"; -} - - - -void test3(layout_cpu< grid<Point<float>>, memory_cpu<memory_cpu_type<Point<float>>::type> > & c3) -{ - std::cout << "3D Array with grid_key_3 storing property: " << "\n"; - - typedef Point<float> P; - - timespec ts_start; - // clock_gettime(CLOCK_MONOTONIC, &ts); // Works on FreeBSD - clock_gettime(CLOCK_REALTIME, &ts_start); // Works on Linux - - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - grid_key_3<P::x> kx(i,j,k); - grid_key_3<P::y> ky(i,j,k); - grid_key_3<P::z> kz(i,j,k); - grid_key_3<P::s> ks(i,j,k); - grid_key_3<P::v> kv(i,j,k); - grid_key_3<P::t> kt(i,j,k); - - c3.get(kx) = 1.1f; - c3.get(ky) = 1.2f; - c3.get(kz) = 1.3f; - c3.get(ks) = 1.0f; - - c3.get(kv)[0] = 1.0f; - c3.get(kv)[1] = 2.0f; - c3.get(kv)[2] = 3.0f; - - c3.get(kt)[0][0] = 1.0f; - c3.get(kt)[0][1] = 2.0f; - c3.get(kt)[0][2] = 3.0f; - c3.get(kt)[1][0] = 4.0f; - c3.get(kt)[1][1] = 5.0f; - c3.get(kt)[1][2] = 6.0f; - c3.get(kt)[2][0] = 7.0f; - c3.get(kt)[2][1] = 8.0f; - c3.get(kt)[2][2] = 9.0f; - } - } - } - - timespec end_time; - clock_gettime(CLOCK_REALTIME, &end_time); // Works on Linux - float time_dif =(float)( end_time.tv_sec - ts_start.tv_sec + (double)(end_time.tv_nsec - ts_start.tv_nsec)/1000000000.0 ); - - std::cout << "End : " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024 << " MB " << " Bandwidth: " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024/time_dif << " MB/s \n"; -} diff --git a/src/test_4.hpp b/src/test_4.hpp deleted file mode 100644 index 133c3ac1e05dd6519d00d0deb6827cc34dddc5e3..0000000000000000000000000000000000000000 --- a/src/test_4.hpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * test_4.hpp - * - * Created on: Feb 4, 2014 - * Author: Pietro Incardona - */ - -void test4(layout_gpu< grid<Point<float>>, memory_gpu<memory_gpu_type<Point<float>>::type> > & c3) -{ - std::cout << "3D Array with grid_key_d storing property: " << "\n"; - - typedef Point<float> P; - - timespec ts_start; - // clock_gettime(CLOCK_MONOTONIC, &ts); // Works on FreeBSD - clock_gettime(CLOCK_REALTIME, &ts_start); // Works on Linux - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - grid_key_d<3,P::x> kx(i,j,k); - grid_key_d<3,P::y> ky(i,j,k); - grid_key_d<3,P::z> kz(i,j,k); - grid_key_d<3,P::s> ks(i,j,k); - grid_key_d<3,P::v> kv(i,j,k); - grid_key_d<3,P::t> kt(i,j,k); - - c3.get(kx) = 1.1f; - c3.get(ky) = 1.2f; - c3.get(kz) = 1.3f; - c3.get(ks) = 1.0f; - - c3.get(kv)[0] = 1.0f; - c3.get(kv)[1] = 2.0f; - c3.get(kv)[2] = 3.0f; - - c3.get(kt)[0][0] = 1.0f; - c3.get(kt)[0][1] = 2.0f; - c3.get(kt)[0][2] = 3.0f; - c3.get(kt)[1][0] = 4.0f; - c3.get(kt)[1][1] = 5.0f; - c3.get(kt)[1][2] = 6.0f; - c3.get(kt)[2][0] = 7.0f; - c3.get(kt)[2][1] = 8.0f; - c3.get(kt)[2][2] = 9.0f; - } - } - } - - timespec end_time; - clock_gettime(CLOCK_REALTIME, &end_time); // Works on Linux - float time_dif =(float)( end_time.tv_sec - ts_start.tv_sec + (double)(end_time.tv_nsec - ts_start.tv_nsec)/1000000000.0 ); - - std::cout << "End : " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024 << " MB " << " Bandwidth: " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024/time_dif << " MB/s \n"; -} - - -void test4(layout_cpu< grid<Point<float>>, memory_cpu<memory_cpu_type<Point<float>>::type> > & c3) -{ - std::cout << "3D Array with grid_key_d storing property: " << "\n"; - - typedef Point<float> P; - - timespec ts_start; - // clock_gettime(CLOCK_MONOTONIC, &ts); // Works on FreeBSD - clock_gettime(CLOCK_REALTIME, &ts_start); // Works on Linux - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - grid_key_d<3,P::x> kx(i,j,k); - grid_key_d<3,P::y> ky(i,j,k); - grid_key_d<3,P::z> kz(i,j,k); - grid_key_d<3,P::s> ks(i,j,k); - grid_key_d<3,P::v> kv(i,j,k); - grid_key_d<3,P::t> kt(i,j,k); - - c3.get(kx) = 1.1f; - c3.get(ky) = 1.2f; - c3.get(kz) = 1.3f; - c3.get(ks) = 1.0f; - - c3.get(kv)[0] = 1.0f; - c3.get(kv)[1] = 2.0f; - c3.get(kv)[2] = 3.0f; - - c3.get(kt)[0][0] = 1.0f; - c3.get(kt)[0][1] = 2.0f; - c3.get(kt)[0][2] = 3.0f; - c3.get(kt)[1][0] = 4.0f; - c3.get(kt)[1][1] = 5.0f; - c3.get(kt)[1][2] = 6.0f; - c3.get(kt)[2][0] = 7.0f; - c3.get(kt)[2][1] = 8.0f; - c3.get(kt)[2][2] = 9.0f; - } - } - } - - timespec end_time; - clock_gettime(CLOCK_REALTIME, &end_time); // Works on Linux - float time_dif =(float)( end_time.tv_sec - ts_start.tv_sec + (double)(end_time.tv_nsec - ts_start.tv_nsec)/1000000000.0 ); - - std::cout << "End : " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024 << " MB " << " Bandwidth: " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024/time_dif << " MB/s \n"; -} - diff --git a/src/test_5.hpp b/src/test_5.hpp deleted file mode 100644 index 94a0e7449e8231ef249dd3fe904025a74b5ce2f9..0000000000000000000000000000000000000000 --- a/src/test_5.hpp +++ /dev/null @@ -1,110 +0,0 @@ -/* - * test_5.hpp - * - * Created on: Feb 4, 2014 - * Author: i-bird - */ - -void test5(layout_gpu< grid<Point<float>>, memory_gpu<memory_gpu_type<Point<float>>::type> > & c3) -{ - std::cout << "3D Array with grid_key runtime dimensions " << "\n"; - - grid_key<Point<float>::x> kx(3); - grid_key<Point<float>::y> ky(kx); - grid_key<Point<float>::z> kz(kx); - grid_key<Point<float>::s> ks(kx); - grid_key<Point<float>::v> kv(kx); - grid_key<Point<float>::t> kt(kx); - - timespec ts_start; - // clock_gettime(CLOCK_MONOTONIC, &ts); // Works on FreeBSD - clock_gettime(CLOCK_REALTIME, &ts_start); // Works on Linux - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - kx.set(i,j,k); - - c3.get(kx) = 1.1f; - c3.get(ky) = 1.2f; - c3.get(kz) = 1.3f; - c3.get(ks) = 1.0f; - - c3.get(kv)[0] = 1.0f; - c3.get(kv)[0] = 2.0f; - c3.get(kv)[0] = 3.0f; - - c3.get(kt)[0][0] = 1.0f; - c3.get(kt)[0][1] = 2.0f; - c3.get(kt)[0][2] = 3.0f; - c3.get(kt)[1][0] = 4.0f; - c3.get(kt)[1][1] = 5.0f; - c3.get(kt)[1][2] = 6.0f; - c3.get(kt)[2][0] = 7.0f; - c3.get(kt)[2][1] = 8.0f; - c3.get(kt)[2][2] = 9.0f; - } - } - } - - timespec end_time; - clock_gettime(CLOCK_REALTIME, &end_time); // Works on Linux - float time_dif =(float)( end_time.tv_sec - ts_start.tv_sec + (double)(end_time.tv_nsec - ts_start.tv_nsec)/1000000000.0 ); - - std::cout << "End : " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024 << " MB " << " Bandwidth: " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024/time_dif << " MB/s \n"; -} - -void test5(layout_cpu< grid<Point<float>>, memory_cpu<memory_cpu_type<Point<float>>::type> > & c3) -{ - std::cout << "3D Array with grid_key runtime dimensions " << "\n"; - - grid_key<Point<float>::x> kx(3); - grid_key<Point<float>::y> ky(kx); - grid_key<Point<float>::z> kz(kx); - grid_key<Point<float>::s> ks(kx); - grid_key<Point<float>::v> kv(kx); - grid_key<Point<float>::t> kt(kx); - - timespec ts_start; - // clock_gettime(CLOCK_MONOTONIC, &ts); // Works on FreeBSD - clock_gettime(CLOCK_REALTIME, &ts_start); // Works on Linux - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - kx.set(i,j,k); - - c3.get(kx) = 1.1f; - c3.get(ky) = 1.2f; - c3.get(kz) = 1.3f; - c3.get(ks) = 1.0f; - - c3.get(kv)[0] = 1.0f; - c3.get(kv)[0] = 2.0f; - c3.get(kv)[0] = 3.0f; - - c3.get(kt)[0][0] = 1.0f; - c3.get(kt)[0][1] = 2.0f; - c3.get(kt)[0][2] = 3.0f; - c3.get(kt)[1][0] = 4.0f; - c3.get(kt)[1][1] = 5.0f; - c3.get(kt)[1][2] = 6.0f; - c3.get(kt)[2][0] = 7.0f; - c3.get(kt)[2][1] = 8.0f; - c3.get(kt)[2][2] = 9.0f; - } - } - } - - timespec end_time; - clock_gettime(CLOCK_REALTIME, &end_time); // Works on Linux - float time_dif =(float)( end_time.tv_sec - ts_start.tv_sec + (double)(end_time.tv_nsec - ts_start.tv_nsec)/1000000000.0 ); - - std::cout << "End : " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024 << " MB " << " Bandwidth: " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024/time_dif << " MB/s \n"; -} diff --git a/src/test_6.hpp b/src/test_6.hpp deleted file mode 100644 index a401d2df21ccd08ab26f5cfd99ac0b789eae439a..0000000000000000000000000000000000000000 --- a/src/test_6.hpp +++ /dev/null @@ -1,244 +0,0 @@ - - -void test_6(layout_cpu<3, grid<3,Point<float>>, memory_cpu<memory_cpu_type<Point<float>>::type> > & c3) -{ - // Create a bigger grid - - - - std::cout << "3D Array with grid_key (without redundant dimension): " << "\n"; - - typedef Point<float> P; - - timespec ts_start; - // clock_gettime(CLOCK_MONOTONIC, &ts); // Works on FreeBSD - clock_gettime(CLOCK_REALTIME, &ts_start); // Works on Linux - - grid_key_dx<3> kk; - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - kk.set(i,j,k); - - c3.get<P::x>(kk) = 1.1f; - c3.get<P::y>(kk) = 1.2f; - c3.get<P::z>(kk) = 1.3f; - c3.get<P::s>(kk) = 1.0f; - - c3.get<P::v>(kk)[0] = 1.0f; - c3.get<P::v>(kk)[1] = 2.0f; - c3.get<P::v>(kk)[2] = 3.0f; - - c3.get<P::t>(kk)[0][0] = 1.0f; - c3.get<P::t>(kk)[0][1] = 2.0f; - c3.get<P::t>(kk)[0][2] = 3.0f; - c3.get<P::t>(kk)[1][0] = 4.0f; - c3.get<P::t>(kk)[1][1] = 5.0f; - c3.get<P::t>(kk)[1][2] = 6.0f; - c3.get<P::t>(kk)[2][0] = 7.0f; - c3.get<P::t>(kk)[2][1] = 8.0f; - c3.get<P::t>(kk)[2][2] = 9.0f; - } - } - } - - timespec end_time; - clock_gettime(CLOCK_REALTIME, &end_time); // Works on Linux - float time_dif =(float)( end_time.tv_sec - ts_start.tv_sec + (double)(end_time.tv_nsec - ts_start.tv_nsec)/1000000000.0 ); - - std::cout << "End : " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024 << " MB " << " Bandwidth: " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024/time_dif << " MB/s "; - - ////////////////////////////////// MEM CHECK ////////////////////////////////////////////////////// - - bool passed = true; - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - kk.set(i,j,k); - - c3.get<P::x>(kk) = i; - c3.get<P::y>(kk) = j; - c3.get<P::z>(kk) = k; - c3.get<P::s>(kk) = i+j+k; - - c3.get<P::v>(kk)[0] = i; - c3.get<P::v>(kk)[1] = j; - c3.get<P::v>(kk)[2] = k; - - c3.get<P::t>(kk)[0][0] = i+i; - c3.get<P::t>(kk)[0][1] = i+j; - c3.get<P::t>(kk)[0][2] = i+k; - c3.get<P::t>(kk)[1][0] = j+i; - c3.get<P::t>(kk)[1][1] = j+j; - c3.get<P::t>(kk)[1][2] = j+k; - c3.get<P::t>(kk)[2][0] = k+i; - c3.get<P::t>(kk)[2][1] = k+j; - c3.get<P::t>(kk)[2][2] = k+k; - } - } - } - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - kk.set(i,j,k); - - if (c3.get<P::x>(kk) != i) passed = false; - if (c3.get<P::y>(kk) != j) passed = false; - if (c3.get<P::z>(kk) != k) passed = false; - if (c3.get<P::s>(kk) != i+j+k) passed = false; - - if (c3.get<P::v>(kk)[0] != i) passed = false; - if (c3.get<P::v>(kk)[1] != j) passed = false; - if (c3.get<P::v>(kk)[2] != k) passed = false; - - if (c3.get<P::t>(kk)[0][0] != i+i) passed = false; - if (c3.get<P::t>(kk)[0][1] != i+j) passed = false; - if (c3.get<P::t>(kk)[0][2] != i+k) passed = false; - if (c3.get<P::t>(kk)[1][0] != j+i) passed = false; - if (c3.get<P::t>(kk)[1][1] != j+j) passed = false; - if (c3.get<P::t>(kk)[1][2] != j+k) passed = false; - if (c3.get<P::t>(kk)[2][0] != k+i) passed = false; - if (c3.get<P::t>(kk)[2][1] != k+j) passed = false; - if (c3.get<P::t>(kk)[2][2] != k+k) passed = false; - } - } - } - - if (passed == true) - std::cout << "PASSED" << "\n"; - else - std::cout << "FAILED" << "\n"; -} - - -void test2(layout_gpu<3, grid<3,Point<float>>, memory_gpu<memory_gpu_type<Point<float>>::type> > & c3) -{ - std::cout << "3D Array with grid_key (without redundant dimension): " << "\n"; - - typedef Point<float> P; - - timespec ts_start; - // clock_gettime(CLOCK_MONOTONIC, &ts); // Works on FreeBSD - clock_gettime(CLOCK_REALTIME, &ts_start); // Works on Linux - - grid_key_dx<3> kk; - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - - kk.set(i,j,k); - - c3.get<P::x>(kk) = 1.1f; - c3.get<P::y>(kk) = 1.2f; - c3.get<P::z>(kk) = 1.3f; - c3.get<P::s>(kk) = 1.0f; - - c3.get<P::v>(kk)[0] = 1.0f; - c3.get<P::v>(kk)[1] = 2.0f; - c3.get<P::v>(kk)[2] = 3.0f; - - c3.get<P::t>(kk)[0][0] = 1.0f; - c3.get<P::t>(kk)[0][1] = 2.0f; - c3.get<P::t>(kk)[0][2] = 3.0f; - c3.get<P::t>(kk)[1][0] = 4.0f; - c3.get<P::t>(kk)[1][1] = 5.0f; - c3.get<P::t>(kk)[1][2] = 6.0f; - c3.get<P::t>(kk)[2][0] = 7.0f; - c3.get<P::t>(kk)[2][1] = 8.0f; - c3.get<P::t>(kk)[2][2] = 9.0f; - - } - } - } - - timespec end_time; - clock_gettime(CLOCK_REALTIME, &end_time); // Works on Linux - float time_dif =(float)( end_time.tv_sec - ts_start.tv_sec + (double)(end_time.tv_nsec - ts_start.tv_nsec)/1000000000.0 ); - - std::cout << "End : " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024 << " MB " << " Bandwidth: " << GS_SIZE*GS_SIZE*GS_SIZE*16*4/1024/1024/time_dif << " MB/s "; - - /////////////////////////////////// MEM CHECK //////////////////////////////////////////////////////// - - bool passed = true; - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - kk.set(i,j,k); - - c3.get<P::x>(kk) = i; - c3.get<P::y>(kk) = j; - c3.get<P::z>(kk) = k; - c3.get<P::s>(kk) = i+j+k; - - c3.get<P::v>(kk)[0] = i; - c3.get<P::v>(kk)[1] = j; - c3.get<P::v>(kk)[2] = k; - - c3.get<P::t>(kk)[0][0] = i+i; - c3.get<P::t>(kk)[0][1] = i+j; - c3.get<P::t>(kk)[0][2] = i+k; - c3.get<P::t>(kk)[1][0] = j+i; - c3.get<P::t>(kk)[1][1] = j+j; - c3.get<P::t>(kk)[1][2] = j+k; - c3.get<P::t>(kk)[2][0] = k+i; - c3.get<P::t>(kk)[2][1] = k+j; - c3.get<P::t>(kk)[2][2] = k+k; - } - } - } - - for (int i = 0 ; i < GS_SIZE ; i++) - { - for (int j = 0 ; j < GS_SIZE ; j++) - { - for (int k = 0 ; k < GS_SIZE ; k++) - { - kk.set(i,j,k); - - if (c3.get<P::x>(kk) != i) passed = false; - if (c3.get<P::y>(kk) != j) passed = false; - if (c3.get<P::z>(kk) != k) passed = false; - if (c3.get<P::s>(kk) != i+j+k) passed = false; - - if (c3.get<P::v>(kk)[0] != i) passed = false; - if (c3.get<P::v>(kk)[1] != j) passed = false; - if (c3.get<P::v>(kk)[2] != k) passed = false; - - if (c3.get<P::t>(kk)[0][0] != i+i) passed = false; - if (c3.get<P::t>(kk)[0][1] != i+j) passed = false; - if (c3.get<P::t>(kk)[0][2] != i+k) passed = false; - if (c3.get<P::t>(kk)[1][0] != j+i) passed = false; - if (c3.get<P::t>(kk)[1][1] != j+j) passed = false; - if (c3.get<P::t>(kk)[1][2] != j+k) passed = false; - if (c3.get<P::t>(kk)[2][0] != k+i) passed = false; - if (c3.get<P::t>(kk)[2][1] != k+j) passed = false; - if (c3.get<P::t>(kk)[2][2] != k+k) passed = false; - } - } - } - - if (passed == true) - std::cout << "PASSED" << "\n"; - else - std::cout << "FAILED" << "\n"; -} diff --git a/src/util/boost/boost_multi_array_base_openfpm.hpp b/src/util/boost/boost_multi_array_base_openfpm.hpp deleted file mode 100644 index efc4e3943119f40eb4b49cb20730482a4779d556..0000000000000000000000000000000000000000 --- a/src/util/boost/boost_multi_array_base_openfpm.hpp +++ /dev/null @@ -1,501 +0,0 @@ -/* - * boost_multi_array_base_openfpm.hpp - * - * Created on: Jun 6, 2018 - * Author: i-bird - */ - -#ifndef OPENFPM_DATA_SRC_UTIL_BOOST_BOOST_MULTI_ARRAY_BASE_OPENFPM_HPP_ -#define OPENFPM_DATA_SRC_UTIL_BOOST_BOOST_MULTI_ARRAY_BASE_OPENFPM_HPP_ - - -// -// base.hpp - some implementation base classes for from which -// functionality is acquired -// - -#include "boost/multi_array/extent_range.hpp" -#include "boost/multi_array/extent_gen.hpp" -#include "boost/multi_array/index_range.hpp" -#include "boost/multi_array/index_gen.hpp" -#include "boost/multi_array/storage_order.hpp" -#include "boost/multi_array/types.hpp" -#include "boost/config.hpp" -#include "boost/multi_array/concept_checks.hpp" //for ignore_unused_... -#include "boost/mpl/eval_if.hpp" -#include "boost/mpl/if.hpp" -#include "boost/mpl/size_t.hpp" -#include "boost/iterator/reverse_iterator.hpp" -#include "boost/static_assert.hpp" -#include "boost/type.hpp" -#include "boost/assert.hpp" -#include <cstddef> -#include <memory> - -namespace boost { - -///////////////////////////////////////////////////////////////////////// -// class declarations -///////////////////////////////////////////////////////////////////////// - -template<typename T, std::size_t NumDims, - typename Allocator = std::allocator<T> > -class multi_array_openfpm; - -// This is a public interface for use by end users! -namespace multi_array_types { - typedef boost::detail::multi_array::size_type size_type; - typedef std::ptrdiff_t difference_type; - typedef boost::detail::multi_array::index index; - typedef detail::multi_array::index_range<index,size_type> index_range; - typedef detail::multi_array::extent_range<index,size_type> extent_range; - typedef detail::multi_array::index_gen<0,0> index_gen; - typedef detail::multi_array::extent_gen<0> extent_gen; -} - - -// boost::extents and boost::indices are now a part of the public -// interface. That way users don't necessarily have to create their -// own objects. On the other hand, one may not want the overhead of -// object creation in small-memory environments. Thus, the objects -// can be left undefined by defining BOOST_MULTI_ARRAY_NO_GENERATORS -// before loading multi_array.hpp. -//#ifndef BOOST_MULTI_ARRAY_NO_GENERATORS -//namespace { -// multi_array_types::extent_gen extents; -// multi_array_types::index_gen indices; -//} -//#endif // BOOST_MULTI_ARRAY_NO_GENERATORS - -namespace detail { -namespace multi_array { - -template <typename T, std::size_t NumDims> -class sub_array_openfpm; - -template <typename T, std::size_t NumDims, typename TPtr = const T*> -class const_sub_array_openfpm; - - template <typename T, typename TPtr, typename NumDims, typename Reference, - typename IteratorCategory> -class array_iterator_openfpm; - -template <typename T, std::size_t NumDims, typename TPtr = const T*> -class const_multi_array_view_openfpm; - -template <typename T, std::size_t NumDims> -class multi_array_view_openfpm; - -///////////////////////////////////////////////////////////////////////// -// class interfaces -///////////////////////////////////////////////////////////////////////// - -class multi_array_base_openfpm { -public: - typedef multi_array_types::size_type size_type; - typedef multi_array_types::difference_type difference_type; - typedef multi_array_types::index index; - typedef multi_array_types::index_range index_range; - typedef multi_array_types::extent_range extent_range; - typedef multi_array_types::index_gen index_gen; - typedef multi_array_types::extent_gen extent_gen; -}; - -// -// value_accessor_n -// contains the routines for accessing elements from -// N-dimensional views. -// -template<typename T, std::size_t NumDims> -class value_accessor_n_openfpm : public multi_array_base_openfpm { - typedef multi_array_base_openfpm super_type; -public: - typedef typename super_type::index index; - - // - // public typedefs used by classes that inherit from this base - // - typedef T element; - typedef boost::multi_array_openfpm<T,NumDims-1> value_type; - typedef sub_array_openfpm<T,NumDims-1> reference; - typedef const_sub_array_openfpm<T,NumDims-1> const_reference; - -protected: - // used by array operator[] and iterators to get reference types. - template <typename Reference, typename TPtr> - __device__ __host__ Reference access(boost::type<Reference>,index idx,TPtr base, - const size_type* extents, - const index* strides, - const index* index_bases) const { - - BOOST_ASSERT(idx - index_bases[0] >= 0); - BOOST_ASSERT(size_type(idx - index_bases[0]) < extents[0]); - // return a sub_array<T,NDims-1> proxy object - TPtr newbase = base + idx * strides[0]; - return Reference(newbase,extents+1,strides+1,index_bases+1); - - } - - __device__ __host__ value_accessor_n_openfpm() { } - __device__ __host__ ~value_accessor_n_openfpm() { } -}; - -template <class T> inline __device__ __host__ void ignore_unused_variable_warning_ofp(T const&) {} - -// -// value_accessor_one -// contains the routines for accessing reference elements from -// 1-dimensional views. -// -template<typename T> -class value_accessor_one_openfpm : public multi_array_base_openfpm { - typedef multi_array_base_openfpm super_type; -public: - typedef typename super_type::index index; - // - // public typedefs for use by classes that inherit it. - // - typedef T element; - typedef T value_type; - typedef T& reference; - typedef T const& const_reference; - -protected: - // used by array operator[] and iterators to get reference types. - template <typename Reference, typename TPtr> - __device__ __host__ Reference access(boost::type<Reference>,index idx,TPtr base, - const size_type* extents, - const index* strides, - const index* index_bases) const { - - ignore_unused_variable_warning_ofp(index_bases); - ignore_unused_variable_warning_ofp(extents); - BOOST_ASSERT(idx - index_bases[0] >= 0); - BOOST_ASSERT(size_type(idx - index_bases[0]) < extents[0]); - return *(base + idx * strides[0]); - } - - __device__ __host__ value_accessor_one_openfpm() { } - __device__ __host__ ~value_accessor_one_openfpm() { } -}; - - -///////////////////////////////////////////////////////////////////////// -// choose value accessor begins -// - -template <typename T, std::size_t NumDims> -struct choose_value_accessor_n_openfpm { - typedef value_accessor_n_openfpm<T,NumDims> type; -}; - -template <typename T> -struct choose_value_accessor_one_openfpm { - typedef value_accessor_one_openfpm<T> type; -}; - -template <typename T, typename NumDims> -struct value_accessor_generator_openfpm { - BOOST_STATIC_CONSTANT(std::size_t, dimensionality = NumDims::value); - - typedef typename - mpl::eval_if_c<(dimensionality == 1), - choose_value_accessor_one_openfpm<T>, - choose_value_accessor_n_openfpm<T,dimensionality> - >::type type; -}; - - -template <class T, class NumDims> -struct associated_types_openfpm - : value_accessor_generator_openfpm<T,NumDims>::type -{}; - -// -// choose value accessor ends -///////////////////////////////////////////////////////////////////////// - -// Due to some imprecision in the C++ Standard, -// MSVC 2010 is broken in debug mode: it requires -// that an Output Iterator have output_iterator_tag in its iterator_category if -// that iterator is not bidirectional_iterator or random_access_iterator. -#if BOOST_WORKAROUND(BOOST_MSVC, >= 1600) -struct mutable_iterator_tag - : boost::random_access_traversal_tag, std::input_iterator_tag -{ - operator std::output_iterator_tag() const { - return std::output_iterator_tag(); - } -}; -#endif - -//////////////////////////////////////////////////////////////////////// -// multi_array_base -//////////////////////////////////////////////////////////////////////// -template <typename T, std::size_t NumDims> -class multi_array_impl_base_openfpm - : - public value_accessor_generator_openfpm<T,mpl::size_t<NumDims> >::type -{ - typedef associated_types_openfpm<T,mpl::size_t<NumDims> > types; -public: - typedef typename types::index index; - typedef typename types::size_type size_type; - typedef typename types::element element; - typedef typename types::index_range index_range; - typedef typename types::value_type value_type; - typedef typename types::reference reference; - typedef typename types::const_reference const_reference; - - template <std::size_t NDims> - struct subarray { - typedef boost::detail::multi_array::sub_array_openfpm<T,NDims> type; - }; - - template <std::size_t NDims> - struct const_subarray { - typedef boost::detail::multi_array::const_sub_array_openfpm<T,NDims> type; - }; - - template <std::size_t NDims> - struct array_view_openfpm { - typedef boost::detail::multi_array::multi_array_view_openfpm<T,NDims> type; - }; - - template <std::size_t NDims> - struct const_array_view_openfpm { - public: - typedef boost::detail::multi_array::const_multi_array_view_openfpm<T,NDims> type; - }; - - // - // iterator support - // -#if BOOST_WORKAROUND(BOOST_MSVC, >= 1600) - // Deal with VC 2010 output_iterator_tag requirement - typedef array_iterator_openfpm<T,T*,mpl::size_t<NumDims>,reference, - mutable_iterator_tag> iterator; -#else - typedef array_iterator_openfpm<T,T*,mpl::size_t<NumDims>,reference, - boost::random_access_traversal_tag> iterator; -#endif - typedef array_iterator_openfpm<T,T const*,mpl::size_t<NumDims>,const_reference, - boost::random_access_traversal_tag> const_iterator; - - typedef ::boost::reverse_iterator<iterator> reverse_iterator; - typedef ::boost::reverse_iterator<const_iterator> const_reverse_iterator; - - BOOST_STATIC_CONSTANT(std::size_t, dimensionality = NumDims); -protected: - - __device__ __host__ multi_array_impl_base_openfpm() { } - __device__ __host__ ~multi_array_impl_base_openfpm() { } - - // Used by operator() in our array classes - template <typename Reference, typename IndexList, typename TPtr> - Reference access_element(boost::type<Reference>, - const IndexList& indices, - TPtr base, - const size_type* extents, - const index* strides, - const index* index_bases) const { - boost::function_requires< - CollectionConcept<IndexList> >(); - ignore_unused_variable_warning(index_bases); - ignore_unused_variable_warning(extents); -#if !defined(NDEBUG) && !defined(BOOST_DISABLE_ASSERTS) - for (size_type i = 0; i != NumDims; ++i) { - BOOST_ASSERT(indices[i] - index_bases[i] >= 0); - BOOST_ASSERT(size_type(indices[i] - index_bases[i]) < extents[i]); - } -#endif - - index offset = 0; - { - typename IndexList::const_iterator i = indices.begin(); - size_type n = 0; - while (n != NumDims) { - offset += (*i) * strides[n]; - ++n; - ++i; - } - } - return base[offset]; - } - - template <typename StrideList, typename ExtentList> - void compute_strides(StrideList& stride_list, ExtentList& extent_list, - const general_storage_order<NumDims>& storage) - { - // invariant: stride = the stride for dimension n - index stride = 1; - for (size_type n = 0; n != NumDims; ++n) { - index stride_sign = +1; - - if (!storage.ascending(storage.ordering(n))) - stride_sign = -1; - - // The stride for this dimension is the product of the - // lengths of the ranks minor to it. - stride_list[storage.ordering(n)] = stride * stride_sign; - - stride *= extent_list[storage.ordering(n)]; - } - } - - // This calculates the offset to the array base pointer due to: - // 1. dimensions stored in descending order - // 2. non-zero dimension index bases - template <typename StrideList, typename ExtentList, typename BaseList> - index - calculate_origin_offset(const StrideList& stride_list, - const ExtentList& extent_list, - const general_storage_order<NumDims>& storage, - const BaseList& index_base_list) - { - return - calculate_descending_dimension_offset(stride_list,extent_list, - storage) + - calculate_indexing_offset(stride_list,index_base_list); - } - - // This calculates the offset added to the base pointer that are - // caused by descending dimensions - template <typename StrideList, typename ExtentList> - index - calculate_descending_dimension_offset(const StrideList& stride_list, - const ExtentList& extent_list, - const general_storage_order<NumDims>& storage) - { - index offset = 0; - if (!storage.all_dims_ascending()) - for (size_type n = 0; n != NumDims; ++n) - if (!storage.ascending(n)) - offset -= (extent_list[n] - 1) * stride_list[n]; - - return offset; - } - - // This is used to reindex array_views, which are no longer - // concerned about storage order (specifically, whether dimensions - // are ascending or descending) since the viewed array handled it. - - template <typename StrideList, typename BaseList> - index - calculate_indexing_offset(const StrideList& stride_list, - const BaseList& index_base_list) - { - index offset = 0; - for (size_type n = 0; n != NumDims; ++n) - offset -= stride_list[n] * index_base_list[n]; - return offset; - } - - // Slicing using an index_gen. - // Note that populating an index_gen creates a type that encodes - // both the number of dimensions in the current Array (NumDims), and - // the Number of dimensions for the resulting view. This allows the - // compiler to fail if the dimensions aren't completely accounted - // for. For reasons unbeknownst to me, a BOOST_STATIC_ASSERT - // within the member function template does not work. I should add a - // note to the documentation specifying that you get a damn ugly - // error message if you screw up in your slicing code. - template <typename ArrayRef, int NDims, typename TPtr> - ArrayRef - generate_array_view(boost::type<ArrayRef>, - const boost::detail::multi_array:: - index_gen<NumDims,NDims>& indices, - const size_type* extents, - const index* strides, - const index* index_bases, - TPtr base) const { - - boost::array<index,NDims> new_strides; - boost::array<index,NDims> new_extents; - - index offset = 0; - size_type dim = 0; - for (size_type n = 0; n != NumDims; ++n) - { - // Use array specs and input specs to produce real specs. - const index default_start = index_bases[n]; - const index default_finish = default_start+extents[n]; - const index_range& current_range = indices.ranges_[n]; - index start = current_range.get_start(default_start); - index finish = current_range.get_finish(default_finish); - index stride = current_range.stride(); - BOOST_ASSERT(stride != 0); - - // An index range indicates a half-open strided interval - // [start,finish) (with stride) which faces upward when stride - // is positive and downward when stride is negative, - - // RG: The following code for calculating length suffers from - // some representation issues: if finish-start cannot be represented as - // by type index, then overflow may result. - - index len; - if ((finish - start) / stride < 0) - { - // [start,finish) is empty according to the direction imposed by - // the stride. - len = 0; - } - else - { - // integral trick for ceiling((finish-start) / stride) - // taking into account signs. - index shrinkage = stride > 0 ? 1 : -1; - len = (finish - start + (stride - shrinkage)) / stride; - } - - // start marks the closed side of the range, so it must lie - // exactly in the set of legal indices - // with a special case for empty arrays - BOOST_ASSERT(index_bases[n] <= start && - ((start <= index_bases[n]+index(extents[n])) || - (start == index_bases[n] && extents[n] == 0))); - -#ifndef BOOST_DISABLE_ASSERTS - // finish marks the open side of the range, so it can go one past - // the "far side" of the range (the top if stride is positive, the bottom - // if stride is negative). - index bound_adjustment = stride < 0 ? 1 : 0; - BOOST_ASSERT(((index_bases[n] - bound_adjustment) <= finish) && - (finish <= (index_bases[n] + index(extents[n]) - bound_adjustment))); -#endif // BOOST_DISABLE_ASSERTS - - - // the array data pointer is modified to account for non-zero - // bases during slicing (see [Garcia] for the math involved) - offset += start * strides[n]; - - if (!current_range.is_degenerate()) { - - // The stride for each dimension is included into the - // strides for the array_view (see [Garcia] for the math involved). - new_strides[dim] = stride * strides[n]; - - // calculate new extents - new_extents[dim] = len; - ++dim; - } - } - BOOST_ASSERT(dim == NDims); - - return - ArrayRef(base+offset, - new_extents, - new_strides); - } - - -}; - -} // namespace multi_array -} // namespace detail - -} // namespace boost - - -#endif /* OPENFPM_DATA_SRC_UTIL_BOOST_BOOST_MULTI_ARRAY_BASE_OPENFPM_HPP_ */ diff --git a/src/util/boost/boost_multi_array_iterator_openfpm.hpp b/src/util/boost/boost_multi_array_iterator_openfpm.hpp deleted file mode 100644 index 31dc221415cefc61c8320ec1691a3dfa55d238c0..0000000000000000000000000000000000000000 --- a/src/util/boost/boost_multi_array_iterator_openfpm.hpp +++ /dev/null @@ -1,164 +0,0 @@ -/* - * boost_multi_array_iterator_openfpm.hpp - * - * Created on: Jun 7, 2018 - * Author: i-bird - */ - -#ifndef OPENFPM_DATA_SRC_UTIL_BOOST_BOOST_MULTI_ARRAY_ITERATOR_OPENFPM_HPP_ -#define OPENFPM_DATA_SRC_UTIL_BOOST_BOOST_MULTI_ARRAY_ITERATOR_OPENFPM_HPP_ - - -// -// iterator.hpp - implementation of iterators for the -// multi-dimensional array class -// - -#include "boost/iterator/iterator_facade.hpp" -#include <algorithm> -#include <cstddef> -#include <iterator> -#include "util/boost/boost_multi_array_base_openfpm.hpp" -#include "util/cuda_util.hpp" - -namespace boost { -namespace detail { -namespace multi_array { - -template <class T> -struct operator_arrow_proxy_openfpm -{ - operator_arrow_proxy_openfpm(T const& px) : value_(px) {} - T* operator->() const { return &value_; } - // This function is needed for MWCW and BCC, which won't call operator-> - // again automatically per 13.3.1.2 para 8 - operator T*() const { return &value_; } - mutable T value_; -}; - -///////////////////////////////////////////////////////////////////////// -// iterator components -///////////////////////////////////////////////////////////////////////// - -template <typename T, typename TPtr, typename NumDims, typename Reference, - typename IteratorCategory> -class array_iterator_openfpm; - -template <typename T, typename TPtr, typename NumDims, typename Reference, - typename IteratorCategory> -class array_iterator_openfpm - : public - iterator_facade< - array_iterator_openfpm<T,TPtr,NumDims,Reference,IteratorCategory> - , typename associated_types_openfpm<T,NumDims>::value_type - , IteratorCategory - , Reference - > - , private - value_accessor_generator_openfpm<T,NumDims>::type -{ - friend class iterator_core_access; - typedef detail::multi_array::associated_types_openfpm<T,NumDims> access_t; - - typedef iterator_facade< - array_iterator_openfpm<T,TPtr,NumDims,Reference,IteratorCategory> - , typename detail::multi_array::associated_types_openfpm<T,NumDims>::value_type - , boost::random_access_traversal_tag - , Reference - > facade_type; - - typedef typename access_t::index index; - typedef typename access_t::size_type size_type; - -#ifndef BOOST_NO_MEMBER_TEMPLATE_FRIENDS - template <typename, typename, typename, typename, typename> - friend class array_iterator_openfpm; -#else - public: -#endif - - index idx_; - TPtr base_; - const size_type* extents_; - const index* strides_; - const index* index_base_; - -public: - // Typedefs to circumvent ambiguities between parent classes - typedef typename facade_type::reference reference; - typedef typename facade_type::value_type value_type; - typedef typename facade_type::difference_type difference_type; - - __device__ __host__ array_iterator_openfpm() {} - - __device__ __host__ array_iterator_openfpm(index idx, TPtr base, const size_type* extents, - const index* strides, - const index* index_base) : - idx_(idx), base_(base), extents_(extents), - strides_(strides), index_base_(index_base) { } - - template <typename OPtr, typename ORef, typename Cat> - array_iterator_openfpm( - const array_iterator_openfpm<T,OPtr,NumDims,ORef,Cat>& rhs - , typename boost::enable_if_convertible<OPtr,TPtr>::type* = 0 - ) - : idx_(rhs.idx_), base_(rhs.base_), extents_(rhs.extents_), - strides_(rhs.strides_), index_base_(rhs.index_base_) { } - - - // RG - we make our own operator-> - operator_arrow_proxy_openfpm<reference> - operator->() const - { - return operator_arrow_proxy_openfpm<reference>(this->dereference()); - } - - - reference dereference() const - { - typedef typename value_accessor_generator_openfpm<T,NumDims>::type accessor; - return accessor::access(boost::type<reference>(), - idx_, - base_, - extents_, - strides_, - index_base_); - } - - void increment() { ++idx_; } - void decrement() { --idx_; } - - template <class IteratorAdaptor> - bool equal(IteratorAdaptor& rhs) const { - const std::size_t N = NumDims::value; - return (idx_ == rhs.idx_) && - (base_ == rhs.base_) && - ( (extents_ == rhs.extents_) || - std::equal(extents_,extents_+N,rhs.extents_) ) && - ( (strides_ == rhs.strides_) || - std::equal(strides_,strides_+N,rhs.strides_) ) && - ( (index_base_ == rhs.index_base_) || - std::equal(index_base_,index_base_+N,rhs.index_base_) ); - } - - template <class DifferenceType> - void advance(DifferenceType n) { - idx_ += n; - } - - template <class IteratorAdaptor> - typename facade_type::difference_type - distance_to(IteratorAdaptor& rhs) const { - return rhs.idx_ - idx_; - } - - -}; - -} // namespace multi_array -} // namespace detail -} // namespace boost - - - -#endif /* OPENFPM_DATA_SRC_UTIL_BOOST_BOOST_MULTI_ARRAY_ITERATOR_OPENFPM_HPP_ */ diff --git a/src/util/boost/boost_multi_array_openfpm.hpp b/src/util/boost/boost_multi_array_openfpm.hpp deleted file mode 100644 index ae90ffe70b32908314dde06103724b54c848522a..0000000000000000000000000000000000000000 --- a/src/util/boost/boost_multi_array_openfpm.hpp +++ /dev/null @@ -1,500 +0,0 @@ -/* - * boost_multi_array_openfpm.hpp - * - * Created on: Jun 7, 2018 - * Author: i-bird - */ - -#ifndef OPENFPM_DATA_SRC_UTIL_BOOST_MULTI_ARRAY_OPENFPM_HPP_ -#define OPENFPM_DATA_SRC_UTIL_BOOST_MULTI_ARRAY_OPENFPM_HPP_ - -#include "util/cuda_util.hpp" - -// -// multi_array.hpp - contains the multi_array class template -// declaration and definition -// - -#include "boost/multi_array/collection_concept.hpp" -#include "boost/multi_array/copy_array.hpp" -#include "boost_multi_array_subarray_openfpm.hpp" -#include "boost_multi_array_ref_openfpm.hpp" -#include "boost/multi_array/algorithm.hpp" -#include "boost/array.hpp" -#include "boost/mpl/if.hpp" -#include "boost/type_traits.hpp" -#include <algorithm> -#include <cstddef> -#include <functional> -#include <numeric> -#include <vector> -#include "util/boost/boost_multi_array_base_openfpm.hpp" -#include "util/boost/boost_multi_array_iterator_openfpm.hpp" - -namespace boost { - namespace detail { - namespace multi_array { - - struct populate_index_ranges_openfpm { - multi_array_types::index_range - // RG: underscore on extent_ to stifle strange MSVC warning. - operator()(multi_array_types::index base, - multi_array_types::size_type extent_) { - return multi_array_types::index_range(base,base+extent_); - } - }; - -#ifdef BOOST_NO_FUNCTION_TEMPLATE_ORDERING -// -// Compilers that don't support partial ordering may need help to -// disambiguate multi_array's templated constructors. Even vc6/7 are -// capable of some limited SFINAE, so we take the most-general version -// out of the overload set with disable_multi_array_impl. -// -template <typename T, std::size_t NumDims, typename TPtr> -char is_multi_array_impl_help(const_multi_array_view<T,NumDims,TPtr>&); -template <typename T, std::size_t NumDims, typename TPtr> -char is_multi_array_impl_help(const_sub_array<T,NumDims,TPtr>&); -template <typename T, std::size_t NumDims, typename TPtr> -char is_multi_array_impl_help(const_multi_array_ref<T,NumDims,TPtr>&); - -char ( &is_multi_array_impl_help(...) )[2]; - -template <class T> -struct is_multi_array_impl -{ - static T x; - BOOST_STATIC_CONSTANT(bool, value = sizeof((is_multi_array_impl_help)(x)) == 1); - - typedef mpl::bool_<value> type; -}; - -template <bool multi_array = false> -struct disable_multi_array_impl_impl -{ - typedef int type; -}; - -template <> -struct disable_multi_array_impl_impl<true> -{ - // forming a pointer to a reference triggers SFINAE - typedef int& type; -}; - - -template <class T> -struct disable_multi_array_impl : - disable_multi_array_impl_impl<is_multi_array_impl<T>::value> -{ }; - - -template <> -struct disable_multi_array_impl<int> -{ - typedef int type; -}; - - -#endif - - } //namespace multi_array - } // namespace detail - -template<typename T, std::size_t NumDims, - typename Allocator> -class multi_array_openfpm : - public multi_array_ref_openfpm<T,NumDims> -{ - typedef multi_array_ref_openfpm<T,NumDims> super_type; -public: - typedef typename super_type::value_type value_type; - typedef typename super_type::reference reference; - typedef typename super_type::const_reference const_reference; - typedef typename super_type::iterator iterator; - typedef typename super_type::const_iterator const_iterator; - typedef typename super_type::reverse_iterator reverse_iterator; - typedef typename super_type::const_reverse_iterator const_reverse_iterator; - typedef typename super_type::element element; - typedef typename super_type::size_type size_type; - typedef typename super_type::difference_type difference_type; - typedef typename super_type::index index; - typedef typename super_type::extent_range extent_range; - - - template <std::size_t NDims> - struct const_array_view - { - typedef boost::detail::multi_array::const_multi_array_view_openfpm<T,NDims> type; - }; - - template <std::size_t NDims> - struct array_view - { - typedef boost::detail::multi_array::multi_array_view_openfpm<T,NDims> type; - }; - - explicit multi_array_openfpm() - :super_type((T*)initial_base_,c_storage_order(), - /*index_bases=*/0, /*extents=*/0) - { - allocate_space(); - } - - template <class ExtentList> - explicit multi_array_openfpm( - ExtentList const& extents -#ifdef BOOST_NO_FUNCTION_TEMPLATE_ORDERING - , typename mpl::if_< - detail::multi_array::is_multi_array_impl<ExtentList>, - int&,int>::type* = 0 -#endif - ) : - super_type((T*)initial_base_,extents) - { - boost::function_requires< - detail::multi_array::CollectionConcept<ExtentList> >(); - allocate_space(); - } - - - template <class ExtentList> - explicit multi_array_openfpm(ExtentList const& extents, - const general_storage_order<NumDims>& so) : - super_type((T*)initial_base_,extents,so) { - boost::function_requires< - detail::multi_array::CollectionConcept<ExtentList> >(); - allocate_space(); - } - - template <class ExtentList> - explicit multi_array_openfpm(ExtentList const& extents, - const general_storage_order<NumDims>& so, - Allocator const& alloc) : - super_type((T*)initial_base_,extents,so), allocator_(alloc) { - boost::function_requires< - detail::multi_array::CollectionConcept<ExtentList> >(); - allocate_space(); - } - - - explicit multi_array_openfpm(const detail::multi_array - ::extent_gen<NumDims>& ranges) : - super_type((T*)initial_base_,ranges) { - - allocate_space(); - } - - - explicit multi_array_openfpm(const detail::multi_array - ::extent_gen<NumDims>& ranges, - const general_storage_order<NumDims>& so) : - super_type((T*)initial_base_,ranges,so) { - - allocate_space(); - } - - - explicit multi_array_openfpm(const detail::multi_array - ::extent_gen<NumDims>& ranges, - const general_storage_order<NumDims>& so, - Allocator const& alloc) : - super_type((T*)initial_base_,ranges,so), allocator_(alloc) { - - allocate_space(); - } - - multi_array_openfpm(const multi_array_openfpm& rhs) : - super_type(rhs), allocator_(rhs.allocator_) { - allocate_space(); - boost::detail::multi_array::copy_n(rhs.base_,rhs.num_elements(),base_); - } - - - // - // A multi_array is constructible from any multi_array_ref, subarray, or - // array_view object. The following constructors ensure that. - // - - // Due to limited support for partial template ordering, - // MSVC 6&7 confuse the following with the most basic ExtentList - // constructor. -#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING - template <typename OPtr> - multi_array_openfpm(const const_multi_array_ref_openfpm<T,NumDims,OPtr>& rhs, - const general_storage_order<NumDims>& so = c_storage_order()) - : super_type(0,so,rhs.index_bases(),rhs.shape()) - { - allocate_space(); - // Warning! storage order may change, hence the following copy technique. - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - template <typename OPtr> - multi_array_openfpm(const detail::multi_array:: - const_sub_array<T,NumDims,OPtr>& rhs, - const general_storage_order<NumDims>& so = c_storage_order()) - : super_type(0,so,rhs.index_bases(),rhs.shape()) - { - allocate_space(); - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - - template <typename OPtr> - multi_array_openfpm(const detail::multi_array:: - const_multi_array_view<T,NumDims,OPtr>& rhs, - const general_storage_order<NumDims>& so = c_storage_order()) - : super_type(0,so,rhs.index_bases(),rhs.shape()) - { - allocate_space(); - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - -#else // BOOST_NO_FUNCTION_TEMPLATE_ORDERING - // More limited support for MSVC - - - multi_array(const const_multi_array_ref<T,NumDims>& rhs) - : super_type(0,c_storage_order(),rhs.index_bases(),rhs.shape()) - { - allocate_space(); - // Warning! storage order may change, hence the following copy technique. - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - multi_array(const const_multi_array_ref<T,NumDims>& rhs, - const general_storage_order<NumDims>& so) - : super_type(0,so,rhs.index_bases(),rhs.shape()) - { - allocate_space(); - // Warning! storage order may change, hence the following copy technique. - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - multi_array(const detail::multi_array:: - const_sub_array<T,NumDims>& rhs) - : super_type(0,c_storage_order(),rhs.index_bases(),rhs.shape()) - { - allocate_space(); - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - multi_array(const detail::multi_array:: - const_sub_array<T,NumDims>& rhs, - const general_storage_order<NumDims>& so) - : super_type(0,so,rhs.index_bases(),rhs.shape()) - { - allocate_space(); - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - - multi_array(const detail::multi_array:: - const_multi_array_view<T,NumDims>& rhs) - : super_type(0,c_storage_order(),rhs.index_bases(),rhs.shape()) - { - allocate_space(); - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - multi_array(const detail::multi_array:: - const_multi_array_view<T,NumDims>& rhs, - const general_storage_order<NumDims>& so) - : super_type(0,so,rhs.index_bases(),rhs.shape()) - { - allocate_space(); - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - -#endif // !BOOST_NO_FUNCTION_TEMPLATE_ORDERING - - // Thes constructors are necessary because of more exact template matches. - multi_array_openfpm(const multi_array_ref_openfpm<T,NumDims>& rhs) - : super_type(0,c_storage_order(),rhs.index_bases(),rhs.shape()) - { - allocate_space(); - // Warning! storage order may change, hence the following copy technique. - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - multi_array_openfpm(const multi_array_ref_openfpm<T,NumDims>& rhs, - const general_storage_order<NumDims>& so) - : super_type(0,so,rhs.index_bases(),rhs.shape()) - { - allocate_space(); - // Warning! storage order may change, hence the following copy technique. - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - - multi_array_openfpm(const detail::multi_array:: - sub_array<T,NumDims>& rhs) - : super_type(0,c_storage_order(),rhs.index_bases(),rhs.shape()) - { - allocate_space(); - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - multi_array_openfpm(const detail::multi_array:: - sub_array<T,NumDims>& rhs, - const general_storage_order<NumDims>& so) - : super_type(0,so,rhs.index_bases(),rhs.shape()) - { - allocate_space(); - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - - multi_array_openfpm(const detail::multi_array:: - multi_array_view<T,NumDims>& rhs) - : super_type(0,c_storage_order(),rhs.index_bases(),rhs.shape()) - { - allocate_space(); - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - multi_array_openfpm(const detail::multi_array:: - multi_array_view<T,NumDims>& rhs, - const general_storage_order<NumDims>& so) - : super_type(0,so,rhs.index_bases(),rhs.shape()) - { - allocate_space(); - std::copy(rhs.begin(),rhs.end(),this->begin()); - } - - // Since assignment is a deep copy, multi_array_ref - // contains all the necessary code. - template <typename ConstMultiArray> - multi_array_openfpm& operator=(const ConstMultiArray& other) { - super_type::operator=(other); - return *this; - } - - multi_array_openfpm& operator=(const multi_array_openfpm& other) { - if (&other != this) { - super_type::operator=(other); - } - return *this; - } - - - template <typename ExtentList> - multi_array_openfpm& resize(const ExtentList& extents) { - boost::function_requires< - detail::multi_array::CollectionConcept<ExtentList> >(); - - typedef detail::multi_array::extent_gen<NumDims> gen_type; - gen_type ranges; - - for (int i=0; i != NumDims; ++i) { - typedef typename gen_type::range range_type; - ranges.ranges_[i] = range_type(0,extents[i]); - } - - return this->resize(ranges); - } - - - - multi_array_openfpm& resize(const detail::multi_array - ::extent_gen<NumDims>& ranges) { - - - // build a multi_array with the specs given - multi_array_openfpm new_array(ranges,this->storage_order()); - - - // build a view of tmp with the minimum extents - - // Get the minimum extents of the arrays. - boost::array<size_type,NumDims> min_extents; - - const size_type& (*min)(const size_type&, const size_type&) = - std::min; - std::transform(new_array.extent_list_.begin(),new_array.extent_list_.end(), - this->extent_list_.begin(), - min_extents.begin(), - min); - - - // typedef boost::array<index,NumDims> index_list; - // Build index_gen objects to create views with the same shape - - // these need to be separate to handle non-zero index bases - typedef detail::multi_array::index_gen<NumDims,NumDims> index_gen; - index_gen old_idxes; - index_gen new_idxes; - - std::transform(new_array.index_base_list_.begin(), - new_array.index_base_list_.end(), - min_extents.begin(),new_idxes.ranges_.begin(), - detail::multi_array::populate_index_ranges()); - - std::transform(this->index_base_list_.begin(), - this->index_base_list_.end(), - min_extents.begin(),old_idxes.ranges_.begin(), - detail::multi_array::populate_index_ranges()); - - // Build same-shape views of the two arrays - typename - multi_array_openfpm::BOOST_NESTED_TEMPLATE array_view_openfpm<NumDims>::type view_old = (*this)[old_idxes]; - typename - multi_array_openfpm::BOOST_NESTED_TEMPLATE array_view_openfpm<NumDims>::type view_new = new_array[new_idxes]; - - // Set the right portion of the new array - view_new = view_old; - - using std::swap; - // Swap the internals of these arrays. - swap(this->super_type::base_,new_array.super_type::base_); - swap(this->storage_,new_array.storage_); - swap(this->extent_list_,new_array.extent_list_); - swap(this->stride_list_,new_array.stride_list_); - swap(this->index_base_list_,new_array.index_base_list_); - swap(this->origin_offset_,new_array.origin_offset_); - swap(this->directional_offset_,new_array.directional_offset_); - swap(this->num_elements_,new_array.num_elements_); - swap(this->allocator_,new_array.allocator_); - swap(this->base_,new_array.base_); - swap(this->allocated_elements_,new_array.allocated_elements_); - - return *this; - } - - - ~multi_array_openfpm() { - deallocate_space(); - } - -private: - void allocate_space() { - typename Allocator::const_pointer no_hint=0; - base_ = allocator_.allocate(this->num_elements(),no_hint); - this->set_base_ptr(base_); - allocated_elements_ = this->num_elements(); - std::uninitialized_fill_n(base_,allocated_elements_,T()); - } - - void deallocate_space() { - if(base_) { - for(T* i = base_; i != base_+allocated_elements_; ++i) - allocator_.destroy(i); - allocator_.deallocate(base_,allocated_elements_); - } - } - - typedef boost::array<size_type,NumDims> size_list; - typedef boost::array<index,NumDims> index_list; - - Allocator allocator_; - T* base_; - size_type allocated_elements_; - enum {initial_base_ = 0}; -}; - -} // namespace boost - - - -#endif /* OPENFPM_DATA_SRC_UTIL_BOOST_MULTI_ARRAY_OPENFPM_HPP_ */ diff --git a/src/util/boost/boost_multi_array_ref_openfpm.hpp b/src/util/boost/boost_multi_array_ref_openfpm.hpp deleted file mode 100644 index 6531542502a9c48abe25803259896b1fe4f1f039..0000000000000000000000000000000000000000 --- a/src/util/boost/boost_multi_array_ref_openfpm.hpp +++ /dev/null @@ -1,792 +0,0 @@ -// Copyright 2002 The Trustees of Indiana University. - -// Use, modification and distribution is subject to the Boost Software -// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -// Boost.MultiArray Library -// Authors: Ronald Garcia -// Jeremy Siek -// Andrew Lumsdaine -// -// Modified by Pietro incardona for openfpm -// -// See http://www.boost.org/libs/multi_array for documentation. - -#ifndef OPENFPM_DATA_SRC_GRID_BOOST_MULTI_ARRAY_OPENFPM_HPP_ -#define OPENFPM_DATA_SRC_GRID_BOOST_MULTI_ARRAY_OPENFPM_HPP_ - - -// -// multi_array_ref.hpp - code for creating "views" of array data. -// - -#include "boost/multi_array/collection_concept.hpp" -#include "boost/multi_array/concept_checks.hpp" -#include "boost/multi_array/storage_order.hpp" -#include "boost_multi_array_view_openfpm.hpp" -#include "boost/multi_array/algorithm.hpp" -#include "boost/type_traits/is_integral.hpp" -#include "boost/utility/enable_if.hpp" -#include "boost_array_openfpm.hpp" -#include "boost/concept_check.hpp" -#include "boost/functional.hpp" -#include "boost/limits.hpp" -#include <algorithm> -#include <cstddef> -#include <functional> -#include <numeric> - -#include "util/boost/boost_multi_array_base_openfpm.hpp" -#include "util/boost/boost_multi_array_iterator_openfpm.hpp" -#include "util/boost/boost_multi_array_subarray_openfpm.hpp" - -namespace boost { - - // RG - This is to make things work with VC++. So sad, so sad. - class c_storage_order; - class fortran_storage_order; - class ofp_storage_order; - - template <std::size_t NumDims> - class general_storage_order_ofp - { - public: - typedef detail::multi_array::size_type size_type; - template <typename OrderingIter, typename AscendingIter> - general_storage_order_ofp(OrderingIter ordering, - AscendingIter ascending) { - boost::detail::multi_array::copy_n(ordering,NumDims,ordering_.begin()); - boost::detail::multi_array::copy_n(ascending,NumDims,ascending_.begin()); - } - - // RG - ideally these would not be necessary, but some compilers - // don't like template conversion operators. I suspect that not - // too many folk will feel the need to use customized - // storage_order objects, I sacrifice that feature for compiler support. - general_storage_order_ofp(const c_storage_order&) { - for (size_type i=0; i != NumDims; ++i) { - ordering_[i] = NumDims - 1 - i; - } - ascending_.assign(true); - } - - general_storage_order_ofp(const fortran_storage_order&) { - for (size_type i=0; i != NumDims; ++i) { - ordering_[i] = i; - } - ascending_.assign(true); - } - - general_storage_order_ofp(const ofp_storage_order&) { - ordering_[NumDims - 1] = 0; - - for (size_type i=0; i != NumDims; ++i) { - ordering_[i] = i + 1; - } - ascending_.assign(true); - } - - size_type ordering(size_type dim) const { return ordering_[dim]; } - bool ascending(size_type dim) const { return ascending_[dim]; } - - bool all_dims_ascending() const { - return std::accumulate(ascending_.begin(),ascending_.end(),true, - std::logical_and<bool>()); - } - - bool operator==(general_storage_order_ofp const& rhs) const { - return (ordering_ == rhs.ordering_) && - (ascending_ == rhs.ascending_); - } - - protected: - boost::array<size_type,NumDims> ordering_; - boost::array<bool,NumDims> ascending_; - }; - - class ofp_storage_order - { - typedef detail::multi_array::size_type size_type; - public: - // This is the idiom for creating your own custom storage orders. - // Not supported by all compilers though! -#ifndef __MWERKS__ // Metrowerks screams "ambiguity!" - template <std::size_t NumDims> - operator general_storage_order<NumDims>() const { - boost::array<size_type,NumDims> ordering; - boost::array<bool,NumDims> ascending; - - ordering[0] = 0; - ascending[0] = true; - for (size_type i=1; i != NumDims; ++i) { - ordering[i] = NumDims - i; - ascending[i] = true; - } - return general_storage_order<NumDims>(ordering.begin(), - ascending.begin()); - } -#endif - }; - -template <typename T, std::size_t NumDims, - typename TPtr = const T*> -class const_multi_array_ref_openfpm : public detail::multi_array::multi_array_impl_base_openfpm<T,NumDims> -{ - typedef detail::multi_array::multi_array_impl_base_openfpm<T,NumDims> super_type; - - public: - - typedef typename super_type::value_type value_type; - typedef typename super_type::const_reference const_reference; - typedef typename super_type::const_iterator const_iterator; - typedef typename super_type::const_reverse_iterator const_reverse_iterator; - typedef typename super_type::element element; - typedef typename super_type::size_type size_type; - typedef typename super_type::difference_type difference_type; - typedef typename super_type::index index; - typedef typename super_type::extent_range extent_range; - typedef general_storage_order<NumDims> storage_order_type; - - // template typedefs - template <std::size_t NDims> - struct const_array_view_openfpm - { - typedef boost::detail::multi_array::const_multi_array_view_openfpm<T,NDims> type; - }; - - template <std::size_t NDims> - struct array_view - { - typedef boost::detail::multi_array::multi_array_view<T,NDims> type; - }; - - #ifndef BOOST_NO_MEMBER_TEMPLATE_FRIENDS - // make const_multi_array_ref a friend of itself - template <typename,std::size_t,typename> - friend class const_multi_array_ref; - #endif - - // This ensures that const_multi_array_ref types with different TPtr - // types can convert to each other - template <typename OPtr> - const_multi_array_ref_openfpm(const const_multi_array_ref_openfpm<T,NumDims,OPtr>& other) - : base_(other.base_), storage_(other.storage_), - extent_list_(other.extent_list_), - stride_list_(other.stride_list_), - index_base_list_(other.index_base_list_), - origin_offset_(other.origin_offset_), - directional_offset_(other.directional_offset_), - num_elements_(other.num_elements_) { } - - template <typename ExtentList> - explicit const_multi_array_ref_openfpm(TPtr base, const ExtentList& extents) - :base_(base), storage_(c_storage_order()) - { - boost::function_requires< - CollectionConcept<ExtentList> >(); - - index_base_list_.assign(0); - init_multi_array_ref(extents.begin()); - } - - template <typename ExtentList> - explicit const_multi_array_ref_openfpm(TPtr base, const ExtentList& extents, - const general_storage_order<NumDims>& so) - :base_(base), storage_(so) - { - boost::function_requires< - CollectionConcept<ExtentList> >(); - - index_base_list_.assign(0); - init_multi_array_ref(extents.begin()); - } - - explicit const_multi_array_ref_openfpm(TPtr base, - const detail::multi_array:: - extent_gen<NumDims>& ranges) - :base_(base), storage_(c_storage_order()) - { - - init_from_extent_gen(ranges); - } - - explicit const_multi_array_ref_openfpm(TPtr base, - const detail::multi_array:: - extent_gen<NumDims>& ranges, - const general_storage_order<NumDims>& so) - :base_(base), storage_(so) - { - init_from_extent_gen(ranges); - } - - template <class InputIterator> - void assign(InputIterator begin, InputIterator end) { - boost::function_requires<InputIteratorConcept<InputIterator> >(); - - InputIterator in_iter = begin; - T* out_iter = base_; - std::size_t copy_count=0; - while (in_iter != end && copy_count < num_elements_) { - *out_iter++ = *in_iter++; - copy_count++; - } - } - - template <class BaseList> - #ifdef BOOST_NO_SFINAE - void - #else - typename - disable_if<typename boost::is_integral<BaseList>::type,void >::type - #endif // BOOST_NO_SFINAE - reindex(const BaseList& values) { - boost::function_requires< - CollectionConcept<BaseList> >(); - boost::detail::multi_array:: - copy_n(values.begin(),num_dimensions(),index_base_list_.begin()); - origin_offset_ = - this->calculate_origin_offset(stride_list_,extent_list_, - storage_,index_base_list_); - } - - void reindex(index value) { - index_base_list_.assign(value); - origin_offset_ = - this->calculate_origin_offset(stride_list_,extent_list_, - storage_,index_base_list_); - } - - template <typename SizeList> - void reshape(const SizeList& extents) { - boost::function_requires< - CollectionConcept<SizeList> >(); - BOOST_ASSERT(num_elements_ == - std::accumulate(extents.begin(),extents.end(), - size_type(1),std::multiplies<size_type>())); - - std::copy(extents.begin(),extents.end(),extent_list_.begin()); - this->compute_strides(stride_list_,extent_list_,storage_); - - origin_offset_ = this->calculate_origin_offset(stride_list_,extent_list_, storage_,index_base_list_); - } - - size_type num_dimensions() const { return NumDims; } - - size_type size() const { return extent_list_.front(); } - - // given reshaping functionality, this is the max possible size. - size_type max_size() const { return num_elements(); } - - bool empty() const { return size() == 0; } - - __device__ __host__ const size_type* shape() const { - return extent_list_.data(); - } - - __device__ __host__ const index* strides() const { - return stride_list_.data(); - } - - __device__ __host__ const element* origin() const { return base_+origin_offset_; } - __device__ __host__ const element* data() const { return base_; } - - size_type num_elements() const { return num_elements_; } - - __device__ __host__ const index* index_bases() const { - return index_base_list_.data(); - } - - - const storage_order_type& storage_order() const { - return storage_; - } - - template <typename IndexList> - const element& operator()(IndexList indices) const { - boost::function_requires< - CollectionConcept<IndexList> >(); - return super_type::access_element(boost::type<const element&>(), - indices,origin(), - shape(),strides(),index_bases()); - } - - // Only allow const element access - __device__ __host__ const_reference operator[](index idx) const { - return super_type::access(boost::type<const_reference>(), - idx,origin(), - shape(),strides(),index_bases()); - } - - // see generate_array_view in base.hpp - template <int NDims> - __device__ __host__ typename const_array_view_openfpm<NDims>::type - operator[](const detail::multi_array:: - index_gen<NumDims,NDims>& indices) - const { - typedef typename const_array_view_openfpm<NDims>::type return_type; - return - super_type::generate_array_view(boost::type<return_type>(), - indices, - shape(), - strides(), - index_bases(), - origin()); - } - - const_iterator begin() const { - return const_iterator(*index_bases(),origin(), - shape(),strides(),index_bases()); - } - - const_iterator end() const { - return const_iterator(*index_bases()+(index)*shape(),origin(), - shape(),strides(),index_bases()); - } - - const_reverse_iterator rbegin() const { - return const_reverse_iterator(end()); - } - - const_reverse_iterator rend() const { - return const_reverse_iterator(begin()); - } - - - template <typename OPtr> - bool operator==(const - const_multi_array_ref_openfpm<T,NumDims,OPtr>& rhs) - const { - if(std::equal(extent_list_.begin(), - extent_list_.end(), - rhs.extent_list_.begin())) - return std::equal(begin(),end(),rhs.begin()); - else return false; - } - - template <typename OPtr> - bool operator<(const - const_multi_array_ref_openfpm<T,NumDims,OPtr>& rhs) - const { - return std::lexicographical_compare(begin(),end(),rhs.begin(),rhs.end()); - } - - template <typename OPtr> - bool operator!=(const - const_multi_array_ref_openfpm<T,NumDims,OPtr>& rhs) - const { - return !(*this == rhs); - } - - template <typename OPtr> - bool operator>(const - const_multi_array_ref_openfpm<T,NumDims,OPtr>& rhs) - const { - return rhs < *this; - } - - template <typename OPtr> - bool operator<=(const - const_multi_array_ref_openfpm<T,NumDims,OPtr>& rhs) - const { - return !(*this > rhs); - } - - template <typename OPtr> - bool operator>=(const - const_multi_array_ref_openfpm<T,NumDims,OPtr>& rhs) - const { - return !(*this < rhs); - } - - - #ifndef BOOST_NO_MEMBER_TEMPLATE_FRIENDS - protected: - #else - public: - #endif - - typedef boost::array_openfpm<size_type,NumDims> size_list; - typedef boost::array_openfpm<index,NumDims> index_list; - - // This is used by multi_array, which is a subclass of this - void set_base_ptr(TPtr new_base) { base_ = new_base; } - - - // This constructor supports multi_array's default constructor - // and constructors from multi_array_ref, subarray, and array_view - explicit - const_multi_array_ref_openfpm(TPtr base, - const storage_order_type& so, - const index * index_bases, - const size_type* extents) : - base_(base), storage_(so), origin_offset_(0), directional_offset_(0) - { - // If index_bases or extents is null, then initialize the corresponding - // private data to zeroed lists. - if(index_bases) { - boost::detail::multi_array:: - copy_n(index_bases,NumDims,index_base_list_.begin()); - } else { - std::fill_n(index_base_list_.begin(),NumDims,0); - } - if(extents) { - init_multi_array_ref(extents); - } else { - boost::array<index,NumDims> extent_list; - extent_list.assign(0); - init_multi_array_ref(extent_list.begin()); - } - } - - - TPtr base_; - storage_order_type storage_; - size_list extent_list_; - index_list stride_list_; - index_list index_base_list_; - index origin_offset_; - index directional_offset_; - size_type num_elements_; - - private: - // const_multi_array_ref cannot be assigned to (no deep copies!) - const_multi_array_ref_openfpm& operator=(const const_multi_array_ref_openfpm & other); - - void init_from_extent_gen(const - detail::multi_array:: - extent_gen<NumDims>& ranges) { - - typedef boost::array<index,NumDims> extent_list; - - // get the index_base values - std::transform(ranges.ranges_.begin(),ranges.ranges_.end(), - index_base_list_.begin(), - boost::mem_fun_ref(&extent_range::start)); - - // calculate the extents - extent_list extents; - std::transform(ranges.ranges_.begin(),ranges.ranges_.end(), - extents.begin(), - boost::mem_fun_ref(&extent_range::size)); - - init_multi_array_ref(extents.begin()); - } - - - #ifndef BOOST_NO_MEMBER_TEMPLATE_FRIENDS - protected: - #else - public: - #endif - // RG - move me! - template <class InputIterator> - void init_multi_array_ref(InputIterator extents_iter) { - boost::function_requires<InputIteratorConcept<InputIterator> >(); - - boost::detail::multi_array:: - copy_n(extents_iter,num_dimensions(),extent_list_.begin()); - - // Calculate the array size - num_elements_ = std::accumulate(extent_list_.begin(),extent_list_.end(), - size_type(1),std::multiplies<size_type>()); - - this->compute_strides(stride_list_,extent_list_,storage_); - - origin_offset_ = - this->calculate_origin_offset(stride_list_,extent_list_, - storage_,index_base_list_); - directional_offset_ = - this->calculate_descending_dimension_offset(stride_list_,extent_list_, - storage_); - } - }; - -template <typename T, std::size_t NumDims> -class multi_array_ref_openfpm : - public const_multi_array_ref_openfpm<T,NumDims,T*> -{ - typedef const_multi_array_ref_openfpm<T,NumDims,T*> super_type; -public: - typedef typename super_type::value_type value_type; - typedef typename super_type::reference reference; - typedef typename super_type::iterator iterator; - typedef typename super_type::reverse_iterator reverse_iterator; - typedef typename super_type::const_reference const_reference; - typedef typename super_type::const_iterator const_iterator; - typedef typename super_type::const_reverse_iterator const_reverse_iterator; - typedef typename super_type::element element; - typedef typename super_type::size_type size_type; - typedef typename super_type::difference_type difference_type; - typedef typename super_type::index index; - typedef typename super_type::extent_range extent_range; - - typedef typename super_type::storage_order_type storage_order_type; - typedef typename super_type::index_list index_list; - typedef typename super_type::size_list size_list; - - template <std::size_t NDims> - struct const_array_view_openfpm { - typedef boost::detail::multi_array::const_multi_array_view_openfpm<T,NDims> type; - }; - - template <std::size_t NDims> - struct array_view_openfpm { - typedef boost::detail::multi_array::multi_array_view_openfpm<T,NDims> type; - }; - - template <class ExtentList> - explicit multi_array_ref_openfpm(T* base, const ExtentList& extents) : - super_type(base,extents) { - boost::function_requires< - CollectionConcept<ExtentList> >(); - } - - template <class ExtentList> - explicit multi_array_ref_openfpm(T* base, const ExtentList& extents, - const general_storage_order<NumDims>& so) : - super_type(base,extents,so) { - boost::function_requires< - CollectionConcept<ExtentList> >(); - } - - - explicit multi_array_ref_openfpm(T* base, - const detail::multi_array:: - extent_gen<NumDims>& ranges) : - super_type(base,ranges) { } - - - explicit multi_array_ref_openfpm(T* base, - const detail::multi_array:: - extent_gen<NumDims>& - ranges, - const general_storage_order_ofp<NumDims>& so) : - super_type(base,ranges,so) { } - - - // Assignment from other ConstMultiArray types. - template <typename ConstMultiArray> - multi_array_ref_openfpm & operator=(const ConstMultiArray& other) { - function_requires< - multi_array_concepts:: - ConstMultiArrayConcept<ConstMultiArray,NumDims> >(); - - // make sure the dimensions agree - BOOST_ASSERT(other.num_dimensions() == this->num_dimensions()); - BOOST_ASSERT(std::equal(other.shape(),other.shape()+this->num_dimensions(), - this->shape())); - // iterator-based copy - std::copy(other.begin(),other.end(),this->begin()); - return *this; - } - - multi_array_ref_openfpm & operator=(const multi_array_ref_openfpm & other) { - if (&other != this) { - // make sure the dimensions agree - - BOOST_ASSERT(other.num_dimensions() == this->num_dimensions()); - BOOST_ASSERT(std::equal(other.shape(), - other.shape()+this->num_dimensions(), - this->shape())); - // iterator-based copy - std::copy(other.begin(),other.end(),this->begin()); - } - return *this; - } - - multi_array_ref_openfpm & bind_ref(const multi_array_ref_openfpm & other) { - if (&other != this) { - - this->base_ = other.base_; - this->storage_ = other.storage_; - this->extent_list_ = other.extent_list_; - this->stride_list_ = other.stride_list_; - this->index_base_list_ = other.index_base_list_; - this->origin_offset_ = other.origin_offset_; - this->directional_offset_ = other.directional_offset_; - this->num_elements_ = other.num_elements_; - - } - return *this; - } - - /* \brief Set the internal pointer - * - * \param base internal pointer - * - */ - void set_pointer(void * base) - { - this->base_ = static_cast<T *>(base); - } - - multi_array_ref_openfpm & operator=(multi_array_ref_openfpm && other) { - - this->base_ = other.base_; - this->storage_ = other.storage_; - this->extent_list_ = other.extent_list_; - this->stride_list_ = other.stride_list_; - this->index_base_list_ = other.index_base_list_; - this->origin_offset_ = other.origin_offset_; - this->directional_offset_ = other.directional_offset_; - this->num_elements_ = other.num_elements_; - - return *this; - } - - void swap(multi_array_ref_openfpm & other) - { - T* base_tmp = this->base_; - this->base_ = other.base_; - other.base_ = base_tmp; - - storage_order_type storage_tmp = this->storage_; - this->storage_ = other.storage_; - other.storage_ = storage_tmp; - - size_list extent_list_tmp = this->extent_list_; - this->extent_list_ = other.extent_list_; - other.extent_list_ = extent_list_tmp; - - index_list stride_list_tmp = this->stride_list_; - this->stride_list_ = other.stride_list_; - other.stride_list_ = stride_list_tmp; - - index_list index_base_list_tmp = this->index_base_list_; - this->index_base_list_ = other.index_base_list_; - other.index_base_list_ = index_base_list_tmp; - - index origin_offset_tmp = this->origin_offset_; - this->origin_offset_ = other.origin_offset_; - other.origin_offset_ = origin_offset_tmp; - - index directional_offset_tmp = this->directional_offset_; - this->directional_offset_ = other.directional_offset_; - other.directional_offset_ = directional_offset_tmp; - - size_type num_elements_tmp = this->num_elements_; - this->num_elements_ = other.num_elements_; - other.num_elements_ = num_elements_tmp; - } - - __device__ __host__ element* origin() { return super_type::base_+super_type::origin_offset_; } - - element* data() { return super_type::base_; } - - template <class IndexList> - element& operator()(const IndexList& indices) { - boost::function_requires< - CollectionConcept<IndexList> >(); - return super_type::access_element(boost::type<element&>(), - indices,origin(), - this->shape(),this->strides(), - this->index_bases()); - } - - - __device__ __host__ reference operator[](index idx) { - return super_type::access(boost::type<reference>(), - idx,origin(), - this->shape(),this->strides(), - this->index_bases()); - } - - - // See note attached to generate_array_view in base.hpp - template <int NDims> - typename array_view_openfpm<NDims>::type - operator[](const detail::multi_array:: - index_gen<NumDims,NDims>& indices) { - typedef typename array_view_openfpm<NDims>::type return_type; - return - super_type::generate_array_view(boost::type<return_type>(), - indices, - this->shape(), - this->strides(), - this->index_bases(), - origin()); - } - - - iterator begin() { - return iterator(*this->index_bases(),origin(),this->shape(), - this->strides(),this->index_bases()); - } - - iterator end() { - return iterator(*this->index_bases()+(index)*this->shape(),origin(), - this->shape(),this->strides(), - this->index_bases()); - } - - // rbegin() and rend() written naively to thwart MSVC ICE. - reverse_iterator rbegin() { - reverse_iterator ri(end()); - return ri; - } - - reverse_iterator rend() { - reverse_iterator ri(begin()); - return ri; - } - - // Using declarations don't seem to work for g++ - // These are the proxies to work around this. - - const element* origin() const { return super_type::origin(); } - const element* data() const { return super_type::data(); } - - template <class IndexList> - const element& operator()(const IndexList& indices) const { - boost::function_requires< - CollectionConcept<IndexList> >(); - return super_type::operator()(indices); - } - - const_reference operator[](index idx) const { - return super_type::access(boost::type<const_reference>(), - idx,origin(), - this->shape(),this->strides(), - this->index_bases()); - } - - // See note attached to generate_array_view in base.hpp - template <int NDims> - typename const_array_view_openfpm<NDims>::type - operator[](const detail::multi_array:: - index_gen<NumDims,NDims>& indices) - const { - return super_type::operator[](indices); - } - - const_iterator begin() const { - return super_type::begin(); - } - - const_iterator end() const { - return super_type::end(); - } - - const_reverse_iterator rbegin() const { - return super_type::rbegin(); - } - - const_reverse_iterator rend() const { - return super_type::rend(); - } - -protected: - // This is only supplied to support multi_array's default constructor - explicit multi_array_ref_openfpm(T* base, - const storage_order_type& so, - const index* index_bases, - const size_type* extents) : - super_type(base,so,index_bases,extents) { } - -}; - -} // namespace boost - - -#endif /* OPENFPM_DATA_SRC_GRID_BOOST_MULTI_ARRAY_OPENFPM_HPP_ */ diff --git a/src/util/boost/boost_multi_array_subarray_openfpm.hpp b/src/util/boost/boost_multi_array_subarray_openfpm.hpp deleted file mode 100644 index 88450f07ad9ae03fda09a5b383ad65758612bb9d..0000000000000000000000000000000000000000 --- a/src/util/boost/boost_multi_array_subarray_openfpm.hpp +++ /dev/null @@ -1,407 +0,0 @@ -// Copyright 2002 The Trustees of Indiana University. - -// Use, modification and distribution is subject to the Boost Software -// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) - -// Boost.MultiArray Library -// Authors: Ronald Garcia -// Jeremy Siek -// Andrew Lumsdaine -// See http://www.boost.org/libs/multi_array for documentation. - -#ifndef SUBARRAY_RG071801_OPENFPM_HPP -#define SUBARRAY_RG071801_OPENFPM_HPP - -// -// subarray.hpp - used to implement standard operator[] on -// multi_arrays -// - -#include "boost/multi_array/concept_checks.hpp" -#include "boost/limits.hpp" -#include "boost/type.hpp" -#include <algorithm> -#include <cstddef> -#include <functional> -#include "util/boost/boost_multi_array_base_openfpm.hpp" -#include "util/cuda_util.hpp" - -namespace boost { -namespace detail { -namespace multi_array { - -// -// const_sub_array -// multi_array's proxy class to allow multiple overloads of -// operator[] in order to provide a clean multi-dimensional array -// interface. -template <typename T, std::size_t NumDims, typename TPtr> -class const_sub_array_openfpm : - public boost::detail::multi_array::multi_array_impl_base_openfpm<T,NumDims> -{ - typedef boost::detail::multi_array::multi_array_impl_base_openfpm<T,NumDims> super_type; -public: - typedef typename super_type::value_type value_type; - typedef typename super_type::const_reference const_reference; - typedef typename super_type::const_iterator const_iterator; - typedef typename super_type::const_reverse_iterator const_reverse_iterator; - typedef typename super_type::element element; - typedef typename super_type::size_type size_type; - typedef typename super_type::difference_type difference_type; - typedef typename super_type::index index; - typedef typename super_type::extent_range extent_range; - - // template typedefs - template <std::size_t NDims> - struct const_array_view { - typedef boost::detail::multi_array::const_multi_array_view_openfpm<T,NDims> type; - }; - - template <std::size_t NDims> - struct array_view { - typedef boost::detail::multi_array::multi_array_view_openfpm<T,NDims> type; - }; - - // Allow default copy constructor as well. - - template <typename OPtr> - const_sub_array_openfpm (const const_sub_array_openfpm<T,NumDims,OPtr>& rhs) : - base_(rhs.base_), extents_(rhs.extents_), strides_(rhs.strides_), - index_base_(rhs.index_base_) { - } - - // const_sub_array always returns const types, regardless of its own - // constness. - __device__ __host__ const_reference operator[](index idx) const { - return super_type::access(boost::type<const_reference>(), - idx,base_,shape(),strides(),index_bases()); - } - - template <typename IndexList> - const element& operator()(const IndexList& indices) const { - boost::function_requires< - CollectionConcept<IndexList> >(); - return super_type::access_element(boost::type<const element&>(), - indices,origin(), - shape(),strides(),index_bases()); - } - - // see generate_array_view in base.hpp - template <int NDims> - __device__ __host__ typename const_array_view<NDims>::type - operator[](const boost::detail::multi_array:: - index_gen<NumDims,NDims>& indices) - const { - typedef typename const_array_view<NDims>::type return_type; - return - super_type::generate_array_view(boost::type<return_type>(), - indices, - shape(), - strides(), - index_bases(), - base_); - } - - template <typename OPtr> - bool operator<(const const_sub_array_openfpm<T,NumDims,OPtr>& rhs) const { - return std::lexicographical_compare(begin(),end(),rhs.begin(),rhs.end()); - } - - template <typename OPtr> - bool operator==(const const_sub_array_openfpm<T,NumDims,OPtr>& rhs) const { - if(std::equal(shape(),shape()+num_dimensions(),rhs.shape())) - return std::equal(begin(),end(),rhs.begin()); - else return false; - } - - template <typename OPtr> - bool operator!=(const const_sub_array_openfpm<T,NumDims,OPtr>& rhs) const { - return !(*this == rhs); - } - - template <typename OPtr> - bool operator>(const const_sub_array_openfpm<T,NumDims,OPtr>& rhs) const { - return rhs < *this; - } - - template <typename OPtr> - bool operator<=(const const_sub_array_openfpm<T,NumDims,OPtr>& rhs) const { - return !(*this > rhs); - } - - template <typename OPtr> - bool operator>=(const const_sub_array_openfpm<T,NumDims,OPtr>& rhs) const { - return !(*this < rhs); - } - - const_iterator begin() const { - return const_iterator(*index_bases(),origin(), - shape(),strides(),index_bases()); - } - - const_iterator end() const { - return const_iterator(*index_bases()+(index)*shape(),origin(), - shape(),strides(),index_bases()); - } - - const_reverse_iterator rbegin() const { - return const_reverse_iterator(end()); - } - - const_reverse_iterator rend() const { - return const_reverse_iterator(begin()); - } - - TPtr origin() const { return base_; } - __host__ __device__ size_type size() const { return extents_[0]; } - size_type max_size() const { return num_elements(); } - bool empty() const { return size() == 0; } - size_type num_dimensions() const { return NumDims; } - __host__ __device__ const size_type* shape() const { return extents_; } - __host__ __device__ const index* strides() const { return strides_; } - __host__ __device__ const index* index_bases() const { return index_base_; } - - size_type num_elements() const { - return std::accumulate(shape(),shape() + num_dimensions(), - size_type(1), std::multiplies<size_type>()); - } - - -#ifndef BOOST_NO_MEMBER_TEMPLATE_FRIENDS -protected: - template <typename,std::size_t> friend class value_accessor_n_openfpm; - template <typename,std::size_t,typename> friend class const_sub_array_openfpm; -#else -public: // Should be protected -#endif - - __device__ __host__ const_sub_array_openfpm (TPtr base, - const size_type* extents, - const index* strides, - const index* index_base) : - base_(base), extents_(extents), strides_(strides), - index_base_(index_base) { - } - - TPtr base_; - const size_type* extents_; - const index* strides_; - const index* index_base_; -private: - // const_sub_array cannot be assigned to (no deep copies!) - const_sub_array_openfpm& operator=(const const_sub_array_openfpm&); -}; - -// -// sub_array -// multi_array's proxy class to allow multiple overloads of -// operator[] in order to provide a clean multi-dimensional array -// interface. -template <typename T, std::size_t NumDims> -class sub_array_openfpm : public const_sub_array_openfpm<T,NumDims,T*> -{ - typedef const_sub_array_openfpm<T,NumDims,T*> super_type; -public: - typedef typename super_type::element element; - typedef typename super_type::reference reference; - typedef typename super_type::index index; - typedef typename super_type::size_type size_type; - typedef typename super_type::iterator iterator; - typedef typename super_type::reverse_iterator reverse_iterator; - typedef typename super_type::const_reference const_reference; - typedef typename super_type::const_iterator const_iterator; - typedef typename super_type::const_reverse_iterator const_reverse_iterator; - - // template typedefs - template <std::size_t NDims> - struct const_array_view { - typedef boost::detail::multi_array::const_multi_array_view_openfpm<T,NDims> type; - }; - - template <std::size_t NDims> - struct array_view { - typedef boost::detail::multi_array::multi_array_view_openfpm<T,NDims> type; - }; - - // Assignment from other ConstMultiArray types. - template <typename ConstMultiArray> - sub_array_openfpm& operator=(const ConstMultiArray& other) { - -#ifdef SE_CLASS1 - function_requires< boost::multi_array_concepts::ConstMultiArrayConcept< - ConstMultiArray, NumDims> >(); - - // make sure the dimensions agree - BOOST_ASSERT(other.num_dimensions() == this->num_dimensions()); -// BOOST_ASSERT(std::equal(other.shape(),other.shape()+this->num_dimensions(), -// this->shape())); - -#endif - // iterator-based copy -// std::copy(other.begin(),other.end(),begin()); - - this->operator[](0) = other[0]; - this->operator[](1) = other[1]; - this->operator[](2) = other[2]; - -// int temp = other.size(); -// for (int i = 0 ; i < (int)temp ; i++) {} -// {/*this->operator[](i) = other[i];*/} - return *this; - } - - - __device__ __host__ sub_array_openfpm& operator=(const sub_array_openfpm& other) { - if (&other != this) { -#ifdef SE_CLASS1 - // make sure the dimensions agree - BOOST_ASSERT(other.num_dimensions() == this->num_dimensions()); -// BOOST_ASSERT(std::equal(other.shape(), -// other.shape()+this->num_dimensions(), -// this->shape())); -#endif - // iterator-based copy - //std::copy(other.begin(),other.end(),begin()); - - for (int i = 0 ; i < (int)other.size() ; i++) - {this->operator[](i) = other[i];} - } - return *this; - } - - __device__ __host__ T* origin() { return this->base_; } - __device__ __host__ const T* origin() const { return this->base_; } - - __device__ __host__ reference operator[](index idx) { - return super_type::access(boost::type<reference>(), - idx,this->base_,this->shape(),this->strides(), - this->index_bases()); - } - - // see generate_array_view in base.hpp - template <int NDims> - __device__ __host__ typename array_view<NDims>::type - operator[](const boost::detail::multi_array:: - index_gen<NumDims,NDims>& indices) { - typedef typename array_view<NDims>::type return_type; - - return - super_type::generate_array_view(boost::type<return_type>(), - indices, - this->shape(), - this->strides(), - this->index_bases(), - origin()); - } - - template <class IndexList> - element& operator()(const IndexList& indices) { - boost::function_requires< - CollectionConcept<IndexList> >(); - return super_type::access_element(boost::type<element&>(), - indices,origin(), - this->shape(),this->strides(), - this->index_bases()); - } - - __device__ __host__ iterator begin() - { - return iterator(*this->index_bases(),origin(), - this->shape(),this->strides(),this->index_bases()); - } - - __device__ __host__ iterator end() - { - return iterator(*this->index_bases()+(index)*this->shape(),origin(), - this->shape(),this->strides(),this->index_bases()); - } - - // RG - rbegin() and rend() written naively to thwart MSVC ICE. - reverse_iterator rbegin() { - reverse_iterator ri(end()); - return ri; - } - - reverse_iterator rend() { - reverse_iterator ri(begin()); - return ri; - } - - // - // proxies - // - - template <class IndexList> - const element& operator()(const IndexList& indices) const { - boost::function_requires< - CollectionConcept<IndexList> >(); - return super_type::operator()(indices); - } - - const_reference operator[](index idx) const { - return super_type::operator[](idx); - } - - // see generate_array_view in base.hpp - template <int NDims> - typename const_array_view<NDims>::type - operator[](const boost::detail::multi_array:: - index_gen<NumDims,NDims>& indices) - const { - return super_type::operator[](indices); - } - - const_iterator begin() const { - return super_type::begin(); - } - - const_iterator end() const { - return super_type::end(); - } - - const_reverse_iterator rbegin() const { - return super_type::rbegin(); - } - - const_reverse_iterator rend() const { - return super_type::rend(); - } - -#ifndef BOOST_NO_MEMBER_TEMPLATE_FRIENDS -private: - template <typename,std::size_t> friend class value_accessor_n_openfpm; -#else -public: // should be private -#endif - - __device__ __host__ sub_array_openfpm (T* base, - const size_type* extents, - const index* strides, - const index* index_base) : - super_type(base,extents,strides,index_base) { - } - -}; - -} // namespace multi_array -} // namespace detail -// -// traits classes to get sub_array types -// -template <typename Array, int N> -class subarray_gen_openfpm { - typedef typename Array::element element; -public: - typedef boost::detail::multi_array::sub_array_openfpm<element,N> type; -}; - -template <typename Array, int N> -class const_subarray_gen_openfpm { - typedef typename Array::element element; -public: - typedef boost::detail::multi_array::const_sub_array_openfpm<element,N> type; -}; -} // namespace boost - -#endif // SUBARRAY_RG071801_HPP diff --git a/src/util/boost/boost_multi_array_view_openfpm.hpp b/src/util/boost/boost_multi_array_view_openfpm.hpp deleted file mode 100644 index 989bbe085293cf520aec9228181a1292074d1b44..0000000000000000000000000000000000000000 --- a/src/util/boost/boost_multi_array_view_openfpm.hpp +++ /dev/null @@ -1,457 +0,0 @@ -/* - * boost_multi_array_view.hpp - * - * Created on: Jun 7, 2018 - * Author: i-bird - */ - -#ifndef OPENFPM_DATA_SRC_UTIL_BOOST_MULTI_ARRAY_VIEW_OPENFPM_HPP_ -#define OPENFPM_DATA_SRC_UTIL_BOOST_MULTI_ARRAY_VIEW_OPENFPM_HPP_ - - - -// -// view.hpp - code for creating "views" of array data. -// - -#include "boost/multi_array/concept_checks.hpp" -#include "boost/multi_array/iterator.hpp" -#include "boost/multi_array/storage_order.hpp" -#include "boost/multi_array/algorithm.hpp" -#include "boost/type_traits/is_integral.hpp" -#include "boost/utility/enable_if.hpp" -#include "boost/array.hpp" -#include "boost/limits.hpp" -#include <algorithm> -#include <cstddef> -#include <functional> -#include <numeric> - -namespace boost { -namespace detail { -namespace multi_array { - -// TPtr = const T* defaulted in base.hpp -template <typename T, std::size_t NumDims, typename TPtr> -class const_multi_array_view_openfpm : - public boost::detail::multi_array::multi_array_impl_base_openfpm<T,NumDims> -{ - typedef boost::detail::multi_array::multi_array_impl_base_openfpm<T,NumDims> super_type; -public: - typedef typename super_type::value_type value_type; - typedef typename super_type::const_reference const_reference; - typedef typename super_type::const_iterator const_iterator; - typedef typename super_type::const_reverse_iterator const_reverse_iterator; - typedef typename super_type::element element; - typedef typename super_type::size_type size_type; - typedef typename super_type::difference_type difference_type; - typedef typename super_type::index index; - typedef typename super_type::extent_range extent_range; - - // template typedefs - template <std::size_t NDims> - struct const_array_view_openfpm - { - typedef boost::detail::multi_array::const_multi_array_view_openfpm<T,NDims> type; - }; - - template <std::size_t NDims> - struct array_view_openfpm - { - typedef boost::detail::multi_array::multi_array_view_openfpm<T,NDims> type; - }; - - template <typename OPtr> - const_multi_array_view_openfpm(const const_multi_array_view_openfpm<T,NumDims,OPtr>& other) - :base_(other.base_), origin_offset_(other.origin_offset_), - num_elements_(other.num_elements_), extent_list_(other.extent_list_), - stride_list_(other.stride_list_), index_base_list_(other.index_base_list_) - {} - - - template <class BaseList> -#ifdef BOOST_NO_SFINAE - void -#else - typename - disable_if<typename boost::is_integral<BaseList>::type,void >::type -#endif - reindex(const BaseList& values) { - boost::function_requires< - CollectionConcept<BaseList> >(); - boost::detail::multi_array:: - copy_n(values.begin(),num_dimensions(),index_base_list_.begin()); - origin_offset_ = - this->calculate_indexing_offset(stride_list_,index_base_list_); - } - - void reindex(index value) { - index_base_list_.assign(value); - origin_offset_ = - this->calculate_indexing_offset(stride_list_,index_base_list_); - } - - size_type num_dimensions() const { return NumDims; } - - size_type size() const { return extent_list_.front(); } - size_type max_size() const { return num_elements(); } - bool empty() const { return size() == 0; } - - __device__ __host__ const size_type* shape() const { - return extent_list_.data(); - } - - __device__ __host__ const index* strides() const { - return stride_list_.data(); - } - - __device__ __host__ const T* origin() const { return base_+origin_offset_; } - - size_type num_elements() const { return num_elements_; } - - __device__ __host__ const index* index_bases() const { - return index_base_list_.data(); - } - - template <typename IndexList> - const element& operator()(IndexList indices) const { - boost::function_requires< - CollectionConcept<IndexList> >(); - return super_type::access_element(boost::type<const element&>(), - indices,origin(), - shape(),strides(),index_bases()); - } - - // Only allow const element access - __device__ __host__ const_reference operator[](index idx) const { - return super_type::access(boost::type<const_reference>(), - idx,origin(), - shape(),strides(), - index_bases()); - } - - // see generate_array_view in base.hpp - template <int NDims> - __device__ __host__ typename const_array_view_openfpm<NDims>::type - operator[](const boost::detail::multi_array:: - index_gen<NumDims,NDims>& indices) - const { - typedef typename const_array_view_openfpm<NDims>::type return_type; - return - super_type::generate_array_view(boost::type<return_type>(), - indices, - shape(), - strides(), - index_bases(), - origin()); - } - const_iterator begin() const { - return const_iterator(*index_bases(),origin(), - shape(),strides(),index_bases()); - } - - const_iterator end() const { - return const_iterator(*index_bases()+(index)*shape(),origin(), - shape(),strides(),index_bases()); - } - - const_reverse_iterator rbegin() const { - return const_reverse_iterator(end()); - } - - const_reverse_iterator rend() const { - return const_reverse_iterator(begin()); - } - - - template <typename OPtr> - bool operator==(const - const_multi_array_view<T,NumDims,OPtr>& rhs) - const { - if(std::equal(extent_list_.begin(), - extent_list_.end(), - rhs.extent_list_.begin())) - return std::equal(begin(),end(),rhs.begin()); - else return false; - } - - template <typename OPtr> - bool operator<(const - const_multi_array_view<T,NumDims,OPtr>& rhs) - const { - return std::lexicographical_compare(begin(),end(),rhs.begin(),rhs.end()); - } - - template <typename OPtr> - bool operator!=(const - const_multi_array_view<T,NumDims,OPtr>& rhs) - const { - return !(*this == rhs); - } - - template <typename OPtr> - bool operator>(const - const_multi_array_view<T,NumDims,OPtr>& rhs) - const { - return rhs < *this; - } - - template <typename OPtr> - bool operator<=(const - const_multi_array_view<T,NumDims,OPtr>& rhs) - const { - return !(*this > rhs); - } - - template <typename OPtr> - bool operator>=(const - const_multi_array_view<T,NumDims,OPtr>& rhs) - const { - return !(*this < rhs); - } - - -#ifndef BOOST_NO_MEMBER_TEMPLATE_FRIENDS -protected: - template <typename,std::size_t> friend class multi_array_impl_base_openfpm; - template <typename,std::size_t,typename> friend class const_multi_array_view_openfpm; -#else -public: // should be protected -#endif - - // This constructor is used by multi_array_impl_base::generate_array_view - // to create strides - template <typename ExtentList, typename Index> - explicit const_multi_array_view_openfpm(TPtr base, - const ExtentList& extents, - const boost::array<Index,NumDims>& strides): - base_(base), origin_offset_(0) { - - index_base_list_.assign(0); - - // Get the extents and strides - boost::detail::multi_array:: - copy_n(extents.begin(),NumDims,extent_list_.begin()); - boost::detail::multi_array:: - copy_n(strides.begin(),NumDims,stride_list_.begin()); - - // Calculate the array size - num_elements_ = std::accumulate(extent_list_.begin(),extent_list_.end(), - size_type(1),std::multiplies<size_type>()); - } - - typedef boost::array<size_type,NumDims> size_list; - typedef boost::array<index,NumDims> index_list; - - TPtr base_; - index origin_offset_; - size_type num_elements_; - size_list extent_list_; - index_list stride_list_; - index_list index_base_list_; - -private: - // const_multi_array_view cannot be assigned to (no deep copies!) - const_multi_array_view_openfpm& operator=(const const_multi_array_view_openfpm& other); -}; - - -template <typename T, std::size_t NumDims> -class multi_array_view_openfpm : - public const_multi_array_view_openfpm<T,NumDims,T*> -{ - typedef const_multi_array_view_openfpm<T,NumDims,T*> super_type; -public: - typedef typename super_type::value_type value_type; - typedef typename super_type::reference reference; - typedef typename super_type::iterator iterator; - typedef typename super_type::reverse_iterator reverse_iterator; - typedef typename super_type::const_reference const_reference; - typedef typename super_type::const_iterator const_iterator; - typedef typename super_type::const_reverse_iterator const_reverse_iterator; - typedef typename super_type::element element; - typedef typename super_type::size_type size_type; - typedef typename super_type::difference_type difference_type; - typedef typename super_type::index index; - typedef typename super_type::extent_range extent_range; - - // template typedefs - template <std::size_t NDims> - struct const_array_view_openfpm { - typedef boost::detail::multi_array::const_multi_array_view_openfpm<T,NDims> type; - }; - - template <std::size_t NDims> - struct array_view_openfpm { - typedef boost::detail::multi_array::multi_array_view_openfpm<T,NDims> type; - }; - - // Assignment from other ConstMultiArray types. - template <typename ConstMultiArray> - multi_array_view_openfpm& operator=(const ConstMultiArray& other) { - function_requires< - boost::multi_array_concepts:: - ConstMultiArrayConcept<ConstMultiArray,NumDims> >(); - - // make sure the dimensions agree - BOOST_ASSERT(other.num_dimensions() == this->num_dimensions()); - BOOST_ASSERT(std::equal(other.shape(),other.shape()+this->num_dimensions(), - this->shape())); - // iterator-based copy - std::copy(other.begin(),other.end(),begin()); - return *this; - } - - - multi_array_view_openfpm& operator=(const multi_array_view_openfpm& other) { - if (&other != this) { - // make sure the dimensions agree - BOOST_ASSERT(other.num_dimensions() == this->num_dimensions()); - BOOST_ASSERT(std::equal(other.shape(), - other.shape()+this->num_dimensions(), - this->shape())); - // iterator-based copy - std::copy(other.begin(),other.end(),begin()); - } - return *this; - } - - element* origin() { return this->base_+this->origin_offset_; } - - template <class IndexList> - element& operator()(const IndexList& indices) { - boost::function_requires< - CollectionConcept<IndexList> >(); - return super_type::access_element(boost::type<element&>(), - indices,origin(), - this->shape(),this->strides(), - this->index_bases()); - } - - - reference operator[](index idx) { - return super_type::access(boost::type<reference>(), - idx,origin(), - this->shape(),this->strides(), - this->index_bases()); - } - - - // see generate_array_view in base.hpp - template <int NDims> - typename array_view_openfpm<NDims>::type - operator[](const boost::detail::multi_array:: - index_gen<NumDims,NDims>& indices) { - typedef typename array_view_openfpm<NDims>::type return_type; - return - super_type::generate_array_view(boost::type<return_type>(), - indices, - this->shape(), - this->strides(), - this->index_bases(), - origin()); - } - - - iterator begin() { - return iterator(*this->index_bases(),origin(), - this->shape(),this->strides(), - this->index_bases()); - } - - iterator end() { - return iterator(*this->index_bases()+(index)*this->shape(),origin(), - this->shape(),this->strides(), - this->index_bases()); - } - - reverse_iterator rbegin() { - return reverse_iterator(end()); - } - - reverse_iterator rend() { - return reverse_iterator(begin()); - } - - // Using declarations don't seem to work for g++ - // These are the proxies to work around this. - - const element* origin() const { return super_type::origin(); } - - template <class IndexList> - const element& operator()(const IndexList& indices) const { - boost::function_requires< - CollectionConcept<IndexList> >(); - return super_type::operator()(indices); - } - - const_reference operator[](index idx) const { - return super_type::operator[](idx); - } - - // see generate_array_view in base.hpp - template <int NDims> - typename const_array_view_openfpm<NDims>::type - operator[](const boost::detail::multi_array:: - index_gen<NumDims,NDims>& indices) - const { - return super_type::operator[](indices); - } - - const_iterator begin() const { - return super_type::begin(); - } - - const_iterator end() const { - return super_type::end(); - } - - const_reverse_iterator rbegin() const { - return super_type::rbegin(); - } - - const_reverse_iterator rend() const { - return super_type::rend(); - } - -#ifndef BOOST_NO_MEMBER_TEMPLATE_FRIENDS -private: - template <typename,std::size_t> friend class multi_array_impl_base; -#else -public: // should be private -#endif - - // constructor used by multi_array_impl_base::generate_array_view to - // generate array views - template <typename ExtentList, typename Index> - explicit multi_array_view_openfpm(T* base, - const ExtentList& extents, - const boost::array<Index,NumDims>& strides) : - super_type(base,extents,strides) { } - -}; - -} // namespace multi_array -} // namespace detail - -// -// traits classes to get array_view types -// -template <typename Array, int N> -class array_view_gen_openfpm { - typedef typename Array::element element; -public: - typedef boost::detail::multi_array::multi_array_view_openfpm<element,N> type; -}; - -template <typename Array, int N> -class const_array_view_gen_openfpm { - typedef typename Array::element element; -public: - typedef boost::detail::multi_array::const_multi_array_view_openfpm<element,N> type; -}; - -} // namespace boost - - -#endif /* OPENFPM_DATA_SRC_UTIL_BOOST_MULTI_ARRAY_VIEW_OPENFPM_HPP_ */ diff --git a/src/util/copy_compare/copy_general.hpp b/src/util/copy_compare/copy_general.hpp index 6907b27fed8c3564bd7ff7395f89c15d851a0a94..60e2adc53da61f77ecc8bf383eea13623e4322a3 100644 --- a/src/util/copy_compare/copy_general.hpp +++ b/src/util/copy_compare/copy_general.hpp @@ -74,12 +74,33 @@ struct add_ * \param src Source object * */ - static inline void operation(Tdst & dst, const Tsrc & src) + __device__ __host__ static inline void operation(Tdst & dst, const Tsrc & src) { dst += src; } }; +/*! \brief This structure define the operation add to use with copy general + * + * \tparam Tdst destination object type + * \tparam Tsrc source object type + * + */ +template<typename Tdst, typename Tsrc> +struct add_atomic_ +{ + /*! \brief Defition of the add operation + * + * \param dst Destination object + * \param src Source object + * + */ + __device__ __host__ static inline void operation(Tdst & dst, const Tsrc & src) + { + atomicAdd(&dst,src); + } +}; + /*! \brief This structure define the operation add to use with copy general * * \tparam Tdst destination object type diff --git a/src/util/cuda/cuda_kernel_error_checker.hpp b/src/util/cuda/cuda_kernel_error_checker.hpp index b7bf06035f55e0b12a92924e3744277cdd8eabde..dc603978550c9fc5d1d43c7b4d262a6f8fa94a0c 100644 --- a/src/util/cuda/cuda_kernel_error_checker.hpp +++ b/src/util/cuda/cuda_kernel_error_checker.hpp @@ -118,8 +118,13 @@ template<typename ... Args>pos_pc error_arg(void * ptr, int prp, Args ... args) std::cout << ")";\ std::cout << " thread: " << "(" << dev_mem[6+i] << "," << dev_mem[7+i] << "," << dev_mem[8+i] << ")*(" << dev_mem[9+i] << "," << dev_mem[10+i] << "," << dev_mem[11+i] << ")+(" << dev_mem[12+i] << "," << dev_mem[13+i] << "," << dev_mem[14+i] << ")" << std::endl;\ std::cout << "Internal error report: " << ea.pc.match_str << std::endl;\ + int dev_mem_null[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};\ + cudaMemcpyToSymbol(global_cuda_error_array,dev_mem_null,sizeof(dev_mem_null),0,cudaMemcpyHostToDevice);\ ACTION_ON_ERROR(CUDA_LAUNCH_ERROR_OBJECT);\ - } + }\ + + + #else #define CHECK_SE_CLASS1_PRE #define CHECK_SE_CLASS1_POST(kernel_call,...) diff --git a/src/util/cuda/ofp_context.hxx b/src/util/cuda/ofp_context.hxx index 0f87e926635e629e5e969d7b9a47cfb80c512000..fd4beb7c5514e641ccf198c9891db8aad64869e4 100644 --- a/src/util/cuda/ofp_context.hxx +++ b/src/util/cuda/ofp_context.hxx @@ -200,6 +200,8 @@ cudaEvent_t _timer[2]; cudaEvent_t _event; + openfpm::vector<aggregate<unsigned char>> tmem; + // Making this a template argument means we won't generate an instance // of dummy_k for each translation unit. template<int dummy_arg = 0> diff --git a/src/util/cuda/scan_sort_cuda_unit_tests.cu b/src/util/cuda/scan_sort_cuda_unit_tests.cu index 8c25cdcb4408f046c375c6aad864448d0a5cb268..97a3ef2ab0208fbace0c2409e5c9ac78f233911f 100644 --- a/src/util/cuda/scan_sort_cuda_unit_tests.cu +++ b/src/util/cuda/scan_sort_cuda_unit_tests.cu @@ -5,7 +5,6 @@ #include <boost/test/unit_test.hpp> #include "util/cuda_util.hpp" -#include "util/boost/boost_array_openfpm.hpp" #include "Vector/map_vector.hpp" #include "scan_cuda.cuh" diff --git a/src/util/multi_array_openfpm/multi_array_iterator_openfpm.hpp b/src/util/multi_array_openfpm/multi_array_iterator_openfpm.hpp index fa8d6f55f759478216896f09c51e04beba0eebf6..db4d2d6788e40b27b81192debd1f79533e3943d9 100644 --- a/src/util/multi_array_openfpm/multi_array_iterator_openfpm.hpp +++ b/src/util/multi_array_openfpm/multi_array_iterator_openfpm.hpp @@ -19,7 +19,7 @@ #include <algorithm> #include <cstddef> #include <iterator> -#include "util/boost/boost_multi_array_base_openfpm.hpp" +//#include "util/boost/boost_multi_array_base_openfpm.hpp" #include "util/cuda_util.hpp" namespace openfpm { diff --git a/src/util/multi_array_openfpm/multi_array_ref_base_openfpm.hpp b/src/util/multi_array_openfpm/multi_array_ref_base_openfpm.hpp index 10a8931bbad5964bd35e02cd62ef84da8010560b..5f307e8a9dd74b3a82be083e7ffe665cf79f3239 100644 --- a/src/util/multi_array_openfpm/multi_array_ref_base_openfpm.hpp +++ b/src/util/multi_array_openfpm/multi_array_ref_base_openfpm.hpp @@ -370,102 +370,6 @@ protected: } return base[offset]; } - - // Slicing using an index_gen. - // Note that populating an index_gen creates a type that encodes - // both the number of dimensions in the current Array (NumDims), and - // the Number of dimensions for the resulting view. This allows the - // compiler to fail if the dimensions aren't completely accounted - // for. For reasons unbeknownst to me, a BOOST_STATIC_ASSERT - // within the member function template does not work. I should add a - // note to the documentation specifying that you get a damn ugly - // error message if you screw up in your slicing code. -/* template <typename ArrayRef, int NDims, typename TPtr> - ArrayRef - generate_array_view(boost::type<ArrayRef>, - const boost::detail::multi_array:: - index_gen<NumDims,NDims>& indices, - const size_type* extents, - const index* strides, - const index* index_bases, - TPtr base) const - { - openfpm::array<index,NDims> new_strides; - - index offset = 0; - size_type dim = 0; - for (size_type n = 0; n != NumDims; ++n) - { - // Use array specs and input specs to produce real specs. - const index default_start = index_bases[n]; - const index default_finish = default_start+extents[n]; - const index_range& current_range = indices.ranges_[n]; - index start = current_range.get_start(default_start); - index finish = current_range.get_finish(default_finish); - index stride = current_range.stride(); - BOOST_ASSERT(stride != 0); - - // An index range indicates a half-open strided interval - // [start,finish) (with stride) which faces upward when stride - // is positive and downward when stride is negative, - - // RG: The following code for calculating length suffers from - // some representation issues: if finish-start cannot be represented as - // by type index, then overflow may result. - - index len; - if ((finish - start) / stride < 0) - { - // [start,finish) is empty according to the direction imposed by - // the stride. - len = 0; - } - else - { - // integral trick for ceiling((finish-start) / stride) - // taking into account signs. - index shrinkage = stride > 0 ? 1 : -1; - len = (finish - start + (stride - shrinkage)) / stride; - } - - // start marks the closed side of the range, so it must lie - // exactly in the set of legal indices - // with a special case for empty arrays - BOOST_ASSERT(index_bases[n] <= start && - ((start <= index_bases[n]+index(extents[n])) || - (start == index_bases[n] && extents[n] == 0))); - - #ifndef BOOST_DISABLE_ASSERTS - // finish marks the open side of the range, so it can go one past - // the "far side" of the range (the top if stride is positive, the bottom - // if stride is negative). - index bound_adjustment = stride < 0 ? 1 : 0; - BOOST_ASSERT(((index_bases[n] - bound_adjustment) <= finish) && - (finish <= (index_bases[n] + index(extents[n]) - bound_adjustment))); - #endif // BOOST_DISABLE_ASSERTS - - - // the array data pointer is modified to account for non-zero - // bases during slicing (see [Garcia] for the math involved) - offset += start * strides[n]; - - if (!current_range.is_degenerate()) { - - // The stride for each dimension is included into the - // strides for the array_view (see [Garcia] for the math involved). - new_strides[dim] = stride * strides[n]; - - // calculate new extents - new_extents[dim] = len; - ++dim; - } - } - BOOST_ASSERT(dim == NDims); - - return ArrayRef(base+offset, - new_extents, - new_strides); - }*/ }; } // namespace multi_array diff --git a/src/util/multi_array_openfpm/multi_array_ref_openfpm.hpp b/src/util/multi_array_openfpm/multi_array_ref_openfpm.hpp index b945a40e871eb4e71b274cfa01e52b2c1d657676..6485832b4927a536d7da8f6f0b916a79bb7372ad 100644 --- a/src/util/multi_array_openfpm/multi_array_ref_openfpm.hpp +++ b/src/util/multi_array_openfpm/multi_array_ref_openfpm.hpp @@ -267,19 +267,6 @@ public: __device__ __host__ const element* origin() const { return super_type::origin(); } -/* element* data() { return super_type::base_; } - - template <class IndexList> - element& operator()(const IndexList& indices) { - boost::function_requires< - CollectionConcept<IndexList> >(); - return super_type::access_element(boost::type<element&>(), - indices,origin(), - this->shape(),this->strides(), - this->index_bases()); - }*/ - - __device__ __host__ reference operator[](index idx) { return super_type::access(boost::type<reference>(), @@ -289,21 +276,6 @@ public: } - // See note attached to generate_array_view in base.hpp -/* template <int NDims> - typename array_view_openfpm<NDims>::type - operator[](const detail::multi_array:: - index_gen<NumDims,NDims>& indices) { - typedef typename array_view_openfpm<NDims>::type return_type; - return - super_type::generate_array_view(boost::type<return_type>(), - indices, - this->shape(), - this->strides(), - this->index_bases(), - origin()); - }*/ - iterator begin() {return iterator(0,origin(),this->size(),this->strides());} @@ -311,30 +283,6 @@ public: iterator end() {return iterator(this->size(),origin(),this->size(),this->strides());} - // rbegin() and rend() written naively to thwart MSVC ICE. -/* reverse_iterator rbegin() { - reverse_iterator ri(end()); - return ri; - } - - reverse_iterator rend() { - reverse_iterator ri(begin()); - return ri; - } - - // Using declarations don't seem to work for g++ - // These are the proxies to work around this. - - const element* origin() const { return super_type::origin(); } - const element* data() const { return super_type::data(); } - - template <class IndexList> - const element& operator()(const IndexList& indices) const { - boost::function_requires< - CollectionConcept<IndexList> >(); - return super_type::operator()(indices); - }*/ - __inline__ const_reference operator[](index idx) const { return super_type::access(boost::type<const_reference>(), @@ -343,37 +291,11 @@ public: this->origin()); } - // See note attached to generate_array_view in base.hpp -/* template <int NDims> - typename const_array_view_openfpm<NDims>::type - operator[](const detail::multi_array:: - index_gen<NumDims,NDims>& indices) - const { - return super_type::operator[](indices); - }*/ - const_iterator begin() const {return super_type::begin();} const_iterator end() const {return super_type::end();} - -/* const_reverse_iterator rbegin() const { - return super_type::rbegin(); - } - - const_reverse_iterator rend() const { - return super_type::rend(); - } - -protected: - // This is only supplied to support multi_array's default constructor - explicit multi_array_ref_openfpm(T* base, - const storage_order_type& so, - const index* index_bases, - const size_type* extents) : - super_type(base,so,index_bases,extents) { }*/ - }; template<typename T, typename Sfinae = void> diff --git a/src/util/multi_array_openfpm/multi_array_ref_subarray_openfpm.hpp b/src/util/multi_array_openfpm/multi_array_ref_subarray_openfpm.hpp index fe6683cca3769aa1eb86b35f128443e274c430a6..d1c18b571f7a4654a4d06dfdc7a40c34d463f299 100644 --- a/src/util/multi_array_openfpm/multi_array_ref_subarray_openfpm.hpp +++ b/src/util/multi_array_openfpm/multi_array_ref_subarray_openfpm.hpp @@ -54,12 +54,12 @@ public: // template typedefs template <std::size_t NDims> struct const_array_view { - typedef boost::detail::multi_array::const_multi_array_view_openfpm<T,NDims> type; + typedef openfpm::detail::multi_array::const_multi_array_view_openfpm<T,NDims> type; }; template <std::size_t NDims> struct array_view { - typedef boost::detail::multi_array::multi_array_view_openfpm<T,NDims> type; + typedef openfpm::detail::multi_array::multi_array_view_openfpm<T,NDims> type; }; // Allow default copy constructor as well. @@ -262,15 +262,6 @@ public: return super_type::operator[](idx); } - // see generate_array_view in base.hpp -/* template <int NDims> - typename const_array_view<NDims>::type - operator[](const boost::detail::multi_array:: - index_gen<NumDims,NDims>& indices) - const { - return super_type::operator[](indices); - }*/ - const_iterator begin() const { return super_type::begin(); } diff --git a/src/util/multi_array_openfpm/multi_array_view_openfpm.hpp b/src/util/multi_array_openfpm/multi_array_view_openfpm.hpp index dfcc0f322282263b61ff5cd275716fbc806a0397..b34d63d296cf93f92259f86120ece3b72d2566a7 100644 --- a/src/util/multi_array_openfpm/multi_array_view_openfpm.hpp +++ b/src/util/multi_array_openfpm/multi_array_view_openfpm.hpp @@ -8,8 +8,9 @@ #ifndef MULTI_ARRAY_VIEW_OPENFPM_HPP_ #define MULTI_ARRAY_VIEW_OPENFPM_HPP_ -#include "util/boost/boost_multi_array_base_openfpm.hpp" +//#include "util/boost/boost_multi_array_base_openfpm.hpp" #include "boost/utility/enable_if.hpp" +#include "boost/multi_array/index_gen.hpp" namespace openfpm { namespace detail { @@ -41,19 +42,19 @@ public: // template typedefs template <std::size_t NDims> struct const_array_view_openfpm { - typedef boost::detail::multi_array::const_multi_array_view_openfpm<T,NDims,vector> type; + typedef openfpm::detail::multi_array::const_multi_array_view_openfpm<T,NDims,vector> type; }; template <std::size_t NDims> struct array_view_openfpm { - typedef boost::detail::multi_array::multi_array_view_openfpm<T,NDims> type; + typedef openfpm::detail::multi_array::multi_array_view_openfpm<T,NDims> type; }; template <typename OPtr> const_multi_array_view_openfpm(const const_multi_array_view_openfpm<T,NumDims,OPtr>& other) : base_(other.base_), origin_offset_(other.origin_offset_), - num_elements_(other.num_elements_), extent(extent), + num_elements_(other.num_elements_), stride_list_(other.stride_list_), index_base_list_(other.index_base_list_) { } diff --git a/src/util/object_s_di.hpp b/src/util/object_s_di.hpp index cfa53895f731582fce3a4d3aafb3a5b232b6ebf3..07c56063611f8489ab7c322881c258bfc0582e8c 100644 --- a/src/util/object_s_di.hpp +++ b/src/util/object_s_di.hpp @@ -582,7 +582,7 @@ struct object_s_di_op<op, v_src,v_dst,OBJ_ENCAP,prp...> * \param vd destination object * */ - inline object_s_di_op(const v_src & vs, v_dst && vd) + __device__ __host__ inline object_s_di_op(const v_src & vs, v_dst && vd) { object_s_di_e_op<op,v_src,v_dst,prp...> obj(vs,vd); boost::mpl::for_each_ref< boost::mpl::range_c<int,0,sizeof...(prp)> >(obj); @@ -594,7 +594,7 @@ struct object_s_di_op<op, v_src,v_dst,OBJ_ENCAP,prp...> * \param vd destination object * */ - inline object_s_di_op(const v_src & vs, v_dst & vd) + __device__ __host__ inline object_s_di_op(const v_src & vs, v_dst & vd) { object_s_di_e_op<op,v_src,v_dst,prp...> obj(vs,vd); boost::mpl::for_each_ref< boost::mpl::range_c<int,0,sizeof...(prp)> >(obj);