Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mosaic/software/parallel-computing/openfpm/openfpm_devices
  • argupta/openfpm_devices
2 results
Show changes
Showing
with 1761 additions and 1147 deletions
# -*- mode: autoconf -*-
#
# AX_OPENCL
#
# Check for an OpenCL implementation. If CL is found, _OPENCL is defined and
# the required compiler and linker flags are included in the output variables
# "CL_CFLAGS" and "CL_LIBS", respectively. If no usable CL implementation is
# found, "no_cl" is set to "yes".
#
# If the header "CL/OpenCL.h" is found, "HAVE_CL_OPENCL_H" is defined. If the
# header "OpenCL/OpenCL.h" is found, HAVE_OPENCL_OPENCL_H is defined. These
# preprocessor definitions may not be mutually exclusive.
#
# Based on AX_CHECK_GL, version: 2.4 author: Braden McDaniel
# <braden@endoframe.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
#
# As a special exception, the you may copy, distribute and modify the
# configure scripts that are the output of Autoconf when processing
# the Macro. You need not follow the terms of the GNU General Public
# License when using or distributing such scripts.
#
AC_DEFUN([AX_CUDA],
[AC_REQUIRE([AC_CANONICAL_HOST])dnl
# Search nvcc compiler
AC_CHECK_PROG([NVCC_EXIST],[nvcc],["yes"],["no"])
AS_IF([test "x$NVCC_EXIST" = "xno"],[],[
NVCC=`which nvcc`
# Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc"
# is substituted by "include".
CUDA_CFLAGS=" -I${NVCC%bin//nvcc}"
CUDA_CFLAGS=" -I${CUDA_CFLAGS%bin/nvcc}"
CUDA_CFLAGS=" -I${CUDA_CFLAGS}include"
#Set CUDA_CFLAGS to $NVCC, where substring "bin/nvcc"
#is substituted by "lib".
CUDA_LIBS=" -L${NVCC%bin//nvcc}"
CUDA_LIBS=" -L${CUDA_LIBS%bin/nvcc}"
CUDA_LIBS=" -L${CUDA_LIBS}lib"
# If $build_cpu contains "_64", append "64" to CUDA_LIBS
AS_IF([echo $build_cpu | grep -q "_64"],
[CUDA_LIBS+="64"])
# Append " -lcuda -lcudart" to CUDA_LIBS
CUDA_LIBS+=" -lcuda -lcudart"
# Make variables available in Makefile.am
AC_SUBST(CUDA_CFLAGS)
AC_SUBST(CUDA_LIBS)
echo $NVCC
AC_SUBST(NVCC)
AC_DEFINE([NVCC],[],[NVCC compiling])
])dnl
])dnl
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_gcc_archflag.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GCC_ARCHFLAG([PORTABLE?], [ACTION-SUCCESS], [ACTION-FAILURE])
#
# DESCRIPTION
#
# This macro tries to guess the "native" arch corresponding to the target
# architecture for use with gcc's -march=arch or -mtune=arch flags. If
# found, the cache variable $ax_cv_gcc_archflag is set to this flag and
# ACTION-SUCCESS is executed; otherwise $ax_cv_gcc_archflag is is set to
# "unknown" and ACTION-FAILURE is executed. The default ACTION-SUCCESS is
# to add $ax_cv_gcc_archflag to the end of $CFLAGS.
#
# PORTABLE? should be either [yes] (default) or [no]. In the former case,
# the flag is set to -mtune (or equivalent) so that the architecture is
# only used for tuning, but the instruction set used is still portable. In
# the latter case, the flag is set to -march (or equivalent) so that
# architecture-specific instructions are enabled.
#
# The user can specify --with-gcc-arch=<arch> in order to override the
# macro's choice of architecture, or --without-gcc-arch to disable this.
#
# When cross-compiling, or if $CC is not gcc, then ACTION-FAILURE is
# called unless the user specified --with-gcc-arch manually.
#
# Requires macros: AX_CHECK_COMPILER_FLAGS, AX_GCC_X86_CPUID
#
# (The main emphasis here is on recent CPUs, on the principle that doing
# high-performance computing on old hardware is uncommon.)
#
# LICENSE
#
# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
# Copyright (c) 2008 Matteo Frigo
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 8
AC_DEFUN([AX_GCC_ARCHFLAG],
[AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([AC_CANONICAL_HOST])
AC_ARG_WITH(gcc-arch, [AS_HELP_STRING([--with-gcc-arch=<arch>], [use architecture <arch> for gcc -march/-mtune, instead of guessing])],
ax_gcc_arch=$withval, ax_gcc_arch=yes)
AC_MSG_CHECKING([for gcc architecture flag])
AC_MSG_RESULT([])
AC_CACHE_VAL(ax_cv_gcc_archflag,
[
ax_cv_gcc_archflag="unknown"
if test "$GCC" = yes; then
if test "x$ax_gcc_arch" = xyes; then
ax_gcc_arch=""
if test "$cross_compiling" = no; then
case $host_cpu in
i[[3456]]86*|x86_64*) # use cpuid codes, in part from x86info-1.7 by D. Jones
AX_GCC_X86_CPUID(0)
AX_GCC_X86_CPUID(1)
case $ax_cv_gcc_x86_cpuid_0 in
*:756e6547:*:*) # Intel
case $ax_cv_gcc_x86_cpuid_1 in
*5[[48]]?:*:*:*) ax_gcc_arch="pentium-mmx pentium" ;;
*5??:*:*:*) ax_gcc_arch=pentium ;;
206??:*:*:*) ax_gcc_arch="corei7 native nocona core2 prescott pentium4 pentiumpro";break;;
106a?:*:*:*) ax_gcc_arch="corei7 native nocona core2 prescott pentium4 pentiumpro";break;;
106e?:*:*:*) ax_gcc_arch="corei7 native nocona core2 prescott pentium4 pentiumpro";break;;
*6[[520]]?:*:*:*) ax_gcc_arch="i7core nocona core2 prescott pentium4 pentiumpro" ;;
*6[[3456]]?:*:*:*) ax_gcc_arch="pentium2 pentiumpro" ;;
*6a?:*[[01]]:*:*) ax_gcc_arch="pentium2 pentiumpro" ;;
*6a?:*[[234]]:*:*) ax_gcc_arch="pentium3 pentiumpro" ;;
*6[[9d]]?:*:*:*) ax_gcc_arch="pentium-m pentium3 pentiumpro" ;;
*6[[78b]]?:*:*:*) ax_gcc_arch="pentium3 pentiumpro" ;;
*6f?:*:*:*) ax_gcc_arch="core2 native pentium-m pentium3 pentiumpro" ;;
*6??:*:*:*) ax_gcc_arch=pentiumpro ;;
*f3[[347]]:*:*:*|*f4[1347]:*:*:*)
case $host_cpu in
x86_64*) ax_gcc_arch="nocona pentium4 pentiumpro" ;;
*) ax_gcc_arch="prescott pentium4 pentiumpro" ;;
esac ;;
*f??:*:*:*) ax_gcc_arch="pentium4 pentiumpro";;
esac ;;
*:68747541:*:*) # AMD
case $ax_cv_gcc_x86_cpuid_1 in
*5[[67]]?:*:*:*) ax_gcc_arch=k6 ;;
*5[[8d]]?:*:*:*) ax_gcc_arch="k6-2 k6" ;;
*5[[9]]?:*:*:*) ax_gcc_arch="k6-3 k6" ;;
*60?:*:*:*) ax_gcc_arch=k7 ;;
*6[[12]]?:*:*:*) ax_gcc_arch="athlon k7" ;;
*6[[34]]?:*:*:*) ax_gcc_arch="athlon-tbird k7" ;;
*67?:*:*:*) ax_gcc_arch="athlon-4 athlon k7" ;;
*6[[68a]]?:*:*:*)
AX_GCC_X86_CPUID(0x80000006) # L2 cache size
case $ax_cv_gcc_x86_cpuid_0x80000006 in
*:*:*[[1-9a-f]]??????:*) # (L2 = ecx >> 16) >= 256
ax_gcc_arch="athlon-xp athlon-4 athlon k7" ;;
*) ax_gcc_arch="athlon-4 athlon k7" ;;
esac ;;
*f[[4cef8b]]?:*:*:*) ax_gcc_arch="athlon64 k8" ;;
*f5?:*:*:*) ax_gcc_arch="opteron k8" ;;
*f7?:*:*:*) ax_gcc_arch="athlon-fx opteron k8" ;;
*f??:*:*:*) ax_gcc_arch="k8" ;;
esac ;;
*:746e6543:*:*) # IDT
case $ax_cv_gcc_x86_cpuid_1 in
*54?:*:*:*) ax_gcc_arch=winchip-c6 ;;
*58?:*:*:*) ax_gcc_arch=winchip2 ;;
*6[[78]]?:*:*:*) ax_gcc_arch=c3 ;;
*69?:*:*:*) ax_gcc_arch="c3-2 c3" ;;
esac ;;
esac
if test x"$ax_gcc_arch" = x; then # fallback
case $host_cpu in
i586*) ax_gcc_arch=pentium ;;
i686*) ax_gcc_arch=pentiumpro ;;
esac
fi
;;
sparc*)
AC_PATH_PROG([PRTDIAG], [prtdiag], [prtdiag], [$PATH:/usr/platform/`uname -i`/sbin/:/usr/platform/`uname -m`/sbin/])
cputype=`(((grep cpu /proc/cpuinfo | cut -d: -f2) ; ($PRTDIAG -v |grep -i sparc) ; grep -i cpu /var/run/dmesg.boot ) | head -n 1) 2> /dev/null`
cputype=`echo "$cputype" | tr -d ' -' |tr $as_cr_LETTERS $as_cr_letters`
case $cputype in
*ultrasparciv*) ax_gcc_arch="ultrasparc4 ultrasparc3 ultrasparc v9" ;;
*ultrasparciii*) ax_gcc_arch="ultrasparc3 ultrasparc v9" ;;
*ultrasparc*) ax_gcc_arch="ultrasparc v9" ;;
*supersparc*|*tms390z5[[05]]*) ax_gcc_arch="supersparc v8" ;;
*hypersparc*|*rt62[[056]]*) ax_gcc_arch="hypersparc v8" ;;
*cypress*) ax_gcc_arch=cypress ;;
esac ;;
alphaev5) ax_gcc_arch=ev5 ;;
alphaev56) ax_gcc_arch=ev56 ;;
alphapca56) ax_gcc_arch="pca56 ev56" ;;
alphapca57) ax_gcc_arch="pca57 pca56 ev56" ;;
alphaev6) ax_gcc_arch=ev6 ;;
alphaev67) ax_gcc_arch=ev67 ;;
alphaev68) ax_gcc_arch="ev68 ev67" ;;
alphaev69) ax_gcc_arch="ev69 ev68 ev67" ;;
alphaev7) ax_gcc_arch="ev7 ev69 ev68 ev67" ;;
alphaev79) ax_gcc_arch="ev79 ev7 ev69 ev68 ev67" ;;
powerpc*)
cputype=`((grep cpu /proc/cpuinfo | head -n 1 | cut -d: -f2 | cut -d, -f1 | sed 's/ //g') ; /usr/bin/machine ; /bin/machine; grep CPU /var/run/dmesg.boot | head -n 1 | cut -d" " -f2) 2> /dev/null`
cputype=`echo $cputype | sed -e 's/ppc//g;s/ *//g'`
case $cputype in
*750*) ax_gcc_arch="750 G3" ;;
*740[[0-9]]*) ax_gcc_arch="$cputype 7400 G4" ;;
*74[[4-5]][[0-9]]*) ax_gcc_arch="$cputype 7450 G4" ;;
*74[[0-9]][[0-9]]*) ax_gcc_arch="$cputype G4" ;;
*970*) ax_gcc_arch="970 G5 power4";;
*POWER4*|*power4*|*gq*) ax_gcc_arch="power4 970";;
*POWER5*|*power5*|*gr*|*gs*) ax_gcc_arch="power5 power4 970";;
603ev|8240) ax_gcc_arch="$cputype 603e 603";;
*) ax_gcc_arch=$cputype ;;
esac
ax_gcc_arch="$ax_gcc_arch powerpc"
;;
esac
fi # not cross-compiling
fi # guess arch
if test "x$ax_gcc_arch" != x -a "x$ax_gcc_arch" != xno; then
for arch in $ax_gcc_arch; do
if test "x[]m4_default([$1],yes)" = xyes; then # if we require portable code
flags="-mtune=$arch"
# -mcpu=$arch and m$arch generate nonportable code on every arch except
# x86. And some other arches (e.g. Alpha) don't accept -mtune. Grrr.
case $host_cpu in i*86|x86_64*) flags="$flags -mcpu=$arch -m$arch";; esac
else
flags="-march=$arch -mcpu=$arch -m$arch"
fi
for flag in $flags; do
AX_CHECK_COMPILER_FLAGS($flag, [ax_cv_gcc_archflag=$flag; break])
done
test "x$ax_cv_gcc_archflag" = xunknown || break
done
fi
fi # $GCC=yes
])
AC_MSG_CHECKING([for gcc architecture flag])
AC_MSG_RESULT($ax_cv_gcc_archflag)
if test "x$ax_cv_gcc_archflag" = xunknown; then
m4_default([$3],:)
else
m4_default([$2], [CFLAGS="$CFLAGS $ax_cv_gcc_archflag"])
fi
])
# ===========================================================================
# http://www.nongnu.org/autoconf-archive/ax_lib_mysql.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_LIB_MYSQL([MINIMUM-VERSION])
#
# DESCRIPTION
#
# This macro provides tests of availability of MySQL client library of
# particular version or newer.
#
# AX_LIB_MYSQL macro takes only one argument which is optional. If there
# is no required version passed, then macro does not run version test.
#
# The --with-mysql option takes one of three possible values:
#
# no - do not check for MySQL client library
#
# yes - do check for MySQL library in standard locations (mysql_config
# should be in the PATH)
#
# path - complete path to mysql_config utility, use this option if
# mysql_config can't be found in the PATH
#
# This macro calls:
#
# AC_SUBST(MYSQL_CFLAGS)
# AC_SUBST(MYSQL_LDFLAGS)
# AC_SUBST(MYSQL_VERSION)
#
# And sets:
#
# HAVE_MYSQL
#
# LICENSE
#
# Copyright (c) 2008 Mateusz Loskot <mateusz@loskot.net>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved.
AC_DEFUN([AX_LIB_MYSQL],
[
AC_ARG_WITH([mysql],
AC_HELP_STRING([--with-mysql=@<:@ARG@:>@],
[use MySQL client library @<:@default=yes@:>@, optionally specify path to mysql_config]
),
[
if test "$withval" = "no"; then
want_mysql="no"
elif test "$withval" = "yes"; then
want_mysql="yes"
else
want_mysql="yes"
MYSQL_CONFIG="$withval"
fi
],
[want_mysql="yes"]
)
MYSQL_CFLAGS=""
MYSQL_LDFLAGS=""
MYSQL_VERSION=""
dnl
dnl Check MySQL libraries (libpq)
dnl
if test "$want_mysql" = "yes"; then
if test -z "$MYSQL_CONFIG" -o test; then
AC_PATH_PROG([MYSQL_CONFIG], [mysql_config], [no])
fi
if test "$MYSQL_CONFIG" != "no"; then
AC_MSG_CHECKING([for MySQL libraries])
MYSQL_CFLAGS="`$MYSQL_CONFIG --cflags`"
MYSQL_LDFLAGS="`$MYSQL_CONFIG --libs`"
MYSQL_VERSION=`$MYSQL_CONFIG --version`
AC_DEFINE([HAVE_MYSQL], [1],
[Define to 1 if MySQL libraries are available])
found_mysql="yes"
AC_MSG_RESULT([yes])
else
found_mysql="no"
# AC_MSG_RESULT([no])
fi
fi
dnl
dnl Check if required version of MySQL is available
dnl
mysql_version_req=ifelse([$1], [], [], [$1])
if test "$found_mysql" = "yes" -a -n "$mysql_version_req"; then
AC_MSG_CHECKING([if MySQL version is >= $mysql_version_req])
dnl Decompose required version string of MySQL
dnl and calculate its number representation
mysql_version_req_major=`expr $mysql_version_req : '\([[0-9]]*\)'`
mysql_version_req_minor=`expr $mysql_version_req : '[[0-9]]*\.\([[0-9]]*\)'`
mysql_version_req_micro=`expr $mysql_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'`
if test "x$mysql_version_req_micro" = "x"; then
mysql_version_req_micro="0"
fi
mysql_version_req_number=`expr $mysql_version_req_major \* 1000000 \
\+ $mysql_version_req_minor \* 1000 \
\+ $mysql_version_req_micro`
dnl Decompose version string of installed MySQL
dnl and calculate its number representation
mysql_version_major=`expr $MYSQL_VERSION : '\([[0-9]]*\)'`
mysql_version_minor=`expr $MYSQL_VERSION : '[[0-9]]*\.\([[0-9]]*\)'`
mysql_version_micro=`expr $MYSQL_VERSION : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'`
if test "x$mysql_version_micro" = "x"; then
mysql_version_micro="0"
fi
mysql_version_number=`expr $mysql_version_major \* 1000000 \
\+ $mysql_version_minor \* 1000 \
\+ $mysql_version_micro`
mysql_version_check=`expr $mysql_version_number \>\= $mysql_version_req_number`
if test "$mysql_version_check" = "1"; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no])
fi
fi
AC_SUBST([MYSQL_VERSION])
AC_SUBST([MYSQL_CFLAGS])
AC_SUBST([MYSQL_LDFLAGS])
])
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_openmp.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_OPENMP([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
#
# DESCRIPTION
#
# This macro tries to find out how to compile programs that use OpenMP a
# standard API and set of compiler directives for parallel programming
# (see http://www-unix.mcs/)
#
# On success, it sets the OPENMP_CFLAGS/OPENMP_CXXFLAGS/OPENMP_F77FLAGS
# output variable to the flag (e.g. -omp) used both to compile *and* link
# OpenMP programs in the current language.
#
# NOTE: You are assumed to not only compile your program with these flags,
# but also link it with them as well.
#
# If you want to compile everything with OpenMP, you should set:
#
# CFLAGS="$CFLAGS $OPENMP_CFLAGS"
# #OR# CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS"
# #OR# FFLAGS="$FFLAGS $OPENMP_FFLAGS"
#
# (depending on the selected language).
#
# The user can override the default choice by setting the corresponding
# environment variable (e.g. OPENMP_CFLAGS).
#
# ACTION-IF-FOUND is a list of shell commands to run if an OpenMP flag is
# found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is
# not found. If ACTION-IF-FOUND is not specified, the default action will
# define HAVE_OPENMP.
#
# LICENSE
#
# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 8
AC_DEFUN([AX_OPENMP], [
AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX
AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown
# Flags to try: -fopenmp (gcc), -openmp (icc), -mp (SGI & PGI),
# -xopenmp (Sun), -omp (Tru64), -qsmp=omp (AIX), none
ax_openmp_flags="-fopenmp -openmp -mp -xopenmp -omp -qsmp=omp none"
if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then
ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags"
fi
for ax_openmp_flag in $ax_openmp_flags; do
case $ax_openmp_flag in
none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;;
*) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;;
esac
AC_TRY_LINK_FUNC(omp_set_num_threads,
[ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break])
done
[]_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS
])
if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" = "xunknown"; then
m4_default([$2],:)
else
if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" != "xnone"; then
OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ax_cv_[]_AC_LANG_ABBREV[]_openmp
fi
m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])])
fi
])dnl AX_OPENMP
if(NOT CUDA_ON_BACKEND STREQUAL "None")
set(CUDA_SOURCES memory/CudaMemory.cu )
set(CUDA_SOURCES_TEST util/cudify/cudify_unit_test.cu)
if (CUDA_ON_BACKEND STREQUAL "SEQUENTIAL" OR CUDA_ON_BACKEND STREQUAL "OpenMP" OR CUDA_ON_BACKEND STREQUAL "HIP")
set_source_files_properties(${CUDA_SOURCES} PROPERTIES LANGUAGE CXX)
set_source_files_properties(${CUDA_SOURCES_TEST} PROPERTIES LANGUAGE CXX)
endif()
if (CUDA_ON_BACKEND STREQUAL "SEQUENTIAL" OR CUDA_ON_BACKEND STREQUAL "OpenMP" )
set_source_files_properties(${CUDA_SOURCES} PROPERTIES COMPILE_FLAGS "-D__NVCC__ -DCUDART_VERSION=11000")
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
add_definitions("-x c++")
endif()
endif()
else()
set(CUDA_SOURCES )
set(CUDA_SOURCES_TEST )
endif()
if ( CUDA_ON_BACKEND STREQUAL "HIP" AND HIP_FOUND )
hip_add_library(ofpmmemory STATIC memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp ${CUDA_SOURCES})
hip_add_library(ofpmmemory_dl SHARED memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp ${CUDA_SOURCES})
set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
hip_add_executable(mem main.cpp memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/mem_conf.cpp ${CUDA_SOURCES} ${CUDA_SOURCES_TEST})
list(APPEND HIP_HIPCC_FLAGS -D__NVCC__ -D__HIP__ -DCUDART_VERSION=11000 -D__CUDACC__ -D__CUDACC_VER_MAJOR__=11 -D__CUDACC_VER_MINOR__=0 -D__CUDACC_VER_BUILD__=0 --std=c++17)
set(CMAKE_SHARED_LIBRARY_CXX_FLAGS "-fPIC")
set_property(TARGET ofpmmemory PROPERTY NO_SONAME ON)
set_property(TARGET ofpmmemory_dl PROPERTY NO_SONAME ON)
else()
add_executable(mem main.cpp memory/HeapMemory.cpp util/cudify/cudify_vars.cpp util/cudify/cudify_unit_test.cu memory/mem_conf.cpp ${CUDA_SOURCES} ${CUDA_SOURCES_TEST})
set_property(TARGET mem PROPERTY CUDA_ARCHITECTURES OFF)
add_library(ofpmmemory STATIC memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp ${CUDA_SOURCES})
set_property(TARGET ofpmmemory PROPERTY CUDA_ARCHITECTURES OFF)
add_library(ofpmmemory_dl SHARED memory/HeapMemory.cpp util/cudify/cudify_vars.cpp memory/PtrMemory.cpp memory/mem_conf.cpp ${CUDA_SOURCES})
set_property(TARGET ofpmmemory_dl PROPERTY CUDA_ARCHITECTURES OFF)
endif()
set_property(TARGET mem PROPERTY CUDA_ARCHITECTURES OFF)
set_property(TARGET mem PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET ofpmmemory PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET ofpmmemory_dl PROPERTY POSITION_INDEPENDENT_CODE ON)
if (HIP_FOUND)
SET(CMAKE_EXE_LINKER_FLAGS "--amdgpu-target=${AMD_ARCH_COMPILE}")
SET(CMAKE_SHARED_LINKER_FLAGS "--amdgpu-target=${AMD_ARCH_COMPILE}")
endif()
if ( CMAKE_COMPILER_IS_GNUCC )
target_compile_options(mem PRIVATE "-Wno-deprecated-declarations")
if (TEST_COVERAGE)
target_compile_options(mem PRIVATE $<$<COMPILE_LANGUAGE:CXX>: -fprofile-arcs -ftest-coverage>)
endif()
endif()
if (CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
add_definitions(-D__STRICT_ANSI__)
endif()
if (CUDA_FOUND)
target_include_directories(mem PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
if (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
target_compile_options(mem PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe "--display_error_number --diag_suppress=2885 --diag_suppress=2887 --diag_suppress=2888 --diag_suppress=186 --diag_suppress=111" --expt-extended-lambda>)
endif()
if (TEST_COVERAGE)
target_compile_options(mem PRIVATE $<$<COMPILE_LANGUAGE:CUDA>: -Xcompiler "-fprofile-arcs -ftest-coverage" >)
endif()
if (CUDA_ON_BACKEND STREQUAL "CUDA")
set_source_files_properties(memory/mem_conf.cpp PROPERTIES LANGUAGE CUDA)
endif()
endif()
target_include_directories (mem PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories (mem PUBLIC ${CMAKE_BINARY_DIR}/config)
target_include_directories (mem PUBLIC ${Boost_INCLUDE_DIRS})
if (ALPAKA_ROOT)
target_include_directories (mem PUBLIC ${ALPAKA_ROOT}/include)
endif()
target_include_directories (ofpmmemory PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
target_include_directories (ofpmmemory PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories (ofpmmemory PRIVATE ${CMAKE_BINARY_DIR}/config)
target_include_directories (ofpmmemory PUBLIC ${ALPAKA_ROOT}/include)
target_include_directories (ofpmmemory PUBLIC ${Boost_INCLUDE_DIRS})
target_include_directories (ofpmmemory_dl PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
target_include_directories (ofpmmemory_dl PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories (ofpmmemory_dl PRIVATE ${CMAKE_BINARY_DIR}/config)
target_include_directories (ofpmmemory_dl PUBLIC ${ALPAKA_ROOT}/include)
target_include_directories (ofpmmemory_dl PUBLIC ${Boost_INCLUDE_DIRS})
target_link_libraries(mem ${Boost_LIBRARIES})
if (OPENMP_FOUND)
target_link_libraries(mem OpenMP::OpenMP_CXX)
target_link_libraries(ofpmmemory OpenMP::OpenMP_CXX)
target_link_libraries(ofpmmemory_dl OpenMP::OpenMP_CXX)
endif()
target_link_libraries(ofpmmemory ${Boost_LIBRARIES})
target_link_libraries(ofpmmemory_dl ${Boost_LIBRARIES})
if (TEST_COVERAGE)
target_link_libraries(mem -lgcov)
endif()
install(TARGETS ofpmmemory ofpmmemory_dl
DESTINATION openfpm_devices/lib
COMPONENT OpenFPM)
install(FILES memory/ExtPreAlloc.hpp
memory/BHeapMemory.hpp
memory/HeapMemory.hpp
memory/memory.hpp
memory/PtrMemory.hpp
memory/CudaMemory.cuh
util/util_unit_tests.hpp
DESTINATION openfpm_devices/include/memory
COMPONENT OpenFPM)
install(FILES memory/ExtPreAlloc.hpp
memory/BHeapMemory.hpp
memory/HeapMemory.hpp
memory/memory.hpp
memory/PtrMemory.hpp
memory/CudaMemory.cuh
memory/mem_conf.hpp
DESTINATION openfpm_devices/include/memory
COMPONENT OpenFPM)
install(FILES util/ofp_context.hpp
util/gpu_context.hpp
util/gpu_types.hpp
util/print_stack.hpp
util/se_util.hpp
util/cuda_util.hpp
util/cuda_kernel_error_checker.hpp
DESTINATION openfpm_devices/include/util
COMPONENT OpenFPM)
install(FILES util/cudify/alpaka/cudify_alpaka.hpp
util/cudify/alpaka/cudify_hardware_alpaka.hpp
DESTINATION openfpm_devices/include/util/cudify/alpaka
COMPONENT OpenFPM)
install(FILES util/cudify/sequential/cudify_sequential.hpp
DESTINATION openfpm_devices/include/util/cudify/sequential
COMPONENT OpenFPM)
install(FILES util/cudify/openmp/cudify_openmp.hpp
DESTINATION openfpm_devices/include/util/cudify/openmp
COMPONENT OpenFPM)
install(FILES util/cudify/hip/cudify_hip.hpp
DESTINATION openfpm_devices/include/util/cudify/hip
COMPONENT OpenFPM)
install(FILES util/cudify/cuda/cudify_cuda.hpp
util/cudify/cuda/operators.hpp
DESTINATION openfpm_devices/include/util/cudify/cuda
COMPONENT OpenFPM)
install(FILES util/cudify/cudify_hardware_cpu.hpp
DESTINATION openfpm_devices/include/util/cudify
COMPONENT OpenFPM)
LINKLIBS = $(PTHREAD_LIBS) $(OPT_LIBS) $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIB) $(CUDA_LIBS) $(BOOST_THREAD_LIB)
if BUILDCUDA
CUDA_SOURCES=memory/CudaMemory.cu
else
CUDA_SOURCES=
endif
bin_PROGRAMS = mem
mem_SOURCES = main.cpp memory/HeapMemory.cpp $(CUDA_SOURCES)
mem_CXXFLAGS = $(INCLUDES_PATH) -I/usr/local/include
mem_CFLAGS =
mem_LDADD = $(LINKLIBS) -L/usr/lib64/nvidia-bumblebee/
.cu.o :
$(NVCC) $(NVCCFLAGS) -I. $(INCLUDES_PATH) -o $@ -c $<
......@@ -7,6 +7,17 @@
#include <iostream>
#if defined(__HIP__)
#define CUDA_SAFE_CALL(call) {\
hipError_t err = call;\
if (hipSuccess != err) {\
std::cerr << "HIP error in file "<< __FILE__ << " in line " << __LINE__ << ": " << hipGetErrorString(err);\
}\
}
#elif defined(CUDA_GPU)
#define CUDA_SAFE_CALL(call) {\
cudaError_t err = call;\
if (cudaSuccess != err) {\
......@@ -14,4 +25,6 @@
}\
}
#endif
/*
* OpenFPMwdeviceCudaMemory.cu
*
* Created on: Aug 11, 2014
* Author: Pietro Incardona
*/
#include <cstddef>
#include <cuda_runtime.h>
#include "CudaMemory.cuh"
/*! \brief Allocate a chunk of memory
*
* Allocate a chunk of memory
*
* \param sz size of the chunk of memory to allocate in byte
*
*/
bool CudaMemory::allocate(size_t sz)
{
//! Allocate the device memory
if (dm == NULL)
{dv = new boost::shared_ptr<void>(new thrust::device_vector<void>(sz));}
}
void CudaMemory::destroy()
{
dv = NULL;
}
void CudaMemory::copyFromPointer(ThreadWorker t)
{
// check if we have a host buffer, if not allocate it
// put on queue a copy from device to host
t.call();
// put on queue a memory copy from pointers
}
void CudaMemory::copyDeviceToDevice(ThreadWorker t)
{
// put on queue a copy from device to device
t.call();
}
bool CudaMemory::copy(memory m, ThreadWorker t)
{
//! Here we try to cast memory into OpenFPMwdeviceCudaMemory
CudaMemory * ofpm = dynamic_cast<CudaMemory>(m);
//! if we fail we get the pointer and simply copy from the pointer
if (ofpm == NULL)
{
// copy the memory from device to host and from host to device
copyFromPointer(t);
}
else
{
// they are the same memory type, use cuda/thrust buffer copy
copyDeviceToDevice();
}
}
bool CudaMemory::copy(OpenFPMwdeviceCudaMemory m)
{
// they are the same type of memory so copy from device to device
copyDeviceToDevice();
}
size_t CudaMemory::size()
{
dv->size();
}
bool CudaMemory::resize(size_t sz)
{
//! Allocate the device memory
if (dv == NULL)
{dv = new boost::shared_ptr<void>(new thrust::device_vector<void>());}
else
{dv.get()->resize(sz);}
}
/*
* OpenFPMwdeviceCudaMemory.h
*
* Created on: Aug 8, 2014
* Author: Pietro Incardona
*/
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/sort.h>
#include <thrust/copy.h>
#include <boost/shared_ptr>
/**
* \brief This class create instructions to allocate, and destroy GPU memory
*
* This class create instructions to allocate, destroy, resize GPU buffer,
* eventually if direct, comunication is not supported, it can instruction
* to create an Host Pinned memory.
*
* Usage:
*
* TO DO
*
* This class in general is used by OpenFPM_data project to basically
* record all the operation made and generate a set of instructions
*
*/
class CudaMemory : public memory
{
//!
//! device memory
void * dm;
//! allocate memory
virtual bool allocate(size_t sz);
//! destroy memory
virtual void destroy();
//! copy memory
virtual bool copy(memory m);
//! the the size of the allocated memory
virtual size_t size();
//! resize the momory allocated
virtual bool resize(size_t sz);
};
#include <iostream>
#include "config.h"
#define BOOST_TEST_MODULE "C++ test module for OpenFPM_data project"
#include <boost/test/included/unit_test.hpp>
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
#ifndef NO_INIT_AND_MAIN
// initialization function:
bool init_unit_test()
{
return true;
}
#include "memory/HeapMemory_unit_tests.hpp"
// entry point:
int main(int argc, char* argv[])
{
return boost::unit_test::unit_test_main( &init_unit_test, argc, argv );
}
#endif
#include "config.h"
#include "memory/Memory_unit_tests.hpp"
#include "util/util_unit_tests.hpp"
/*
* RHeapMempory.hpp
*
* Created on: Aug 17, 2014
* Author: Pietro Incardona
*/
#ifndef BHEAP_MEMORY_HPP
#define BHEAP_MEMORY_HPP
#include "config.h"
#include "memory.hpp"
#include <cstddef>
#include <cstdint>
#include <iostream>
typedef unsigned char byte;
#define MEM_ALIGNMENT 32
/**
* \brief It override the behavior if size()
*
* On normal memory like HeapMemory if you try to use resize to shrink the memory, nothing happen and size() return the old size.
* In case of BMemory<HeapMemory> if you try to shrink still the memory is not shrinked, but size() return the shrinked size.
* This gives a "feeling" of shrinkage. The real internal size can be retrieved with msize(). When we use resize to increase
* the memory size the behaviour remain the same as normal HeapMemory.
*
* \note this wrapper can be used in combination also with CudaMemory
*
* ### Allocate memory
*
* \snippet HeapMemory_unit_tests.hpp BAllocate some memory and fill with data
*
* ### Resize memory
*
* \snippet HeapMemory_unit_tests.hpp BResize the memory
*
* ### Shrink memory
*
* \snippet HeapMemory_unit_tests.hpp BShrink memory
*
*/
template<typename Memory>
class BMemory : public Memory
{
//! size of the memory
size_t buf_sz;
public:
/*! \brief Copy the Heap memory
*
* \param mem memory to copy
*
*/
BMemory(const BMemory<Memory> & mem)
:Memory(mem),buf_sz(mem.size())
{
}
/*! \brief Copy the Heap memory
*
* \param mem memory to copy
*
*/
BMemory(BMemory<Memory> && mem) noexcept
:Memory((Memory &&)mem),buf_sz(mem.size())
{
}
//! Constructor, we choose a default alignment of 32 for avx
BMemory()
:Memory(),buf_sz(0)
{};
//! Destructor
virtual ~BMemory() noexcept
{
};
/*! \brief allocate the memory
*
* Resize the allocated memory, if request is smaller than the allocated memory
* is not resized
*
* \param sz size
* \return true if the resize operation complete correctly
*
*/
virtual bool allocate(size_t sz)
{
bool ret = Memory::allocate(sz);
if (ret == true)
buf_sz = sz;
return ret;
}
/*! \brief Resize the allocated memory
*
* Resize the allocated memory, if request is smaller than the allocated memory
* is not resized
*
* \param sz size
* \return true if the resize operation complete correctly
*
*/
virtual bool resize(size_t sz)
{
bool ret = Memory::resize(sz);
// if the allocated memory is enough, do not resize
if (ret == true)
buf_sz = sz;
return ret;
}
/*! \brief Resize the buffer size
*
* \return the buffer size
*
*/
virtual size_t size() const
{
return buf_sz;
}
/*! \brief Return the memory size
*
*
* \return The allocated memory size
*
*/
size_t msize()
{
return Memory::size();
}
/*! \brief Copy the memory
*
* \param mem memory to copy
*
* \return itself
*
*/
BMemory & operator=(const BMemory<Memory> & mem)
{
buf_sz = mem.buf_sz;
static_cast<Memory *>(this)->operator=(mem);
return *this;
}
/*! \brief Copy the memory
*
* \param mem memory to copy
*
* \return itself
*
*/
BMemory & operator=(BMemory<Memory> && mem)
{
buf_sz = mem.buf_sz;
static_cast<Memory *>(this)->operator=(mem);
return *this;
}
/*! \brief Destroy the internal memory
*
*
*/
void destroy()
{
Memory::destroy();
buf_sz = 0;
}
/*! \brief swap the two memory object
*
* \param mem Memory to swap with
*
*/
void swap(BMemory<Memory> & mem)
{
Memory::swap(mem);
size_t buf_sz_t = mem.buf_sz;
mem.buf_sz = buf_sz;
buf_sz = buf_sz_t;
}
};
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.