Commit 01b268a3 authored by Matthias Werner's avatar Matthias Werner

initial commit.

parent bd315421
cmake_minimum_required(VERSION 2.8)
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
set(CMAKE_DISABLE_SOURCE_CHANGES ON)
set(CMAKE_COLOR_MAKEFILE ON)
# And optionally
#set(CMAKE_VERBOSE_MAKEFILE ON)
if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
message(SEND_ERROR "In-source builds are not allowed.")
endif ()
if (WIN32)
set(CMAKE_SHARED_LIBRARY_PREFIX "")
endif ()
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/build)
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/build)
# set project build type
IF(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
ENDIF(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
message(">> Setting up ${CMAKE_BUILD_TYPE} build")
add_subdirectory(lib/helper/src)
add_subdirectory(src)
......@@ -175,7 +175,8 @@
END OF TERMS AND CONDITIONS
Copyright 2016 Matthias Werner
Copyright 2016 Matthias Werner <matthias dot werner1 at tu dresden de>
TU Dresden, Germany
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......
......@@ -7,11 +7,22 @@ This project is in development, but benchmarks are already possible for cuFFT an
Timer and allocation statistics of a benchmark are stored into a csv file.
## Build
CUDA: Check src/CMakeLists.txt for device architectures
```
mkdir build && cd build
cmake
make -j 4
```
CMake tries to find the libraries and enables the corresponding make targets.
After make finished you can run e.g. `./gearshifft_cufft_float`.
## Requirements
- cmake 2.8+
- C++14 capable compiler
- CUDA FFT library cuFFT or clFFT for OpenCL
- FFTW
- boost version 1.56+
## Tested on ...
- gcc 5.3.0
......@@ -21,9 +32,13 @@ Timer and allocation statistics of a benchmark are stored into a csv file.
- OpenCL 1.2-4.4.0.117 (Nvidia)
- Nvidia Kepler K80 GPU and Kepler K20X GPU
## Issues
- clFFT does not support arbitrary transform sizes. The benchmark will print only these tests as failed.
- at the moment this is for single-GPUs, batches are not considered
## Roadmap
- [x] cuFFT
- [ ] clFFT: emulation of arbitrary transform sizes / non-supported radices
- [ ] liFFT: include library independent FFT framework
- [ ] scripts for creating benchmark summary of the individual results
- [ ] callbacks to benchmark a typical FFT use case
- [ ] callbacks to benchmark a typical FFT use case
\ No newline at end of file
# This comes from:
# https://github.com/elhigu/cmake-findopencl
# @todo may integrate opencl finder from cmake
# Find OpenCL
#
# To set manually the paths, define these environment variables:
# OpenCL_INCPATH - Include path (e.g. OpenCL_INCPATH=/opt/cuda/4.0/cuda/include)
# OpenCL_LIBPATH - Library path (e.h. OpenCL_LIBPATH=/usr/lib64/nvidia)
#
# Once done this will define
# OPENCL_FOUND - system has OpenCL
# OPENCL_INCLUDE_DIRS - the OpenCL include directory
# OPENCL_LIBRARIES - link these to use OpenCL
# OPENCL_HAS_CPP_BINDINGS - system has also cl.hpp
# 0.2.0 added variables for our cluster environment
FIND_PACKAGE(PackageHandleStandardArgs)
SET (OPENCL_VERSION_STRING "0.2.0")
SET (OPENCL_VERSION_MAJOR 0)
SET (OPENCL_VERSION_MINOR 2)
SET (OPENCL_VERSION_PATCH 0)
IF (APPLE)
# IF OpenCL_LIBPATH is given use it and don't use default path
IF (DEFINED ENV{OpenCL_LIBPATH})
FIND_LIBRARY(OPENCL_LIBRARIES OpenCL PATHS ENV OpenCL_LIBPATH NO_DEFAULT_PATH)
ELSE ()
FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX")
ENDIF ()
# IF OpenCL_INCPATH is given use it and find for CL/cl.h and OpenCL/cl.h do not try to find default paths
IF (DEFINED ENV{OpenCL_INCPATH})
FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h OpenCL/cl.h PATHS ENV OpenCL_INCPATH NO_DEFAULT_PATH)
FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp OpenCL/cl.hpp PATHS ${OPENCL_INCLUDE_DIRS} NO_DEFAULT_PATH)
ELSE ()
FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX")
FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX")
ENDIF ()
ELSE (APPLE)
IF (WIN32)
# Find OpenCL includes and libraries from environment variables provided by vendor
SET(OPENCL_INCLUDE_SEARCH_PATHS)
SET(OPENCL_LIBRARY_SEARCH_PATHS)
SET(OPENCL_LIBRARY_64_SEARCH_PATHS)
# Nvidia
IF (DEFINED ENV{CUDA_INC_PATH})
SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{CUDA_INC_PATH})
SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{CUDA_LIB_PATH}/../lib64)
SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{CUDA_LIB_PATH}/../lib)
ENDIF()
# Intel SDK
IF (DEFINED ENV{INTELOCSDKROOT})
SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/include)
SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/lib/x64)
SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{INTELOCSDKROOT}/lib/x86)
ENDIF()
# AMD SDK
IF (DEFINED ENV{AMDAPPSDKROOT})
SET(OPENCL_INCLUDE_SEARCH_PATHS ${OPENCL_INCLUDE_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/include)
SET(OPENCL_LIBRARY_64_SEARCH_PATHS ${OPENCL_LIBRARY_64_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/lib/x86_64)
SET(OPENCL_LIBRARY_SEARCH_PATHS ${OPENCL_LIBRARY_SEARCH_PATHS} $ENV{AMDAPPSDKROOT}/lib/x86)
ENDIF()
# Override search paths with OpenCL_INCPATH env variable
IF (DEFINED ENV{OpenCL_INCPATH})
SET(OPENCL_INCLUDE_SEARCH_PATHS $ENV{OpenCL_INCPATH})
ENDIF ()
# Override search paths with OpenCL_LIBPATH env variable
IF (DEFINED ENV{OpenCL_INCPATH})
SET(OPENCL_LIBRARY_SEARCH_PATHS $ENV{OpenCL_LIBPATH})
SET(OPENCL_LIBRARY_64_SEARCH_PATHS $ENV{OpenCL_LIBPATH})
ENDIF ()
FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${OPENCL_INCLUDE_SEARCH_PATHS})
FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${OPENCL_INCLUDE_SEARCH_PATHS})
FIND_LIBRARY(_OPENCL_32_LIBRARIES OpenCL.lib HINTS ${OPENCL_LIBRARY_SEARCH_PATHS} PATHS ${OPENCL_LIB_DIR} ENV PATH)
FIND_LIBRARY(_OPENCL_64_LIBRARIES OpenCL.lib HINTS ${OPENCL_LIBRARY_64_SEARCH_PATHS} PATHS ${OPENCL_LIB_DIR} ENV PATH)
# Check if 64bit or 32bit versions links fine
SET (_OPENCL_VERSION_SOURCE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/openclversion.c")
#SET (_OPENCL_VERSION_SOURCE "${CMAKE_BINARY_DIR}/test.c")
FILE (WRITE "${_OPENCL_VERSION_SOURCE}"
"
#if __APPLE__
#include <OpenCL/cl.h>
#else /* !__APPLE__ */
#include <CL/cl.h>
#endif /* __APPLE__ */
int main()
{
cl_int result;
cl_platform_id id;
result = clGetPlatformIDs(1, &id, NULL);
return result != CL_SUCCESS;
}
")
TRY_COMPILE(_OPENCL_64_COMPILE_SUCCESS ${CMAKE_BINARY_DIR} "${_OPENCL_VERSION_SOURCE}"
CMAKE_FLAGS
"-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIRS}"
CMAKE_FLAGS
"-DLINK_LIBRARIES:STRING=${_OPENCL_64_LIBRARIES}"
)
IF(_OPENCL_64_COMPILE_SUCCESS)
message(STATUS "OpenCL 64bit lib found.")
SET(OPENCL_LIBRARIES ${_OPENCL_64_LIBRARIES})
ELSE()
TRY_COMPILE(_OPENCL_32_COMPILE_SUCCESS ${CMAKE_BINARY_DIR} "${_OPENCL_VERSION_SOURCE}"
CMAKE_FLAGS
"-DINCLUDE_DIRECTORIES:STRING=${OPENCL_INCLUDE_DIRS}"
CMAKE_FLAGS
"-DLINK_LIBRARIES:STRING=${_OPENCL_32_LIBRARIES}"
)
IF(_OPENCL_32_COMPILE_SUCCESS)
message(STATUS "OpenCL 32bit lib found.")
SET(OPENCL_LIBRARIES ${_OPENCL_32_LIBRARIES})
ELSE()
message(STATUS "Couldn't link opencl..")
ENDIF()
ENDIF()
ELSE (WIN32) # linux
IF (CYGWIN)
SET (CMAKE_FIND_LIBRARY_SUFFIXES .lib)
SET (OCL_LIB_SUFFIX .lib)
ENDIF (CYGWIN)
IF( NOT DEFINED OPENCL_ROOT AND DEFINED ENV{OPENCL_ROOT} )
SET(OPENCL_ROOT $ENV{OPENCL_ROOT})
ENDIF()
IF( NOT DEFINED OPENCL_INC AND DEFINED ENV{OPENCL_INC} )
SET(OPENCL_INC $ENV{OPENCL_INC})
ENDIF()
IF( NOT DEFINED OPENCL_LIB AND DEFINED ENV{OPENCL_LIB} )
SET(OPENCL_LIB $ENV{OPENCL_LIB})
ENDIF()
# Unix style platforms
FIND_LIBRARY(OPENCL_LIBRARIES OpenCL${OCL_LIB_SUFFIX}
PATHS ENV LD_LIBRARY_PATH
ENV OpenCL_LIBPATH
OPENCL_LIB
)
GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH)
GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE)
# The AMD SDK currently does not place its headers
# in /usr/include, therefore also search relative
# to the library
FIND_PATH(OPENCL_INCLUDE_DIRS
NAMES "cl.h"
PATHS ${_OPENCL_INC_CAND}
PATHS "/usr/local/cuda/include"
"/opt/AMDAPP/include"
ENV OpenCL_INCPATH
PATHS ${OPENCL_INC}
PATH_SUFFIXES "CL")
FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include" "/opt/AMDAPP/include" ENV OpenCL_INCPATH OPENCL_INC)
ENDIF (WIN32)
ENDIF (APPLE)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS)
IF(_OPENCL_CPP_INCLUDE_DIRS)
SET( OPENCL_HAS_CPP_BINDINGS TRUE )
LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} )
# This is often the same, so clean up
LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS )
ENDIF(_OPENCL_CPP_INCLUDE_DIRS)
MARK_AS_ADVANCED(
OPENCL_INCLUDE_DIRS
)
# - Find clFFT, AMD's OpenCL FFT library
# This script defines the following variables:
# CLFFT_INCLUDE_DIRS - Location of clFFT's include directory.
# CLFFT_LIBRARIES - Location of clFFT's libraries
# CLFFT_FOUND - True if clFFT has been located
#
# If your clFFT installation is not in a standard installation directory, you
# may provide a hint to where it may be found. Simply set the value CLFFT_ROOT
# to the directory containing 'include/clFFT.h" prior to calling this script.
#
# By default this script will attempt to find the 32-bit version of clFFT.
# If you desire to use the 64-bit version instead, set
# set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON)
# prior to calling this script.
#
#=============================================================================
# Copyright 2014 Brian Kloppenborg
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#=============================================================================
#
if( (NOT DEFINED CLFFT_ROOT) AND DEFINED ENV{CLFFT_ROOT} )
set( CLFFT_ROOT $ENV{CLFFT_ROOT} )
endif()
find_path(CLFFT_INCLUDE_DIRS
NAMES "clFFT.h"
PATHS ${CLFFT_ROOT}
PATH_SUFFIXES "include"
NO_DEFAULT_PATH
)
find_library(CLFFT_LIBRARIES
NAMES "clFFT"
PATHS ${CLFFT_ROOT}
PATH_SUFFIXES "lib" "lib64"
NO_DEFAULT_PATH
)
# handle the QUIETLY and REQUIRED arguments and set CLFFT_FOUND to TRUE if
# all listed variables are TRUE
INCLUDE (FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(CLFFT DEFAULT_MSG CLFFT_LIBRARIES CLFFT_INCLUDE_DIRS)
MARK_AS_ADVANCED(CLFFT_LIBRARIES CLFFT_INCLUDE_DIRS)
This diff is collapsed.
#ifndef CLFFT_HELPER_HPP_
#define CLFFT_HELPER_HPP_
#include <CL/cl.h>
#include <clFFT.h>
#include <stdexcept>
#include <sstream>
#include <vector>
#include <utility> // pair
#define clSafeCall( err ) gearshifft::ClFFT::__clSafeCall( err, __FILE__, __LINE__ )
#define clFFTSafeCall( err ) gearshifft::ClFFT::__clSafeCall( err, __FILE__, __LINE__ )
#define clCheckError(err) gearshifft::ClFFT::__clSafeCall( err, __FILE__, __LINE__ )
#define STRINGIFY(A) #A
#define clFFTStatusCase(s) case s: return STRINGIFY(s)
namespace gearshifft {
namespace ClFFT {
inline const char *getOpenCLErrorString(cl_int error)
{
switch(error){
// run-time and JIT compiler errors
case 0: return "CL_SUCCESS";
case -1: return "CL_DEVICE_NOT_FOUND";
case -2: return "CL_DEVICE_NOT_AVAILABLE";
case -3: return "CL_COMPILER_NOT_AVAILABLE";
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case -5: return "CL_OUT_OF_RESOURCES";
case -6: return "CL_OUT_OF_HOST_MEMORY";
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
case -8: return "CL_MEM_COPY_OVERLAP";
case -9: return "CL_IMAGE_FORMAT_MISMATCH";
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case -11: return "CL_BUILD_PROGRAM_FAILURE";
case -12: return "CL_MAP_FAILURE";
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
case -15: return "CL_COMPILE_PROGRAM_FAILURE";
case -16: return "CL_LINKER_NOT_AVAILABLE";
case -17: return "CL_LINK_PROGRAM_FAILURE";
case -18: return "CL_DEVICE_PARTITION_FAILED";
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
// compile-time errors
case -30: return "CL_INVALID_VALUE";
case -31: return "CL_INVALID_DEVICE_TYPE";
case -32: return "CL_INVALID_PLATFORM";
case -33: return "CL_INVALID_DEVICE";
case -34: return "CL_INVALID_CONTEXT";
case -35: return "CL_INVALID_QUEUE_PROPERTIES";
case -36: return "CL_INVALID_COMMAND_QUEUE";
case -37: return "CL_INVALID_HOST_PTR";
case -38: return "CL_INVALID_MEM_OBJECT";
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case -40: return "CL_INVALID_IMAGE_SIZE";
case -41: return "CL_INVALID_SAMPLER";
case -42: return "CL_INVALID_BINARY";
case -43: return "CL_INVALID_BUILD_OPTIONS";
case -44: return "CL_INVALID_PROGRAM";
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
case -46: return "CL_INVALID_KERNEL_NAME";
case -47: return "CL_INVALID_KERNEL_DEFINITION";
case -48: return "CL_INVALID_KERNEL";
case -49: return "CL_INVALID_ARG_INDEX";
case -50: return "CL_INVALID_ARG_VALUE";
case -51: return "CL_INVALID_ARG_SIZE";
case -52: return "CL_INVALID_KERNEL_ARGS";
case -53: return "CL_INVALID_WORK_DIMENSION";
case -54: return "CL_INVALID_WORK_GROUP_SIZE";
case -55: return "CL_INVALID_WORK_ITEM_SIZE";
case -56: return "CL_INVALID_GLOBAL_OFFSET";
case -57: return "CL_INVALID_EVENT_WAIT_LIST";
case -58: return "CL_INVALID_EVENT";
case -59: return "CL_INVALID_OPERATION";
case -60: return "CL_INVALID_GL_OBJECT";
case -61: return "CL_INVALID_BUFFER_SIZE";
case -62: return "CL_INVALID_MIP_LEVEL";
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
case -64: return "CL_INVALID_PROPERTY";
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
case -66: return "CL_INVALID_COMPILER_OPTIONS";
case -67: return "CL_INVALID_LINKER_OPTIONS";
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
// extension errors
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
// CLFFT
clFFTStatusCase(CLFFT_BUGCHECK);
clFFTStatusCase(CLFFT_NOTIMPLEMENTED);
clFFTStatusCase(CLFFT_TRANSPOSED_NOTIMPLEMENTED);
clFFTStatusCase(CLFFT_FILE_NOT_FOUND);
clFFTStatusCase(CLFFT_FILE_CREATE_FAILURE);
clFFTStatusCase(CLFFT_VERSION_MISMATCH);
clFFTStatusCase(CLFFT_INVALID_PLAN);
clFFTStatusCase(CLFFT_DEVICE_NO_DOUBLE);
clFFTStatusCase(CLFFT_DEVICE_MISMATCH);
default: return "Unknown OpenCL error";
}
}
template<typename T>
inline void __clSafeCall( T err, const char *file, const int line )
{
if ( CL_SUCCESS != err )
{
fprintf( stderr, "clSafeCall() failed at %s:%i : %s\n",
file, line, getOpenCLErrorString( err ) );
throw std::runtime_error("OpenCL Error: " + std::string(getOpenCLErrorString(err))+ " "+std::to_string(err));
}
}
inline std::stringstream getClDeviceInformations(cl_device_id dev_id)
{
std::stringstream info;
std::vector<std::pair<std::string,std::string> > values;
char* value = nullptr;
size_t valueSize = 0;
cl_uint maxComputeUnits;
// print device name
clGetDeviceInfo(dev_id, CL_DEVICE_NAME, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(dev_id, CL_DEVICE_NAME, valueSize, value, NULL);
values.emplace_back("Device", value);
free(value);
// print hardware device version
clGetDeviceInfo(dev_id, CL_DEVICE_VERSION, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(dev_id, CL_DEVICE_VERSION, valueSize, value, NULL);
values.emplace_back("Hardware", value);
free(value);
// print software driver version
clGetDeviceInfo(dev_id, CL_DRIVER_VERSION, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(dev_id, CL_DRIVER_VERSION, valueSize, value, NULL);
values.emplace_back("Software", value);
free(value);
// print c version supported by compiler for device
clGetDeviceInfo(dev_id, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(dev_id, CL_DEVICE_OPENCL_C_VERSION, valueSize, value, NULL);
values.emplace_back("OpenCL", value);
free(value);
// print parallel compute units
clGetDeviceInfo(dev_id, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(maxComputeUnits), &maxComputeUnits, NULL);
values.emplace_back("ComputeUnits", std::to_string(maxComputeUnits));
info << "\"ClFFT Informations\"";
for(auto pair : values) {
info << ",\"" << pair.first << "\",\"" << pair.second << '"';
}
return info;
}
/**
*
*/
inline int findClDevice(cl_device_type devkind, cl_platform_id* platform, cl_device_id* device)
{
cl_uint num_of_platforms = 0, num_of_devices = 0;
cl_device_id device_id = 0;
if (clGetPlatformIDs(0, NULL, &num_of_platforms) != CL_SUCCESS)
{
fprintf(stderr, "Unable to get platform_id\n");
return 1;
}
cl_platform_id *platform_ids = new cl_platform_id[num_of_platforms];
if (clGetPlatformIDs(num_of_platforms, platform_ids, NULL) != CL_SUCCESS)
{
fprintf(stderr,"Unable to get platform_id\n");
return 1;
}
bool found = false;
for(unsigned i=0; i<num_of_platforms; i++)
if(clGetDeviceIDs(platform_ids[i], devkind, 1, &device_id, &num_of_devices) == CL_SUCCESS){
found = true;
*platform = platform_ids[i];
*device = device_id;
break;
}
if(!found){
clSafeCall(clGetPlatformIDs( 1, platform, NULL ));
clSafeCall(clGetDeviceIDs( *platform, CL_DEVICE_TYPE_DEFAULT, 1, device, NULL ));
}
return 0;
}
} // ClFFT
} // gearshifft
#endif
#ifndef CUFFT_HPP_
#define CUFFT_HPP_
#include "helper.h"
#include "fft_abstract.hpp"
#include "fixture_test_suite.hpp"
#include "cufft_helper.hpp"
#include <array>
#include <cufft.h>
#include <vector_types.h>
namespace gearshifft {
namespace CuFFT {
namespace traits{
// @todo simplify constants
template<typename T_Precision=float>
struct Types
{
using ComplexType = cufftComplex;
using RealType = cufftReal;
struct FFTForward: std::integral_constant< cufftType, CUFFT_R2C >{};
struct FFTComplex: std::integral_constant< cufftType, CUFFT_C2C >{};
struct FFTBackward: std::integral_constant< cufftType, CUFFT_C2R >{};
struct FFTExecuteForward{
void operator()(cufftHandle plan, RealType* in, ComplexType* out){
CHECK_CUFFT(cufftExecR2C(plan, in, out));
}
void operator()(cufftHandle plan, ComplexType* in, ComplexType* out){
CHECK_CUFFT(cufftExecC2C(plan, in, out, CUFFT_FORWARD));
}
};
struct FFTExecuteBackward{
void operator()(cufftHandle plan, ComplexType* in, RealType* out){
CHECK_CUFFT(cufftExecC2R(plan, in, out));
}
void operator()(cufftHandle plan, ComplexType* in, ComplexType* out){
CHECK_CUFFT(cufftExecC2C(plan, in, out, CUFFT_INVERSE));
}
};
};
template<>
struct Types<double>
{
using ComplexType = cufftDoubleComplex;
using RealType = cufftDoubleReal;
struct FFTForward: std::integral_constant< cufftType, CUFFT_D2Z >{};
struct FFTComplex: std::integral_constant< cufftType, CUFFT_Z2Z >{};
struct FFTBackward: std::integral_constant< cufftType, CUFFT_Z2D >{};
struct FFTExecuteForward{
void operator()(cufftHandle plan, RealType* in, ComplexType* out){
CHECK_CUFFT(cufftExecD2Z(plan, in, out));
}
void operator()(cufftHandle plan, ComplexType* in, ComplexType* out){
CHECK_CUFFT(cufftExecZ2Z(plan, in, out, CUFFT_FORWARD));
}
};
struct FFTExecuteBackward{
void operator()(cufftHandle plan, ComplexType* in, RealType* out){
CHECK_CUFFT(cufftExecZ2D(plan, in, out));
}
void operator()(cufftHandle plan, ComplexType* in, ComplexType* out){
CHECK_CUFFT(cufftExecZ2Z(plan, in, out, CUFFT_INVERSE));
}
};
};
} // namespace traits
/**
* Estimates memory reserved by cufft plan depending on FFT transform type
* (CUFFT_R2C, ...) and depending on number of dimensions {1,2,3}.
*/
template<typename FFTType, size_t NDim>
size_t estimateAllocSize(const std::array<unsigned,NDim>& e, cufftHandle& plan)
{
size_t s=0;
if(NDim==1){
// CHECK_CUFFT( cufftEstimate1d(e[0], FFTType::value, 1, &s) );
CHECK_CUFFT( cufftGetSize1d(plan, e[0], FFTType::value, 1, &s) );
}
if(NDim==2){
// CHECK_CUFFT( cufftEstimate2d(e[0], e[1], FFTType::value, &s) );
CHECK_CUFFT( cufftGetSize2d(plan, e[0], e[1], FFTType::value, &s) );
}
if(NDim==3){
// CHECK_CUFFT( cufftEstimate3d(e[0], e[1], e[2], FFTType::value, &s) );
CHECK_CUFFT( cufftGetSize3d(plan, e[0], e[1], e[2], FFTType::value, &s) );
}
return s;
}
/**
* Plan Creator depending on FFT transform type (CUFFT_R2C, ...).
*/
template<typename FFTType>
void makePlan(cufftHandle& plan, const std::array<unsigned,3>& e){
CHECK_CUFFT(cufftPlan3d(&plan, e[0], e[1], e[2], FFTType::value));
}
template<typename FFTType>
void makePlan(cufftHandle& plan, const std::array<unsigned,1>& e){
CHECK_CUFFT(cufftPlan1d(&plan, e[0], FFTType::value, 1));
}
template<typename FFTType>
void makePlan(cufftHandle& plan, const std::array<unsigned,2>& e){
CHECK_CUFFT(cufftPlan2d(&plan, e[0], e[1], FFTType::value));
}