CudaMemory.cu

#include <cstddef>
#include <cuda_runtime.h>
#include "CudaMemory.cuh"
#include "cuda_macro.h"
#include <cstring>


/*! \brief Allocate a chunk of memory
 *
 * Allocate a chunk of memory
 *
 * \param sz size of the chunk of memory to allocate in byte
 *
 */
bool CudaMemory::allocate(size_t sz)
{
	//! Allocate the device memory
	if (dm == NULL)
	{CUDA_SAFE_CALL(cudaMalloc(&dm,sz));}

	this->sz = sz;

	return true;
}

/*! \brief destroy a chunk of memory
 *
 * Destroy a chunk of memory
 *
 */
void CudaMemory::destroy()
{
	CUDA_SAFE_CALL(cudaFree(dm));
}

/*! \brief Allocate the host buffer
 *
 * Allocate the host buffer
 *
 */

void CudaMemory::allocate_host(size_t sz)
{
	if (hm == NULL)
	{CUDA_SAFE_CALL(cudaHostAlloc(&hm,sz,cudaHostAllocMapped))}
}

/*! \brief copy the data from a pointer
 *
 * copy the data from a pointer
 *
 *	\param ptr
 *	\return true if success
 */
bool CudaMemory::copyFromPointer(void * ptr)
{
	// check if we have a host buffer, if not allocate it

	allocate_host(sz);

	// get the device pointer

	void * dvp;
	CUDA_SAFE_CALL(cudaHostGetDevicePointer(&dvp,hm,0));

	// memory copy

	memcpy(ptr,dvp,sz);

	return true;
}

/*! \brief copy from device to device
 *
 * copy a piece of memory from device to device
 *
 * \param CudaMemory from where to copy
 *
 * \return true is success
 */
bool CudaMemory::copyDeviceToDevice(CudaMemory & m)
{
	//! The source buffer is too big to copy it

	if (m.sz > sz)
	{
		std::cerr << "Error " << __LINE__ << __FILE__ << ": source buffer is too big to copy";
		return false;
	}

	//! Copy the memory
	CUDA_SAFE_CALL(cudaMemcpy(m.dm,dm,m.sz,cudaMemcpyDeviceToDevice));

	return true;
}

/*! \brief copy from memory
 *
 * copy from memory
 *
 * \param m a memory interface
 *
 */
bool CudaMemory::copy(memory & m)
{
	//! Here we try to cast memory into OpenFPMwdeviceCudaMemory
	CudaMemory * ofpm = dynamic_cast<CudaMemory *>(&m);

	//! if we fail we get the pointer and simply copy from the pointer

	if (ofpm == NULL)
	{
		// copy the memory from device to host and from host to device

		return copyFromPointer(m.getPointer());
	}
	else
	{
		// they are the same memory type, use cuda/thrust buffer copy

		return copyDeviceToDevice(*ofpm);
	}
}

/*! \brief Get the size of the allocated memory
 *
 * Get the size of the allocated memory
 *
 * \return the size of the allocated memory
 *
 */

size_t CudaMemory::size()
{
	return sz;
}

/*! \brief Resize the allocated memory
 *
 * Resize the allocated memory, if request is smaller than the allocated memory
 * is not resized
 *
 * \param sz size
 * \return true if the resize operation complete correctly
 *
 */

bool CudaMemory::resize(size_t sz)
{
	//! Allocate the device memory if not done yet

	if (size() == 0)
		return allocate(sz);

	//! Create a new buffer if sz is bigger than the actual size
	void * tdm;

	if (this->sz < sz)
		CUDA_SAFE_CALL(cudaMalloc(&tdm,sz));

	//! copy from the old buffer to the new one

	CUDA_SAFE_CALL(cudaMemcpy(dm,tdm,size(),cudaMemcpyDeviceToDevice));

	//! free the old buffer

	destroy();

	//! change to the new buffer

	dm = tdm;
	this->sz = sz;

	return true;
}

/*! \brief Return a readable pointer with your data
 *
 * Return a readable pointer with your data
 *
 */

void * CudaMemory::getPointer()
{
	//! if the host buffer is synchronized with the device buffer return the host buffer

	if (is_hm_sync)
		return hm;

	//! we have to synchronize


	//| allocate an host mempory
	if (hm == NULL)
		allocate_host(sz);

	//! copy from device to host memory

	CUDA_SAFE_CALL(cudaMemcpy(dm,hm,sz,cudaMemcpyDeviceToHost));

	return hm;
}