func(float** newVar, float* oldVar) {
/* Code to calculate result */
newVar[i][j] = 1000 - Result;
oldVar[j] = 1000 - Result;
assert(newVar[i][j] == oldVar[j]);
}
#include <cassert>
int main()
{
float a = 1.999f;
float b = 8.349f;
float c = 17534.1f;
for(int i = 0 ; i < 999999 ; ++i)
{
float x = ((float)(1000 * i)) / a * b + c;
float y = ((float)(1000 * i)) / a * b + c;
assert(x==y);
}
return 0;
}
CudaArray<float>* newVar;
float* oldVar, kernel_oldVar;
srand(1000); //So the error doesn't move around.
func1(float* inData, int size) {
newVar = new CudaArray<float>(size)
oldVar = inData;
func2(newVar, oldVar, size);
}
func2(CudaArray<float>* newVar, float* oldVar, int size) {
/* Snip */
kernel_oldVar = CudaMalloc<float>(size);
CudaKernel<<<>>>(newVar->DevicePtr(), oldVar, size, /*Other params*/)
//Strange if your unfamiliar with Cuda, I know. But the <> should be there.
newVar->CopyToHost();
CudaCopyDtH<float>(kernel_oldVar, oldVar, size);
for(int i = 0; i < popsize; i++) {
if(newVar->HostPtr()[0][i] != result[i]) {
printf("\n%d\n", i);
assert(newVar->HostPtr()[0][i] == oldVar[i]); //This was the real failing assert. i == 4. 6th run.
}
}
/* Snip */
}
__global__ static void CudaKernel(float** newVar, float* oldVar, /* Other params */) {
/* Lots of calculations */
newVar[0][i] = 1000000 - result[i];
oldVar[i] = 1000000 - result[i];
assert(newVar[0][i] == oldVar[i]); //This one doesn't fail. My mistake.
}
//The templates used:
template <class T>
T* CudaMalloc(const size_t size) {
T* devicePtr = NULL;
cudaError_t error = cudaMalloc((void**)&devicePtr, size * sizeof(T));
if (error == cudaErrorMemoryAllocation) {
cout << "CudaMalloc error: " << error << " (" << cudaGetErrorString(error) << ")" << ". Exiting" << endl;
assert(error == 0);
}
return devicePtr;
}
template <class T>
void CudaCopyHtD(const T* hostPtr, T* devicePtr, const size_t size) {
cudaError_t error = cudaMemcpy(devicePtr, hostPtr, sizeof(T)*size, cudaMemcpyHostToDevice);
if(error != 0) {
cout << "Copy HtD error: " << error << " (" << cudaGetErrorString(error) << ")" << ". Exiting" << endl;
assert(error == 0);
}
}
template <class T>
void CudaCopyDtH(const T* devicePtr, T* hostPtr, const size_t size) {
cudaError_t error = cudaMemcpy(hostPtr, devicePtr, sizeof(T)*size, cudaMemcpyDeviceToHost);
if(error != 0) {
cout << "Copy DtT error: " << error << " (" << cudaGetErrorString(error) << ")" << ". Exiting" << endl;
assert(error == 0);
}
}
template <class T>
class CudaArray {
public:
CudaArray(unsigned int size) {
arrays = 3;
arraySize = size;
ArrayHost = (T**)malloc(arrays * sizeof(T*));
for(int i = 0; i < arrays; i++)
ArrayHost[i] = (T*)malloc(arraySize * sizeof(T));
ArrayDevice = CudaMalloc<T*>(arrays * sizeof(T*));
PtrHolder = (T**)malloc(arrays * sizeof(T*));
for(int i = 0; i < arrays; i++)
PtrHolder[i] = CudaMalloc<T>(arraySize);
CudaCopyHtD<T*>(PtrHolder, ArrayDevice, arrays);
}
T** DevicePtr() { return ArrayDevice; }
T** HostPtr() { return ArrayHost; }
void CopyToHost() {
for(int i = 0; i < arrays; i++) CudaCopyDtH<T>(PtrHolder[i], ArrayHost[i], arraySize);
}
void CopyToDevice() {
for(int i = 0; i < arrays; i++) CudaCopyHtD<T>(ArrayHost[i], PtrHolder[i], arraySize);
}
private:
unsigned int arrays;
unsigned int arraySize;
T **ArrayHost;
T **ArrayDevice;
T **PtrHolder;
};