Is your cloud always on? With an Always On cloud you won't have to worry about downtime for maintenance or software application code updates, ensuring that your bottom line isn't affected.
func(float** newVar, float* oldVar) {
/* Code to calculate result */
newVar[i][j] = 1000 - Result;
oldVar[j] = 1000 - Result;
assert(newVar[i][j] == oldVar[j]);
}
#include <cassert>
int main()
{
float a = 1.999f;
float b = 8.349f;
float c = 17534.1f;
for(int i = 0 ; i < 999999 ; ++i)
{
float x = ((float)(1000 * i)) / a * b + c;
float y = ((float)(1000 * i)) / a * b + c;
assert(x==y);
}
return 0;
}
CudaArray<float>* newVar;
float* oldVar, kernel_oldVar;
srand(1000); //So the error doesn't move around.
func1(float* inData, int size) {
newVar = new CudaArray<float>(size)
oldVar = inData;
func2(newVar, oldVar, size);
}
func2(CudaArray<float>* newVar, float* oldVar, int size) {
/* Snip */
kernel_oldVar = CudaMalloc<float>(size);
CudaKernel<<<>>>(newVar->DevicePtr(), oldVar, size, /*Other params*/)
//Strange if your unfamiliar with Cuda, I know. But the <> should be there.
newVar->CopyToHost();
CudaCopyDtH<float>(kernel_oldVar, oldVar, size);
for(int i = 0; i < popsize; i++) {
if(newVar->HostPtr()[0][i] != result[i]) {
printf("\n%d\n", i);
assert(newVar->HostPtr()[0][i] == oldVar[i]); //This was the real failing assert. i == 4. 6th run.
}
}
/* Snip */
}
__global__ static void CudaKernel(float** newVar, float* oldVar, /* Other params */) {
/* Lots of calculations */
newVar[0][i] = 1000000 - result[i];
oldVar[i] = 1000000 - result[i];
assert(newVar[0][i] == oldVar[i]); //This one doesn't fail. My mistake.
}
//The templates used:
template <class T>
T* CudaMalloc(const size_t size) {
T* devicePtr = NULL;
cudaError_t error = cudaMalloc((void**)&devicePtr, size * sizeof(T));
if (error == cudaErrorMemoryAllocation) {
cout << "CudaMalloc error: " << error << " (" << cudaGetErrorString(error) << ")" << ". Exiting" << endl;
assert(error == 0);
}
return devicePtr;
}
template <class T>
void CudaCopyHtD(const T* hostPtr, T* devicePtr, const size_t size) {
cudaError_t error = cudaMemcpy(devicePtr, hostPtr, sizeof(T)*size, cudaMemcpyHostToDevice);
if(error != 0) {
cout << "Copy HtD error: " << error << " (" << cudaGetErrorString(error) << ")" << ". Exiting" << endl;
assert(error == 0);
}
}
template <class T>
void CudaCopyDtH(const T* devicePtr, T* hostPtr, const size_t size) {
cudaError_t error = cudaMemcpy(hostPtr, devicePtr, sizeof(T)*size, cudaMemcpyDeviceToHost);
if(error != 0) {
cout << "Copy DtT error: " << error << " (" << cudaGetErrorString(error) << ")" << ". Exiting" << endl;
assert(error == 0);
}
}
template <class T>
class CudaArray {
public:
CudaArray(unsigned int size) {
arrays = 3;
arraySize = size;
ArrayHost = (T**)malloc(arrays * sizeof(T*));
for(int i = 0; i < arrays; i++)
ArrayHost[i] = (T*)malloc(arraySize * sizeof(T));
ArrayDevice = CudaMalloc<T*>(arrays * sizeof(T*));
PtrHolder = (T**)malloc(arrays * sizeof(T*));
for(int i = 0; i < arrays; i++)
PtrHolder[i] = CudaMalloc<T>(arraySize);
CudaCopyHtD<T*>(PtrHolder, ArrayDevice, arrays);
}
T** DevicePtr() { return ArrayDevice; }
T** HostPtr() { return ArrayHost; }
void CopyToHost() {
for(int i = 0; i < arrays; i++) CudaCopyDtH<T>(PtrHolder[i], ArrayHost[i], arraySize);
}
void CopyToDevice() {
for(int i = 0; i < arrays; i++) CudaCopyHtD<T>(ArrayHost[i], PtrHolder[i], arraySize);
}
private:
unsigned int arrays;
unsigned int arraySize;
T **ArrayHost;
T **ArrayDevice;
T **PtrHolder;
};