Skip to content
Snippets Groups Projects
Commit b77d161e authored by Nordine Feddal's avatar Nordine Feddal
Browse files

add vecAdd for test

parent d5f1dda0
Branches
No related tags found
No related merge requests found
#include <stdlib.h>
// Start vectorAdd_float
/**
* CUDA Kernel Device code
*
* Computes the vector addition of A and B into C. The 3 vectors have the same
* number of elements numElements.
*/
__global__ void vectorAdd_float(const float *A, const float *B, float *C, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < numElements)
{
C[i] = A[i] + B[i];
}
}
// Start Parameters
float * vectorAdd_float_ha;
float * vectorAdd_float_hb;
float * vectorAdd_float_hc;
constexpr int SIZE_float = 10000;
// End Parameters
// Start Init function
void initialize_vectorAdd_float() {
vectorAdd_float_ha = (float*)malloc(SIZE_float*sizeof(float));
vectorAdd_float_hb = (float*)malloc(SIZE_float*sizeof(float));
vectorAdd_float_hc = (float*)malloc(SIZE_float*sizeof(float));
float * vectorAdd_float_da;
float * vectorAdd_float_db;
float * vectorAdd_float_dc;
cudaMalloc(&vectorAdd_float_da, SIZE_float * sizeof(float));
cudaMalloc(&vectorAdd_float_db, SIZE_float * sizeof(float));
cudaMalloc(&vectorAdd_float_dc, SIZE_float * sizeof(float));
for (int i = 0; i < SIZE_float; i++) {
vectorAdd_float_ha[i] = 1.0;
vectorAdd_float_hb[i] = 2.0;
}
cudaMemcpy(vectorAdd_float_da, vectorAdd_float_ha, SIZE_float*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(vectorAdd_float_db, vectorAdd_float_hb, SIZE_float*sizeof(float), cudaMemcpyHostToDevice);
}
// End Init Function
// End vectorAdd_float
// Start vectorAdd_int
__global__ void vectorAdd_int(const int *A, const int *B, int *C, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < numElements)
{
C[i] = A[i] + B[i];
}
}
// Start Parameters
int * vectorAdd_int_ha;
int * vectorAdd_int_hb;
int * vectorAdd_int_hc;
constexpr int SIZE_int = 10000;
// End Parameters
// Start Init function
void initialize_vectorAdd_int() {
vectorAdd_int_ha = (int*)malloc(SIZE_int*sizeof(int));
vectorAdd_int_hb = (int*)malloc(SIZE_int*sizeof(int));
vectorAdd_int_hc = (int*)malloc(SIZE_int*sizeof(int));
int * vectorAdd_int_da;
int * vectorAdd_int_db;
int * vectorAdd_int_dc;
cudaMalloc(&vectorAdd_int_da, SIZE_int * sizeof(int));
cudaMalloc(&vectorAdd_int_db, SIZE_int * sizeof(int));
cudaMalloc(&vectorAdd_int_dc, SIZE_int * sizeof(int));
for (int i = 0; i < SIZE_int; i++) {
vectorAdd_int_ha[i] = 1.0;
vectorAdd_int_hb[i] = 2.0;
}
cudaMemcpy(vectorAdd_int_da, vectorAdd_int_ha, SIZE_int*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(vectorAdd_int_db, vectorAdd_int_hb, SIZE_int*sizeof(int), cudaMemcpyHostToDevice);
}
// End Init Function
// End vectorAdd_int
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment