Skip to content
Snippets Groups Projects
Commit e78a9a7f authored by Nordine Feddal's avatar Nordine Feddal
Browse files

-

parent d14d4d69
Branches
No related tags found
No related merge requests found
#include <stdlib.h>
#include "kernels.h"
// Start vectorAdd_float
/**
* CUDA Kernel Device code
......@@ -18,11 +18,14 @@ __global__ void vectorAdd_float(const float *A, const float *B, float *C, int nu
}
// Start Parameters
float * vectorAdd_float_da;
float * vectorAdd_float_db;
float * vectorAdd_float_dc;
constexpr int SIZE_float = 10000;
// End Parameters
float * vectorAdd_float_ha;
float * vectorAdd_float_hb;
float * vectorAdd_float_hc;
constexpr int SIZE_float = 10000;
// End Parameters
// Start Init function
void initialize_vectorAdd_float() {
......@@ -30,9 +33,6 @@ void initialize_vectorAdd_float() {
vectorAdd_float_hb = (float*)malloc(SIZE_float*sizeof(float));
vectorAdd_float_hc = (float*)malloc(SIZE_float*sizeof(float));
float * vectorAdd_float_da;
float * vectorAdd_float_db;
float * vectorAdd_float_dc;
cudaMalloc(&vectorAdd_float_da, SIZE_float * sizeof(float));
cudaMalloc(&vectorAdd_float_db, SIZE_float * sizeof(float));
......@@ -62,11 +62,14 @@ __global__ void vectorAdd_int(const int *A, const int *B, int *C, int numElement
}
// Start Parameters
int * vectorAdd_int_da;
int * vectorAdd_int_db;
int * vectorAdd_int_dc;
constexpr int SIZE_int = 10000;
// End Parameters
int * vectorAdd_int_ha;
int * vectorAdd_int_hb;
int * vectorAdd_int_hc;
constexpr int SIZE_int = 10000;
// End Parameters
// Start Init function
void initialize_vectorAdd_int() {
......@@ -74,9 +77,6 @@ void initialize_vectorAdd_int() {
vectorAdd_int_hb = (int*)malloc(SIZE_int*sizeof(int));
vectorAdd_int_hc = (int*)malloc(SIZE_int*sizeof(int));
int * vectorAdd_int_da;
int * vectorAdd_int_db;
int * vectorAdd_int_dc;
cudaMalloc(&vectorAdd_int_da, SIZE_int * sizeof(int));
cudaMalloc(&vectorAdd_int_db, SIZE_int * sizeof(int));
cudaMalloc(&vectorAdd_int_dc, SIZE_int * sizeof(int));
......@@ -92,7 +92,48 @@ void initialize_vectorAdd_int() {
// End Init Function
// End vectorAdd_int
// Start saxpy
// Start vectorAdd_double
__global__ void vectorAdd_double(const double *A, const double *B, double *C, double numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < numElements)
{
C[i] = A[i] + B[i];
}
}
// Start Parameters
double * vectorAdd_double_da;
double * vectorAdd_double_db;
double * vectorAdd_double_dc;
constexpr double SIZE_double = 10000;
// End Parameters
double * vectorAdd_double_ha;
double * vectorAdd_double_hb;
double * vectorAdd_double_hc;
// Start Init function
void initialize_vectorAdd_double() {
vectorAdd_double_ha = (double*)malloc(SIZE_double*sizeof(double));
vectorAdd_double_hb = (double*)malloc(SIZE_double*sizeof(double));
vectorAdd_double_hc = (double*)malloc(SIZE_double*sizeof(double));
cudaMalloc(&vectorAdd_double_da, SIZE_double * sizeof(double));
cudaMalloc(&vectorAdd_double_db, SIZE_double * sizeof(double));
cudaMalloc(&vectorAdd_double_dc, SIZE_double * sizeof(double));
for (int i = 0; i < SIZE_double; i++) {
vectorAdd_double_ha[i] = 1.0;
vectorAdd_double_hb[i] = 2.0;
}
cudaMemcpy(vectorAdd_double_da, vectorAdd_double_ha, SIZE_double*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(vectorAdd_double_db, vectorAdd_double_hb, SIZE_double*sizeof(double), cudaMemcpyHostToDevice);
}
// End Init Function
// End vectorAdd_double
// Start saxpy_parallel
__global__ void saxpy_parallel(int n, float a, float *x, float *y)
{
......@@ -103,21 +144,50 @@ __global__ void saxpy_parallel(int n, float a, float *x, float *y)
// Start Parameters
int SIZE_SAXPY = 10000;
float SAXPY_a = 1.5;
float * SAXPY_dx;
float * SAXPY_dy;
// End Parameters
float * SAXPY_hx;
float * SAXPY_hy;
// End Parameters
// Start Init function
void initialize_saxpy() {
SAXPY_hx = (float*)malloc(SIZE_SAXPY*sizeof(float));
SAXPY_hy = (float*)malloc(SIZE_SAXPY*sizeof(float));
float * SAXPY_dx;
float * SAXPY_dy;
cudaMalloc(&SAXPY_dx, SIZE_SAXPY * sizeof(float));
cudaMalloc(&SAXPY_dy, SIZE_SAXPY * sizeof(float));
}
// End Init Function
// End saxpy
// Start saxpy_parallel_double
__global__ void saxpy_parallel_double(int n, double a, double *x, double *y)
{
int i = blockIdx.x*blockDim.x + threadIdx.x;
if (i<n) y[i] = a*x[i] + y[i];
}
// Start Parameters
int SIZE_SAXPY_double = 10000;
double SAXPY_double_a = 1.5;
double * SAXPY_double_dx;
double * SAXPY_double_dy;
// End Parameters
double * SAXPY_double_hx;
double * SAXPY_double_hy;
// Start Init function
void initialize_saxpy_double() {
SAXPY_double_hx = (double*)malloc(SIZE_SAXPY_double*sizeof(double));
SAXPY_double_hy = (double*)malloc(SIZE_SAXPY_double*sizeof(double));
cudaMalloc(&SAXPY_double_dx, SIZE_SAXPY_double * sizeof(double));
cudaMalloc(&SAXPY_double_dy, SIZE_SAXPY_double * sizeof(double));
}
// End Init Function
// End saxpy_parallel_double
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment