Skip to content
Snippets Groups Projects
Commit 1ae6f17d authored by zahoussem's avatar zahoussem
Browse files

adding free user parameters mechanisms

parent 9502a355
No related branches found
No related tags found
No related merge requests found
CC = nvcc
main_1: runqueue.o tools.o timeops.o fp_single.o
$(CC) tools.o runqueue.o timeops.o test.o -o testing
main_2: runqueue.o tools.o timeops.o fp_multiple.o
$(CC) tools.o runqueue.o timeops.o test.o -o testing
fp_multiple.o : examples/fp_multiple.cu
$(CC) -c examples/fp_multiple.cu -o test.o -dc
main_1: user.o runqueue.o tools.o timeops.o fp_single.o user.o
$(CC) user.o tools.o runqueue.o timeops.o test.o -o testing
fp_single.o : examples/fp_single.cu
$(CC) -c examples/fp_single.cu -o test.o -dc
user.o : src/user.cu
$(CC) -c src/user.cu -o user.o
timeops.o : src/timeops.cu
$(CC) -c src/timeops.cu -o timeops.o -dc
......@@ -27,5 +21,7 @@ runqueue.o : src/runqueue.cu
tools.o : src/tools.cu
$(CC) -c src/tools.cu -o tools.o -dc
clean:
rm -f *.o testing *~
#include "../inc/tools.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
// // Kernel function to add the elements of two arrays
// __global__ void add()
// {
// __allocate_to_sm(0);
// printf("I am processed by %lu\n",(unsigned long)__get_smid());
// }
#include "../inc/user.h"
#include "../inc/tools.h"
#define N 8
template<typename ...Arguments>
void create_kernel(struct kernel_t<Arguments ...> * k, void kernel_c(Arguments...), int gs, int bs,
Arguments...args){
k->kernel_c = kernel_c;
k->args = std::tuple<Arguments...>(args...);
k->gs = gs;
k->bs = bs;
__global__ void kernel_a_code(){
printf(" ******** this is the kernel a code \n");
int res = 0;
for (int i=0;i<500000000;i++){
res +=i * 5;
}
}
__global__ void kernel_b_code(){
printf(" ****************************** this is the kernel b code \n");
int res = 0;
for (int i=0;i<500000000;i++){
res +=i * 5;
__global__ void add( int *a, int *b, int *c ) {
printf("here 2 \n");
int tid = blockDim.x * blockIdx.x + threadIdx.x;
while (tid < N) {
c[tid] = a[tid] + b[tid];
tid += blockDim.x;
}
}
__global__ void kernel_c_code(){
printf(" ***************************************** this is the kernel c code \n");
int res = 0;
for (int i=0;i<500000000;i++){
res +=i * 5;
__global__ void mul( int *a, int *b, int *c, int h ) {
printf("here mul \n");
int tid = blockDim.x * blockIdx.x + threadIdx.x;
while (tid < N) {
c[tid] = a[tid] * b[tid];
tid += blockDim.x;
}
}
int main(int argc, char ** argv){
int *a, *b, *c;
int *dev_a, *dev_b, *dev_c;
a = (int*)malloc( N * sizeof(int) );
b = (int*)malloc( N * sizeof(int) );
c = (int*)malloc( N * sizeof(int) );
for (int i=0; i<N; i++) {
a[i] = i;
b[i] = i;
}
cudaMalloc( (void**)&dev_a, N * sizeof(int) );
cudaMalloc( (void**)&dev_b, N * sizeof(int) );
cudaMalloc( (void**)&dev_c, N * sizeof(int) );
cudaMemcpy( dev_a, a, N * sizeof(int),cudaMemcpyHostToDevice );
cudaMemcpy( dev_b, b, N * sizeof(int),cudaMemcpyHostToDevice );
struct gpu_sched_param b;
b.period_us = 30000;
b.deadline_us= 30000;
b.priority = 20;
int ac=5;
init_kernel_listing();
create_kernel(std::get<1>(get_listing()),add,2,5,dev_a,dev_b,dev_c);
create_kernel(std::get<0>(get_listing()),mul,2,5,dev_a,dev_b,dev_c,ac);
struct gpu_sched_param a;
a.period_us = 30000;
a.deadline_us= 30000;
a.priority = 15;
struct gpu_sched_param gb;
gb.period_us = 3000000;
gb.deadline_us= 3000000;
gb.priority = 20;
struct gpu_sched_param c;
c.period_us = 30000;
c.deadline_us= 30000;
c.priority = 2;
struct gpu_sched_param ga;
ga.period_us = 6000000;
ga.deadline_us= 6000000;
ga.priority = 15;
struct pruda_task_t * p_task_b = create_pruda_task(1, kernel_b_code, b, 1, 1);
struct pruda_task_t * p_task_a = create_pruda_task(0, kernel_a_code, a, 1, 1);
struct pruda_task_t * p_task_c = create_pruda_task(2, kernel_c_code, c, 1, 1);
struct pruda_task_t * p_task_b = create_pruda_task(1, gb, 1, 1);
struct pruda_task_t * p_task_a = create_pruda_task(0, ga, 1, 1);
init_scheduler(SINGLE, FP);
add_pruda_task(p_task_a);
add_pruda_task(p_task_b);
add_pruda_task(p_task_c);
// gpu_call_params(p_task_a,params);
printf("Sched initialed, creating cpu threads \n");
create_cpu_threads();
sleep(5);
sleep(9);
cudaMemcpy(c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost );
free( a );
free( b );
free( c );
cudaFree( dev_a );
cudaFree( dev_b );
cudaFree( dev_c );
return 0;
}
// struct gpu_sched_param c;
// c.period_us = 30000;
// c.deadline_us= 30000;
// c.priority = 2;
// struct pruda_task_t * p_task_b = create_pruda_task(1, kernel_b_code, b, 1, 1);
// struct pruda_task_t * p_task_a = create_pruda_task(0, kernel_a_code, a, 1, 1);
// struct pruda_task_t * p_task_c = create_pruda_task(2, kernel_c_code, c, 1, 1);
// init_scheduler(SINGLE, FP);
// add_pruda_task(p_task_a);
// add_pruda_task(p_task_b);
// add_pruda_task(p_task_c);
// // gpu_call_params(p_task_a,params);
// printf("Sched initialed, creating cpu threads \n");
// create_cpu_threads();
// sleep(5);
// }
......@@ -12,7 +12,7 @@
#include <stdlib.h>
#include <stdio.h>
#include "tools.h"
#include "task.h"
......
#ifndef TASK_H
#define TASK_H
#include <pthread.h>
#include <semaphore.h>
struct gpu_sched_param {
long period_us;
long deadline_us;
long priority;
};
struct pruda_task_t {
int id;
struct gpu_sched_param gpu_params;
int bs;
int gs;
struct sched_param param;
pthread_t th;
pthread_attr_t attr;
sem_t wait_exec;
cudaStream_t *str;
struct pruda_task_t * next;
};
#endif
......@@ -7,12 +7,16 @@
#include <semaphore.h>
#include <cstdint>
//#include "task.h"
#include "runqueue.h"
#include "timeops.h"
#include "user.h"
#include "task.h"
#define SINGLE 1
#define MULTIPLE 2
......@@ -24,60 +28,18 @@
struct gpu_sched_param {
long period_us;
long deadline_us;
long priority;
};
// Houssam : Use templates !! I need to use this
struct kernel_call_param {
int N;
int * int_param;
float * float_param;
};
typedef void (*kernel_t)();
struct pruda_task_t {
int id;
kernel_t kernel_func;
struct gpu_sched_param gpu_params;
int bs;
int gs;
template<typename ...Arguments>
void create_kernel(struct kernel_t<Arguments ...> * k, void kernel_c(Arguments...), int gs, int bs,
Arguments...args);
struct sched_param param;
// CPU Thread params
pthread_t th;
pthread_attr_t attr;
sem_t wait_exec;
// stream
cudaStream_t *str;
struct pruda_task_t * next;
};
struct scheduler_t {
int strategy;
int policy;
......@@ -131,11 +93,14 @@ void pruda_kernel_abort();
void submit_task(int indexex);
struct pruda_task_t * create_pruda_task(int id, kernel_t kernel_func,
struct pruda_task_t * create_pruda_task(int id,
struct gpu_sched_param gpu_params,
int bs, int gs);
void init_scheduler(int strategy, int policy);
......
#ifndef VAR_DEFS
#define VAR_DEFS
#include <tuple>
#include <iostream>
template<typename ...Arguments>
struct kernel_t {
void (*kernel_c)(Arguments...);
std::tuple<Arguments...> args;
int bs;
int gs;
};
std::tuple<struct kernel_t<int *,int*,int *, int> * , struct kernel_t<int *,int*,int *> * > get_listing();
void init_kernel_listing();
#endif
#include "../inc/tools.h"
#include "../inc/user.h"
// User prototypes must be defined after this line
// template<typename ...Arguments>
// struct kernel_t<Arguments...> * create_kernel_t(void kernel_c(Arguments...), int gs, int bs,
// Arguments...args){
// struct kernel_t<Arguments...> * tau = (struct kernel_t<Arguments...> *)
// (malloc(sizeof(struct kernel_t<Arguments...> )));
// tau->kernel_c = kernel_c;
// tau->args = std::tuple<Arguments...>(args...);
// tau->gs = gs;
// tau->bs = bs;
// return tau;
// }
template<typename ...Arguments>
void create_kernel(struct kernel_t<Arguments ...> * k, void kernel_c(Arguments...), int gs, int bs,
Arguments...args){
k->kernel_c = kernel_c;
k->args = std::tuple<Arguments...>(args...);
k->gs = gs;
k->bs = bs;
}
template<int...> struct index_tuple{};
template<int I, typename IndexTuple, typename... Types>
struct make_indexes_impl;
template<int I, int... Indexes, typename T, typename ... Types>
struct make_indexes_impl<I, index_tuple<Indexes...>, T, Types...>
{
typedef typename make_indexes_impl<I + 1, index_tuple<Indexes..., I>, Types...>::type type;
};
template<int I, int... Indexes>
struct make_indexes_impl<I, index_tuple<Indexes...> >
{
typedef index_tuple<Indexes...> type;
};
template<typename ... Types>
struct make_indexes : make_indexes_impl<0, index_tuple<>, Types...>
{};
template<class Ret, class... Args, int... Indexes >
Ret apply_helper(int gs, int bs, Ret (*pf)(Args...), index_tuple< Indexes... >, std::tuple<Args...>&& tup)
{
(*pf)<<<gs,bs>>>( std::forward<Args>( std::get<Indexes>(tup))... );
}
template<class Ret, class ... Args>
Ret apply(int gs, int bs, Ret (*pf)(Args...), const std::tuple<Args...>& tup)
{
return apply_helper(gs, bs, pf, typename make_indexes<Args...>::type(), std::tuple<Args...>(tup));
}
static __device__ __inline__ uint32_t __get_smid(){
uint32_t smid;
......@@ -21,20 +97,26 @@ static __device__ __inline__ void __allocate_to_sm(int sm){
struct scheduler_t * scheduler;
// Houssam : need to declare indexes methods
void pruda_alloc_sm(int sm){}
int pruda_get_sm(){
return 0;
}
int pruda_check_sm(int sm){
return 0;
}
void pruda_thread_exit(){}
void pruda_kernel_abort(){}
void init_scheduler(int strategy, int policy){
scheduler = (struct scheduler_t *) (malloc(sizeof(struct scheduler_t)));
scheduler->strategy = strategy;
......@@ -97,6 +179,34 @@ void pruda_subscribe_fp(struct pruda_task_t *tau){
}
void submit_task(int indexex){
auto tasks = get_listing();
switch ( indexex )
{
case 0:
apply(std::get<0>(tasks)->gs,std::get<0>(tasks)->bs,
std::get<0>(tasks)->kernel_c,std::get<0>(tasks)->args);
break;
case 1:
apply(std::get<1>(tasks)->gs,std::get<1>(tasks)->bs,std::get<1>(tasks)->kernel_c,std::get<1>(tasks)->args);
break;
// case 2:
// apply(get<2>(tasks)->gs,get<2>(tasks)->bs,get<2>(tasks)->kernel_c,get<2>(tasks)->args);
// break;
default:
printf("unknown task, exitting \n");
exit(-1);
}
}
void sched_on_lsq(){
struct pruda_runqueue_t * rq_h = get_most_priority_queue_fixed_priority(scheduler->rql);
......@@ -108,8 +218,8 @@ void sched_on_lsq(){
mp->str = &(scheduler->lsq);
(*(mp->kernel_func))<<<mp->gs,mp->bs,0,(*(mp->str)) >>>();
// (*(mp->kernel_func))<<<mp->gs,mp->bs,0,(*(mp->str)) >>>();
submit_task(mp->id);
cudaError_t code2= cudaGetLastError();
if (code2 != cudaSuccess)
{
......@@ -134,7 +244,9 @@ void sched_on_hsq(){
mp->str = &(scheduler->hsq);
(*(mp->kernel_func))<<<mp->gs,mp->bs,0,(*(mp->str)) >>>();
// (*(mp->kernel_func))<<<mp->gs,mp->bs,0,(*(mp->str)) >>>();
submit_task(mp->id);
cudaError_t code2= cudaGetLastError();
if (code2 != cudaSuccess)
......@@ -242,7 +354,7 @@ int del_tail_pruda_task_from_tq(){
return del_tail_pruda_task_runqueue(scheduler->tq);
}
struct pruda_task_t * create_pruda_task(int id, kernel_t kernel_func,
struct pruda_task_t * create_pruda_task(int id,
struct gpu_sched_param gpu_params,
int bs, int gs){
......@@ -250,7 +362,6 @@ struct pruda_task_t * create_pruda_task(int id, kernel_t kernel_func,
struct pruda_task_t * task = (struct pruda_task_t *)(malloc(sizeof(struct pruda_task_t)));
task->id = id;
task->kernel_func=kernel_func;
task->gpu_params.period_us=gpu_params.period_us;
task->gpu_params.deadline_us=gpu_params.deadline_us;
task->gpu_params.priority=gpu_params.priority;
......
#include "../inc/user.h"
struct kernel_t<int *,int*,int *, int> m_1;
struct kernel_t<int *,int*,int *> m_2;
std::tuple<decltype(m_1) * , decltype(m_2) * > tasks;
decltype(tasks) get_listing(){
return tasks;
}
void init_kernel_listing(){
tasks = std::make_tuple(&m_1,&m_2);
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment