adding multiple strategy and restylling the code

a0a5da20 · zahoussem · c8404926 · a0a5da20 · a0a5da20 · a0a5da20
Commit a0a5da20 authored Sep 24, 2019 by zahoussem
--- a/Makefile
+++ b/Makefile
 CC      =  nvcc

-main: runqueue.o tools.o timeops.o test.o
+main_1: runqueue.o tools.o timeops.o fp_single.o
 	$(CC)   tools.o runqueue.o timeops.o test.o -o  testing  


-test.o : test.cu
-	$(CC)  -c test.cu -o test.o -dc
+main_2: runqueue.o tools.o timeops.o fp_multiple.o
+	$(CC)   tools.o runqueue.o timeops.o test.o -o  testing  
+
+
+
+
+fp_multiple.o : examples/fp_multiple.cu
+	$(CC)  -c examples/fp_multiple.cu -o test.o -dc 
+
+
+fp_single.o : examples/fp_single.cu
+	$(CC)  -c examples/fp_single.cu -o test.o -dc

-timeops.o : timeops.cu
-	$(CC) -c timeops.cu -o timeops.o -dc
+timeops.o : src/timeops.cu
+	$(CC) -c src/timeops.cu -o timeops.o -dc

-runqueue.o : runqueue.cu
-	$(CC)  -c runqueue.cu -o runqueue.o -dc
+runqueue.o : src/runqueue.cu
+	$(CC)  -c src/runqueue.cu -o runqueue.o -dc


-tools.o : tools.cu
-	$(CC) -c tools.cu -o tools.o -dc 
+tools.o : src/tools.cu
+	$(CC) -c src/tools.cu -o tools.o -dc 

 clean:
 	rm -f *.o testing *~
--- a/examples/fp_multiple.cu
+++ b/examples/fp_multiple.cu
+#include "../inc/tools.h"
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+
+
+// // Kernel function to add the elements of two arrays
+// __global__ void add()
+// {
+//   __allocate_to_sm(0);
+//   printf("I am processed by %lu\n",(unsigned long)__get_smid());
+// }
+
+
+
+
+__global__ void kernel_a_code(){
+    printf(" ******** this is the kernel a  code \n");
+    int res = 0; 
+    for (int i=0;i<500000000;i++){
+      res +=i * 5;
+    }
+}
+
+
+
+__global__  void kernel_b_code(){
+printf(" ****************************** this is the kernel b code \n");
+    int res = 0; 
+    for (int i=0;i<500000000;i++){
+      res +=i * 5;
+    }
+}
+
+__global__  void kernel_c_code(){
+  printf(" ***************************************** this is the kernel c code \n");
+    int res = 0; 
+    for (int i=0;i<500000000;i++){
+      res +=i * 5;
+    }
+}
+
+
+
+int main(int argc, char  ** argv){
+
+
+
+  struct pruda_task_t * p_task_a = create_pruda_task(0, kernel_a_code, 1000000, 1000000, 15, 1, 1);
+  struct pruda_task_t * p_task_b = create_pruda_task(1, kernel_b_code, 2500000, 2500000 , 20, 1, 1);
+
+  init_scheduler(MULTIPLE, FP);
+  add_pruda_task(p_task_a);
+  add_pruda_task(p_task_b);
+  
+  printf("Sched initialed, creating cpu threads \n");
+
+  create_cpu_threads();
+
+  sleep(20);
+}
--- a/test.cu
+++ b/test.cu
-#include "runqueue.h"
-#include "tools.h"
+#include "../inc/tools.h"



@@ -50,10 +49,26 @@ printf(" ***************************************** this is the kernel c code \n"
 int main(int argc, char  ** argv){

  
+  struct gpu_sched_param b;
+  b.period_us =  30000;
+  b.deadline_us= 30000;
+  b.priority = 20;
  
-  struct pruda_task_t * p_task_a = create_pruda_task(0, kernel_a_code, 600000, 6000000, 15, 1, 1);
-  struct pruda_task_t * p_task_b = create_pruda_task(1, kernel_b_code, 600000, 6000000, 20, 1, 1);
-  struct pruda_task_t * p_task_c = create_pruda_task(2, kernel_c_code, 600000, 6000000, 2, 1, 1);
+
+  struct gpu_sched_param a;
+  a.period_us =  30000;
+  a.deadline_us= 30000;
+  a.priority = 15;
+
+  struct gpu_sched_param c;
+  c.period_us =  30000;
+  c.deadline_us= 30000;
+  c.priority = 2;
+
+
+  struct pruda_task_t * p_task_b = create_pruda_task(1, kernel_b_code, b, 1, 1);
+  struct pruda_task_t * p_task_a = create_pruda_task(0, kernel_a_code, a, 1, 1);
+  struct pruda_task_t * p_task_c = create_pruda_task(2, kernel_c_code, c, 1, 1);


  init_scheduler(SINGLE, FP);
@@ -65,6 +80,6 @@ int main(int argc, char  ** argv){

  create_cpu_threads();

-  sleep(20);
+  sleep(5);
 
 }
--- a/image.c
+++ b/image.c
--- a/image.cu
+++ b/image.cu
--- a/runqueue.h
+++ b/runqueue.h
--- a/timeops.h
+++ b/timeops.h
--- a/tools.h
+++ b/tools.h
@@ -24,19 +24,42 @@



+struct gpu_sched_param {
+  long period_us;
+  long deadline_us;
+  long priority;
+};
+
+ 
+
+// Houssam : Use templates !!  I need to use this 
+struct kernel_call_param {
+  int N;
+  int * int_param; 
+  float * float_param;
+};
+
+
+
+
 typedef void (*kernel_t)();


 struct pruda_task_t {
  int id;
  kernel_t kernel_func;
-  long period_us;
-  long deadline_us;
-  long priority;
+
+
+  
+  struct gpu_sched_param gpu_params;
  int bs;
  int gs;


+  
+
+  
+  
  struct sched_param param;

  // CPU Thread params
@@ -48,6 +71,9 @@ struct pruda_task_t {

  // stream
  cudaStream_t *str; 
+
+  struct pruda_task_t * next;
+
 };


@@ -79,6 +105,8 @@ struct scheduler_t {
    int hsq_free;
    int lsq_free;

+  struct pruda_task_t * lsq_current;
+  struct pruda_task_t * hsq_current;


  // protect resched from multiple accesses
@@ -104,8 +132,10 @@ void pruda_kernel_abort();



-struct pruda_task_t * create_pruda_task(int id, kernel_t kernel_func, long period_us, long deadline_us, long priority, int bs, int gs);

+struct pruda_task_t * create_pruda_task(int id, kernel_t kernel_func,
+					struct gpu_sched_param gpu_params,
+					int bs, int gs);
 void init_scheduler(int strategy, int policy);


@@ -121,6 +151,8 @@ void reset_pruda_task_queue();
 int add_pruda_task(struct pruda_task_t *);
 int del_tail_pruda_task_from_tq();

+void sched_on_lsq();
+void sched_on_hsq();

 void print_pruda_task(const struct pruda_task_t *task);


--- a/runqueue.cu
+++ b/runqueue.cu
-#include "runqueue.h"
+#include "../inc/runqueue.h"



@@ -71,7 +71,7 @@ void destroy_pruda_runqueue_list_t(struct  pruda_runqueue_list_t * rql){


 void add_pruda_task_fixed_priority(struct pruda_task_t * tau, struct pruda_runqueue_list_t * rql){
-  add_tail_pruda_task_runqueue(tau,rql->list[tau->priority]);
+  add_tail_pruda_task_runqueue(tau,rql->list[tau->gpu_params.priority]);
 }



--- a/timeops.cu
+++ b/timeops.cu
-#include "timeops.h"
+#include "../inc/timeops.h"

 int cmp_spec(struct timespec *a, struct timespec *b){
 return 0;

--- a/tools.cu
+++ b/tools.cu
-#include "tools.h"
+#include "../inc/tools.h"


 static __device__ __inline__ uint32_t __get_smid(){    
@@ -68,8 +68,25 @@ void init_scheduler(int strategy, int policy){
    }
   

-  cudaStreamCreate(&(scheduler->hsq));
-  cudaStreamCreate(&(scheduler->lsq));
+
+  int lp,hp ;
+  cudaDeviceGetStreamPriorityRange(&lp,&hp); 
+
+  
+  cudaStreamCreateWithPriority(&(scheduler->hsq), cudaStreamNonBlocking,hp);
+  cudaStreamCreateWithPriority(&(scheduler->lsq), cudaStreamNonBlocking,lp);
+
+
+  
+  
+  // cudaStreamCreate(&(scheduler->hsq));
+  // cudaStreamCreate(&(scheduler->lsq));
+
+    
+
+  
+
+  
  // Houssam : Need to set the priority between streams 
  scheduler->lsq_free = 0;
  scheduler->hsq_free = 0;
@@ -80,7 +97,7 @@ void init_scheduler(int strategy, int policy){
 void pruda_subscribe_fp(struct pruda_task_t *tau){

  // pthread_mutex_lock(&(scheduler->mut));
-  if (tau->priority < 0 || tau->priority >= RUNQUEUES_NMB ){
+  if (tau->gpu_params.priority < 0 || tau->gpu_params.priority >= RUNQUEUES_NMB ){
    printf("Task priority out of Range, exiting \n");
    exit(-1);
  }
@@ -90,18 +107,10 @@ void pruda_subscribe_fp(struct pruda_task_t *tau){
 }


+void sched_on_lsq(){

-
-void pruda_resched_fp_single(){
-  pthread_mutex_lock(&(scheduler->mut));
-  if (scheduler->lsq_free == 1) {
-     pthread_mutex_unlock(&(scheduler->mut));
-    return;
-  }
-  print_rq_state(scheduler->rql);
  struct pruda_runqueue_t * rq_h = get_most_priority_queue_fixed_priority(scheduler->rql);
 
-
  if (rq_h != NULL) {
    
    struct pruda_task_t * mp  = rq_h->list[rq_h->size-1];
@@ -117,11 +126,85 @@ void pruda_resched_fp_single(){
      }
    sem_post(&(mp->wait_exec));
    scheduler->lsq_free =1;
+    scheduler->lsq_current = mp;
+  }
+
+}
+
+
+
+// Houssam : May be I will merge it with sched_on_lsq
+void sched_on_hsq(){
+  struct pruda_runqueue_t * rq_h = get_most_priority_queue_fixed_priority(scheduler->rql);
+  if (rq_h != NULL) {
+    struct pruda_task_t * mp  = rq_h->list[rq_h->size-1];
+    del_tail_pruda_task_runqueue(rq_h);
+    
+    mp->str = &(scheduler->hsq);
+    (*(mp->kernel_func))<<<mp->gs,mp->bs,0,(*(mp->str)) >>>();
+    cudaError_t code2= cudaGetLastError(); 
+    if (code2 != cudaSuccess) 
+      {
+	printf("Running error: %s \n", cudaGetErrorString(code2));
+	exit(-1);
+      }
+
+    printf("task is %d for hsq  \n", mp->id);
+    sem_post(&(mp->wait_exec));
+    scheduler->hsq_free =1;
+    scheduler->hsq_current = mp;
+  }
+}
+
+void pruda_resched_fp_single(){
+  pthread_mutex_lock(&(scheduler->mut));
+  if (scheduler->lsq_free == 1) {
+     pthread_mutex_unlock(&(scheduler->mut));
+    return;
+  }
+  sched_on_lsq();
+  pthread_mutex_unlock(&(scheduler->mut));
+}
+
+void pruda_resched_fp_multiple(){
+  pthread_mutex_lock(&(scheduler->mut));
+  printf("resched multiple called \n");
+  if (scheduler->hsq_free == 1)
+    {
+      pthread_mutex_unlock(&(scheduler->mut));
+      return;
+
+    }
+  printf("hi not occupied \n");
+  if (scheduler->lsq_free==0)
+    {
+      printf("calling to schedule on lsq \n");
+      sched_on_lsq();
+      printf("scheduled on lsq \n");
+    }
+  else
+    {
+      printf("calling to schedule on hsq \n");
+      struct pruda_runqueue_t * rq_h = get_most_priority_queue_fixed_priority(scheduler->rql);
+      if (rq_h == NULL)
+	{
+	  pthread_mutex_unlock(&(scheduler->mut));
+	  return;
+	}
+      struct pruda_task_t * mp  = rq_h->list[rq_h->size-1];
+      if (mp->gpu_params.priority<scheduler->lsq_current->gpu_params.priority)
+	{
+	  printf("passed hsq tests \n");
+	  sched_on_hsq();
+	  printf("scheduled on hsq \n");
+	}
+      else {
+	printf("hsq testes not passed \n");
+      }
    }
  pthread_mutex_unlock(&(scheduler->mut));
 }

-void pruda_resched_fp_multiple(){}
 void pruda_resched_multiproc(){}

 void pruda_resched_fp(){  
@@ -165,17 +248,18 @@ int del_tail_pruda_task_from_tq(){
  return del_tail_pruda_task_runqueue(scheduler->tq); 
 }

-struct pruda_task_t * create_pruda_task(int id, kernel_t kernel_func, long period_us,
-					long deadline_us, long priority, int bs, int gs){
+struct pruda_task_t * create_pruda_task(int id, kernel_t kernel_func,
+					struct gpu_sched_param gpu_params,
+					int bs, int gs){

  
  struct pruda_task_t * task = (struct pruda_task_t *)(malloc(sizeof(struct pruda_task_t)));
  
  task->id = id;
  task->kernel_func=kernel_func;
-  task->period_us=period_us;
-  task->deadline_us=deadline_us;
-  task->priority=priority;
+  task->gpu_params.period_us=gpu_params.period_us;
+  task->gpu_params.deadline_us=gpu_params.deadline_us;
+  task->gpu_params.priority=gpu_params.priority;
  task->bs=bs;
  task->gs=gs;
  task->str = NULL;
@@ -190,7 +274,7 @@ void print_pruda_task(const struct pruda_task_t *task){
    return;
  }
  printf("[Task: id= %d, T= %lu , D= %lu, P= %lu, gs= %d, bs= %d ] \n",
-	 task->id, task->period_us, task->deadline_us, task->priority,
+	 task->id, task->gpu_params.period_us, task->gpu_params.deadline_us, task->gpu_params.priority,
 	 task->gs, task->bs);
 }

@@ -210,12 +294,13 @@ void *pruda_task(void *args){

    
    clock_gettime(CLOCK_REALTIME, &next);
-    add_spec_us(&next,task->period_us);
+    add_spec_us(&next,task->gpu_params.period_us);

    scheduler->pruda_subscribe(task);

    // Sync task execution 
    sem_wait(&(task->wait_exec));
+    // check if the next task is active, if yes !! resched it without waiting 
    cudaError_t code= cudaStreamSynchronize(*(task->str));
    if (code != cudaSuccess) 
      {
@@ -246,7 +331,7 @@ void create_cpu_threads(){
  
  pthread_attr_init(&(scheduler->tq->list[i]->attr));
  pthread_attr_setschedpolicy(&(scheduler->tq->list[i]->attr), SCHED_FIFO);
-  scheduler->tq->list[i]->param.sched_priority =  scheduler->tq->list[i]->priority;
+  scheduler->tq->list[i]->param.sched_priority =  scheduler->tq->list[i]->gpu_params.priority;
  pthread_attr_setschedparam(&(scheduler->tq->list[i]->attr), &(scheduler->tq->list[i]->param));   

   
@@ -254,8 +339,6 @@ void create_cpu_threads(){
  }
 }

-
-
 int pruda_task_queue_size(){
  return scheduler->tq->size;
 }