main.c 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. #include<starpu.h>
  2. #include <stdio.h>
  3. #include<stdlib.h>
  4. #include <sys/time.h>
  5. // platform independent data types:
  6. #include <stdint.h>
  7. // Number of iterations
  8. #define NUM_ITERATIONS 10
  9. // The window height is equal to the number of streams
  10. #define NUM_INPUT_STREAMS WINDOW_HEIGHT
  11. #define WINDOW_HEIGHT 4
  12. // The window width is equal to number of tuples required to fill in a window
  13. #define WINDOW_WIDTH 100
  14. // The total number of elements is equal to the window_height times window_width
  15. #define elements WINDOW_HEIGHT*WINDOW_WIDTH
  16. // measure time:
  17. struct timeval start[NUM_ITERATIONS], end[NUM_ITERATIONS];
  18. extern void cpu_output(void *buffers[], void *_args);
  19. extern void output_thread_aggregation(void *buffers[], void *_args);
  20. static struct starpu_perfmodel perf_model = {
  21. .type = STARPU_HISTORY_BASED,
  22. .symbol = "main",
  23. };
  24. static struct starpu_codelet cl =
  25. {
  26. /*CPU implementation of the codelet */
  27. .cpu_funcs = { cpu_output },
  28. .cpu_funcs_name = { "cpu_output" },
  29. #ifdef STARPU_USE_CUDA
  30. /* CUDA implementation of the codelet */
  31. .cuda_funcs = { output_thread_aggregation },
  32. #endif
  33. .nbuffers = 1,
  34. .modes = { STARPU_RW },
  35. .model = &perf_model
  36. };
  37. int main(int argc, char **argv)
  38. {
  39. uint32_t iterations_id;
  40. // create input streams:
  41. for (iterations_id=0; iterations_id<NUM_ITERATIONS; iterations_id++) {
  42. // dynamic allocation of the memory needed for all the elements
  43. uint32_t *window;
  44. window = (uint32_t*)calloc(elements, sizeof(uint32_t));
  45. // check if there's enough space for the allocation
  46. if(!window){
  47. printf("Allocation error for window - aborting.\n");
  48. exit(1);
  49. }
  50. uint64_t ag_val = 0; // test variable to check if the cuda sum is equal to the cpu sum
  51. // initialization - fill in the window with random numbers:
  52. for (int i = 0; i < elements; i++) {
  53. window[i] = (rand()%1000);
  54. ag_val += window[i];
  55. }
  56. printf("TEST %lu\n", ag_val);
  57. gettimeofday(&start[iterations_id], NULL); // start time for each iteration only for StarPU initialization and time to calculate aggregated value
  58. /* initialize StarPU */
  59. starpu_init(NULL);
  60. /* initialize performance model */
  61. starpu_perfmodel_init(&perf_model);
  62. /* Tell StaPU to associate the "window" vector with the "vector_handle" */
  63. starpu_data_handle_t vector_handle;
  64. starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)window, elements, sizeof(window[0]));
  65. /* create a synchronous task: any call to starpu_task_submit will block
  66. * until it is terminated */
  67. struct starpu_task *task = starpu_task_create();
  68. task->synchronous = 1;
  69. task->cl = &cl; /* Pointer to the codelet defined above */
  70. /* the codelet manipulates one buffer in RW mode */
  71. task->handles[0] = vector_handle;
  72. uint64_t aggregated_value = 0;
  73. /* an argument is passed to the codelet, beware that this is a
  74. * READ-ONLY buffer and that the codelet may be given a pointer to a
  75. * COPY of the argument */
  76. task->cl_arg = &aggregated_value;
  77. task->cl_arg_size = sizeof(aggregated_value);
  78. /* submit the task to StarPU */
  79. starpu_task_submit(task);
  80. /* StarPU does not need to manipulate the array anymore so we can stop monitoring it */
  81. starpu_data_unregister(vector_handle);
  82. /* terminate StarPU */
  83. starpu_shutdown();
  84. gettimeofday(&end[iterations_id], NULL); // stop time for each iteration after aggregation value has been calculated and StarPU has been shutted down
  85. printf("iter: %d - aggregated value: %lu\n", iterations_id, aggregated_value);
  86. //free the memory allocated on the CPU
  87. free(window);
  88. }
  89. uint64_t time = 0; // variable that holds the time
  90. // calculate the time required for the calculation of the aggregated value for all iterations
  91. for (iterations_id=0; iterations_id<NUM_ITERATIONS; iterations_id++) {
  92. time += ((end[iterations_id].tv_sec * 1000000 + end[iterations_id].tv_usec) - (start[iterations_id].tv_sec * 1000000 + start[iterations_id].tv_usec));
  93. }
  94. printf("usec: %ld\n", time);
  95. return 0;
  96. }