123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128 |
- #include<starpu.h>
- #include <stdio.h>
- #include<stdlib.h>
- #include <sys/time.h>
- // platform independent data types:
- #include <stdint.h>
- // Number of iterations
- #define NUM_ITERATIONS 10
- // The window height is equal to the number of streams
- #define NUM_INPUT_STREAMS WINDOW_HEIGHT
- #define WINDOW_HEIGHT 4
- // The window width is equal to number of tuples required to fill in a window
- #define WINDOW_WIDTH 100
- // The total number of elements is equal to the window_height times window_width
- #define elements WINDOW_HEIGHT*WINDOW_WIDTH
- // measure time:
- struct timeval start[NUM_ITERATIONS], end[NUM_ITERATIONS];
- extern void cpu_output(void *buffers[], void *_args);
- extern void output_thread_aggregation(void *buffers[], void *_args);
- static struct starpu_perfmodel perf_model = {
- .type = STARPU_HISTORY_BASED,
- .symbol = "main",
- };
- static struct starpu_codelet cl =
- {
- /*CPU implementation of the codelet */
- .cpu_funcs = { cpu_output },
- .cpu_funcs_name = { "cpu_output" },
- #ifdef STARPU_USE_CUDA
- /* CUDA implementation of the codelet */
- .cuda_funcs = { output_thread_aggregation },
- #endif
- .nbuffers = 1,
- .modes = { STARPU_RW },
- .model = &perf_model
- };
- int main(int argc, char **argv)
- {
- uint32_t iterations_id;
-
- // create input streams:
- for (iterations_id=0; iterations_id<NUM_ITERATIONS; iterations_id++) {
- // dynamic allocation of the memory needed for all the elements
- uint32_t *window;
- window = (uint32_t*)calloc(elements, sizeof(uint32_t));
-
- // check if there's enough space for the allocation
- if(!window){
- printf("Allocation error for window - aborting.\n");
- exit(1);
- }
-
- uint64_t ag_val = 0; // test variable to check if the cuda sum is equal to the cpu sum
-
- // initialization - fill in the window with random numbers:
- for (int i = 0; i < elements; i++) {
- window[i] = (rand()%1000);
- ag_val += window[i];
- }
- printf("TEST %lu\n", ag_val);
-
- gettimeofday(&start[iterations_id], NULL); // start time for each iteration only for StarPU initialization and time to calculate aggregated value
-
- /* initialize StarPU */
- starpu_init(NULL);
-
- /* initialize performance model */
- starpu_perfmodel_init(&perf_model);
-
- /* Tell StaPU to associate the "window" vector with the "vector_handle" */
- starpu_data_handle_t vector_handle;
- starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)window, elements, sizeof(window[0]));
-
- /* create a synchronous task: any call to starpu_task_submit will block
- * until it is terminated */
- struct starpu_task *task = starpu_task_create();
- task->synchronous = 1;
- task->cl = &cl; /* Pointer to the codelet defined above */
-
- /* the codelet manipulates one buffer in RW mode */
- task->handles[0] = vector_handle;
-
- uint64_t aggregated_value = 0;
-
- /* an argument is passed to the codelet, beware that this is a
- * READ-ONLY buffer and that the codelet may be given a pointer to a
- * COPY of the argument */
- task->cl_arg = &aggregated_value;
- task->cl_arg_size = sizeof(aggregated_value);
-
- /* submit the task to StarPU */
- starpu_task_submit(task);
-
- /* StarPU does not need to manipulate the array anymore so we can stop monitoring it */
- starpu_data_unregister(vector_handle);
-
- /* terminate StarPU */
- starpu_shutdown();
- gettimeofday(&end[iterations_id], NULL); // stop time for each iteration after aggregation value has been calculated and StarPU has been shutted down
-
- printf("iter: %d - aggregated value: %lu\n", iterations_id, aggregated_value);
-
- //free the memory allocated on the CPU
- free(window);
- }
- uint64_t time = 0; // variable that holds the time
-
- // calculate the time required for the calculation of the aggregated value for all iterations
- for (iterations_id=0; iterations_id<NUM_ITERATIONS; iterations_id++) {
- time += ((end[iterations_id].tv_sec * 1000000 + end[iterations_id].tv_usec) - (start[iterations_id].tv_sec * 1000000 + start[iterations_id].tv_usec));
- }
- printf("usec: %ld\n", time);
- return 0;
- }
|