#include #include #include #include // platform independent data types: #include // Number of iterations #define NUM_ITERATIONS 10 // The window height is equal to the number of streams #define NUM_INPUT_STREAMS WINDOW_HEIGHT #define WINDOW_HEIGHT 4 // The window width is equal to number of tuples required to fill in a window #define WINDOW_WIDTH 100 // The total number of elements is equal to the window_height times window_width #define elements WINDOW_HEIGHT*WINDOW_WIDTH // measure time: struct timeval start[NUM_ITERATIONS], end[NUM_ITERATIONS]; extern void cpu_output(void *buffers[], void *_args); extern void output_thread_aggregation(void *buffers[], void *_args); static struct starpu_perfmodel perf_model = { .type = STARPU_HISTORY_BASED, .symbol = "main", }; static struct starpu_codelet cl = { /*CPU implementation of the codelet */ .cpu_funcs = { cpu_output }, .cpu_funcs_name = { "cpu_output" }, #ifdef STARPU_USE_CUDA /* CUDA implementation of the codelet */ .cuda_funcs = { output_thread_aggregation }, #endif .nbuffers = 1, .modes = { STARPU_RW }, .model = &perf_model }; int main(int argc, char **argv) { uint32_t iterations_id; // create input streams: for (iterations_id=0; iterations_idsynchronous = 1; task->cl = &cl; /* Pointer to the codelet defined above */ /* the codelet manipulates one buffer in RW mode */ task->handles[0] = vector_handle; uint64_t aggregated_value = 0; /* an argument is passed to the codelet, beware that this is a * READ-ONLY buffer and that the codelet may be given a pointer to a * COPY of the argument */ task->cl_arg = &aggregated_value; task->cl_arg_size = sizeof(aggregated_value); /* submit the task to StarPU */ starpu_task_submit(task); /* StarPU does not need to manipulate the array anymore so we can stop monitoring it */ starpu_data_unregister(vector_handle); /* terminate StarPU */ starpu_shutdown(); gettimeofday(&end[iterations_id], NULL); // stop time for each iteration after aggregation value has been calculated and StarPU has been shutted down printf("iter: %d - aggregated value: %lu\n", iterations_id, aggregated_value); //free the memory allocated on the CPU free(window); } uint64_t time = 0; // variable that holds the time // calculate the time required for the calculation of the aggregated value for all iterations for (iterations_id=0; iterations_id