#include #include #include #include #include "../helper.h" #include "StreamFMA.h" #include "MaxSLiCInterface.h" #define SIZE (192/sizeof(int32_t)) void cpu_func(void *buffers[], void *cl_arg) { int *a = (int*) STARPU_VECTOR_GET_PTR(buffers[0]); int *b = (int*) STARPU_VECTOR_GET_PTR(buffers[1]); int *c = (int*) STARPU_VECTOR_GET_PTR(buffers[2]); int size = STARPU_VECTOR_GET_NX(buffers[0]); (void)buffers; (void)cl_arg; int i; for (i = 0; i < size; i++) c[i] = a[i] + b[i]; } void fpga_impl(void *buffers[], void *cl_arg) { (void)cl_arg; int32_t *ptrA = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[0]); int32_t *ptrB = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[1]); int32_t *ptrC = (int32_t*) STARPU_VECTOR_GET_PTR(buffers[2]); int32_t *poubelle_cpu = malloc(SIZE * sizeof(int32_t)); int size = STARPU_VECTOR_GET_NX(buffers[0]); int sizeBytes=SIZE *sizeof(int32_t); size_t LMemsize= SIZE *sizeof(int32_t); size_t poubelle = 0xc0000; size_t ptrCT1 = 0x00000000000000c0; size_t ptrAT2 = ptrCT1; size_t ptrBT2 = ptrCT1; size_t ptrCT2 = 0x0000000000000180; size_t ptrAT3 = ptrCT2; size_t ptrBT3 = ptrCT2; printf("Loading DFE memory.\n"); StreamFMA(SIZE, ptrA, sizeBytes, ptrB, sizeBytes, poubelle_cpu, sizeBytes, poubelle, LMemsize, poubelle, LMemsize, poubelle, LMemsize, poubelle, LMemsize, ptrCT1, LMemsize, poubelle, LMemsize); printf("T1 finished\n"); StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, ptrAT2, LMemsize, poubelle, LMemsize, ptrBT2, LMemsize, poubelle, LMemsize, poubelle, LMemsize, ptrCT2, LMemsize); printf("T2 finished\n"); StreamFMA(SIZE, poubelle_cpu, sizeBytes, poubelle_cpu, sizeBytes, ptrC, sizeBytes, poubelle, LMemsize, ptrAT3, LMemsize, poubelle, LMemsize, ptrBT3, LMemsize, poubelle, LMemsize, poubelle, LMemsize); printf("T3 finished\n"); printf("Running DFE.\n"); } static struct starpu_codelet cl = { .cpu_funcs = {cpu_func}, .cpu_funcs_name = {"cpu_func"}, .fpga_funcs = {fpga_impl}, .nbuffers = 3, .modes = {STARPU_R, STARPU_R, STARPU_W} }; int main(int argc, char **argv) { /* Enable profiling */ starpu_profiling_status_set(1); struct starpu_conf conf; starpu_data_handle_t handle_a, handle_b, handle_c; int ret; int size=1234; starpu_conf_init(&conf); conf.sched_policy_name = "eager"; conf.calibrate = 0; ret = starpu_initialize(&conf, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); int a[SIZE]; int b[SIZE]; int c[SIZE]; int i; for(i = 0; i < SIZE; ++i) { a[i] = random() % 100; b[i] = random() % 100; } starpu_vector_data_register(&handle_a, STARPU_MAIN_RAM, (uintptr_t) &a, SIZE, sizeof(int)); starpu_vector_data_register(&handle_b, STARPU_MAIN_RAM, (uintptr_t) &b, SIZE, sizeof(int)); starpu_vector_data_register(&handle_c, STARPU_MAIN_RAM, (uintptr_t) &c, SIZE, sizeof(int)); struct starpu_task *task = starpu_task_create(); task->cl = &cl; task->handles[0] = handle_a; task->handles[1] = handle_b; task->handles[2] = handle_c; task->synchronous = 1; task->destroy = 0; /* submit the task to StarPU */ //starpu_task_destroy(task); ret = starpu_task_submit(task); fprintf(stderr,"task submitted %d\n", ret); starpu_data_unregister(handle_a); starpu_data_unregister(handle_b); starpu_data_unregister(handle_c); int mysize = SIZE; if (mysize > 10) mysize = 10; for (i = 0; i < mysize; ++i) { int ct1 = a[i] + b[i]; int ct2 = ct1 * ct1; int ct3 = ct2 + ct2; printf("%d == %d\n", c[i], ct3); } starpu_shutdown(); return EXIT_SUCCESS; }