/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2011-2012, 2014 Université de Bordeaux 1 * Copyright (C) 2012 inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include #include "../helper.h" #include "scal.h" #if ! (defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA)) int main(int argc, char **argv) { return STARPU_TEST_SKIPPED; } #else static int submit_tasks(starpu_data_handle_t handle, int pieces, int n) { int i, ret; for (i = 0; i < pieces; i++) { struct starpu_task *task = starpu_task_create(); task->handles[0] = starpu_data_get_sub_data(handle, 1, i); task->cl = &scal_codelet; task->execute_on_a_specific_worker = 1; task->workerid = i%n; ret = starpu_task_submit(task); if (ret == -ENODEV) return -ENODEV; STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit"); } ret = starpu_task_wait_for_all(); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); return 0; } static int find_a_worker(enum starpu_worker_archtype type) { int worker[STARPU_NMAXWORKERS]; int ret = starpu_worker_get_ids_by_type(type, worker, STARPU_NMAXWORKERS); if (ret == 0) return -ENODEV; if (ret == -ERANGE) return worker[STARPU_NMAXWORKERS-1]; return worker[ret-1]; } static int check_result(unsigned *t, size_t size) { unsigned i; for (i = 0; i < size; i++) { if (t[i] != i*2) { FPRINTF(stderr,"t[%d] is %u instead of %u\n", i, t[i], 2*i); return 1; } } return 0; } #ifdef STARPU_USE_CUDA #ifdef HAVE_CUDA_MEMCPY_PEER static int test_cuda(void) { int ret; unsigned *foo_gpu; unsigned *foo; int n, i, size, pieces; int devid; int chosen; cudaError_t cures; starpu_data_handle_t handle; /* Find a CUDA worker */ chosen = find_a_worker(STARPU_CUDA_WORKER); if (chosen == -ENODEV) return -ENODEV; n = starpu_worker_get_count(); size = 10 * n; devid = starpu_worker_get_devid(chosen); foo_gpu = (void*) starpu_malloc_on_node(starpu_worker_get_memory_node(chosen), size * sizeof(*foo_gpu)); foo = calloc(size, sizeof(*foo)); for (i = 0; i < size; i++) foo[i] = i; starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(*foo)); starpu_vector_ptr_register(handle, starpu_worker_get_memory_node(chosen), (uintptr_t)foo_gpu, (uintptr_t)foo_gpu, 0); /* Broadcast the data to force in-place partitioning */ for (i = 0; i < n; i++) starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); /* Even with just one worker, split in at least two */ if (n == 1) pieces = 2; else pieces = n; struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = pieces, }; starpu_data_partition(handle, &f); ret = submit_tasks(handle, pieces, n); if (ret == -ENODEV) return -ENODEV; starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen)); starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(chosen), 0); starpu_data_unregister(handle); starpu_cuda_set_device(devid); cures = cudaMemcpy(foo, foo_gpu, size * sizeof(*foo_gpu), cudaMemcpyDeviceToHost); if (STARPU_UNLIKELY(cures)) STARPU_CUDA_REPORT_ERROR(cures); return check_result(foo, size); } #endif #endif #ifdef STARPU_USE_OPENCL static int test_opencl(void) { int i; int ret; int chosen; int n; int size; int pieces; cl_mem foo_gpu; starpu_data_handle_t handle; ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); /* Find an OpenCL worker */ chosen = find_a_worker(STARPU_OPENCL_WORKER); if (chosen == -ENODEV) return -ENODEV; n = starpu_worker_get_count(); size = 10 * n; int devid; cl_int err; cl_context context; cl_command_queue queue; devid = starpu_worker_get_devid(chosen); starpu_opencl_get_context(devid, &context); starpu_opencl_get_queue(devid, &queue); foo_gpu = (void*) starpu_malloc_on_node(starpu_worker_get_memory_node(chosen), size * sizeof(int)); unsigned int *foo = malloc(size*sizeof(*foo)); for (i = 0; i < size; i++) foo[i] = i; starpu_vector_data_register(&handle, STARPU_MAIN_RAM, (uintptr_t)foo, size, sizeof(int)); starpu_vector_ptr_register(handle, starpu_worker_get_memory_node(chosen), (uintptr_t)foo_gpu, (uintptr_t)foo_gpu, 0); /* Broadcast the data to force in-place partitioning */ for (i = 0; i < n; i++) starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0); /* Even with just one worker, split in at least two */ if (n == 1) pieces = 2; else pieces = n; struct starpu_data_filter f = { .filter_func = starpu_vector_filter_block, .nchildren = pieces, }; starpu_data_partition(handle, &f); ret = submit_tasks(handle, pieces, n); if (ret == -ENODEV) return -ENODEV; starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen)); starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(chosen), 0); starpu_data_unregister(handle); STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all"); ret = starpu_opencl_unload_opencl(&opencl_program); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); err = clEnqueueReadBuffer(queue, foo_gpu, CL_FALSE, 0, size*sizeof(*foo), foo, 0, NULL, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); clFinish(queue); return check_result(foo, size); } #endif /* !STARPU_USE_OPENCL */ int main(int argc, char **argv) { int skipped_cuda = 1, skipped_opencl = 1; int ret; ret = starpu_initialize(NULL, &argc, &argv); if (ret == -ENODEV) return STARPU_TEST_SKIPPED; STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); #ifdef STARPU_USE_OPENCL ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL); STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file"); #endif #ifdef STARPU_USE_CUDA #ifdef HAVE_CUDA_MEMCPY_PEER ret = test_cuda(); if (ret == 1) goto fail; else if (ret == 0) skipped_cuda = 0; #endif #endif #ifdef STARPU_USE_OPENCL ret = test_opencl(); if (ret == 1) goto fail; else if (ret == 0) skipped_opencl = 0; #endif starpu_shutdown(); if (skipped_cuda == 1 && skipped_opencl == 1) return STARPU_TEST_SKIPPED; return EXIT_SUCCESS; fail: starpu_shutdown(); return EXIT_FAILURE; } #endif /* defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) */