123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211 |
- /* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2010-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- * Copyright (C) 2013 Thibaut Lambert
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
- #include <starpu.h>
- #include <common/config.h>
- #include <core/task.h>
- #include <core/workers.h>
- #include <datawizard/datawizard.h>
- #include <util/starpu_data_cpy.h>
- #include <starpu_mic.h>
- #include <datawizard/memory_nodes.h>
- static void common_data_cpy_func(void *descr[], void *cl_arg)
- {
- unsigned interface_id = *(unsigned *)cl_arg;
- const struct starpu_data_interface_ops *interface_ops = _starpu_data_interface_get_ops(interface_id);
- const struct starpu_data_copy_methods *copy_methods = interface_ops->copy_methods;
- int workerid = starpu_worker_get_id_check();
- enum starpu_worker_archtype type = starpu_worker_get_type(workerid);
- unsigned memory_node = starpu_worker_get_memory_node(workerid);
- void *dst_interface = descr[0];
- void *src_interface = descr[1];
- switch (type)
- {
- case STARPU_CPU_WORKER:
- if (copy_methods->ram_to_ram)
- {
- copy_methods->ram_to_ram(src_interface, memory_node, dst_interface, memory_node);
- return;
- }
- break;
- #ifdef STARPU_USE_CUDA
- case STARPU_CUDA_WORKER:
- {
- cudaStream_t stream = starpu_cuda_get_local_stream();
- if (copy_methods->cuda_to_cuda_async)
- {
- copy_methods->cuda_to_cuda_async(src_interface, memory_node, dst_interface, memory_node, stream);
- return;
- }
- else if (copy_methods->cuda_to_cuda)
- {
- copy_methods->cuda_to_cuda(src_interface, memory_node, dst_interface, memory_node);
- return;
- }
- break;
- }
- #endif
- case STARPU_OPENCL_WORKER:
- if (copy_methods->opencl_to_opencl)
- {
- copy_methods->opencl_to_opencl(src_interface, memory_node, dst_interface, memory_node);
- return;
- }
- break;
- default:
- /* unknown architecture */
- STARPU_ABORT();
- }
- STARPU_ASSERT(copy_methods->any_to_any);
- copy_methods->any_to_any(src_interface, memory_node, dst_interface, memory_node, NULL);
- }
- void mp_cpy_kernel(void *descr[], void *cl_arg)
- {
- unsigned interface_id = *(unsigned *)cl_arg;
- const struct starpu_data_interface_ops *interface_ops = _starpu_data_interface_get_ops(interface_id);
- const struct starpu_data_copy_methods *copy_methods = interface_ops->copy_methods;
-
- void *dst_interface = descr[0];
- void *src_interface = descr[1];
- if(copy_methods->ram_to_ram)
- copy_methods->ram_to_ram(src_interface, STARPU_MAIN_RAM, dst_interface, STARPU_MAIN_RAM);
- else if(copy_methods->any_to_any)
- copy_methods->any_to_any(src_interface, STARPU_MAIN_RAM, dst_interface, STARPU_MAIN_RAM, NULL);
- else
- STARPU_ABORT();
- }
- static starpu_mic_kernel_t mic_cpy_func()
- {
- #ifdef STARPU_USE_MIC
- starpu_mic_func_symbol_t mic_symbol = NULL;
- starpu_mic_register_kernel(&mic_symbol, "mp_cpy_kernel");
- return starpu_mic_get_kernel(mic_symbol);
- #else
- STARPU_ABORT();
- return NULL;
- #endif
- }
- struct starpu_perfmodel copy_model =
- {
- .type = STARPU_HISTORY_BASED,
- .symbol = "starpu_data_cpy"
- };
- static struct starpu_codelet copy_cl =
- {
- .where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL|STARPU_MIC,
- .cpu_funcs = {common_data_cpy_func},
- .cuda_funcs = {common_data_cpy_func},
- .opencl_funcs = {common_data_cpy_func},
- .mic_funcs = {mic_cpy_func},
- .nbuffers = 2,
- .modes = {STARPU_W, STARPU_R},
- .model = ©_model
- };
- int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle,
- int asynchronous, void (*callback_func)(void*), void *callback_arg,
- int reduction, struct starpu_task *reduction_dep_task)
- {
- struct starpu_task *task = starpu_task_create();
- STARPU_ASSERT(task);
- task->name = "data_cpy";
- struct _starpu_job *j = _starpu_get_job_associated_to_task(task);
- if (reduction)
- {
- j->reduction_task = reduction;
- if (reduction_dep_task)
- starpu_task_declare_deps_array(task, 1, &reduction_dep_task);
- }
- task->cl = ©_cl;
- unsigned *interface_id;
- _STARPU_MALLOC(interface_id, sizeof(*interface_id));
- *interface_id = dst_handle->ops->interfaceid;
- task->cl_arg = interface_id;
- task->cl_arg_size = sizeof(*interface_id);
- task->cl_arg_free = 1;
- task->priority = STARPU_MAX_PRIO; //TODO: make it as a parameter
- task->callback_func = callback_func;
- task->callback_arg = callback_arg;
- STARPU_TASK_SET_HANDLE(task, dst_handle, 0);
- STARPU_TASK_SET_HANDLE(task, src_handle, 1);
- task->synchronous = !asynchronous;
- int ret = _starpu_task_submit_internally(task);
- STARPU_ASSERT(!ret);
- return 0;
- }
- int starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_handle,
- int asynchronous, void (*callback_func)(void*), void *callback_arg)
- {
- return _starpu_data_cpy(dst_handle, src_handle, asynchronous, callback_func, callback_arg, 0, NULL);
- }
- /* TODO: implement copy on write, and introduce starpu_data_dup as well */
- int starpu_data_dup_ro(starpu_data_handle_t *dst_handle, starpu_data_handle_t src_handle, int asynchronous)
- {
- _starpu_spin_lock(&src_handle->header_lock);
- if (src_handle->readonly_dup) {
- /* Already a ro duplicate, just return it with one more ref */
- *dst_handle = src_handle->readonly_dup;
- _starpu_spin_unlock(&src_handle->header_lock);
- _starpu_spin_lock(&(*dst_handle)->header_lock);
- (*dst_handle)->aliases++;
- _starpu_spin_unlock(&(*dst_handle)->header_lock);
- return 0;
- }
- if (src_handle->readonly) {
- src_handle->aliases++;
- _starpu_spin_unlock(&src_handle->header_lock);
- *dst_handle = src_handle;
- return 0;
- }
- _starpu_spin_unlock(&src_handle->header_lock);
- starpu_data_register_same(dst_handle, src_handle);
- _starpu_data_cpy(*dst_handle, src_handle, asynchronous, NULL, NULL, 0, NULL);
- (*dst_handle)->readonly = 1;
- _starpu_spin_lock(&src_handle->header_lock);
- src_handle->readonly_dup = (*dst_handle);
- (*dst_handle)->readonly_dup_of = src_handle;
- _starpu_spin_unlock(&src_handle->header_lock);
- return 0;
- }
|