driver_mic_source.c 17 KB


  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2012 INRIA
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <stdio.h>
  17. #include <scif.h>
  18. #include <starpu.h>
  19. #include <starpu_profiling.h>
  20. #include <core/sched_policy.h>
  21. #include <core/workers.h>
  22. #include <common/uthash.h>
  23. #include <datawizard/memory_nodes.h>
  24. #include <drivers/driver_common/driver_common.h>
  25. #include <drivers/mp_common/source_common.h>
  26. #include "driver_mic_common.h"
  27. #include "driver_mic_source.h"
  28. /* Array of structures containing all the informations useful to send
  29. * and receive informations with devices */
  30. struct _starpu_mp_node *mic_nodes[STARPU_MAXMICDEVS];
  31. static COIENGINE handles[STARPU_MAXMICDEVS];
  32. /* Structure used by host to store informations about a kernel executable on
  33. * a MIC device : its name, and its address on each device.
  34. * If a kernel has been initialized, then a lookup has already been achieved and the
  35. * device knows how to call it, else the host still needs to do a lookup.
  36. */
  37. struct _starpu_mic_kernel
  38. {
  39. UT_hash_handle hh;
  40. char *name;
  41. starpu_mic_kernel_t func[STARPU_MAXMICDEVS];
  42. } *kernels;
  43. /* Mutex for concurrent access to the table.
  44. */
  45. starpu_pthread_mutex_t htbl_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  46. /* Number of MIC worker initialized.
  47. */
  48. unsigned int nb_mic_worker_init = 0;
  49. starpu_pthread_mutex_t nb_mic_worker_init_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
  50. /* Returns the ID of the MIC device controlled by the caller.
  51. * if the worker doesn't control a MIC device -ENODEV is returned
  52. */
  53. //static int _starpu_mic_get_devid(void)
  54. //{
  55. // struct _starpu_machine_config *config = _starpu_get_machine_config();
  56. // int workerid = starpu_worker_get_id();
  57. //
  58. // if (config->workers[workerid].arch != STARPU_MIC_WORKER)
  59. // return -ENODEV;
  60. //
  61. // return config->workers[workerid].devid;
  62. //}
  63. struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node()
  64. {
  65. struct _starpu_worker *actual_worker = _starpu_get_local_worker_key();
  66. STARPU_ASSERT(actual_worker);
  67. int devid = actual_worker->devid;
  68. STARPU_ASSERT(devid >= 0 && devid < STARPU_MAXMICDEVS);
  69. return mic_nodes[devid];
  70. }
  71. const struct _starpu_mp_node *_starpu_mic_src_get_mp_node_from_memory_node(int memory_node)
  72. {
  73. int devid = _starpu_memory_node_get_devid(memory_node);
  74. STARPU_ASSERT_MSG(devid >= 0 && devid < STARPU_MAXMICDEVS, "bogus devid %d for memory node %d\n", devid, memory_node);
  75. return mic_nodes[devid];
  76. }
  77. static void _starpu_mic_src_free_kernel(void *kernel)
  78. {
  79. struct _starpu_mic_kernel *k = kernel;
  80. free(k->name);
  81. free(kernel);
  82. }
  83. void _starpu_mic_clear_kernels(void)
  84. {
  85. struct _starpu_mic_kernel *kernel, *tmp;
  86. HASH_ITER(hh, kernels, kernel, tmp)
  87. {
  88. HASH_DEL(kernels, kernel);
  89. _starpu_mic_src_free_kernel(kernel);
  90. }
  91. }
  92. int _starpu_mic_src_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name)
  93. {
  94. unsigned int func_name_size = (strlen(func_name) + 1) * sizeof(char);
  95. STARPU_PTHREAD_MUTEX_LOCK(&htbl_mutex);
  96. struct _starpu_mic_kernel *kernel;
  97. HASH_FIND_STR(kernels, func_name, kernel);
  98. if (kernel != NULL)
  99. {
  100. STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
  101. // Function already in the table.
  102. *symbol = kernel;
  103. return 0;
  104. }
  105. kernel = malloc(sizeof(*kernel));
  106. if (kernel == NULL)
  107. {
  108. STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
  109. return -ENOMEM;
  110. }
  111. kernel->name = malloc(func_name_size);
  112. if (kernel->name == NULL)
  113. {
  114. STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
  115. free(kernel);
  116. return -ENOMEM;
  117. }
  118. memcpy(kernel->name, func_name, func_name_size);
  119. HASH_ADD_STR(kernels, name, kernel);
  120. unsigned int nb_mic_devices = _starpu_mic_src_get_device_count();
  121. unsigned int i;
  122. for (i = 0; i < nb_mic_devices; ++i)
  123. kernel->func[i] = NULL;
  124. STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
  125. *symbol = kernel;
  126. return 0;
  127. }
  128. starpu_mic_kernel_t _starpu_mic_src_get_kernel(starpu_mic_func_symbol_t symbol)
  129. {
  130. int workerid = starpu_worker_get_id();
  131. /* This function has to be called in the codelet only, by the thread
  132. * which will handle the task */
  133. if (workerid < 0)
  134. return NULL;
  135. int devid = starpu_worker_get_devid(workerid);
  136. struct _starpu_mic_kernel *kernel = symbol;
  137. if (kernel->func[devid] == NULL)
  138. {
  139. struct _starpu_mp_node *node = mic_nodes[devid];
  140. int ret = _starpu_src_common_lookup(node, (void (**)(void))&kernel->func[devid], kernel->name);
  141. if (ret)
  142. return NULL;
  143. }
  144. return kernel->func[devid];
  145. }
  146. /* Report an error which occured when using a MIC device
  147. * and print this error in a human-readable style.
  148. * It hanbles errors occuring when using COI.
  149. */
  150. void _starpu_mic_src_report_coi_error(const char *func, const char *file,
  151. const int line, const COIRESULT status)
  152. {
  153. const char *errormsg = COIResultGetName(status);
  154. printf("SRC: oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
  155. STARPU_ASSERT(0);
  156. }
  157. /* Report an error which occured when using a MIC device
  158. * and print this error in a human-readable style.
  159. * It hanbles errors occuring when using SCIF.
  160. */
  161. void _starpu_mic_src_report_scif_error(const char *func, const char *file, const int line, const int status)
  162. {
  163. const char *errormsg = strerror(status);
  164. printf("SRC: oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
  165. STARPU_ASSERT(0);
  166. }
  167. /* Return the number of MIC devices in the system.
  168. * If the number of devices is already known, we use the cached value
  169. * without calling again COI. */
  170. unsigned _starpu_mic_src_get_device_count(void)
  171. {
  172. static unsigned short cached = 0;
  173. static unsigned nb_devices = 0;
  174. /* We don't need to call the COI API again if we already
  175. * have the result in cache */
  176. if (!cached)
  177. {
  178. COIRESULT res;
  179. res = COIEngineGetCount(COI_ISA_MIC, &nb_devices);
  180. /* If something is wrong with the COI engine, we shouldn't
  181. * use MIC devices (if there is any...) */
  182. if (res != COI_SUCCESS)
  183. nb_devices = 0;
  184. cached = 1;
  185. }
  186. return nb_devices;
  187. }
  188. unsigned starpu_mic_device_get_count(void)
  189. {
  190. // Return the number of configured MIC devices.
  191. struct _starpu_machine_config *config = _starpu_get_machine_config ();
  192. struct _starpu_machine_topology *topology = &config->topology;
  193. return topology->nmicdevices;
  194. }
  195. starpu_mic_kernel_t _starpu_mic_src_get_kernel_from_codelet(struct starpu_codelet *cl, unsigned nimpl)
  196. {
  197. starpu_mic_kernel_t kernel = NULL;
  198. starpu_mic_func_t func = _starpu_task_get_mic_nth_implementation(cl, nimpl);
  199. if (func)
  200. {
  201. /* We execute the function contained in the codelet, it must return a
  202. * pointer to the function to execute on the device, either specified
  203. * directly by the user or by a call to starpu_mic_get_func().
  204. */
  205. kernel = func();
  206. }
  207. else
  208. {
  209. /* If user dont define any starpu_mic_fun_t in cl->mic_func we try to use
  210. * cpu_func_name.
  211. */
  212. char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
  213. if (func_name)
  214. {
  215. starpu_mic_func_symbol_t symbol;
  216. _starpu_mic_src_register_kernel(&symbol, func_name);
  217. kernel = _starpu_mic_src_get_kernel(symbol);
  218. }
  219. }
  220. STARPU_ASSERT_MSG(kernel, "when STARPU_MIC is defined in 'where', mic_funcs or cpu_funcs_name has to be defined and the function be non-static");
  221. return kernel;
  222. }
  223. void(* _starpu_mic_src_get_kernel_from_job(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *j))(void)
  224. {
  225. starpu_mic_kernel_t kernel = NULL;
  226. starpu_mic_func_t func = _starpu_task_get_mic_nth_implementation(j->task->cl, j->nimpl);
  227. if (func)
  228. {
  229. /* We execute the function contained in the codelet, it must return a
  230. * pointer to the function to execute on the device, either specified
  231. * directly by the user or by a call to starpu_mic_get_func().
  232. */
  233. kernel = func();
  234. }
  235. else
  236. {
  237. /* If user dont define any starpu_mic_fun_t in cl->mic_func we try to use
  238. * cpu_func_name.
  239. */
  240. char *func_name = _starpu_task_get_cpu_name_nth_implementation(j->task->cl, j->nimpl);
  241. if (func_name)
  242. {
  243. starpu_mic_func_symbol_t symbol;
  244. _starpu_mic_src_register_kernel(&symbol, func_name);
  245. kernel = _starpu_mic_src_get_kernel(symbol);
  246. }
  247. }
  248. STARPU_ASSERT(kernel);
  249. return (void (*)(void))kernel;
  250. }
  251. /* Initialize the node structure describing the MIC source.
  252. */
  253. void _starpu_mic_src_init(struct _starpu_mp_node *node)
  254. {
  255. extern COIPROCESS _starpu_mic_process[STARPU_MAXMICDEVS];
  256. /* Let's initialize the connection with the peered sink device */
  257. _starpu_mic_common_connect(&node->mp_connection.mic_endpoint,
  258. STARPU_TO_MIC_ID(node->peer_id),
  259. _starpu_mic_process[node->peer_id],
  260. STARPU_MIC_SINK_PORT_NUMBER(node->peer_id),
  261. STARPU_MIC_SOURCE_PORT_NUMBER);
  262. _starpu_mic_common_connect(&node->host_sink_dt_connection.mic_endpoint,
  263. STARPU_TO_MIC_ID(node->peer_id),
  264. _starpu_mic_process[node->peer_id],
  265. STARPU_MIC_SINK_DT_PORT_NUMBER(node->peer_id),
  266. STARPU_MIC_SOURCE_DT_PORT_NUMBER);
  267. }
  268. /* Deinitialize the MIC sink, close all the connections.
  269. */
  270. void _starpu_mic_src_deinit(struct _starpu_mp_node *node)
  271. {
  272. scif_close(node->host_sink_dt_connection.mic_endpoint);
  273. scif_close(node->mp_connection.mic_endpoint);
  274. }
  275. /* Get infos of the MIC associed to memory_node */
  276. static void _starpu_mic_get_engine_info(COI_ENGINE_INFO *info, int devid)
  277. {
  278. STARPU_ASSERT(devid >= 0 && devid < STARPU_MAXMICDEVS);
  279. if (COIEngineGetInfo(handles[devid], sizeof(*info), info) != COI_SUCCESS)
  280. STARPU_MIC_SRC_REPORT_COI_ERROR(errno);
  281. }
  282. /* TODO: call _starpu_memory_manager_set_global_memory_size instead */
  283. /* Return the size of the memory on the MIC associed to memory_node */
  284. size_t _starpu_mic_get_global_mem_size(int devid)
  285. {
  286. COI_ENGINE_INFO infos;
  287. _starpu_mic_get_engine_info(&infos, devid);
  288. return infos.PhysicalMemory;
  289. }
  290. /* Return the size of the free memory on the MIC associed to memory_node */
  291. size_t _starpu_mic_get_free_mem_size(int devid)
  292. {
  293. COI_ENGINE_INFO infos;
  294. _starpu_mic_get_engine_info(&infos, devid);
  295. return infos.PhysicalMemoryFree;
  296. }
  297. /* Allocate memory on MIC.
  298. * Return 0 if OK or 1 if not.
  299. */
  300. int _starpu_mic_allocate_memory(void **addr, size_t size, unsigned memory_node)
  301. {
  302. /* We check we have (1.25 * size) free space in the MIC because
  303. * transfert with scif is not possible when the MIC
  304. * doesn't have enought free memory.
  305. * In this cas we can't tell any things to the host. */
  306. //int devid = _starpu_memory_node_get_devid(memory_node);
  307. //if (_starpu_mic_get_free_mem_size(devid) < size * 1.25)
  308. // return 1;
  309. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(memory_node);
  310. return _starpu_src_common_allocate(mp_node, addr, size);
  311. }
  312. /* Free memory on MIC.
  313. * Mic need size to free memory for use the function scif_unregister.
  314. */
  315. void _starpu_mic_free_memory(void *addr, size_t size, unsigned memory_node)
  316. {
  317. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(memory_node);
  318. struct _starpu_mic_free_command cmd = {addr, size};
  319. return _starpu_mp_common_send_command(mp_node, STARPU_FREE, &cmd, sizeof(cmd));
  320. }
  321. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  322. * node to the address pointed by DST in the DST_NODE memory node
  323. */
  324. int _starpu_mic_copy_ram_to_mic(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size)
  325. {
  326. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(dst_node);
  327. return _starpu_src_common_copy_host_to_sink(mp_node, src, dst, size);
  328. }
  329. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  330. * node to the address pointed by DST in the DST_NODE memory node
  331. */
  332. int _starpu_mic_copy_mic_to_ram(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size)
  333. {
  334. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(src_node);
  335. return _starpu_src_common_copy_sink_to_host(mp_node, src, dst, size);
  336. }
  337. /* Asynchronous transfers */
  338. int _starpu_mic_copy_ram_to_mic_async(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size)
  339. {
  340. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(dst_node);
  341. if (scif_vwriteto(mp_node->host_sink_dt_connection.mic_endpoint, src, size, (off_t)dst, 0) < 0)
  342. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  343. return 0;
  344. }
  345. int _starpu_mic_copy_mic_to_ram_async(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size)
  346. {
  347. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(src_node);
  348. if (scif_vreadfrom(mp_node->host_sink_dt_connection.mic_endpoint, dst, size, (off_t)src, 0) < 0)
  349. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  350. return 0;
  351. }
  352. /* Initialize a _starpu_mic_async_event. */
  353. int _starpu_mic_init_event(struct _starpu_mic_async_event *event, unsigned memory_node)
  354. {
  355. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(memory_node);
  356. scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
  357. event->memory_node = memory_node;
  358. /* Address of allocation must be multiple of the page size. */
  359. if (posix_memalign((void **)&(event->signal), 0x1000, sizeof(*(event->signal))) != 0)
  360. return -ENOMEM;
  361. *(event->signal) = 0;
  362. /* The size pass to scif_register is 0x1000 because it should be a multiple of the page size. */
  363. if (scif_register(epd, event->signal, 0x1000, (off_t)(event->signal), SCIF_PROT_WRITE, SCIF_MAP_FIXED) < 0)
  364. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  365. /* Mark for a futur wait. */
  366. if (scif_fence_mark(epd, SCIF_FENCE_INIT_SELF, &(event->mark)) < 0)
  367. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  368. /* Tell to scif to write STARPU_MIC_REQUEST_COMPLETE in event->signal when the transfer is complete.
  369. * We use this for test the end of a transfer. */
  370. if (scif_fence_signal(epd, (off_t)event->signal, STARPU_MIC_REQUEST_COMPLETE, 0, 0, SCIF_FENCE_INIT_SELF | SCIF_SIGNAL_LOCAL) < 0)
  371. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  372. return 0;
  373. }
  374. /* Wait the end of the asynchronous request */
  375. void _starpu_mic_wait_request_completion(struct _starpu_mic_async_event *event)
  376. {
  377. if (event->signal != NULL)
  378. {
  379. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(event->memory_node);
  380. scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
  381. if (scif_fence_wait(epd, event->mark) < 0)
  382. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  383. if (scif_unregister(epd, (off_t)(event->signal), 0x1000) < 0)
  384. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  385. free(event->signal);
  386. event->signal = NULL;
  387. }
  388. }
  389. /* Test if a asynchronous request is end.
  390. * Return 1 if is end, 0 else. */
  391. int _starpu_mic_request_is_complete(struct _starpu_mic_async_event *event)
  392. {
  393. if (event->signal != NULL && *(event->signal) != STARPU_MIC_REQUEST_COMPLETE)
  394. return 0;
  395. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(event->memory_node);
  396. scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
  397. if (scif_unregister(epd, (off_t)(event->signal), 0x1000) < 0)
  398. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  399. free(event->signal);
  400. event->signal = NULL;
  401. return 1;
  402. }
  403. void *_starpu_mic_src_worker(void *arg)
  404. {
  405. struct _starpu_worker_set *worker_set = arg;
  406. /* As all workers of a set share common data, we just use the first
  407. * one for intializing the following stuffs. */
  408. struct _starpu_worker *baseworker = &worker_set->workers[0];
  409. struct _starpu_machine_config *config = baseworker->config;
  410. unsigned baseworkerid = baseworker - config->workers;
  411. unsigned devid = baseworker->devid;
  412. unsigned i;
  413. /* unsigned memnode = baseworker->memory_node; */
  414. _starpu_driver_start(baseworker, _STARPU_FUT_MIC_KEY, 0);
  415. #ifdef STARPU_USE_FXT
  416. for (i = 1; i < worker_set->nworkers; i++)
  417. _starpu_worker_start(&worker_set->workers[i], _STARPU_FUT_MIC_KEY, 0);
  418. #endif
  419. // Current task for a thread managing a worker set has no sense.
  420. _starpu_set_current_task(NULL);
  421. for (i = 0; i < config->topology.nmiccores[devid]; i++)
  422. {
  423. struct _starpu_worker *worker = &config->workers[baseworkerid+i];
  424. snprintf(worker->name, sizeof(worker->name), "MIC %d core %u", devid, i);
  425. snprintf(worker->short_name, sizeof(worker->short_name), "MIC %d.%u", devid, i);
  426. }
  427. {
  428. char thread_name[16];
  429. snprintf(thread_name, sizeof(thread_name), "MIC %d", devid);
  430. starpu_pthread_setname(thread_name);
  431. }
  432. for (i = 0; i < worker_set->nworkers; i++)
  433. {
  434. struct _starpu_worker *worker = &worker_set->workers[i];
  435. _STARPU_TRACE_WORKER_INIT_END(worker->workerid);
  436. }
  437. /* tell the main thread that this one is ready */
  438. STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex);
  439. baseworker->status = STATUS_UNKNOWN;
  440. worker_set->set_is_initialized = 1;
  441. STARPU_PTHREAD_COND_SIGNAL(&worker_set->ready_cond);
  442. STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex);
  443. _starpu_src_common_worker(worker_set, baseworkerid, mic_nodes[devid]);
  444. return NULL;
  445. }