driver_mic_source.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2012 Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <stdio.h>
  17. #include <scif.h>
  18. #include <starpu.h>
  19. #include <starpu_profiling.h>
  20. #include <core/sched_policy.h>
  21. #include <core/workers.h>
  22. #include <common/uthash.h>
  23. #include <drivers/driver_common/driver_common.h>
  24. #include <drivers/mp_common/source_common.h>
  25. #include "driver_mic_common.h"
  26. #include "driver_mic_source.h"
  27. /* Array of structures containing all the informations useful to send
  28. * and receive informations with devices */
  29. struct _starpu_mp_node *mic_nodes[STARPU_MAXMICDEVS];
  30. static COIENGINE handles[STARPU_MAXMICDEVS];
  31. /* Structure used by host to store informations about a kernel executable on
  32. * a MIC device : its name, and its address on each device.
  33. * If a kernel has been initialized, then a lookup has already been achieved and the
  34. * device knows how to call it, else the host still needs to do a lookup.
  35. */
  36. struct _starpu_mic_kernel
  37. {
  38. UT_hash_handle hh;
  39. char *name;
  40. starpu_mic_kernel_t func[STARPU_MAXMICDEVS];
  41. } *kernels;
  42. /* Mutex for concurrent access to the table.
  43. */
  44. starpu_pthread_mutex_t htbl_mutex = PTHREAD_MUTEX_INITIALIZER;
  45. /* Number of MIC worker initialized.
  46. */
  47. unsigned int nb_mic_worker_init = 0;
  48. starpu_pthread_mutex_t nb_mic_worker_init_mutex = PTHREAD_MUTEX_INITIALIZER;
  49. /* Returns the ID of the MIC device controlled by the caller.
  50. * if the worker doesn't control a MIC device -ENODEV is returned
  51. */
  52. //static int _starpu_mic_get_devid(void)
  53. //{
  54. // struct _starpu_machine_config *config = _starpu_get_machine_config();
  55. // int workerid = starpu_worker_get_id();
  56. //
  57. // if (config->workers[workerid].arch != STARPU_MIC_WORKER)
  58. // return -ENODEV;
  59. //
  60. // return config->workers[workerid].devid;
  61. //}
  62. struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node()
  63. {
  64. struct _starpu_worker *actual_worker = _starpu_get_local_worker_key();
  65. STARPU_ASSERT(actual_worker);
  66. int nodeid = actual_worker->mp_nodeid;
  67. STARPU_ASSERT(nodeid >= 0 && nodeid < STARPU_MAXMICDEVS);
  68. return mic_nodes[nodeid];
  69. }
  70. const struct _starpu_mp_node *_starpu_mic_src_get_mp_node_from_memory_node(int memory_node)
  71. {
  72. int nodeid = _starpu_memory_node_get_devid(memory_node);
  73. STARPU_ASSERT_MSG(nodeid >= 0 && nodeid < STARPU_MAXMICDEVS, "bogus nodeid %d for memory node %d\n", nodeid, memory_node);
  74. return mic_nodes[nodeid];
  75. }
  76. static void _starpu_mic_src_free_kernel(void *kernel)
  77. {
  78. struct _starpu_mic_kernel *k = kernel;
  79. free(k->name);
  80. free(kernel);
  81. }
  82. void _starpu_mic_clear_kernels(void)
  83. {
  84. struct _starpu_mic_kernel *kernel, *tmp;
  85. HASH_ITER(hh, kernels, kernel, tmp)
  86. {
  87. HASH_DEL(kernels, kernel);
  88. free(kernel);
  89. }
  90. }
  91. int _starpu_mic_src_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name)
  92. {
  93. unsigned int func_name_size = (strlen(func_name) + 1) * sizeof(char);
  94. STARPU_PTHREAD_MUTEX_LOCK(&htbl_mutex);
  95. struct _starpu_mic_kernel *kernel;
  96. HASH_FIND_STR(kernels, func_name, kernel);
  97. if (kernel != NULL)
  98. {
  99. STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
  100. // Function already in the table.
  101. *symbol = kernel;
  102. return 0;
  103. }
  104. kernel = malloc(sizeof(*kernel));
  105. if (kernel == NULL)
  106. {
  107. STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
  108. return -ENOMEM;
  109. }
  110. kernel->name = malloc(func_name_size);
  111. if (kernel->name == NULL)
  112. {
  113. STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
  114. free(kernel);
  115. return -ENOMEM;
  116. }
  117. memcpy(kernel->name, func_name, func_name_size);
  118. HASH_ADD_STR(kernels, name, kernel);
  119. unsigned int nb_mic_devices = _starpu_mic_src_get_device_count();
  120. unsigned int i;
  121. for (i = 0; i < nb_mic_devices; ++i)
  122. kernel->func[i] = NULL;
  123. STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
  124. *symbol = kernel;
  125. return 0;
  126. }
  127. starpu_mic_kernel_t _starpu_mic_src_get_kernel(starpu_mic_func_symbol_t symbol)
  128. {
  129. int workerid = starpu_worker_get_id();
  130. /* This function has to be called in the codelet only, by the thread
  131. * which will handle the task */
  132. if (workerid < 0)
  133. return NULL;
  134. int nodeid = starpu_worker_get_mp_nodeid(workerid);
  135. struct _starpu_mic_kernel *kernel = symbol;
  136. if (kernel->func[nodeid] == NULL)
  137. {
  138. struct _starpu_mp_node *node = mic_nodes[nodeid];
  139. int ret = _starpu_src_common_lookup(node, (void (**)(void))&kernel->func[nodeid], kernel->name);
  140. if (ret)
  141. return NULL;
  142. }
  143. return kernel->func[nodeid];
  144. }
  145. /* Report an error which occured when using a MIC device
  146. * and print this error in a human-readable style.
  147. * It hanbles errors occuring when using COI.
  148. */
  149. void _starpu_mic_src_report_coi_error(const char *func, const char *file,
  150. const int line, const COIRESULT status)
  151. {
  152. const char *errormsg = COIResultGetName(status);
  153. printf("SRC: oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
  154. STARPU_ASSERT(0);
  155. }
  156. /* Report an error which occured when using a MIC device
  157. * and print this error in a human-readable style.
  158. * It hanbles errors occuring when using SCIF.
  159. */
  160. void _starpu_mic_src_report_scif_error(const char *func, const char *file, const int line, const int status)
  161. {
  162. const char *errormsg = strerror(status);
  163. printf("SRC: oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
  164. STARPU_ASSERT(0);
  165. }
  166. /* Return the number of MIC devices in the system.
  167. * If the number of devices is already known, we use the cached value
  168. * without calling again COI. */
  169. unsigned _starpu_mic_src_get_device_count(void)
  170. {
  171. static unsigned short cached = 0;
  172. static unsigned nb_devices = 0;
  173. /* We don't need to call the COI API again if we already
  174. * have the result in cache */
  175. if (!cached)
  176. {
  177. COIRESULT res;
  178. res = COIEngineGetCount(COI_ISA_MIC, &nb_devices);
  179. /* If something is wrong with the COI engine, we shouldn't
  180. * use MIC devices (if there is any...) */
  181. if (res != COI_SUCCESS)
  182. nb_devices = 0;
  183. cached = 1;
  184. }
  185. return nb_devices;
  186. }
  187. unsigned starpu_mic_device_get_count(void)
  188. {
  189. // Return the number of configured MIC devices.
  190. struct _starpu_machine_config *config = _starpu_get_machine_config ();
  191. struct _starpu_machine_topology *topology = &config->topology;
  192. return topology->nmicdevices;
  193. }
  194. starpu_mic_kernel_t _starpu_mic_src_get_kernel_from_codelet(struct starpu_codelet *cl, unsigned nimpl)
  195. {
  196. starpu_mic_kernel_t kernel = NULL;
  197. starpu_mic_func_t func = _starpu_task_get_mic_nth_implementation(cl, nimpl);
  198. if (func)
  199. {
  200. /* We execute the function contained in the codelet, it must return a
  201. * pointer to the function to execute on the device, either specified
  202. * directly by the user or by a call to starpu_mic_get_func().
  203. */
  204. kernel = func();
  205. }
  206. else
  207. {
  208. /* If user dont define any starpu_mic_fun_t in cl->mic_func we try to use
  209. * cpu_func_name.
  210. */
  211. char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
  212. if (func_name)
  213. {
  214. starpu_mic_func_symbol_t symbol;
  215. _starpu_mic_src_register_kernel(&symbol, func_name);
  216. kernel = _starpu_mic_src_get_kernel(symbol);
  217. }
  218. }
  219. STARPU_ASSERT(kernel);
  220. return kernel;
  221. }
  222. void(* _starpu_mic_src_get_kernel_from_job(const struct _starpu_mp_node *node STARPU_ATTRIBUTE_UNUSED, struct _starpu_job *j))(void)
  223. {
  224. starpu_mic_kernel_t kernel = NULL;
  225. starpu_mic_func_t func = _starpu_task_get_mic_nth_implementation(j->task->cl, j->nimpl);
  226. if (func)
  227. {
  228. /* We execute the function contained in the codelet, it must return a
  229. * pointer to the function to execute on the device, either specified
  230. * directly by the user or by a call to starpu_mic_get_func().
  231. */
  232. kernel = func();
  233. }
  234. else
  235. {
  236. /* If user dont define any starpu_mic_fun_t in cl->mic_func we try to use
  237. * cpu_func_name.
  238. */
  239. char *func_name = _starpu_task_get_cpu_name_nth_implementation(j->task->cl, j->nimpl);
  240. if (func_name)
  241. {
  242. starpu_mic_func_symbol_t symbol;
  243. _starpu_mic_src_register_kernel(&symbol, func_name);
  244. kernel = _starpu_mic_src_get_kernel(symbol);
  245. }
  246. }
  247. STARPU_ASSERT(kernel);
  248. return (void (*)(void))kernel;
  249. }
  250. /* Initialize the node structure describing the MIC source.
  251. */
  252. void _starpu_mic_src_init(struct _starpu_mp_node *node)
  253. {
  254. /* Let's initialize the connection with the peered sink device */
  255. _starpu_mic_common_connect(&node->mp_connection.mic_endpoint,
  256. STARPU_TO_MIC_ID(node->peer_id),
  257. STARPU_MIC_SINK_PORT_NUMBER(node->peer_id),
  258. STARPU_MIC_SOURCE_PORT_NUMBER);
  259. _starpu_mic_common_connect(&node->host_sink_dt_connection.mic_endpoint,
  260. STARPU_TO_MIC_ID(node->peer_id),
  261. STARPU_MIC_SINK_DT_PORT_NUMBER(node->peer_id),
  262. STARPU_MIC_SOURCE_DT_PORT_NUMBER);
  263. }
  264. /* Deinitialize the MIC sink, close all the connections.
  265. */
  266. void _starpu_mic_src_deinit(struct _starpu_mp_node *node)
  267. {
  268. scif_close(node->host_sink_dt_connection.mic_endpoint);
  269. scif_close(node->mp_connection.mic_endpoint);
  270. }
  271. /* Get infos of the MIC associed to memory_node */
  272. static void _starpu_mic_get_engine_info(COI_ENGINE_INFO *info, int devid)
  273. {
  274. STARPU_ASSERT(devid >= 0 && devid < STARPU_MAXMICDEVS);
  275. if (COIEngineGetInfo(handles[devid], sizeof(*info), info) != COI_SUCCESS)
  276. STARPU_MIC_SRC_REPORT_COI_ERROR(errno);
  277. }
  278. /* TODO: call _starpu_memory_manager_set_global_memory_size instead */
  279. /* Return the size of the memory on the MIC associed to memory_node */
  280. size_t _starpu_mic_get_global_mem_size(int devid)
  281. {
  282. COI_ENGINE_INFO infos;
  283. _starpu_mic_get_engine_info(&infos, devid);
  284. return infos.PhysicalMemory;
  285. }
  286. /* Return the size of the free memory on the MIC associed to memory_node */
  287. size_t _starpu_mic_get_free_mem_size(int devid)
  288. {
  289. COI_ENGINE_INFO infos;
  290. _starpu_mic_get_engine_info(&infos, devid);
  291. return infos.PhysicalMemoryFree;
  292. }
  293. /* Allocate memory on MIC.
  294. * Return 0 if OK or 1 if not.
  295. */
  296. int _starpu_mic_allocate_memory(void **addr, size_t size, unsigned memory_node)
  297. {
  298. /* We check we have (1.25 * size) free space in the MIC because
  299. * transfert with scif is not possible when the MIC
  300. * doesn't have enought free memory.
  301. * In this cas we can't tell any things to the host. */
  302. //int devid = _starpu_memory_node_get_devid(memory_node);
  303. //if (_starpu_mic_get_free_mem_size(devid) < size * 1.25)
  304. // return 1;
  305. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(memory_node);
  306. return _starpu_src_common_allocate(mp_node, addr, size);
  307. }
  308. /* Free memory on MIC.
  309. * Mic need size to free memory for use the function scif_unregister.
  310. */
  311. void _starpu_mic_free_memory(void *addr, size_t size, unsigned memory_node)
  312. {
  313. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(memory_node);
  314. struct _starpu_mic_free_command cmd = {addr, size};
  315. return _starpu_mp_common_send_command(mp_node, STARPU_FREE, &cmd, sizeof(cmd));
  316. }
  317. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  318. * node to the address pointed by DST in the DST_NODE memory node
  319. */
  320. int _starpu_mic_copy_ram_to_mic(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size)
  321. {
  322. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(dst_node);
  323. return _starpu_src_common_copy_host_to_sink(mp_node, src, dst, size);
  324. }
  325. /* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
  326. * node to the address pointed by DST in the DST_NODE memory node
  327. */
  328. int _starpu_mic_copy_mic_to_ram(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size)
  329. {
  330. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(src_node);
  331. return _starpu_src_common_copy_sink_to_host(mp_node, src, dst, size);
  332. }
  333. /* Asynchronous transfers */
  334. int _starpu_mic_copy_ram_to_mic_async(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size)
  335. {
  336. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(dst_node);
  337. if (scif_vwriteto(mp_node->host_sink_dt_connection.mic_endpoint, src, size, (off_t)dst, 0) < 0)
  338. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  339. return 0;
  340. }
  341. int _starpu_mic_copy_mic_to_ram_async(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size)
  342. {
  343. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(src_node);
  344. if (scif_vreadfrom(mp_node->host_sink_dt_connection.mic_endpoint, dst, size, (off_t)src, 0) < 0)
  345. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  346. return 0;
  347. }
  348. /* Initialize a _starpu_mic_async_event. */
  349. int _starpu_mic_init_event(struct _starpu_mic_async_event *event, unsigned memory_node)
  350. {
  351. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(memory_node);
  352. scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
  353. event->memory_node = memory_node;
  354. /* Address of allocation must be multiple of the page size. */
  355. if (posix_memalign((void **)&(event->signal), 0x1000, sizeof(*(event->signal))) != 0)
  356. return -ENOMEM;
  357. *(event->signal) = 0;
  358. /* The size pass to scif_register is 0x1000 because it should be a multiple of the page size. */
  359. if (scif_register(epd, event->signal, 0x1000, (off_t)(event->signal), SCIF_PROT_WRITE, SCIF_MAP_FIXED) < 0)
  360. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  361. /* Mark for a futur wait. */
  362. if (scif_fence_mark(epd, SCIF_FENCE_INIT_SELF, &(event->mark)) < 0)
  363. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  364. /* Tell to scif to write STARPU_MIC_REQUEST_COMPLETE in event->signal when the transfer is complete.
  365. * We use this for test the end of a transfer. */
  366. if (scif_fence_signal(epd, (off_t)event->signal, STARPU_MIC_REQUEST_COMPLETE, 0, 0, SCIF_FENCE_INIT_SELF | SCIF_SIGNAL_LOCAL) < 0)
  367. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  368. return 0;
  369. }
  370. /* Wait the end of the asynchronous request */
  371. void _starpu_mic_wait_request_completion(struct _starpu_mic_async_event *event)
  372. {
  373. if (event->signal != NULL)
  374. {
  375. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(event->memory_node);
  376. scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
  377. if (scif_fence_wait(epd, event->mark) < 0)
  378. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  379. if (scif_unregister(epd, (off_t)(event->signal), 0x1000) < 0)
  380. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  381. free(event->signal);
  382. event->signal = NULL;
  383. }
  384. }
  385. /* Test if a asynchronous request is end.
  386. * Return 1 if is end, 0 else. */
  387. int _starpu_mic_request_is_complete(struct _starpu_mic_async_event *event)
  388. {
  389. if (event->signal != NULL && *(event->signal) != STARPU_MIC_REQUEST_COMPLETE)
  390. return 0;
  391. const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(event->memory_node);
  392. scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
  393. if (scif_unregister(epd, (off_t)(event->signal), 0x1000) < 0)
  394. STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
  395. free(event->signal);
  396. event->signal = NULL;
  397. return 1;
  398. }
  399. void *_starpu_mic_src_worker(void *arg)
  400. {
  401. struct _starpu_worker_set *worker_set = arg;
  402. /* As all workers of a set share common data, we just use the first
  403. * one for intializing the following stuffs. */
  404. struct _starpu_worker *baseworker = &worker_set->workers[0];
  405. struct _starpu_machine_config *config = baseworker->config;
  406. unsigned baseworkerid = baseworker - config->workers;
  407. unsigned mp_nodeid = baseworker->mp_nodeid;
  408. unsigned i;
  409. /* unsigned memnode = baseworker->memory_node; */
  410. _starpu_worker_init(baseworker, _STARPU_FUT_MIC_KEY);
  411. // Current task for a thread managing a worker set has no sense.
  412. _starpu_set_current_task(NULL);
  413. for (i = 0; i < config->topology.nmiccores[mp_nodeid]; i++)
  414. {
  415. struct _starpu_worker *worker = &config->workers[baseworkerid+i];
  416. snprintf(worker->name, sizeof(worker->name), "MIC %d core %u", mp_nodeid, i);
  417. }
  418. baseworker->status = STATUS_UNKNOWN;
  419. _STARPU_TRACE_WORKER_INIT_END;
  420. /* tell the main thread that this one is ready */
  421. STARPU_PTHREAD_MUTEX_LOCK(&worker_set->mutex);
  422. worker_set->set_is_initialized = 1;
  423. STARPU_PTHREAD_COND_SIGNAL(&worker_set->ready_cond);
  424. STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex);
  425. _starpu_src_common_worker(worker_set, baseworkerid, mic_nodes[mp_nodeid]);
  426. _STARPU_TRACE_WORKER_DEINIT_START;
  427. _STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CUDA_KEY);
  428. return NULL;
  429. }