malloc.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2010, 2012-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <errno.h>
  18. #include <core/workers.h>
  19. #include <common/config.h>
  20. #include <starpu.h>
  21. #include <drivers/opencl/driver_opencl.h>
  22. #include <datawizard/memory_manager.h>
  23. static size_t malloc_align = sizeof(void*);
  24. void starpu_malloc_set_align(size_t align)
  25. {
  26. STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align must be a power of two");
  27. if (malloc_align < align)
  28. malloc_align = align;
  29. }
  30. #if (defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER))// || defined(STARPU_USE_OPENCL)
  31. struct malloc_pinned_codelet_struct
  32. {
  33. void **ptr;
  34. size_t dim;
  35. };
  36. #endif
  37. /* Would be difficult to do it this way, we need to remember the cl_mem to be able to free it later... */
  38. //#ifdef STARPU_USE_OPENCL
  39. //static void malloc_pinned_opencl_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
  40. //{
  41. // struct malloc_pinned_codelet_struct *s = arg;
  42. // // *(s->ptr) = malloc(s->dim);
  43. // starpu_opencl_allocate_memory((void **)(s->ptr), s->dim, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR);
  44. //}
  45. //#endif
  46. #if defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  47. static void malloc_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
  48. {
  49. struct malloc_pinned_codelet_struct *s = arg;
  50. cudaError_t cures;
  51. cures = cudaHostAlloc((void **)(s->ptr), s->dim, cudaHostAllocPortable);
  52. if (STARPU_UNLIKELY(cures))
  53. STARPU_CUDA_REPORT_ERROR(cures);
  54. }
  55. #endif
  56. #if (defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER)) && !defined(STARPU_SIMGRID)// || defined(STARPU_USE_OPENCL)
  57. static struct starpu_perfmodel malloc_pinned_model =
  58. {
  59. .type = STARPU_HISTORY_BASED,
  60. .symbol = "malloc_pinned"
  61. };
  62. static struct starpu_codelet malloc_pinned_cl =
  63. {
  64. .cuda_funcs = {malloc_pinned_cuda_codelet, NULL},
  65. //#ifdef STARPU_USE_OPENCL
  66. // .opencl_funcs = {malloc_pinned_opencl_codelet, NULL},
  67. //#endif
  68. .nbuffers = 0,
  69. .model = &malloc_pinned_model
  70. };
  71. #endif
  72. int starpu_malloc(void **A, size_t dim)
  73. {
  74. if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
  75. return -EDEADLK;
  76. STARPU_ASSERT(A);
  77. if (_starpu_memory_manager_can_allocate_size(dim, 0) == 0)
  78. {
  79. size_t freed;
  80. size_t reclaim = 0.25*_starpu_memory_manager_get_global_memory_size(0);
  81. if (2*dim > reclaim)
  82. reclaim = 2*dim;
  83. _STARPU_DEBUG("There is not enough memory left, we are going to reclaim %ld\n", 2*reclaim);
  84. _STARPU_TRACE_START_MEMRECLAIM(0);
  85. freed = _starpu_memory_reclaim_generic(0, 0, 2*reclaim);
  86. _STARPU_TRACE_END_MEMRECLAIM(0);
  87. if (freed < dim)
  88. // We could not reclaim enough memory
  89. return 1;
  90. }
  91. #ifndef STARPU_SIMGRID
  92. if (_starpu_can_submit_cuda_task())
  93. {
  94. #ifdef STARPU_USE_CUDA
  95. #ifdef HAVE_CUDA_MEMCPY_PEER
  96. cudaError_t cures;
  97. cures = cudaHostAlloc(A, dim, cudaHostAllocPortable);
  98. if (STARPU_UNLIKELY(cures))
  99. STARPU_CUDA_REPORT_ERROR(cures);
  100. #else
  101. int push_res;
  102. struct malloc_pinned_codelet_struct s =
  103. {
  104. .ptr = A,
  105. .dim = dim
  106. };
  107. malloc_pinned_cl.where = STARPU_CUDA;
  108. struct starpu_task *task = starpu_task_create();
  109. task->callback_func = NULL;
  110. task->cl = &malloc_pinned_cl;
  111. task->cl_arg = &s;
  112. task->synchronous = 1;
  113. _starpu_exclude_task_from_dag(task);
  114. push_res = _starpu_task_submit_internally(task);
  115. STARPU_ASSERT(push_res != -ENODEV);
  116. #endif /* HAVE_CUDA_MEMCPY_PEER */
  117. #endif /* STARPU_USE_CUDA */
  118. }
  119. // else if (_starpu_can_submit_opencl_task())
  120. // {
  121. //#ifdef STARPU_USE_OPENCL
  122. // int push_res;
  123. //
  124. // struct malloc_pinned_codelet_struct s =
  125. // {
  126. // .ptr = A,
  127. // .dim = dim
  128. // };
  129. //
  130. // malloc_pinned_cl.where = STARPU_OPENCL;
  131. // struct starpu_task *task = starpu_task_create();
  132. // task->callback_func = NULL;
  133. // task->cl = &malloc_pinned_cl;
  134. // task->cl_arg = &s;
  135. //
  136. // task->synchronous = 1;
  137. //
  138. // _starpu_exclude_task_from_dag(task);
  139. //
  140. // push_res = _starpu_task_submit_internally(task);
  141. // STARPU_ASSERT(push_res != -ENODEV);
  142. //#endif /* STARPU_USE_OPENCL */
  143. // }
  144. else
  145. #endif /* STARPU_SIMGRID */
  146. {
  147. #ifdef STARPU_HAVE_POSIX_MEMALIGN
  148. if (malloc_align != sizeof(void*))
  149. {
  150. if (posix_memalign(A, malloc_align, dim))
  151. *A = NULL;
  152. }
  153. else
  154. #elif defined(STARPU_HAVE_MEMALIGN)
  155. if (malloc_align != sizeof(void*))
  156. {
  157. *A = memalign(malloc_align, dim);
  158. }
  159. else
  160. #endif /* STARPU_HAVE_POSIX_MEMALIGN */
  161. {
  162. *A = malloc(dim);
  163. }
  164. }
  165. STARPU_ASSERT(*A);
  166. return 0;
  167. }
  168. #if defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  169. static void free_pinned_cuda_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
  170. {
  171. cudaError_t cures;
  172. cures = cudaFreeHost(arg);
  173. if (STARPU_UNLIKELY(cures))
  174. STARPU_CUDA_REPORT_ERROR(cures);
  175. }
  176. #endif
  177. //#ifdef STARPU_USE_OPENCL
  178. //static void free_pinned_opencl_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
  179. //{
  180. // // free(arg);
  181. // int err = clReleaseMemObject(arg);
  182. // if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
  183. //}
  184. //#endif
  185. #if defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID) // || defined(STARPU_USE_OPENCL)
  186. static struct starpu_perfmodel free_pinned_model =
  187. {
  188. .type = STARPU_HISTORY_BASED,
  189. .symbol = "free_pinned"
  190. };
  191. static struct starpu_codelet free_pinned_cl =
  192. {
  193. .cuda_funcs = {free_pinned_cuda_codelet, NULL},
  194. //#ifdef STARPU_USE_OPENCL
  195. // .opencl_funcs = {free_pinned_opencl_codelet, NULL},
  196. //#endif
  197. .nbuffers = 0,
  198. .model = &free_pinned_model
  199. };
  200. #endif
  201. int starpu_free(void *A)
  202. {
  203. if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
  204. return -EDEADLK;
  205. #ifndef STARPU_SIMGRID
  206. #ifdef STARPU_USE_CUDA
  207. if (_starpu_can_submit_cuda_task())
  208. {
  209. #ifndef HAVE_CUDA_MEMCPY_PEER
  210. if (!_starpu_is_initialized())
  211. {
  212. #endif
  213. /* This is especially useful when starpu_free is called from
  214. * the GCC-plugin. starpu_shutdown will probably have already
  215. * been called, so we will not be able to submit a task. */
  216. cudaError_t err = cudaFreeHost(A);
  217. if (STARPU_UNLIKELY(err))
  218. STARPU_CUDA_REPORT_ERROR(err);
  219. #ifndef HAVE_CUDA_MEMCPY_PEER
  220. }
  221. else
  222. {
  223. int push_res;
  224. free_pinned_cl.where = STARPU_CUDA;
  225. struct starpu_task *task = starpu_task_create();
  226. task->callback_func = NULL;
  227. task->cl = &free_pinned_cl;
  228. task->cl_arg = A;
  229. task->synchronous = 1;
  230. _starpu_exclude_task_from_dag(task);
  231. push_res = _starpu_task_submit_internally(task);
  232. STARPU_ASSERT(push_res != -ENODEV);
  233. }
  234. #endif
  235. // else if (_starpu_can_submit_opencl_task())
  236. // {
  237. //#ifdef STARPU_USE_OPENCL
  238. // int push_res;
  239. //
  240. // free_pinned_cl.where = STARPU_OPENCL;
  241. // struct starpu_task *task = starpu_task_create();
  242. // task->callback_func = NULL;
  243. // task->cl = &free_pinned_cl;
  244. // task->cl_arg = A;
  245. //
  246. // task->synchronous = 1;
  247. //
  248. // _starpu_exclude_task_from_dag(task);
  249. //
  250. // push_res = starpu_task_submit(task);
  251. // STARPU_ASSERT(push_res != -ENODEV);
  252. //#endif
  253. // }
  254. } else
  255. #endif
  256. #endif
  257. {
  258. free(A);
  259. }
  260. #ifdef STARPU_DEVEL
  261. #warning FIXME: how do we know the size
  262. #endif
  263. // _starpu_memory_manager_deallocate_size(size, 0);
  264. return 0;
  265. }
  266. #ifdef STARPU_SIMGRID
  267. static _starpu_pthread_mutex_t cuda_alloc_mutex = _STARPU_PTHREAD_MUTEX_INITIALIZER;
  268. static _starpu_pthread_mutex_t opencl_alloc_mutex = _STARPU_PTHREAD_MUTEX_INITIALIZER;
  269. #endif
  270. uintptr_t
  271. starpu_malloc_on_node(unsigned dst_node, size_t size)
  272. {
  273. uintptr_t addr = 0;
  274. #ifdef STARPU_USE_CUDA
  275. cudaError_t status;
  276. #endif
  277. if (_starpu_memory_manager_can_allocate_size(size, dst_node) == 0)
  278. return 0;
  279. switch(starpu_node_get_kind(dst_node))
  280. {
  281. case STARPU_CPU_RAM:
  282. {
  283. addr = (uintptr_t)malloc(size);
  284. break;
  285. }
  286. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  287. case STARPU_CUDA_RAM:
  288. #ifdef STARPU_SIMGRID
  289. #ifdef STARPU_DEVEL
  290. #warning TODO: record used memory, using a simgrid property to know the available memory
  291. #endif
  292. /* Sleep 10µs for the allocation */
  293. _STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
  294. MSG_process_sleep(0.000010);
  295. addr = 1;
  296. _STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
  297. #else
  298. status = cudaMalloc((void **)&addr, size);
  299. if (!addr || (status != cudaSuccess))
  300. {
  301. if (STARPU_UNLIKELY(status != cudaErrorMemoryAllocation))
  302. STARPU_CUDA_REPORT_ERROR(status);
  303. addr = 0;
  304. }
  305. #endif
  306. break;
  307. #endif
  308. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  309. case STARPU_OPENCL_RAM:
  310. {
  311. #ifdef STARPU_SIMGRID
  312. /* Sleep 10µs for the allocation */
  313. _STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
  314. MSG_process_sleep(0.000010);
  315. addr = 1;
  316. _STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
  317. #else
  318. int ret;
  319. cl_mem ptr;
  320. ret = starpu_opencl_allocate_memory(&ptr, size, CL_MEM_READ_WRITE);
  321. if (ret)
  322. {
  323. addr = 0;
  324. }
  325. else
  326. {
  327. addr = (uintptr_t)ptr;
  328. }
  329. break;
  330. #endif
  331. }
  332. #endif
  333. default:
  334. STARPU_ABORT();
  335. }
  336. if (addr == 0)
  337. {
  338. // Allocation failed, gives the memory back to the memory manager
  339. _starpu_memory_manager_deallocate_size(size, dst_node);
  340. }
  341. return addr;
  342. }
  343. void
  344. starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
  345. {
  346. enum starpu_node_kind kind = starpu_node_get_kind(dst_node);
  347. switch(kind)
  348. {
  349. #ifdef STARPU_DEVEL
  350. #warning TODO we need to call starpu_free
  351. #endif
  352. case STARPU_CPU_RAM:
  353. free((void*)addr);
  354. _starpu_memory_manager_deallocate_size(size, dst_node);
  355. break;
  356. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  357. case STARPU_CUDA_RAM:
  358. {
  359. #ifdef STARPU_SIMGRID
  360. _STARPU_PTHREAD_MUTEX_LOCK(&cuda_alloc_mutex);
  361. /* Sleep 10µs for the free */
  362. MSG_process_sleep(0.000010);
  363. _STARPU_PTHREAD_MUTEX_UNLOCK(&cuda_alloc_mutex);
  364. #else
  365. cudaError_t err;
  366. err = cudaFree((void*)addr);
  367. if (STARPU_UNLIKELY(err != cudaSuccess))
  368. STARPU_CUDA_REPORT_ERROR(err);
  369. _starpu_memory_manager_deallocate_size(size, dst_node);
  370. #endif
  371. break;
  372. }
  373. #endif
  374. #if defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
  375. case STARPU_OPENCL_RAM:
  376. {
  377. #ifdef STARPU_SIMGRID
  378. _STARPU_PTHREAD_MUTEX_LOCK(&opencl_alloc_mutex);
  379. /* Sleep 10µs for the free */
  380. MSG_process_sleep(0.000010);
  381. _STARPU_PTHREAD_MUTEX_UNLOCK(&opencl_alloc_mutex);
  382. #else
  383. cl_int err;
  384. err = clReleaseMemObject((void*)addr);
  385. if (STARPU_UNLIKELY(err != CL_SUCCESS))
  386. STARPU_OPENCL_REPORT_ERROR(err);
  387. _starpu_memory_manager_deallocate_size(size, dst_node);
  388. #endif
  389. break;
  390. }
  391. #endif
  392. default:
  393. STARPU_ABORT();
  394. }
  395. }