memalloc.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2011 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <datawizard/memalloc.h>
  18. #include <datawizard/footprint.h>
  19. /* This per-node RW-locks protect mc_list and memchunk_cache entries */
  20. static pthread_rwlock_t mc_rwlock[STARPU_MAXNODES];
  21. /* Potentially in use memory chunks */
  22. static starpu_mem_chunk_list_t mc_list[STARPU_MAXNODES];
  23. /* Explicitly caches memory chunks that can be reused */
  24. static starpu_mem_chunk_list_t memchunk_cache[STARPU_MAXNODES];
  25. void _starpu_init_mem_chunk_lists(void)
  26. {
  27. unsigned i;
  28. for (i = 0; i < STARPU_MAXNODES; i++)
  29. {
  30. PTHREAD_RWLOCK_INIT(&mc_rwlock[i], NULL);
  31. mc_list[i] = starpu_mem_chunk_list_new();
  32. memchunk_cache[i] = starpu_mem_chunk_list_new();
  33. }
  34. }
  35. void _starpu_deinit_mem_chunk_lists(void)
  36. {
  37. unsigned i;
  38. for (i = 0; i < STARPU_MAXNODES; i++)
  39. {
  40. starpu_mem_chunk_list_delete(mc_list[i]);
  41. starpu_mem_chunk_list_delete(memchunk_cache[i]);
  42. }
  43. }
  44. /*
  45. * Manipulate subtrees
  46. */
  47. static void lock_all_subtree(starpu_data_handle handle)
  48. {
  49. if (handle->nchildren == 0)
  50. {
  51. /* this is a leaf */
  52. while (_starpu_spin_trylock(&handle->header_lock))
  53. _starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
  54. }
  55. else {
  56. /* lock all sub-subtrees children */
  57. unsigned child;
  58. for (child = 0; child < handle->nchildren; child++)
  59. {
  60. lock_all_subtree(&handle->children[child]);
  61. }
  62. }
  63. }
  64. static void unlock_all_subtree(starpu_data_handle handle)
  65. {
  66. if (handle->nchildren == 0)
  67. {
  68. /* this is a leaf */
  69. _starpu_spin_unlock(&handle->header_lock);
  70. }
  71. else {
  72. /* lock all sub-subtrees children
  73. * Note that this is done in the reverse order of the
  74. * lock_all_subtree so that we avoid deadlock */
  75. unsigned i;
  76. for (i =0; i < handle->nchildren; i++)
  77. {
  78. unsigned child = handle->nchildren - 1 - i;
  79. unlock_all_subtree(&handle->children[child]);
  80. }
  81. }
  82. }
  83. static unsigned may_free_subtree(starpu_data_handle handle, unsigned node)
  84. {
  85. /* we only free if no one refers to the leaf */
  86. uint32_t refcnt = _starpu_get_data_refcnt(handle, node);
  87. if (refcnt)
  88. return 0;
  89. if (!handle->nchildren)
  90. return 1;
  91. /* look into all sub-subtrees children */
  92. unsigned child;
  93. for (child = 0; child < handle->nchildren; child++)
  94. {
  95. unsigned res;
  96. res = may_free_subtree(&handle->children[child], node);
  97. if (!res) return 0;
  98. }
  99. /* no problem was found */
  100. return 1;
  101. }
  102. static void transfer_subtree_to_node(starpu_data_handle handle, unsigned src_node,
  103. unsigned dst_node)
  104. {
  105. unsigned i;
  106. unsigned last = 0;
  107. unsigned cnt;
  108. int ret;
  109. if (handle->nchildren == 0)
  110. {
  111. struct starpu_data_replicate_s *src_replicate = &handle->per_node[src_node];
  112. struct starpu_data_replicate_s *dst_replicate = &handle->per_node[dst_node];
  113. /* this is a leaf */
  114. switch(src_replicate->state) {
  115. case STARPU_OWNER:
  116. /* the local node has the only copy */
  117. /* the owner is now the destination_node */
  118. src_replicate->state = STARPU_INVALID;
  119. dst_replicate->state = STARPU_OWNER;
  120. #ifdef STARPU_DEVEL
  121. #warning we should use requests during memory reclaim
  122. #endif
  123. /* TODO use request !! */
  124. src_replicate->refcnt++;
  125. dst_replicate->refcnt++;
  126. ret = _starpu_driver_copy_data_1_to_1(handle, src_replicate, dst_replicate, 0, NULL, 1);
  127. STARPU_ASSERT(ret == 0);
  128. src_replicate->refcnt--;
  129. dst_replicate->refcnt--;
  130. break;
  131. case STARPU_SHARED:
  132. /* some other node may have the copy */
  133. src_replicate->state = STARPU_INVALID;
  134. /* count the number of copies */
  135. cnt = 0;
  136. for (i = 0; i < STARPU_MAXNODES; i++)
  137. {
  138. if (handle->per_node[i].state == STARPU_SHARED) {
  139. cnt++;
  140. last = i;
  141. }
  142. }
  143. if (cnt == 1)
  144. handle->per_node[last].state = STARPU_OWNER;
  145. break;
  146. case STARPU_INVALID:
  147. /* nothing to be done */
  148. break;
  149. default:
  150. STARPU_ABORT();
  151. break;
  152. }
  153. }
  154. else {
  155. /* lock all sub-subtrees children */
  156. unsigned child;
  157. for (child = 0; child < handle->nchildren; child++)
  158. {
  159. transfer_subtree_to_node(&handle->children[child],
  160. src_node, dst_node);
  161. }
  162. }
  163. }
  164. static size_t free_memory_on_node(starpu_mem_chunk_t mc, uint32_t node)
  165. {
  166. size_t freed = 0;
  167. STARPU_ASSERT(mc->ops);
  168. STARPU_ASSERT(mc->ops->free_data_on_node);
  169. starpu_data_handle handle = mc->data;
  170. /* Does this memory chunk refers to a handle that does not exist
  171. * anymore ? */
  172. unsigned data_was_deleted = mc->data_was_deleted;
  173. struct starpu_data_replicate_s *replicate = mc->replicate;
  174. // while (_starpu_spin_trylock(&handle->header_lock))
  175. // _starpu_datawizard_progress(_starpu_get_local_memory_node());
  176. #ifdef STARPU_DEVEL
  177. #warning can we block here ?
  178. #endif
  179. // _starpu_spin_lock(&handle->header_lock);
  180. if (mc->automatically_allocated &&
  181. (!handle || data_was_deleted || replicate->refcnt == 0))
  182. {
  183. if (handle && !data_was_deleted)
  184. STARPU_ASSERT(replicate->allocated);
  185. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER)
  186. if (_starpu_get_node_kind(node) == STARPU_CUDA_RAM)
  187. {
  188. /* To facilitate the design of interface, we set the
  189. * proper CUDA device in case it is needed. This avoids
  190. * having to set it again in the free method of each
  191. * interface. */
  192. cudaError_t err = cudaSetDevice(starpu_memory_node_to_devid(node));
  193. STARPU_ASSERT(err == cudaSuccess);
  194. }
  195. #endif
  196. mc->ops->free_data_on_node(mc->chunk_interface, node);
  197. if (handle && !data_was_deleted)
  198. {
  199. replicate->allocated = 0;
  200. /* XXX why do we need that ? */
  201. replicate->automatically_allocated = 0;
  202. }
  203. freed = mc->size;
  204. if (handle && !data_was_deleted)
  205. STARPU_ASSERT(replicate->refcnt == 0);
  206. }
  207. // _starpu_spin_unlock(&handle->header_lock);
  208. return freed;
  209. }
  210. static size_t do_free_mem_chunk(starpu_mem_chunk_t mc, unsigned node)
  211. {
  212. size_t size;
  213. /* free the actual buffer */
  214. size = free_memory_on_node(mc, node);
  215. /* remove the mem_chunk from the list */
  216. starpu_mem_chunk_list_erase(mc_list[node], mc);
  217. free(mc->chunk_interface);
  218. starpu_mem_chunk_delete(mc);
  219. return size;
  220. }
  221. /* This function is called for memory chunks that are possibly in used (ie. not
  222. * in the cache). They should therefore still be associated to a handle. */
  223. static size_t try_to_free_mem_chunk(starpu_mem_chunk_t mc, unsigned node)
  224. {
  225. size_t freed = 0;
  226. starpu_data_handle handle;
  227. handle = mc->data;
  228. STARPU_ASSERT(handle);
  229. /* Either it's a "relaxed coherency" memchunk, or it's a memchunk that
  230. * could be used with filters. */
  231. if (mc->relaxed_coherency)
  232. {
  233. STARPU_ASSERT(mc->replicate);
  234. while (_starpu_spin_trylock(&handle->header_lock))
  235. _starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
  236. if (mc->replicate->refcnt == 0)
  237. {
  238. /* Note taht there is no need to transfer any data or
  239. * to update the status in terms of MSI protocol
  240. * because this memchunk is associated to a replicate
  241. * in "relaxed coherency" mode. */
  242. freed = do_free_mem_chunk(mc, node);
  243. }
  244. _starpu_spin_unlock(&handle->header_lock);
  245. }
  246. else {
  247. /* try to lock all the leafs of the subtree */
  248. lock_all_subtree(handle);
  249. /* check if they are all "free" */
  250. if (may_free_subtree(handle, node))
  251. {
  252. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  253. /* in case there was nobody using that buffer, throw it
  254. * away after writing it back to main memory */
  255. transfer_subtree_to_node(handle, node, 0);
  256. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  257. /* now the actual buffer may be freed */
  258. freed = do_free_mem_chunk(mc, node);
  259. }
  260. /* unlock the leafs */
  261. unlock_all_subtree(handle);
  262. }
  263. return freed;
  264. }
  265. #ifdef STARPU_USE_ALLOCATION_CACHE
  266. /* We assume that mc_rwlock[node] is taken. is_already_in_mc_list indicates
  267. * that the mc is already in the list of buffers that are possibly used, and
  268. * therefore not in the cache. */
  269. static void reuse_mem_chunk(unsigned node, struct starpu_data_replicate_s *new_replicate, starpu_mem_chunk_t mc, unsigned is_already_in_mc_list)
  270. {
  271. starpu_data_handle old_data;
  272. old_data = mc->data;
  273. /* we found an appropriate mem chunk: so we get it out
  274. * of the "to free" list, and reassign it to the new
  275. * piece of data */
  276. if (!is_already_in_mc_list)
  277. {
  278. starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  279. }
  280. struct starpu_data_replicate_s *old_replicate = mc->replicate;
  281. old_replicate->allocated = 0;
  282. old_replicate->automatically_allocated = 0;
  283. old_replicate->initialized = 0;
  284. new_replicate->allocated = 1;
  285. new_replicate->automatically_allocated = 1;
  286. new_replicate->initialized = 0;
  287. STARPU_ASSERT(new_replicate->chunk_interface);
  288. STARPU_ASSERT(mc->chunk_interface);
  289. memcpy(new_replicate->chunk_interface, mc->chunk_interface, old_replicate->ops->interface_size);
  290. mc->data = new_replicate->handle;
  291. mc->data_was_deleted = 0;
  292. /* mc->ops, mc->size, mc->footprint and mc->interface should be
  293. * unchanged ! */
  294. /* reinsert the mem chunk in the list of active memory chunks */
  295. if (!is_already_in_mc_list)
  296. {
  297. starpu_mem_chunk_list_push_front(mc_list[node], mc);
  298. }
  299. }
  300. static unsigned try_to_reuse_mem_chunk(starpu_mem_chunk_t mc, unsigned node, starpu_data_handle new_data, unsigned is_already_in_mc_list)
  301. {
  302. unsigned success = 0;
  303. starpu_data_handle old_data;
  304. old_data = mc->data;
  305. STARPU_ASSERT(old_data);
  306. /* try to lock all the leafs of the subtree */
  307. lock_all_subtree(old_data);
  308. /* check if they are all "free" */
  309. if (may_free_subtree(old_data, node))
  310. {
  311. success = 1;
  312. /* in case there was nobody using that buffer, throw it
  313. * away after writing it back to main memory */
  314. transfer_subtree_to_node(old_data, node, 0);
  315. /* now replace the previous data */
  316. reuse_mem_chunk(node, new_data, mc, is_already_in_mc_list);
  317. }
  318. /* unlock the leafs */
  319. unlock_all_subtree(old_data);
  320. return success;
  321. }
  322. static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_data_interface_ops_t *ops_a,
  323. void *data_interface_b, struct starpu_data_interface_ops_t *ops_b)
  324. {
  325. if (ops_a->interfaceid != ops_b->interfaceid)
  326. return -1;
  327. int ret = ops_a->compare(interface_a, interface_b);
  328. return ret;
  329. }
  330. /* This function must be called with mc_rwlock[node] taken in write mode */
  331. static starpu_mem_chunk_t _starpu_memchunk_cache_lookup_locked(uint32_t node, starpu_data_handle handle)
  332. {
  333. uint32_t footprint = _starpu_compute_data_footprint(handle);
  334. /* go through all buffers in the cache */
  335. starpu_mem_chunk_t mc;
  336. for (mc = starpu_mem_chunk_list_begin(memchunk_cache[node]);
  337. mc != starpu_mem_chunk_list_end(memchunk_cache[node]);
  338. mc = starpu_mem_chunk_list_next(mc))
  339. {
  340. if (mc->footprint == footprint)
  341. {
  342. /* Is that a false hit ? (this is _very_ unlikely) */
  343. if (_starpu_data_interface_compare(handle->per_node[node].interface, handle->ops, mc->interface, mc->ops))
  344. continue;
  345. /* Cache hit */
  346. /* Remove from the cache */
  347. starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  348. return mc;
  349. }
  350. }
  351. /* This is a cache miss */
  352. return NULL;
  353. }
  354. /* this function looks for a memory chunk that matches a given footprint in the
  355. * list of mem chunk that need to be freed. This function must be called with
  356. * mc_rwlock[node] taken in write mode. */
  357. static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle data, uint32_t footprint)
  358. {
  359. starpu_mem_chunk_t mc, next_mc;
  360. /* go through all buffers in the cache */
  361. mc = _starpu_memchunk_cache_lookup_locked(node, handle);
  362. if (mc)
  363. {
  364. /* We found an entry in the cache so we can reuse it */
  365. reuse_mem_chunk(node, data, mc, 0);
  366. return 1;
  367. }
  368. /* now look for some non essential data in the active list */
  369. for (mc = starpu_mem_chunk_list_begin(mc_list[node]);
  370. mc != starpu_mem_chunk_list_end(mc_list[node]);
  371. mc = next_mc)
  372. {
  373. /* there is a risk that the memory chunk is freed before next
  374. * iteration starts: so we compute the next element of the list
  375. * now */
  376. next_mc = starpu_mem_chunk_list_next(mc);
  377. if (mc->data->is_not_important && (mc->footprint == footprint))
  378. {
  379. // fprintf(stderr, "found a candidate ...\n");
  380. if (try_to_reuse_mem_chunk(mc, node, data, 1))
  381. return 1;
  382. }
  383. }
  384. return 0;
  385. }
  386. #endif
  387. /*
  388. * Free the memory chuncks that are explicitely tagged to be freed. The
  389. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  390. */
  391. static size_t flush_memchunk_cache(uint32_t node)
  392. {
  393. starpu_mem_chunk_t mc, next_mc;
  394. size_t freed = 0;
  395. for (mc = starpu_mem_chunk_list_begin(memchunk_cache[node]);
  396. mc != starpu_mem_chunk_list_end(memchunk_cache[node]);
  397. mc = next_mc)
  398. {
  399. next_mc = starpu_mem_chunk_list_next(mc);
  400. freed += free_memory_on_node(mc, node);
  401. starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  402. free(mc->chunk_interface);
  403. starpu_mem_chunk_delete(mc);
  404. }
  405. return freed;
  406. }
  407. /*
  408. * Try to free the buffers currently in use on the memory node. If the force
  409. * flag is set, the memory is freed regardless of coherency concerns (this
  410. * should only be used at the termination of StarPU for instance). The
  411. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  412. */
  413. static size_t free_potentially_in_use_mc(uint32_t node, unsigned force)
  414. {
  415. size_t freed = 0;
  416. starpu_mem_chunk_t mc, next_mc;
  417. for (mc = starpu_mem_chunk_list_begin(mc_list[node]);
  418. mc != starpu_mem_chunk_list_end(mc_list[node]);
  419. mc = next_mc)
  420. {
  421. /* there is a risk that the memory chunk is freed
  422. before next iteration starts: so we compute the next
  423. element of the list now */
  424. next_mc = starpu_mem_chunk_list_next(mc);
  425. if (!force)
  426. {
  427. freed += try_to_free_mem_chunk(mc, node);
  428. #if 0
  429. if (freed > toreclaim)
  430. break;
  431. #endif
  432. }
  433. else {
  434. /* We must free the memory now: note that data
  435. * coherency is not maintained in that case ! */
  436. freed += do_free_mem_chunk(mc, node);
  437. }
  438. }
  439. return freed;
  440. }
  441. static size_t reclaim_memory_generic(uint32_t node, unsigned force)
  442. {
  443. size_t freed = 0;
  444. PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  445. /* remove all buffers for which there was a removal request */
  446. freed += flush_memchunk_cache(node);
  447. /* try to free all allocated data potentially in use */
  448. freed += free_potentially_in_use_mc(node, force);
  449. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  450. return freed;
  451. }
  452. /*
  453. * This function frees all the memory that was implicitely allocated by StarPU
  454. * (for the data replicates). This is not ensuring data coherency, and should
  455. * only be called while StarPU is getting shut down.
  456. */
  457. size_t _starpu_free_all_automatically_allocated_buffers(uint32_t node)
  458. {
  459. return reclaim_memory_generic(node, 1);
  460. }
  461. static starpu_mem_chunk_t _starpu_memchunk_init(struct starpu_data_replicate_s *replicate, size_t size, size_t interface_size, unsigned automatically_allocated)
  462. {
  463. starpu_mem_chunk_t mc = starpu_mem_chunk_new();
  464. starpu_data_handle handle = replicate->handle;
  465. STARPU_ASSERT(handle);
  466. STARPU_ASSERT(handle->ops);
  467. mc->data = handle;
  468. mc->size = size;
  469. mc->footprint = _starpu_compute_data_footprint(handle);
  470. mc->ops = handle->ops;
  471. mc->data_was_deleted = 0;
  472. mc->automatically_allocated = automatically_allocated;
  473. mc->relaxed_coherency = replicate->relaxed_coherency;
  474. mc->replicate = replicate;
  475. /* Save a copy of the interface */
  476. mc->chunk_interface = malloc(interface_size);
  477. STARPU_ASSERT(mc->chunk_interface);
  478. memcpy(mc->chunk_interface, replicate->data_interface, interface_size);
  479. return mc;
  480. }
  481. static void register_mem_chunk(struct starpu_data_replicate_s *replicate, size_t size, unsigned automatically_allocated)
  482. {
  483. unsigned dst_node = replicate->memory_node;
  484. starpu_mem_chunk_t mc;
  485. /* the interface was already filled by ops->allocate_data_on_node */
  486. size_t interface_size = replicate->handle->ops->interface_size;
  487. /* Put this memchunk in the list of memchunk in use */
  488. mc = _starpu_memchunk_init(replicate, size, interface_size, automatically_allocated);
  489. PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[dst_node]);
  490. starpu_mem_chunk_list_push_front(mc_list[dst_node], mc);
  491. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  492. }
  493. /* This function is called when the handle is destroyed (eg. when calling
  494. * unregister or unpartition). It puts all the memchunks that refer to the
  495. * specified handle into the cache. */
  496. void _starpu_request_mem_chunk_removal(starpu_data_handle handle, unsigned node)
  497. {
  498. PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  499. /* iterate over the list of memory chunks and remove the entry */
  500. starpu_mem_chunk_t mc, next_mc;
  501. for (mc = starpu_mem_chunk_list_begin(mc_list[node]);
  502. mc != starpu_mem_chunk_list_end(mc_list[node]);
  503. mc = next_mc)
  504. {
  505. next_mc = starpu_mem_chunk_list_next(mc);
  506. if (mc->data == handle) {
  507. /* we found the data */
  508. mc->data_was_deleted = 1;
  509. /* remove it from the main list */
  510. starpu_mem_chunk_list_erase(mc_list[node], mc);
  511. /* put it in the list of buffers to be removed */
  512. starpu_mem_chunk_list_push_front(memchunk_cache[node], mc);
  513. /* Note that we do not stop here because there can be
  514. * multiple replicates associated to the same handle on
  515. * the same memory node. */
  516. }
  517. }
  518. /* there was no corresponding buffer ... */
  519. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  520. }
  521. /*
  522. * In order to allocate a piece of data, we try to reuse existing buffers if
  523. * its possible.
  524. * 1 - we try to reuse a memchunk that is explicitely unused.
  525. * 2 - we go through the list of memory chunks and find one that is not
  526. * referenced and that has the same footprint to reuse it.
  527. * 3 - we call the usual driver's alloc method
  528. * 4 - we go through the list of memory chunks and release those that are
  529. * not referenced (or part of those).
  530. *
  531. */
  532. static ssize_t _starpu_allocate_interface(starpu_data_handle handle, struct starpu_data_replicate_s *replicate, uint32_t dst_node)
  533. {
  534. unsigned attempts = 0;
  535. ssize_t allocated_memory;
  536. _starpu_data_allocation_inc_stats(dst_node);
  537. #ifdef STARPU_USE_ALLOCATION_CACHE
  538. /* perhaps we can directly reuse a buffer in the free-list */
  539. uint32_t footprint = _starpu_compute_data_footprint(handle);
  540. STARPU_TRACE_START_ALLOC_REUSE(dst_node);
  541. PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  542. if (try_to_find_reusable_mem_chunk(dst_node, handle, footprint))
  543. {
  544. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  545. _starpu_allocation_cache_hit(dst_node);
  546. ssize_t data_size = _starpu_data_get_size(handle);
  547. return data_size;
  548. }
  549. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  550. STARPU_TRACE_END_ALLOC_REUSE(dst_node);
  551. #endif
  552. do {
  553. STARPU_ASSERT(handle->ops);
  554. STARPU_ASSERT(handle->ops->allocate_data_on_node);
  555. STARPU_TRACE_START_ALLOC(dst_node);
  556. STARPU_ASSERT(replicate->data_interface);
  557. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER)
  558. if (_starpu_get_node_kind(dst_node) == STARPU_CUDA_RAM)
  559. {
  560. /* To facilitate the design of interface, we set the
  561. * proper CUDA device in case it is needed. This avoids
  562. * having to set it again in the malloc method of each
  563. * interface. */
  564. cudaError_t err = cudaSetDevice(starpu_memory_node_to_devid(dst_node));
  565. STARPU_ASSERT(err == cudaSuccess);
  566. }
  567. #endif
  568. allocated_memory = handle->ops->allocate_data_on_node(replicate->data_interface, dst_node);
  569. STARPU_TRACE_END_ALLOC(dst_node);
  570. if (allocated_memory == -ENOMEM)
  571. {
  572. replicate->refcnt++;
  573. _starpu_spin_unlock(&handle->header_lock);
  574. STARPU_TRACE_START_MEMRECLAIM(dst_node);
  575. reclaim_memory_generic(dst_node, 0);
  576. STARPU_TRACE_END_MEMRECLAIM(dst_node);
  577. while (_starpu_spin_trylock(&handle->header_lock))
  578. _starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
  579. replicate->refcnt--;
  580. }
  581. } while((allocated_memory == -ENOMEM) && attempts++ < 2);
  582. return allocated_memory;
  583. }
  584. int _starpu_allocate_memory_on_node(starpu_data_handle handle, struct starpu_data_replicate_s *replicate)
  585. {
  586. ssize_t allocated_memory;
  587. unsigned dst_node = replicate->memory_node;
  588. STARPU_ASSERT(handle);
  589. /* A buffer is already allocated on the node */
  590. if (replicate->allocated)
  591. return 0;
  592. STARPU_ASSERT(replicate->data_interface);
  593. allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node);
  594. /* perhaps we could really not handle that capacity misses */
  595. if (allocated_memory == -ENOMEM)
  596. return -ENOMEM;
  597. register_mem_chunk(replicate, allocated_memory, 1);
  598. replicate->allocated = 1;
  599. replicate->automatically_allocated = 1;
  600. if (dst_node == 0)
  601. {
  602. void *ptr = starpu_handle_to_pointer(handle, 0);
  603. if (ptr != NULL)
  604. {
  605. _starpu_data_register_ram_pointer(handle, ptr);
  606. }
  607. }
  608. return 0;
  609. }
  610. unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle handle, uint32_t memory_node)
  611. {
  612. return handle->per_node[memory_node].allocated;
  613. }