memalloc.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749
  1. /*
  2. * StarPU
  3. * Copyright (C) Université Bordeaux 1, CNRS 2008-2010 (see AUTHORS file)
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <datawizard/memalloc.h>
  17. #include <datawizard/footprint.h>
  18. /* This per-node RW-locks protect mc_list and memchunk_cache entries */
  19. static pthread_rwlock_t mc_rwlock[STARPU_MAXNODES];
  20. /* Potentially in use memory chunks */
  21. static starpu_mem_chunk_list_t mc_list[STARPU_MAXNODES];
  22. /* Explicitly caches memory chunks that can be reused */
  23. static starpu_mem_chunk_list_t memchunk_cache[STARPU_MAXNODES];
  24. void _starpu_init_mem_chunk_lists(void)
  25. {
  26. unsigned i;
  27. for (i = 0; i < STARPU_MAXNODES; i++)
  28. {
  29. PTHREAD_RWLOCK_INIT(&mc_rwlock[i], NULL);
  30. mc_list[i] = starpu_mem_chunk_list_new();
  31. memchunk_cache[i] = starpu_mem_chunk_list_new();
  32. }
  33. }
  34. void _starpu_deinit_mem_chunk_lists(void)
  35. {
  36. unsigned i;
  37. for (i = 0; i < STARPU_MAXNODES; i++)
  38. {
  39. starpu_mem_chunk_list_delete(mc_list[i]);
  40. starpu_mem_chunk_list_delete(memchunk_cache[i]);
  41. }
  42. }
  43. /*
  44. * Manipulate subtrees
  45. */
  46. static void lock_all_subtree(starpu_data_handle handle)
  47. {
  48. if (handle->nchildren == 0)
  49. {
  50. /* this is a leaf */
  51. while (_starpu_spin_trylock(&handle->header_lock))
  52. _starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
  53. }
  54. else {
  55. /* lock all sub-subtrees children */
  56. unsigned child;
  57. for (child = 0; child < handle->nchildren; child++)
  58. {
  59. lock_all_subtree(&handle->children[child]);
  60. }
  61. }
  62. }
  63. static void unlock_all_subtree(starpu_data_handle handle)
  64. {
  65. if (handle->nchildren == 0)
  66. {
  67. /* this is a leaf */
  68. _starpu_spin_unlock(&handle->header_lock);
  69. }
  70. else {
  71. /* lock all sub-subtrees children
  72. * Note that this is done in the reverse order of the
  73. * lock_all_subtree so that we avoid deadlock */
  74. unsigned i;
  75. for (i =0; i < handle->nchildren; i++)
  76. {
  77. unsigned child = handle->nchildren - 1 - i;
  78. unlock_all_subtree(&handle->children[child]);
  79. }
  80. }
  81. }
  82. static unsigned may_free_subtree(starpu_data_handle handle, unsigned node)
  83. {
  84. /* we only free if no one refers to the leaf */
  85. uint32_t refcnt = _starpu_get_data_refcnt(handle, node);
  86. if (refcnt)
  87. return 0;
  88. if (!handle->nchildren)
  89. return 1;
  90. /* look into all sub-subtrees children */
  91. unsigned child;
  92. for (child = 0; child < handle->nchildren; child++)
  93. {
  94. unsigned res;
  95. res = may_free_subtree(&handle->children[child], node);
  96. if (!res) return 0;
  97. }
  98. /* no problem was found */
  99. return 1;
  100. }
  101. static void transfer_subtree_to_node(starpu_data_handle handle, unsigned src_node,
  102. unsigned dst_node)
  103. {
  104. unsigned i;
  105. unsigned last = 0;
  106. unsigned cnt;
  107. int ret;
  108. if (handle->nchildren == 0)
  109. {
  110. struct starpu_data_replicate_s *src_replicate = &handle->per_node[src_node];
  111. struct starpu_data_replicate_s *dst_replicate = &handle->per_node[dst_node];
  112. /* this is a leaf */
  113. switch(src_replicate->state) {
  114. case STARPU_OWNER:
  115. /* the local node has the only copy */
  116. /* the owner is now the destination_node */
  117. src_replicate->state = STARPU_INVALID;
  118. dst_replicate->state = STARPU_OWNER;
  119. #warning we should use requests during memory reclaim
  120. /* TODO use request !! */
  121. src_replicate->refcnt++;
  122. dst_replicate->refcnt++;
  123. ret = _starpu_driver_copy_data_1_to_1(handle, src_replicate, dst_replicate, 0, NULL, 1);
  124. STARPU_ASSERT(ret == 0);
  125. src_replicate->refcnt--;
  126. dst_replicate->refcnt--;
  127. break;
  128. case STARPU_SHARED:
  129. /* some other node may have the copy */
  130. src_replicate->state = STARPU_INVALID;
  131. /* count the number of copies */
  132. cnt = 0;
  133. for (i = 0; i < STARPU_MAXNODES; i++)
  134. {
  135. if (handle->per_node[i].state == STARPU_SHARED) {
  136. cnt++;
  137. last = i;
  138. }
  139. }
  140. if (cnt == 1)
  141. handle->per_node[last].state = STARPU_OWNER;
  142. break;
  143. case STARPU_INVALID:
  144. /* nothing to be done */
  145. break;
  146. default:
  147. STARPU_ABORT();
  148. break;
  149. }
  150. }
  151. else {
  152. /* lock all sub-subtrees children */
  153. unsigned child;
  154. for (child = 0; child < handle->nchildren; child++)
  155. {
  156. transfer_subtree_to_node(&handle->children[child],
  157. src_node, dst_node);
  158. }
  159. }
  160. }
  161. static size_t free_memory_on_node(starpu_mem_chunk_t mc, uint32_t node)
  162. {
  163. size_t freed = 0;
  164. STARPU_ASSERT(mc->ops);
  165. STARPU_ASSERT(mc->ops->free_data_on_node);
  166. starpu_data_handle handle = mc->data;
  167. /* Does this memory chunk refers to a handle that does not exist
  168. * anymore ? */
  169. unsigned data_was_deleted = mc->data_was_deleted;
  170. struct starpu_data_replicate_s *replicate = mc->replicate;
  171. // while (_starpu_spin_trylock(&handle->header_lock))
  172. // _starpu_datawizard_progress(_starpu_get_local_memory_node());
  173. #warning can we block here ?
  174. // _starpu_spin_lock(&handle->header_lock);
  175. if (mc->automatically_allocated &&
  176. (!handle || data_was_deleted || replicate->refcnt == 0))
  177. {
  178. if (handle && !data_was_deleted)
  179. STARPU_ASSERT(replicate->allocated);
  180. mc->ops->free_data_on_node(mc->interface, node);
  181. if (handle && !data_was_deleted)
  182. {
  183. replicate->allocated = 0;
  184. /* XXX why do we need that ? */
  185. replicate->automatically_allocated = 0;
  186. }
  187. freed = mc->size;
  188. if (handle && !data_was_deleted)
  189. STARPU_ASSERT(replicate->refcnt == 0);
  190. }
  191. // _starpu_spin_unlock(&handle->header_lock);
  192. return freed;
  193. }
  194. static size_t do_free_mem_chunk(starpu_mem_chunk_t mc, unsigned node)
  195. {
  196. size_t size;
  197. /* free the actual buffer */
  198. size = free_memory_on_node(mc, node);
  199. /* remove the mem_chunk from the list */
  200. starpu_mem_chunk_list_erase(mc_list[node], mc);
  201. free(mc->interface);
  202. starpu_mem_chunk_delete(mc);
  203. return size;
  204. }
  205. /* This function is called for memory chunks that are possibly in used (ie. not
  206. * in the cache). They should therefore still be associated to a handle. */
  207. static size_t try_to_free_mem_chunk(starpu_mem_chunk_t mc, unsigned node)
  208. {
  209. size_t freed = 0;
  210. starpu_data_handle handle;
  211. handle = mc->data;
  212. STARPU_ASSERT(handle);
  213. /* Either it's a "relaxed coherency" memchunk, or it's a memchunk that
  214. * could be used with filters. */
  215. if (mc->relaxed_coherency)
  216. {
  217. STARPU_ASSERT(mc->replicate);
  218. while (_starpu_spin_trylock(&handle->header_lock))
  219. _starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
  220. if (mc->replicate->refcnt == 0)
  221. {
  222. /* Note taht there is no need to transfer any data or
  223. * to update the status in terms of MSI protocol
  224. * because this memchunk is associated to a replicate
  225. * in "relaxed coherency" mode. */
  226. freed = do_free_mem_chunk(mc, node);
  227. }
  228. _starpu_spin_unlock(&handle->header_lock);
  229. }
  230. else {
  231. /* try to lock all the leafs of the subtree */
  232. lock_all_subtree(handle);
  233. /* check if they are all "free" */
  234. if (may_free_subtree(handle, node))
  235. {
  236. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  237. /* in case there was nobody using that buffer, throw it
  238. * away after writing it back to main memory */
  239. transfer_subtree_to_node(handle, node, 0);
  240. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  241. /* now the actual buffer may be freed */
  242. freed = do_free_mem_chunk(mc, node);
  243. }
  244. /* unlock the leafs */
  245. unlock_all_subtree(handle);
  246. }
  247. return freed;
  248. }
  249. #ifdef STARPU_USE_ALLOCATION_CACHE
  250. /* We assume that mc_rwlock[node] is taken. is_already_in_mc_list indicates
  251. * that the mc is already in the list of buffers that are possibly used, and
  252. * therefore not in the cache. */
  253. static void reuse_mem_chunk(unsigned node, struct starpu_data_replicate_s *new_replicate, starpu_mem_chunk_t mc, unsigned is_already_in_mc_list)
  254. {
  255. starpu_data_handle old_data;
  256. old_data = mc->data;
  257. /* we found an appropriate mem chunk: so we get it out
  258. * of the "to free" list, and reassign it to the new
  259. * piece of data */
  260. if (!is_already_in_mc_list)
  261. {
  262. starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  263. }
  264. struct starpu_data_replicate_s *old_replicate = mc->replicate;
  265. old_replicate->allocated = 0;
  266. old_replicate->automatically_allocated = 0;
  267. old_replicate->initialized = 0;
  268. new_replicate->allocated = 1;
  269. new_replicate->automatically_allocated = 1;
  270. new_replicate->initialized = 0;
  271. STARPU_ASSERT(new_replicate->interface);
  272. STARPU_ASSERT(mc->interface);
  273. memcpy(new_replicate->interface, mc->interface, old_replicate->ops->interface_size);
  274. mc->data = new_replicate->handle;
  275. mc->data_was_deleted = 0;
  276. /* mc->ops, mc->size, mc->footprint and mc->interface should be
  277. * unchanged ! */
  278. /* reinsert the mem chunk in the list of active memory chunks */
  279. if (!is_already_in_mc_list)
  280. {
  281. starpu_mem_chunk_list_push_front(mc_list[node], mc);
  282. }
  283. }
  284. static unsigned try_to_reuse_mem_chunk(starpu_mem_chunk_t mc, unsigned node, starpu_data_handle new_data, unsigned is_already_in_mc_list)
  285. {
  286. unsigned success = 0;
  287. starpu_data_handle old_data;
  288. old_data = mc->data;
  289. STARPU_ASSERT(old_data);
  290. /* try to lock all the leafs of the subtree */
  291. lock_all_subtree(old_data);
  292. /* check if they are all "free" */
  293. if (may_free_subtree(old_data, node))
  294. {
  295. success = 1;
  296. /* in case there was nobody using that buffer, throw it
  297. * away after writing it back to main memory */
  298. transfer_subtree_to_node(old_data, node, 0);
  299. /* now replace the previous data */
  300. reuse_mem_chunk(node, new_data, mc, is_already_in_mc_list);
  301. }
  302. /* unlock the leafs */
  303. unlock_all_subtree(old_data);
  304. return success;
  305. }
  306. /* this function looks for a memory chunk that matches a given footprint in the
  307. * list of mem chunk that need to be freed. This function must be called with
  308. * mc_rwlock[node] taken in write mode. */
  309. static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle data, uint32_t footprint)
  310. {
  311. starpu_mem_chunk_t mc, next_mc;
  312. /* go through all buffers in the cache */
  313. mc = _starpu_memchunk_cache_lookup_locked(node, handle);
  314. if (mc)
  315. {
  316. /* We found an entry in the cache so we can reuse it */
  317. reuse_mem_chunk(node, data, mc, 0);
  318. return 1;
  319. }
  320. /* now look for some non essential data in the active list */
  321. for (mc = starpu_mem_chunk_list_begin(mc_list[node]);
  322. mc != starpu_mem_chunk_list_end(mc_list[node]);
  323. mc = next_mc)
  324. {
  325. /* there is a risk that the memory chunk is freed before next
  326. * iteration starts: so we compute the next element of the list
  327. * now */
  328. next_mc = starpu_mem_chunk_list_next(mc);
  329. if (mc->data->is_not_important && (mc->footprint == footprint))
  330. {
  331. // fprintf(stderr, "found a candidate ...\n");
  332. if (try_to_reuse_mem_chunk(mc, node, data, 1))
  333. return 1;
  334. }
  335. }
  336. return 0;
  337. }
  338. #endif
  339. static int _starpu_data_interface_compare(void *interface_a, struct starpu_data_interface_ops_t *ops_a,
  340. void *interface_b, struct starpu_data_interface_ops_t *ops_b)
  341. {
  342. if (ops_a->interfaceid != ops_b->interfaceid)
  343. return -1;
  344. int ret = ops_a->compare(interface_a, interface_b);
  345. return ret;
  346. }
  347. /* This function must be called with mc_rwlock[node] taken in write mode */
  348. starpu_mem_chunk_t _starpu_memchunk_cache_lookup_locked(uint32_t node, starpu_data_handle handle)
  349. {
  350. uint32_t footprint = _starpu_compute_data_footprint(handle);
  351. /* go through all buffers in the cache */
  352. starpu_mem_chunk_t mc;
  353. for (mc = starpu_mem_chunk_list_begin(memchunk_cache[node]);
  354. mc != starpu_mem_chunk_list_end(memchunk_cache[node]);
  355. mc = starpu_mem_chunk_list_next(mc))
  356. {
  357. if (mc->footprint == footprint)
  358. {
  359. /* Is that a false hit ? (this is _very_ unlikely) */
  360. if (_starpu_data_interface_compare(handle->per_node[node].interface, handle->ops, mc->interface, mc->ops))
  361. continue;
  362. /* Cache hit */
  363. /* Remove from the cache */
  364. starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  365. return mc;
  366. }
  367. }
  368. /* This is a cache miss */
  369. return NULL;
  370. }
  371. starpu_mem_chunk_t _starpu_memchunk_cache_lookup(uint32_t node, starpu_data_handle handle)
  372. {
  373. starpu_mem_chunk_t mc;
  374. PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  375. mc = _starpu_memchunk_cache_lookup_locked(node, handle);
  376. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  377. return mc;
  378. }
  379. void _starpu_memchunk_cache_insert(uint32_t node, starpu_mem_chunk_t mc)
  380. {
  381. PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  382. mc->data = NULL;
  383. starpu_mem_chunk_list_push_front(memchunk_cache[node], mc);
  384. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  385. }
  386. /*
  387. * Free the memory chuncks that are explicitely tagged to be freed. The
  388. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  389. */
  390. static size_t flush_memchunk_cache(uint32_t node)
  391. {
  392. starpu_mem_chunk_t mc, next_mc;
  393. size_t freed = 0;
  394. for (mc = starpu_mem_chunk_list_begin(memchunk_cache[node]);
  395. mc != starpu_mem_chunk_list_end(memchunk_cache[node]);
  396. mc = next_mc)
  397. {
  398. next_mc = starpu_mem_chunk_list_next(mc);
  399. freed += free_memory_on_node(mc, node);
  400. starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  401. free(mc->interface);
  402. starpu_mem_chunk_delete(mc);
  403. }
  404. return freed;
  405. }
  406. /*
  407. * Try to free the buffers currently in use on the memory node. If the force
  408. * flag is set, the memory is freed regardless of coherency concerns (this
  409. * should only be used at the termination of StarPU for instance). The
  410. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  411. */
  412. static size_t free_potentially_in_use_mc(uint32_t node, unsigned force)
  413. {
  414. size_t freed = 0;
  415. starpu_mem_chunk_t mc, next_mc;
  416. for (mc = starpu_mem_chunk_list_begin(mc_list[node]);
  417. mc != starpu_mem_chunk_list_end(mc_list[node]);
  418. mc = next_mc)
  419. {
  420. /* there is a risk that the memory chunk is freed
  421. before next iteration starts: so we compute the next
  422. element of the list now */
  423. next_mc = starpu_mem_chunk_list_next(mc);
  424. if (!force)
  425. {
  426. freed += try_to_free_mem_chunk(mc, node);
  427. #if 0
  428. if (freed > toreclaim)
  429. break;
  430. #endif
  431. }
  432. else {
  433. /* We must free the memory now: note that data
  434. * coherency is not maintained in that case ! */
  435. freed += do_free_mem_chunk(mc, node);
  436. }
  437. }
  438. return freed;
  439. }
  440. static size_t reclaim_memory_generic(uint32_t node, unsigned force)
  441. {
  442. size_t freed = 0;
  443. PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  444. /* remove all buffers for which there was a removal request */
  445. freed += flush_memchunk_cache(node);
  446. /* try to free all allocated data potentially in use */
  447. freed += free_potentially_in_use_mc(node, force);
  448. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  449. return freed;
  450. }
  451. /*
  452. * This function frees all the memory that was implicitely allocated by StarPU
  453. * (for the data replicates). This is not ensuring data coherency, and should
  454. * only be called while StarPU is getting shut down.
  455. */
  456. size_t _starpu_free_all_automatically_allocated_buffers(uint32_t node)
  457. {
  458. return reclaim_memory_generic(node, 1);
  459. }
  460. static starpu_mem_chunk_t _starpu_memchunk_init(struct starpu_data_replicate_s *replicate, size_t size, size_t interface_size, unsigned automatically_allocated)
  461. {
  462. starpu_mem_chunk_t mc = starpu_mem_chunk_new();
  463. starpu_data_handle handle = replicate->handle;
  464. STARPU_ASSERT(handle);
  465. STARPU_ASSERT(handle->ops);
  466. mc->data = handle;
  467. mc->size = size;
  468. mc->footprint = _starpu_compute_data_footprint(handle);
  469. mc->ops = handle->ops;
  470. mc->data_was_deleted = 0;
  471. mc->automatically_allocated = automatically_allocated;
  472. mc->relaxed_coherency = replicate->relaxed_coherency;
  473. mc->replicate = replicate;
  474. /* Save a copy of the interface */
  475. mc->interface = malloc(interface_size);
  476. STARPU_ASSERT(mc->interface);
  477. memcpy(mc->interface, replicate->interface, interface_size);
  478. return mc;
  479. }
  480. static void register_mem_chunk(struct starpu_data_replicate_s *replicate, size_t size, unsigned automatically_allocated)
  481. {
  482. unsigned dst_node = replicate->memory_node;
  483. starpu_mem_chunk_t mc;
  484. /* the interface was already filled by ops->allocate_data_on_node */
  485. size_t interface_size = replicate->handle->ops->interface_size;
  486. /* Put this memchunk in the list of memchunk in use */
  487. mc = _starpu_memchunk_init(replicate, size, interface_size, automatically_allocated);
  488. PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[dst_node]);
  489. starpu_mem_chunk_list_push_front(mc_list[dst_node], mc);
  490. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  491. }
  492. /* This function is called when the handle is destroyed (eg. when calling
  493. * unregister or unpartition). It puts all the memchunks that refer to the
  494. * specified handle into the cache. */
  495. void _starpu_request_mem_chunk_removal(starpu_data_handle handle, unsigned node)
  496. {
  497. PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  498. /* iterate over the list of memory chunks and remove the entry */
  499. starpu_mem_chunk_t mc, next_mc;
  500. for (mc = starpu_mem_chunk_list_begin(mc_list[node]);
  501. mc != starpu_mem_chunk_list_end(mc_list[node]);
  502. mc = next_mc)
  503. {
  504. next_mc = starpu_mem_chunk_list_next(mc);
  505. if (mc->data == handle) {
  506. /* we found the data */
  507. mc->data_was_deleted = 1;
  508. /* remove it from the main list */
  509. starpu_mem_chunk_list_erase(mc_list[node], mc);
  510. /* put it in the list of buffers to be removed */
  511. starpu_mem_chunk_list_push_front(memchunk_cache[node], mc);
  512. /* Note that we do not stop here because there can be
  513. * multiple replicates associated to the same handle on
  514. * the same memory node. */
  515. }
  516. }
  517. /* there was no corresponding buffer ... */
  518. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  519. }
  520. /*
  521. * In order to allocate a piece of data, we try to reuse existing buffers if
  522. * its possible.
  523. * 1 - we try to reuse a memchunk that is explicitely unused.
  524. * 2 - we go through the list of memory chunks and find one that is not
  525. * referenced and that has the same footprint to reuse it.
  526. * 3 - we call the usual driver's alloc method
  527. * 4 - we go through the list of memory chunks and release those that are
  528. * not referenced (or part of those).
  529. *
  530. */
  531. static ssize_t _starpu_allocate_interface(starpu_data_handle handle, struct starpu_data_replicate_s *replicate, uint32_t dst_node)
  532. {
  533. unsigned attempts = 0;
  534. ssize_t allocated_memory;
  535. _starpu_data_allocation_inc_stats(dst_node);
  536. #ifdef STARPU_USE_ALLOCATION_CACHE
  537. /* perhaps we can directly reuse a buffer in the free-list */
  538. uint32_t footprint = _starpu_compute_data_footprint(handle);
  539. STARPU_TRACE_START_ALLOC_REUSE(dst_node);
  540. PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  541. if (try_to_find_reusable_mem_chunk(dst_node, handle, footprint))
  542. {
  543. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  544. _starpu_allocation_cache_hit(dst_node);
  545. ssize_t data_size = _starpu_data_get_size(handle);
  546. return data_size;
  547. }
  548. PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  549. STARPU_TRACE_END_ALLOC_REUSE(dst_node);
  550. #endif
  551. do {
  552. STARPU_ASSERT(handle->ops);
  553. STARPU_ASSERT(handle->ops->allocate_data_on_node);
  554. STARPU_TRACE_START_ALLOC(dst_node);
  555. STARPU_ASSERT(replicate->interface);
  556. allocated_memory = handle->ops->allocate_data_on_node(replicate->interface, dst_node);
  557. STARPU_TRACE_END_ALLOC(dst_node);
  558. if (allocated_memory == -ENOMEM)
  559. {
  560. replicate->refcnt++;
  561. _starpu_spin_unlock(&handle->header_lock);
  562. STARPU_TRACE_START_MEMRECLAIM(dst_node);
  563. reclaim_memory_generic(dst_node, 0);
  564. STARPU_TRACE_END_MEMRECLAIM(dst_node);
  565. while (_starpu_spin_trylock(&handle->header_lock))
  566. _starpu_datawizard_progress(_starpu_get_local_memory_node(), 0);
  567. replicate->refcnt--;
  568. }
  569. } while((allocated_memory == -ENOMEM) && attempts++ < 2);
  570. return allocated_memory;
  571. }
  572. int _starpu_allocate_memory_on_node(starpu_data_handle handle, struct starpu_data_replicate_s *replicate)
  573. {
  574. ssize_t allocated_memory;
  575. unsigned dst_node = replicate->memory_node;
  576. STARPU_ASSERT(handle);
  577. /* A buffer is already allocated on the node */
  578. if (replicate->allocated)
  579. return 0;
  580. STARPU_ASSERT(replicate->interface);
  581. allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node);
  582. /* perhaps we could really not handle that capacity misses */
  583. if (allocated_memory == -ENOMEM)
  584. return -ENOMEM;
  585. register_mem_chunk(replicate, allocated_memory, 1);
  586. replicate->allocated = 1;
  587. replicate->automatically_allocated = 1;
  588. return 0;
  589. }
  590. unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle handle, uint32_t memory_node)
  591. {
  592. return handle->per_node[memory_node].allocated;
  593. }