memalloc.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <datawizard/memory_manager.h>
  18. #include <datawizard/memalloc.h>
  19. #include <datawizard/footprint.h>
  20. #include <core/disk.h>
  21. #include <starpu.h>
  22. #include <common/uthash.h>
  23. /* This per-node RW-locks protect mc_list and memchunk_cache entries */
  24. /* Note: handle header lock is always taken before this */
  25. static starpu_pthread_rwlock_t mc_rwlock[STARPU_MAXNODES];
  26. /* Potentially in use memory chunks */
  27. static struct _starpu_mem_chunk_list *mc_list[STARPU_MAXNODES];
  28. /* Explicitly caches memory chunks that can be reused */
  29. struct mc_cache_entry
  30. {
  31. UT_hash_handle hh;
  32. struct _starpu_mem_chunk_list *list;
  33. };
  34. static struct mc_cache_entry *mc_cache[STARPU_MAXNODES];
  35. /* When reclaiming memory to allocate, we reclaim MAX(what_is_to_reclaim_on_device, data_size_coefficient*data_size) */
  36. const unsigned starpu_memstrategy_data_size_coefficient=2;
  37. static int get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node);
  38. static unsigned choose_target(starpu_data_handle_t handle, unsigned node);
  39. void _starpu_init_mem_chunk_lists(void)
  40. {
  41. unsigned i;
  42. for (i = 0; i < STARPU_MAXNODES; i++)
  43. {
  44. STARPU_PTHREAD_RWLOCK_INIT(&mc_rwlock[i], NULL);
  45. mc_list[i] = _starpu_mem_chunk_list_new();
  46. }
  47. }
  48. void _starpu_deinit_mem_chunk_lists(void)
  49. {
  50. unsigned i;
  51. for (i = 0; i < STARPU_MAXNODES; i++)
  52. {
  53. struct mc_cache_entry *entry, *tmp;
  54. _starpu_mem_chunk_list_delete(mc_list[i]);
  55. HASH_ITER(hh, mc_cache[i], entry, tmp)
  56. {
  57. HASH_DEL(mc_cache[i], entry);
  58. _starpu_mem_chunk_list_delete(entry->list);
  59. free(entry);
  60. }
  61. STARPU_PTHREAD_RWLOCK_DESTROY(&mc_rwlock[i]);
  62. }
  63. }
  64. /*
  65. * Manipulate subtrees
  66. */
  67. static void unlock_all_subtree(starpu_data_handle_t handle)
  68. {
  69. /* lock all sub-subtrees children
  70. * Note that this is done in the reverse order of the
  71. * lock_all_subtree so that we avoid deadlock */
  72. unsigned i;
  73. for (i =0; i < handle->nchildren; i++)
  74. {
  75. unsigned child = handle->nchildren - 1 - i;
  76. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  77. unlock_all_subtree(child_handle);
  78. }
  79. _starpu_spin_unlock(&handle->header_lock);
  80. }
  81. static int lock_all_subtree(starpu_data_handle_t handle)
  82. {
  83. int child;
  84. /* lock parent */
  85. if (_starpu_spin_trylock(&handle->header_lock))
  86. /* the handle is busy, abort */
  87. return 0;
  88. /* lock all sub-subtrees children */
  89. for (child = 0; child < (int) handle->nchildren; child++)
  90. {
  91. if (!lock_all_subtree(starpu_data_get_child(handle, child))) {
  92. /* Some child is busy, abort */
  93. while (--child >= 0)
  94. /* Unlock what we have already uselessly locked */
  95. unlock_all_subtree(starpu_data_get_child(handle, child));
  96. return 0;
  97. }
  98. }
  99. return 1;
  100. }
  101. static unsigned may_free_subtree(starpu_data_handle_t handle, unsigned node)
  102. {
  103. /* we only free if no one refers to the leaf */
  104. uint32_t refcnt = _starpu_get_data_refcnt(handle, node);
  105. if (refcnt)
  106. return 0;
  107. if (!handle->nchildren)
  108. return 1;
  109. /* look into all sub-subtrees children */
  110. unsigned child;
  111. for (child = 0; child < handle->nchildren; child++)
  112. {
  113. unsigned res;
  114. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  115. res = may_free_subtree(child_handle, node);
  116. if (!res) return 0;
  117. }
  118. /* no problem was found */
  119. return 1;
  120. }
  121. static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_node,
  122. unsigned dst_node)
  123. {
  124. unsigned i;
  125. unsigned last = 0;
  126. unsigned cnt;
  127. int ret;
  128. STARPU_ASSERT(dst_node != src_node);
  129. if (handle->nchildren == 0)
  130. {
  131. struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node];
  132. struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node];
  133. /* this is a leaf */
  134. switch(src_replicate->state)
  135. {
  136. case STARPU_OWNER:
  137. /* the local node has the only copy */
  138. /* the owner is now the destination_node */
  139. src_replicate->state = STARPU_INVALID;
  140. dst_replicate->state = STARPU_OWNER;
  141. #ifdef STARPU_DEVEL
  142. #warning we should use requests during memory reclaim
  143. #endif
  144. /* TODO use request !! */
  145. /* Take temporary references on the replicates */
  146. _starpu_spin_checklocked(&handle->header_lock);
  147. src_replicate->refcnt++;
  148. dst_replicate->refcnt++;
  149. handle->busy_count+=2;
  150. ret = _starpu_driver_copy_data_1_to_1(handle, src_replicate, dst_replicate, 0, NULL, 1);
  151. STARPU_ASSERT(ret == 0);
  152. src_replicate->refcnt--;
  153. dst_replicate->refcnt--;
  154. STARPU_ASSERT(handle->busy_count >= 2);
  155. handle->busy_count -= 2;
  156. ret = _starpu_data_check_not_busy(handle);
  157. STARPU_ASSERT(ret == 0);
  158. break;
  159. case STARPU_SHARED:
  160. /* some other node may have the copy */
  161. src_replicate->state = STARPU_INVALID;
  162. /* count the number of copies */
  163. cnt = 0;
  164. for (i = 0; i < STARPU_MAXNODES; i++)
  165. {
  166. if (handle->per_node[i].state == STARPU_SHARED)
  167. {
  168. cnt++;
  169. last = i;
  170. }
  171. }
  172. STARPU_ASSERT(cnt > 0);
  173. if (cnt == 1)
  174. handle->per_node[last].state = STARPU_OWNER;
  175. break;
  176. case STARPU_INVALID:
  177. /* nothing to be done */
  178. break;
  179. default:
  180. STARPU_ABORT();
  181. break;
  182. }
  183. }
  184. else
  185. {
  186. /* lock all sub-subtrees children */
  187. unsigned child;
  188. for (child = 0; child < handle->nchildren; child++)
  189. {
  190. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  191. transfer_subtree_to_node(child_handle, src_node, dst_node);
  192. }
  193. }
  194. }
  195. static void notify_handle_children(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node)
  196. {
  197. unsigned child;
  198. replicate->allocated = 0;
  199. /* XXX why do we need that ? */
  200. replicate->automatically_allocated = 0;
  201. for (child = 0; child < handle->nchildren; child++)
  202. {
  203. /* Notify children that their buffer has been deallocated too */
  204. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  205. notify_handle_children(child_handle, &child_handle->per_node[node], node);
  206. }
  207. }
  208. static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
  209. {
  210. size_t freed = 0;
  211. STARPU_ASSERT(mc->ops);
  212. STARPU_ASSERT(mc->ops->free_data_on_node);
  213. starpu_data_handle_t handle = mc->data;
  214. struct _starpu_data_replicate *replicate = mc->replicate;
  215. if (handle)
  216. _starpu_spin_checklocked(&handle->header_lock);
  217. if (mc->automatically_allocated &&
  218. (!handle || replicate->refcnt == 0))
  219. {
  220. if (handle)
  221. STARPU_ASSERT(replicate->allocated);
  222. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  223. if (starpu_node_get_kind(node) == STARPU_CUDA_RAM)
  224. {
  225. /* To facilitate the design of interface, we set the
  226. * proper CUDA device in case it is needed. This avoids
  227. * having to set it again in the free method of each
  228. * interface. */
  229. starpu_cuda_set_device(_starpu_memory_node_get_devid(node));
  230. }
  231. #endif
  232. mc->ops->free_data_on_node(mc->chunk_interface, node);
  233. if (handle)
  234. notify_handle_children(handle, replicate, node);
  235. freed = mc->size;
  236. if (handle)
  237. STARPU_ASSERT(replicate->refcnt == 0);
  238. }
  239. return freed;
  240. }
  241. static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  242. {
  243. size_t size;
  244. starpu_data_handle_t handle = mc->data;
  245. if (handle) {
  246. _starpu_spin_checklocked(&handle->header_lock);
  247. mc->size = _starpu_data_get_size(handle);
  248. }
  249. mc->replicate->mc=NULL;
  250. /* free the actual buffer */
  251. size = free_memory_on_node(mc, node);
  252. /* remove the mem_chunk from the list */
  253. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  254. free(mc->chunk_interface);
  255. _starpu_mem_chunk_delete(mc);
  256. return size;
  257. }
  258. /* This function is called for memory chunks that are possibly in used (ie. not
  259. * in the cache). They should therefore still be associated to a handle. */
  260. static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  261. {
  262. size_t freed = 0;
  263. starpu_data_handle_t handle;
  264. handle = mc->data;
  265. STARPU_ASSERT(handle);
  266. /* This data should be written through to this node, avoid dropping it! */
  267. if (handle->wt_mask & (1<<node))
  268. return 0;
  269. /* This data was registered from this node, we will not be able to drop it anyway */
  270. if ((int) node == handle->home_node)
  271. return 0;
  272. /* REDUX memchunk */
  273. if (mc->relaxed_coherency == 2)
  274. {
  275. /* TODO: reduce it back to e.g. main memory */
  276. }
  277. else
  278. /* Either it's a "relaxed coherency" memchunk (SCRATCH), or it's a
  279. * memchunk that could be used with filters. */
  280. if (mc->relaxed_coherency == 1)
  281. {
  282. STARPU_ASSERT(mc->replicate);
  283. if (_starpu_spin_trylock(&handle->header_lock))
  284. /* Handle is busy, abort */
  285. return 0;
  286. if (mc->replicate->refcnt == 0)
  287. {
  288. /* Note taht there is no need to transfer any data or
  289. * to update the status in terms of MSI protocol
  290. * because this memchunk is associated to a replicate
  291. * in "relaxed coherency" mode. */
  292. freed = do_free_mem_chunk(mc, node);
  293. }
  294. _starpu_spin_unlock(&handle->header_lock);
  295. }
  296. /* try to lock all the subtree */
  297. else if (lock_all_subtree(handle))
  298. {
  299. /* check if they are all "free" */
  300. if (may_free_subtree(handle, node))
  301. {
  302. int target = -1;
  303. /* XXX Considering only owner to invalidate */
  304. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  305. /* in case there was nobody using that buffer, throw it
  306. * away after writing it back to main memory */
  307. /* choose the best target */
  308. target = choose_target(handle, node);
  309. if (target != -1) {
  310. #ifdef STARPU_MEMORY_STATS
  311. if (handle->per_node[node].state == STARPU_OWNER)
  312. _starpu_memory_handle_stats_invalidated(handle, node);
  313. #endif
  314. transfer_subtree_to_node(handle, node, target);
  315. #ifdef STARPU_MEMORY_STATS
  316. _starpu_memory_handle_stats_loaded_owner(handle, target);
  317. #endif
  318. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  319. /* now the actual buffer may be freed */
  320. freed = do_free_mem_chunk(mc, node);
  321. }
  322. }
  323. /* unlock the tree */
  324. unlock_all_subtree(handle);
  325. }
  326. return freed;
  327. }
  328. #ifdef STARPU_USE_ALLOCATION_CACHE
  329. /* We assume that mc_rwlock[node] is taken. is_already_in_mc_list indicates
  330. * that the mc is already in the list of buffers that are possibly used, and
  331. * therefore not in the cache. */
  332. static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_replicate, struct _starpu_mem_chunk *mc, unsigned is_already_in_mc_list)
  333. {
  334. /* we found an appropriate mem chunk: so we get it out
  335. * of the "to free" list, and reassign it to the new
  336. * piece of data */
  337. if (!is_already_in_mc_list)
  338. {
  339. uint32_t footprint = _starpu_compute_data_footprint(new_replicate->handle);
  340. struct mc_cache_entry *entry;
  341. HASH_FIND(hh, mc_cache[node], &footprint, sizeof(footprint), entry);
  342. _starpu_mem_chunk_list_erase(entry->list, mc);
  343. }
  344. struct _starpu_data_replicate *old_replicate = mc->replicate;
  345. old_replicate->allocated = 0;
  346. old_replicate->automatically_allocated = 0;
  347. old_replicate->initialized = 0;
  348. new_replicate->allocated = 1;
  349. new_replicate->automatically_allocated = 1;
  350. new_replicate->initialized = 0;
  351. STARPU_ASSERT(new_replicate->data_interface);
  352. STARPU_ASSERT(mc->chunk_interface);
  353. memcpy(new_replicate->data_interface, mc->chunk_interface, old_replicate->handle->ops->interface_size);
  354. mc->data = new_replicate->handle;
  355. /* mc->ops, mc->footprint and mc->interface should be
  356. * unchanged ! */
  357. /* reinsert the mem chunk in the list of active memory chunks */
  358. if (!is_already_in_mc_list)
  359. {
  360. _starpu_mem_chunk_list_push_back(mc_list[node], mc);
  361. }
  362. }
  363. static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node, struct _starpu_data_replicate *replicate, unsigned is_already_in_mc_list)
  364. {
  365. unsigned success = 0;
  366. starpu_data_handle_t old_data;
  367. old_data = mc->data;
  368. STARPU_ASSERT(old_data);
  369. /* try to lock all the subtree */
  370. /* and check if they are all "free" */
  371. if (lock_all_subtree(old_data))
  372. {
  373. if (may_free_subtree(old_data, node))
  374. {
  375. success = 1;
  376. /* in case there was nobody using that buffer, throw it
  377. * away after writing it back to main memory */
  378. transfer_subtree_to_node(old_data, node, 0);
  379. /* now replace the previous data */
  380. reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list);
  381. }
  382. /* unlock the tree */
  383. unlock_all_subtree(old_data);
  384. }
  385. return success;
  386. }
  387. static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_data_interface_ops *ops_a,
  388. void *data_interface_b, struct starpu_data_interface_ops *ops_b)
  389. {
  390. if (ops_a->interfaceid != ops_b->interfaceid)
  391. return -1;
  392. int ret = ops_a->compare(data_interface_a, data_interface_b);
  393. return ret;
  394. }
  395. /* This function must be called with mc_rwlock[node] taken in write mode */
  396. static struct _starpu_mem_chunk *_starpu_memchunk_cache_lookup_locked(unsigned node, starpu_data_handle_t handle, uint32_t footprint)
  397. {
  398. /* go through all buffers in the cache */
  399. struct mc_cache_entry *entry;
  400. HASH_FIND(hh, mc_cache[node], &footprint, sizeof(footprint), entry);
  401. if (!entry)
  402. /* No data with that footprint */
  403. return NULL;
  404. struct _starpu_mem_chunk *mc;
  405. for (mc = _starpu_mem_chunk_list_begin(entry->list);
  406. mc != _starpu_mem_chunk_list_end(entry->list);
  407. mc = _starpu_mem_chunk_list_next(mc))
  408. {
  409. /* Is that a false hit ? (this is _very_ unlikely) */
  410. if (_starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->chunk_interface, mc->ops))
  411. continue;
  412. /* Cache hit */
  413. /* Remove from the cache */
  414. _starpu_mem_chunk_list_erase(entry->list, mc);
  415. return mc;
  416. }
  417. /* This is a cache miss */
  418. return NULL;
  419. }
  420. /* this function looks for a memory chunk that matches a given footprint in the
  421. * list of mem chunk that need to be freed. This function must be called with
  422. * mc_rwlock[node] taken in write mode. */
  423. static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint)
  424. {
  425. struct _starpu_mem_chunk *mc, *next_mc;
  426. /* go through all buffers in the cache */
  427. mc = _starpu_memchunk_cache_lookup_locked(node, data, footprint);
  428. if (mc)
  429. {
  430. /* We found an entry in the cache so we can reuse it */
  431. reuse_mem_chunk(node, replicate, mc, 0);
  432. return 1;
  433. }
  434. if (!_starpu_has_not_important_data)
  435. return 0;
  436. /* now look for some non essential data in the active list */
  437. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  438. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  439. mc = next_mc)
  440. {
  441. /* there is a risk that the memory chunk is freed before next
  442. * iteration starts: so we compute the next element of the list
  443. * now */
  444. next_mc = _starpu_mem_chunk_list_next(mc);
  445. if (mc->data->is_not_important && (mc->footprint == footprint))
  446. {
  447. // fprintf(stderr, "found a candidate ...\n");
  448. if (try_to_reuse_mem_chunk(mc, node, replicate, 1))
  449. return 1;
  450. }
  451. }
  452. return 0;
  453. }
  454. #endif
  455. /*
  456. * Free the memory chuncks that are explicitely tagged to be freed. The
  457. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  458. */
  459. static size_t flush_memchunk_cache(unsigned node, size_t reclaim)
  460. {
  461. struct _starpu_mem_chunk *mc;
  462. struct _starpu_mem_chunk_list *busy_mc_cache;
  463. struct mc_cache_entry *entry, *tmp;
  464. size_t freed = 0;
  465. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  466. HASH_ITER(hh, mc_cache[node], entry, tmp)
  467. {
  468. busy_mc_cache = _starpu_mem_chunk_list_new();
  469. while (!_starpu_mem_chunk_list_empty(entry->list)) {
  470. mc = _starpu_mem_chunk_list_pop_front(entry->list);
  471. starpu_data_handle_t handle = mc->data;
  472. if (handle)
  473. if (_starpu_spin_trylock(&handle->header_lock)) {
  474. /* The handle is still busy, leave this chunk for later */
  475. _starpu_mem_chunk_list_push_back(busy_mc_cache, mc);
  476. continue;
  477. }
  478. freed += free_memory_on_node(mc, node);
  479. if (handle)
  480. _starpu_spin_unlock(&handle->header_lock);
  481. free(mc->chunk_interface);
  482. _starpu_mem_chunk_delete(mc);
  483. if (reclaim && freed >= reclaim)
  484. break;
  485. }
  486. _starpu_mem_chunk_list_push_list_front(busy_mc_cache, entry->list);
  487. _starpu_mem_chunk_list_delete(busy_mc_cache);
  488. if (reclaim && freed >= reclaim)
  489. break;
  490. }
  491. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  492. return freed;
  493. }
  494. /*
  495. * Try to free the buffers currently in use on the memory node. If the force
  496. * flag is set, the memory is freed regardless of coherency concerns (this
  497. * should only be used at the termination of StarPU for instance). The
  498. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  499. */
  500. static size_t free_potentially_in_use_mc(unsigned node, unsigned force, size_t reclaim)
  501. {
  502. size_t freed = 0;
  503. struct _starpu_mem_chunk *mc, *next_mc;
  504. /*
  505. * We have to unlock mc_rwlock before locking header_lock, so we have
  506. * to be careful with the list. We try to do just one pass, by
  507. * remembering the next mc to be tried. If it gets dropped, we restart
  508. * from zero. So we continue until we go through the whole list without
  509. * finding anything to free.
  510. */
  511. restart:
  512. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  513. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  514. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  515. mc = next_mc)
  516. {
  517. /* mc hopefully gets out of the list, we thus need to prefetch
  518. * the next element */
  519. next_mc = _starpu_mem_chunk_list_next(mc);
  520. if (!force)
  521. {
  522. freed += try_to_free_mem_chunk(mc, node);
  523. if (reclaim && freed >= reclaim)
  524. break;
  525. }
  526. else
  527. {
  528. starpu_data_handle_t handle = mc->data;
  529. if (_starpu_spin_trylock(&handle->header_lock))
  530. {
  531. /* Ergl. We are shutting down, but somebody is
  532. * still locking the handle. That's not
  533. * supposed to happen, but better be safe by
  534. * letting it go through. */
  535. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  536. goto restart;
  537. }
  538. /* We must free the memory now, because we are
  539. * terminating the drivers: note that data coherency is
  540. * not maintained in that case ! */
  541. freed += do_free_mem_chunk(mc, node);
  542. _starpu_spin_unlock(&handle->header_lock);
  543. }
  544. }
  545. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  546. return freed;
  547. }
  548. size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim)
  549. {
  550. size_t freed = 0;
  551. if (reclaim && !force)
  552. {
  553. static int warned;
  554. if (!warned) {
  555. _STARPU_DISP("Not enough memory left on node %u. Trying to purge %lu bytes out\n", node, (unsigned long) reclaim);
  556. warned = 1;
  557. }
  558. }
  559. /* remove all buffers for which there was a removal request */
  560. freed += flush_memchunk_cache(node, reclaim);
  561. /* try to free all allocated data potentially in use */
  562. if (reclaim && freed<reclaim)
  563. freed += free_potentially_in_use_mc(node, force, reclaim);
  564. return freed;
  565. }
  566. /*
  567. * This function frees all the memory that was implicitely allocated by StarPU
  568. * (for the data replicates). This is not ensuring data coherency, and should
  569. * only be called while StarPU is getting shut down.
  570. */
  571. size_t _starpu_free_all_automatically_allocated_buffers(unsigned node)
  572. {
  573. return _starpu_memory_reclaim_generic(node, 1, 0);
  574. }
  575. static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_replicate *replicate, size_t interface_size, unsigned automatically_allocated)
  576. {
  577. struct _starpu_mem_chunk *mc = _starpu_mem_chunk_new();
  578. starpu_data_handle_t handle = replicate->handle;
  579. STARPU_ASSERT(handle);
  580. STARPU_ASSERT(handle->ops);
  581. mc->data = handle;
  582. mc->footprint = _starpu_compute_data_footprint(handle);
  583. mc->ops = handle->ops;
  584. mc->automatically_allocated = automatically_allocated;
  585. mc->relaxed_coherency = replicate->relaxed_coherency;
  586. mc->replicate = replicate;
  587. mc->replicate->mc = mc;
  588. /* Save a copy of the interface */
  589. mc->chunk_interface = malloc(interface_size);
  590. STARPU_ASSERT(mc->chunk_interface);
  591. memcpy(mc->chunk_interface, replicate->data_interface, interface_size);
  592. return mc;
  593. }
  594. static void register_mem_chunk(struct _starpu_data_replicate *replicate, unsigned automatically_allocated)
  595. {
  596. unsigned dst_node = replicate->memory_node;
  597. struct _starpu_mem_chunk *mc;
  598. /* the interface was already filled by ops->allocate_data_on_node */
  599. size_t interface_size = replicate->handle->ops->interface_size;
  600. /* Put this memchunk in the list of memchunk in use */
  601. mc = _starpu_memchunk_init(replicate, interface_size, automatically_allocated);
  602. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[dst_node]);
  603. _starpu_mem_chunk_list_push_back(mc_list[dst_node], mc);
  604. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  605. }
  606. /* This function is called when the handle is destroyed (eg. when calling
  607. * unregister or unpartition). It puts all the memchunks that refer to the
  608. * specified handle into the cache.
  609. */
  610. void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size)
  611. {
  612. struct _starpu_mem_chunk *mc = replicate->mc;
  613. STARPU_ASSERT(mc->data == handle);
  614. /* Record the allocated size, so that later in memory
  615. * reclaiming we can estimate how much memory we free
  616. * by freeing this. */
  617. mc->size = size;
  618. /* This memchunk doesn't have to do with the data any more. */
  619. replicate->mc = NULL;
  620. replicate->allocated = 0;
  621. replicate->automatically_allocated = 0;
  622. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  623. mc->data = NULL;
  624. /* remove it from the main list */
  625. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  626. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  627. /* We would only flush the RAM nodes cache if memory gets tight, either
  628. * because StarPU automatically knows the total memory size of the
  629. * machine, or because the user has provided a limitation.
  630. *
  631. * We don't really want the former scenario to be eating a lot of
  632. * memory just for caching allocations. Allocating main memory is cheap
  633. * anyway.
  634. */
  635. /* This is particularly important when
  636. * STARPU_USE_ALLOCATION_CACHE is not enabled, as we
  637. * wouldn't even re-use these allocations! */
  638. if (starpu_node_get_kind(node) == STARPU_CPU_RAM)
  639. {
  640. /* Free data immediately */
  641. free_memory_on_node(mc, node);
  642. free(mc->chunk_interface);
  643. _starpu_mem_chunk_delete(mc);
  644. }
  645. else
  646. {
  647. /* put it in the list of buffers to be removed */
  648. uint32_t footprint = mc->footprint;
  649. struct mc_cache_entry *entry;
  650. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  651. HASH_FIND(hh, mc_cache[node], &footprint, sizeof(footprint), entry);
  652. if (!entry) {
  653. entry = malloc(sizeof(*entry));
  654. entry->list = _starpu_mem_chunk_list_new();
  655. HASH_ADD_KEYPTR(hh, mc_cache[node], &footprint, sizeof(footprint), entry);
  656. }
  657. _starpu_mem_chunk_list_push_front(entry->list, mc);
  658. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  659. }
  660. }
  661. /*
  662. * In order to allocate a piece of data, we try to reuse existing buffers if
  663. * its possible.
  664. * 1 - we try to reuse a memchunk that is explicitely unused.
  665. * 2 - we go through the list of memory chunks and find one that is not
  666. * referenced and that has the same footprint to reuse it.
  667. * 3 - we call the usual driver's alloc method
  668. * 4 - we go through the list of memory chunks and release those that are
  669. * not referenced (or part of those).
  670. *
  671. */
  672. static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned dst_node, unsigned is_prefetch)
  673. {
  674. unsigned attempts = 0;
  675. starpu_ssize_t allocated_memory;
  676. int ret;
  677. starpu_ssize_t data_size = _starpu_data_get_size(handle);
  678. _starpu_spin_checklocked(&handle->header_lock);
  679. _starpu_data_allocation_inc_stats(dst_node);
  680. #ifdef STARPU_USE_ALLOCATION_CACHE
  681. /* perhaps we can directly reuse a buffer in the free-list */
  682. uint32_t footprint = _starpu_compute_data_footprint(handle);
  683. _STARPU_TRACE_START_ALLOC_REUSE(dst_node, data_size);
  684. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[dst_node]);
  685. if (try_to_find_reusable_mem_chunk(dst_node, handle, replicate, footprint))
  686. {
  687. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  688. _starpu_allocation_cache_hit(dst_node);
  689. return data_size;
  690. }
  691. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  692. _STARPU_TRACE_END_ALLOC_REUSE(dst_node);
  693. #endif
  694. STARPU_ASSERT(handle->ops);
  695. STARPU_ASSERT(handle->ops->allocate_data_on_node);
  696. STARPU_ASSERT(replicate->data_interface);
  697. char interface[handle->ops->interface_size];
  698. memcpy(interface, replicate->data_interface, handle->ops->interface_size);
  699. /* Take temporary reference on the replicate */
  700. replicate->refcnt++;
  701. handle->busy_count++;
  702. _starpu_spin_unlock(&handle->header_lock);
  703. do
  704. {
  705. _STARPU_TRACE_START_ALLOC(dst_node, data_size);
  706. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  707. if (starpu_node_get_kind(dst_node) == STARPU_CUDA_RAM)
  708. {
  709. /* To facilitate the design of interface, we set the
  710. * proper CUDA device in case it is needed. This avoids
  711. * having to set it again in the malloc method of each
  712. * interface. */
  713. starpu_cuda_set_device(_starpu_memory_node_get_devid(dst_node));
  714. }
  715. #endif
  716. allocated_memory = handle->ops->allocate_data_on_node(interface, dst_node);
  717. _STARPU_TRACE_END_ALLOC(dst_node);
  718. if (allocated_memory == -ENOMEM)
  719. {
  720. size_t reclaim = 0.25*_starpu_memory_manager_get_global_memory_size(dst_node);
  721. size_t handle_size = handle->ops->get_size(handle);
  722. if (starpu_memstrategy_data_size_coefficient*handle_size > reclaim)
  723. reclaim = starpu_memstrategy_data_size_coefficient*handle_size;
  724. _STARPU_TRACE_START_MEMRECLAIM(dst_node,is_prefetch);
  725. if (is_prefetch)
  726. {
  727. flush_memchunk_cache(dst_node, reclaim);
  728. }
  729. else
  730. _starpu_memory_reclaim_generic(dst_node, 0, reclaim);
  731. _STARPU_TRACE_END_MEMRECLAIM(dst_node,is_prefetch);
  732. }
  733. }
  734. while((allocated_memory == -ENOMEM) && attempts++ < 2);
  735. int cpt = 0;
  736. while (cpt < STARPU_SPIN_MAXTRY && _starpu_spin_trylock(&handle->header_lock))
  737. {
  738. cpt++;
  739. _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
  740. }
  741. if (cpt == STARPU_SPIN_MAXTRY)
  742. _starpu_spin_lock(&handle->header_lock);
  743. replicate->refcnt--;
  744. STARPU_ASSERT(replicate->refcnt >= 0);
  745. STARPU_ASSERT(handle->busy_count > 0);
  746. handle->busy_count--;
  747. ret = _starpu_data_check_not_busy(handle);
  748. STARPU_ASSERT(ret == 0);
  749. if (replicate->allocated)
  750. {
  751. /* Argl, somebody allocated it in between already, drop this one */
  752. handle->ops->free_data_on_node(interface, dst_node);
  753. allocated_memory = 0;
  754. }
  755. else
  756. /* Install allocated interface */
  757. memcpy(replicate->data_interface, interface, handle->ops->interface_size);
  758. return allocated_memory;
  759. }
  760. int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned is_prefetch)
  761. {
  762. starpu_ssize_t allocated_memory;
  763. unsigned dst_node = replicate->memory_node;
  764. STARPU_ASSERT(handle);
  765. /* A buffer is already allocated on the node */
  766. if (replicate->allocated)
  767. return 0;
  768. STARPU_ASSERT(replicate->data_interface);
  769. allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node, is_prefetch);
  770. /* perhaps we could really not handle that capacity misses */
  771. if (allocated_memory == -ENOMEM)
  772. return -ENOMEM;
  773. register_mem_chunk(replicate, 1);
  774. replicate->allocated = 1;
  775. replicate->automatically_allocated = 1;
  776. if (dst_node == STARPU_MAIN_RAM)
  777. {
  778. void *ptr = starpu_data_handle_to_pointer(handle, STARPU_MAIN_RAM);
  779. if (ptr != NULL)
  780. {
  781. _starpu_data_register_ram_pointer(handle, ptr);
  782. }
  783. }
  784. return 0;
  785. }
  786. unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node)
  787. {
  788. return handle->per_node[memory_node].allocated;
  789. }
  790. /* This memchunk has been recently used, put it last on the mc_list, so we will
  791. * try to evict it as late as possible */
  792. void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node)
  793. {
  794. if (!mc)
  795. /* user-allocated memory */
  796. return;
  797. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  798. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  799. _starpu_mem_chunk_list_push_back(mc_list[node], mc);
  800. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  801. }
  802. #ifdef STARPU_MEMORY_STATS
  803. void _starpu_memory_display_stats_by_node(int node)
  804. {
  805. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  806. if (!_starpu_mem_chunk_list_empty(mc_list[node]))
  807. {
  808. struct _starpu_mem_chunk *mc;
  809. fprintf(stderr, "#-------\n");
  810. fprintf(stderr, "Data on Node #%d\n",node);
  811. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  812. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  813. mc = _starpu_mem_chunk_list_next(mc))
  814. {
  815. if (mc->automatically_allocated == 0)
  816. _starpu_memory_display_handle_stats(mc->data);
  817. }
  818. }
  819. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  820. }
  821. #endif
  822. void starpu_data_display_memory_stats(void)
  823. {
  824. #ifdef STARPU_MEMORY_STATS
  825. unsigned node;
  826. fprintf(stderr, "\n#---------------------\n");
  827. fprintf(stderr, "Memory stats :\n");
  828. for (node = 0; node < STARPU_MAXNODES; node++)
  829. {
  830. _starpu_memory_display_stats_by_node(node);
  831. }
  832. fprintf(stderr, "\n#---------------------\n");
  833. #endif
  834. }
  835. static int
  836. get_better_disk_can_accept_size(starpu_data_handle_t handle, unsigned node)
  837. {
  838. int target = -1;
  839. unsigned nnodes = starpu_memory_nodes_get_count();
  840. unsigned int i;
  841. double time_disk = 0;
  842. for (i = 0; i < nnodes; i++)
  843. {
  844. if (starpu_node_get_kind(i) == STARPU_DISK_RAM && i != node &&
  845. (_starpu_memory_manager_test_allocate_size_(_starpu_data_get_size(handle), i) == 1 ||
  846. handle->per_node[i].allocated))
  847. {
  848. /* if we can write on the disk */
  849. if (_starpu_get_disk_flag(i) != STARPU_DISK_NO_RECLAIM)
  850. {
  851. /* only time can change between disk <-> main_ram
  852. * and not between main_ram <-> worker if we compare diks*/
  853. double time_tmp = starpu_transfer_predict(i, STARPU_MAIN_RAM, _starpu_data_get_size(handle));
  854. if (target == -1 || time_disk > time_tmp)
  855. {
  856. target = i;
  857. time_disk = time_tmp;
  858. }
  859. }
  860. }
  861. }
  862. return target;
  863. }
  864. static unsigned
  865. choose_target(starpu_data_handle_t handle, unsigned node)
  866. {
  867. int target = -1;
  868. size_t size_handle = _starpu_data_get_size(handle);
  869. if (handle->home_node != -1)
  870. /* try to push on RAM if we can before to push on disk */
  871. if(starpu_node_get_kind(handle->home_node) == STARPU_DISK_RAM && node != STARPU_MAIN_RAM)
  872. {
  873. if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  874. _starpu_memory_manager_test_allocate_size_(size_handle, STARPU_MAIN_RAM) == 1)
  875. {
  876. target = STARPU_MAIN_RAM;
  877. }
  878. else
  879. {
  880. target = get_better_disk_can_accept_size(handle, node);
  881. }
  882. }
  883. /* others memory nodes */
  884. else
  885. {
  886. target = handle->home_node;
  887. }
  888. else
  889. {
  890. /* handle->home_node == -1 */
  891. /* no place for datas in RAM, we push on disk */
  892. if (node == STARPU_MAIN_RAM)
  893. {
  894. target = get_better_disk_can_accept_size(handle, node);
  895. }
  896. /* node != 0 */
  897. /* try to push data to RAM if we can before to push on disk*/
  898. else if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  899. _starpu_memory_manager_test_allocate_size_(size_handle, STARPU_MAIN_RAM) == 1)
  900. {
  901. target = STARPU_MAIN_RAM;
  902. }
  903. /* no place in RAM */
  904. else
  905. {
  906. target = get_better_disk_can_accept_size(handle, node);
  907. }
  908. }
  909. /* we haven't the right to write on the disk */
  910. if (target != -1 && starpu_node_get_kind(target) == STARPU_DISK_RAM && _starpu_get_disk_flag(target) == STARPU_DISK_NO_RECLAIM)
  911. target = -1;
  912. return target;
  913. }