memalloc.c 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2009-2013 Université de Bordeaux 1
  4. * Copyright (C) 2010, 2011, 2012, 2013 Centre National de la Recherche Scientifique
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #include <datawizard/memory_manager.h>
  18. #include <datawizard/memalloc.h>
  19. #include <datawizard/footprint.h>
  20. #include <starpu.h>
  21. /* This per-node RW-locks protect mc_list and memchunk_cache entries */
  22. /* Note: handle header lock is always taken before this */
  23. static starpu_pthread_rwlock_t mc_rwlock[STARPU_MAXNODES];
  24. /* This per-node spinlock protect lru_list */
  25. static struct _starpu_spinlock lru_rwlock[STARPU_MAXNODES];
  26. /* Last Recently used memory chunkgs */
  27. static struct _starpu_mem_chunk_lru_list *starpu_lru_list[STARPU_MAXNODES];
  28. /* Potentially in use memory chunks */
  29. static struct _starpu_mem_chunk_list *mc_list[STARPU_MAXNODES];
  30. /* Explicitly caches memory chunks that can be reused */
  31. static struct _starpu_mem_chunk_list *memchunk_cache[STARPU_MAXNODES];
  32. /* When reclaiming memory to allocate, we reclaim MAX(what_is_to_reclaim_on_device, data_size_coefficient*data_size) */
  33. const unsigned starpu_memstrategy_data_size_coefficient=2;
  34. static void starpu_lru(unsigned node);
  35. static int get_better_disk_can_accept_size(starpu_data_handle_t handle);
  36. void _starpu_init_mem_chunk_lists(void)
  37. {
  38. unsigned i;
  39. for (i = 0; i < STARPU_MAXNODES; i++)
  40. {
  41. STARPU_PTHREAD_RWLOCK_INIT(&mc_rwlock[i], NULL);
  42. _starpu_spin_init(&lru_rwlock[i]);
  43. mc_list[i] = _starpu_mem_chunk_list_new();
  44. starpu_lru_list[i] = _starpu_mem_chunk_lru_list_new();
  45. memchunk_cache[i] = _starpu_mem_chunk_list_new();
  46. }
  47. }
  48. void _starpu_deinit_mem_chunk_lists(void)
  49. {
  50. unsigned i;
  51. for (i = 0; i < STARPU_MAXNODES; i++)
  52. {
  53. _starpu_mem_chunk_list_delete(mc_list[i]);
  54. _starpu_mem_chunk_list_delete(memchunk_cache[i]);
  55. _starpu_mem_chunk_lru_list_delete(starpu_lru_list[i]);
  56. _starpu_spin_destroy(&lru_rwlock[i]);
  57. STARPU_PTHREAD_RWLOCK_DESTROY(&mc_rwlock[i]);
  58. }
  59. }
  60. /*
  61. * Manipulate subtrees
  62. */
  63. static void lock_all_subtree(starpu_data_handle_t handle)
  64. {
  65. unsigned child;
  66. /* lock parent */
  67. while (_starpu_spin_trylock(&handle->header_lock))
  68. _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
  69. /* lock all sub-subtrees children */
  70. for (child = 0; child < handle->nchildren; child++)
  71. {
  72. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  73. lock_all_subtree(child_handle);
  74. }
  75. }
  76. static void unlock_all_subtree(starpu_data_handle_t handle)
  77. {
  78. /* lock all sub-subtrees children
  79. * Note that this is done in the reverse order of the
  80. * lock_all_subtree so that we avoid deadlock */
  81. unsigned i;
  82. for (i =0; i < handle->nchildren; i++)
  83. {
  84. unsigned child = handle->nchildren - 1 - i;
  85. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  86. unlock_all_subtree(child_handle);
  87. }
  88. _starpu_spin_unlock(&handle->header_lock);
  89. }
  90. static unsigned may_free_subtree(starpu_data_handle_t handle, unsigned node)
  91. {
  92. /* we only free if no one refers to the leaf */
  93. uint32_t refcnt = _starpu_get_data_refcnt(handle, node);
  94. if (refcnt)
  95. return 0;
  96. if (!handle->nchildren)
  97. return 1;
  98. /* look into all sub-subtrees children */
  99. unsigned child;
  100. for (child = 0; child < handle->nchildren; child++)
  101. {
  102. unsigned res;
  103. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  104. res = may_free_subtree(child_handle, node);
  105. if (!res) return 0;
  106. }
  107. /* no problem was found */
  108. return 1;
  109. }
  110. static void transfer_subtree_to_node(starpu_data_handle_t handle, unsigned src_node,
  111. unsigned dst_node)
  112. {
  113. unsigned i;
  114. unsigned last = 0;
  115. unsigned cnt;
  116. int ret;
  117. STARPU_ASSERT(dst_node != src_node);
  118. if (handle->nchildren == 0)
  119. {
  120. struct _starpu_data_replicate *src_replicate = &handle->per_node[src_node];
  121. struct _starpu_data_replicate *dst_replicate = &handle->per_node[dst_node];
  122. /* this is a leaf */
  123. switch(src_replicate->state)
  124. {
  125. case STARPU_OWNER:
  126. /* the local node has the only copy */
  127. /* the owner is now the destination_node */
  128. src_replicate->state = STARPU_INVALID;
  129. dst_replicate->state = STARPU_OWNER;
  130. #ifdef STARPU_DEVEL
  131. #warning we should use requests during memory reclaim
  132. #endif
  133. /* TODO use request !! */
  134. /* Take temporary references on the replicates */
  135. _starpu_spin_checklocked(&handle->header_lock);
  136. src_replicate->refcnt++;
  137. dst_replicate->refcnt++;
  138. handle->busy_count+=2;
  139. ret = _starpu_driver_copy_data_1_to_1(handle, src_replicate, dst_replicate, 0, NULL, 1);
  140. STARPU_ASSERT(ret == 0);
  141. src_replicate->refcnt--;
  142. dst_replicate->refcnt--;
  143. STARPU_ASSERT(handle->busy_count >= 2);
  144. handle->busy_count -= 2;
  145. ret = _starpu_data_check_not_busy(handle);
  146. STARPU_ASSERT(ret == 0);
  147. break;
  148. case STARPU_SHARED:
  149. /* some other node may have the copy */
  150. src_replicate->state = STARPU_INVALID;
  151. /* count the number of copies */
  152. cnt = 0;
  153. for (i = 0; i < STARPU_MAXNODES; i++)
  154. {
  155. if (handle->per_node[i].state == STARPU_SHARED)
  156. {
  157. cnt++;
  158. last = i;
  159. }
  160. }
  161. STARPU_ASSERT(cnt > 0);
  162. if (cnt == 1)
  163. handle->per_node[last].state = STARPU_OWNER;
  164. break;
  165. case STARPU_INVALID:
  166. /* nothing to be done */
  167. break;
  168. default:
  169. STARPU_ABORT();
  170. break;
  171. }
  172. }
  173. else
  174. {
  175. /* lock all sub-subtrees children */
  176. unsigned child;
  177. for (child = 0; child < handle->nchildren; child++)
  178. {
  179. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  180. transfer_subtree_to_node(child_handle, src_node, dst_node);
  181. }
  182. }
  183. }
  184. static void notify_handle_children(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node)
  185. {
  186. unsigned child;
  187. replicate->allocated = 0;
  188. /* XXX why do we need that ? */
  189. replicate->automatically_allocated = 0;
  190. for (child = 0; child < handle->nchildren; child++)
  191. {
  192. /* Notify children that their buffer has been deallocated too */
  193. starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
  194. notify_handle_children(child_handle, &child_handle->per_node[node], node);
  195. }
  196. }
  197. static size_t free_memory_on_node(struct _starpu_mem_chunk *mc, unsigned node)
  198. {
  199. size_t freed = 0;
  200. STARPU_ASSERT(mc->ops);
  201. STARPU_ASSERT(mc->ops->free_data_on_node);
  202. starpu_data_handle_t handle = mc->data;
  203. struct _starpu_data_replicate *replicate = mc->replicate;
  204. if (handle)
  205. _starpu_spin_checklocked(&handle->header_lock);
  206. if (mc->automatically_allocated &&
  207. (!handle || replicate->refcnt == 0))
  208. {
  209. if (handle)
  210. STARPU_ASSERT(replicate->allocated);
  211. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  212. if (starpu_node_get_kind(node) == STARPU_CUDA_RAM)
  213. {
  214. /* To facilitate the design of interface, we set the
  215. * proper CUDA device in case it is needed. This avoids
  216. * having to set it again in the free method of each
  217. * interface. */
  218. starpu_cuda_set_device(_starpu_memory_node_get_devid(node));
  219. }
  220. #endif
  221. mc->ops->free_data_on_node(mc->chunk_interface, node);
  222. if (handle)
  223. notify_handle_children(handle, replicate, node);
  224. freed = mc->size;
  225. if (handle)
  226. STARPU_ASSERT(replicate->refcnt == 0);
  227. }
  228. return freed;
  229. }
  230. static size_t do_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  231. {
  232. size_t size;
  233. starpu_data_handle_t handle = mc->data;
  234. if (handle) {
  235. _starpu_spin_checklocked(&handle->header_lock);
  236. mc->size = _starpu_data_get_size(handle);
  237. }
  238. mc->replicate->mc=NULL;
  239. /* free the actual buffer */
  240. size = free_memory_on_node(mc, node);
  241. /* remove the mem_chunk from the list */
  242. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  243. free(mc->chunk_interface);
  244. _starpu_mem_chunk_delete(mc);
  245. return size;
  246. }
  247. /* This function is called for memory chunks that are possibly in used (ie. not
  248. * in the cache). They should therefore still be associated to a handle. */
  249. static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
  250. {
  251. size_t freed = 0;
  252. starpu_data_handle_t handle;
  253. handle = mc->data;
  254. STARPU_ASSERT(handle);
  255. /* This data should be written through to this node, avoid dropping it! */
  256. if (handle->wt_mask & (1<<node))
  257. return 0;
  258. /* This data was registered from this node, we will not be able to drop it anyway */
  259. if ((int) node == handle->home_node)
  260. return 0;
  261. /* REDUX memchunk */
  262. if (mc->relaxed_coherency == 2)
  263. {
  264. /* TODO: reduce it back to e.g. main memory */
  265. }
  266. else
  267. /* Either it's a "relaxed coherency" memchunk (SCRATCH), or it's a
  268. * memchunk that could be used with filters. */
  269. if (mc->relaxed_coherency == 1)
  270. {
  271. STARPU_ASSERT(mc->replicate);
  272. while (_starpu_spin_trylock(&handle->header_lock))
  273. _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
  274. if (mc->replicate->refcnt == 0)
  275. {
  276. /* Note taht there is no need to transfer any data or
  277. * to update the status in terms of MSI protocol
  278. * because this memchunk is associated to a replicate
  279. * in "relaxed coherency" mode. */
  280. freed = do_free_mem_chunk(mc, node);
  281. }
  282. _starpu_spin_unlock(&handle->header_lock);
  283. }
  284. else
  285. {
  286. /* try to lock all the subtree */
  287. lock_all_subtree(handle);
  288. /* check if they are all "free" */
  289. if (may_free_subtree(handle, node))
  290. {
  291. int target = -1;
  292. /* XXX Considering only owner to invalidate */
  293. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  294. /* in case there was nobody using that buffer, throw it
  295. * away after writing it back to main memory */
  296. if (handle->home_node != -1)
  297. target = handle->home_node;
  298. else
  299. {
  300. /* handle->home_node == -1 */
  301. size_t size_handle = _starpu_data_get_size(handle);
  302. /* no place for datas in RAM, we push on disk */
  303. if (node == 0)
  304. {
  305. target = get_better_disk_can_accept_size(handle);
  306. }
  307. /* node != 0 */
  308. /* try to push data to RAM if we can before to push on disk*/
  309. else if (handle->per_node[STARPU_MAIN_RAM].allocated ||
  310. _starpu_memory_manager_test_allocate_size_(size_handle, STARPU_MAIN_RAM) == 1)
  311. {
  312. target = STARPU_MAIN_RAM;
  313. }
  314. /* no place in RAM */
  315. else
  316. {
  317. target = get_better_disk_can_accept_size(handle);
  318. }
  319. }
  320. if (target != -1) {
  321. #ifdef STARPU_MEMORY_STATS
  322. if (handle->per_node[node].state == STARPU_OWNER)
  323. _starpu_memory_handle_stats_invalidated(handle, node);
  324. #endif
  325. transfer_subtree_to_node(handle, node, target);
  326. #ifdef STARPU_MEMORY_STATS
  327. _starpu_memory_handle_stats_loaded_owner(handle, target);
  328. #endif
  329. STARPU_ASSERT(handle->per_node[node].refcnt == 0);
  330. /* now the actual buffer may be freed */
  331. freed = do_free_mem_chunk(mc, node);
  332. }
  333. }
  334. /* unlock the leafs */
  335. unlock_all_subtree(handle);
  336. }
  337. return freed;
  338. }
  339. #ifdef STARPU_USE_ALLOCATION_CACHE
  340. /* We assume that mc_rwlock[node] is taken. is_already_in_mc_list indicates
  341. * that the mc is already in the list of buffers that are possibly used, and
  342. * therefore not in the cache. */
  343. static void reuse_mem_chunk(unsigned node, struct _starpu_data_replicate *new_replicate, struct _starpu_mem_chunk *mc, unsigned is_already_in_mc_list)
  344. {
  345. /* we found an appropriate mem chunk: so we get it out
  346. * of the "to free" list, and reassign it to the new
  347. * piece of data */
  348. if (!is_already_in_mc_list)
  349. {
  350. _starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  351. }
  352. struct _starpu_data_replicate *old_replicate = mc->replicate;
  353. old_replicate->allocated = 0;
  354. old_replicate->automatically_allocated = 0;
  355. old_replicate->initialized = 0;
  356. new_replicate->allocated = 1;
  357. new_replicate->automatically_allocated = 1;
  358. new_replicate->initialized = 0;
  359. STARPU_ASSERT(new_replicate->data_interface);
  360. STARPU_ASSERT(mc->chunk_interface);
  361. memcpy(new_replicate->data_interface, mc->chunk_interface, old_replicate->handle->ops->interface_size);
  362. mc->data = new_replicate->handle;
  363. /* mc->ops, mc->footprint and mc->interface should be
  364. * unchanged ! */
  365. /* reinsert the mem chunk in the list of active memory chunks */
  366. if (!is_already_in_mc_list)
  367. {
  368. _starpu_mem_chunk_list_push_front(mc_list[node], mc);
  369. }
  370. }
  371. static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node, struct _starpu_data_replicate *replicate, unsigned is_already_in_mc_list)
  372. {
  373. unsigned success = 0;
  374. starpu_data_handle_t old_data;
  375. old_data = mc->data;
  376. STARPU_ASSERT(old_data);
  377. /* try to lock all the subtree */
  378. lock_all_subtree(old_data);
  379. /* check if they are all "free" */
  380. if (may_free_subtree(old_data, node))
  381. {
  382. success = 1;
  383. /* in case there was nobody using that buffer, throw it
  384. * away after writing it back to main memory */
  385. transfer_subtree_to_node(old_data, node, 0);
  386. /* now replace the previous data */
  387. reuse_mem_chunk(node, replicate, mc, is_already_in_mc_list);
  388. }
  389. /* unlock the leafs */
  390. unlock_all_subtree(old_data);
  391. return success;
  392. }
  393. static int _starpu_data_interface_compare(void *data_interface_a, struct starpu_data_interface_ops *ops_a,
  394. void *data_interface_b, struct starpu_data_interface_ops *ops_b)
  395. {
  396. if (ops_a->interfaceid != ops_b->interfaceid)
  397. return -1;
  398. int ret = ops_a->compare(data_interface_a, data_interface_b);
  399. return ret;
  400. }
  401. /* This function must be called with mc_rwlock[node] taken in write mode */
  402. static struct _starpu_mem_chunk *_starpu_memchunk_cache_lookup_locked(unsigned node, starpu_data_handle_t handle)
  403. {
  404. uint32_t footprint = _starpu_compute_data_footprint(handle);
  405. /* go through all buffers in the cache */
  406. struct _starpu_mem_chunk *mc;
  407. for (mc = _starpu_mem_chunk_list_begin(memchunk_cache[node]);
  408. mc != _starpu_mem_chunk_list_end(memchunk_cache[node]);
  409. mc = _starpu_mem_chunk_list_next(mc))
  410. {
  411. if (mc->footprint == footprint)
  412. {
  413. /* Is that a false hit ? (this is _very_ unlikely) */
  414. if (_starpu_data_interface_compare(handle->per_node[node].data_interface, handle->ops, mc->chunk_interface, mc->ops))
  415. continue;
  416. /* Cache hit */
  417. /* Remove from the cache */
  418. _starpu_mem_chunk_list_erase(memchunk_cache[node], mc);
  419. return mc;
  420. }
  421. }
  422. /* This is a cache miss */
  423. return NULL;
  424. }
  425. /* this function looks for a memory chunk that matches a given footprint in the
  426. * list of mem chunk that need to be freed. This function must be called with
  427. * mc_rwlock[node] taken in write mode. */
  428. static unsigned try_to_find_reusable_mem_chunk(unsigned node, starpu_data_handle_t data, struct _starpu_data_replicate *replicate, uint32_t footprint)
  429. {
  430. struct _starpu_mem_chunk *mc, *next_mc;
  431. /* go through all buffers in the cache */
  432. mc = _starpu_memchunk_cache_lookup_locked(node, data);
  433. if (mc)
  434. {
  435. /* We found an entry in the cache so we can reuse it */
  436. reuse_mem_chunk(node, replicate, mc, 0);
  437. return 1;
  438. }
  439. /* now look for some non essential data in the active list */
  440. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  441. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  442. mc = next_mc)
  443. {
  444. /* there is a risk that the memory chunk is freed before next
  445. * iteration starts: so we compute the next element of the list
  446. * now */
  447. next_mc = _starpu_mem_chunk_list_next(mc);
  448. if (mc->data->is_not_important && (mc->footprint == footprint))
  449. {
  450. // fprintf(stderr, "found a candidate ...\n");
  451. if (try_to_reuse_mem_chunk(mc, node, replicate, 1))
  452. return 1;
  453. }
  454. }
  455. return 0;
  456. }
  457. #endif
  458. /*
  459. * Free the memory chuncks that are explicitely tagged to be freed. The
  460. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  461. */
  462. static size_t flush_memchunk_cache(unsigned node, size_t reclaim)
  463. {
  464. struct _starpu_mem_chunk *mc;
  465. size_t freed = 0;
  466. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  467. while (!_starpu_mem_chunk_list_empty(memchunk_cache[node])) {
  468. mc = _starpu_mem_chunk_list_pop_front(memchunk_cache[node]);
  469. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  470. starpu_data_handle_t handle = mc->data;
  471. if (handle)
  472. while (_starpu_spin_trylock(&handle->header_lock))
  473. _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
  474. freed += free_memory_on_node(mc, node);
  475. if (handle)
  476. _starpu_spin_unlock(&handle->header_lock);
  477. free(mc->chunk_interface);
  478. _starpu_mem_chunk_delete(mc);
  479. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  480. if (reclaim && freed>reclaim)
  481. break;
  482. }
  483. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  484. return freed;
  485. }
  486. /*
  487. * Try to free the buffers currently in use on the memory node. If the force
  488. * flag is set, the memory is freed regardless of coherency concerns (this
  489. * should only be used at the termination of StarPU for instance). The
  490. * mc_rwlock[node] rw-lock should be taken prior to calling this function.
  491. */
  492. static size_t free_potentially_in_use_mc(unsigned node, unsigned force, size_t reclaim)
  493. {
  494. size_t freed = 0;
  495. struct _starpu_mem_chunk *mc, *next_mc = (void*) -1;
  496. /*
  497. * We have to unlock mc_rwlock before locking header_lock, so we have
  498. * to be careful with the list. We try to do just one pass, by
  499. * remembering the next mc to be tried. If it gets dropped, we restart
  500. * from zero. So we continue until we go through the whole list without
  501. * finding anything to free.
  502. */
  503. while (1)
  504. {
  505. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  506. if (_starpu_mem_chunk_list_empty(mc_list[node]) || !next_mc)
  507. {
  508. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  509. /* We reached the end of the list :/ */
  510. break;
  511. }
  512. if (next_mc == (void*) -1) {
  513. /* First iteration ever, start from beginning */
  514. mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  515. } else {
  516. /* Try to restart from where we were */
  517. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  518. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  519. mc = _starpu_mem_chunk_list_next(mc))
  520. if (mc == next_mc)
  521. /* Found it, restart from there. */
  522. break;
  523. if (mc == _starpu_mem_chunk_list_end(mc_list[node]))
  524. /* Couldn't find next_mc, restart from the beginning :/ */
  525. mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  526. }
  527. /* Remember where to try next */
  528. next_mc = _starpu_mem_chunk_list_next(mc);
  529. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  530. if (!force)
  531. {
  532. freed += try_to_free_mem_chunk(mc, node);
  533. if (reclaim && freed > reclaim)
  534. break;
  535. }
  536. else
  537. {
  538. starpu_data_handle_t handle = mc->data;
  539. _starpu_spin_lock(&handle->header_lock);
  540. /* We must free the memory now, because we are
  541. * terminating the drivers: note that data coherency is
  542. * not maintained in that case ! */
  543. freed += do_free_mem_chunk(mc, node);
  544. _starpu_spin_unlock(&handle->header_lock);
  545. }
  546. }
  547. return freed;
  548. }
  549. size_t _starpu_memory_reclaim_generic(unsigned node, unsigned force, size_t reclaim)
  550. {
  551. size_t freed = 0;
  552. starpu_lru(node);
  553. /* remove all buffers for which there was a removal request */
  554. freed += flush_memchunk_cache(node, reclaim);
  555. /* try to free all allocated data potentially in use */
  556. if (reclaim && freed<reclaim)
  557. freed += free_potentially_in_use_mc(node, force, reclaim);
  558. return freed;
  559. }
  560. /*
  561. * This function frees all the memory that was implicitely allocated by StarPU
  562. * (for the data replicates). This is not ensuring data coherency, and should
  563. * only be called while StarPU is getting shut down.
  564. */
  565. size_t _starpu_free_all_automatically_allocated_buffers(unsigned node)
  566. {
  567. return _starpu_memory_reclaim_generic(node, 1, 0);
  568. }
  569. static struct _starpu_mem_chunk *_starpu_memchunk_init(struct _starpu_data_replicate *replicate, size_t interface_size, unsigned automatically_allocated)
  570. {
  571. struct _starpu_mem_chunk *mc = _starpu_mem_chunk_new();
  572. starpu_data_handle_t handle = replicate->handle;
  573. STARPU_ASSERT(handle);
  574. STARPU_ASSERT(handle->ops);
  575. mc->data = handle;
  576. mc->footprint = _starpu_compute_data_footprint(handle);
  577. mc->ops = handle->ops;
  578. mc->automatically_allocated = automatically_allocated;
  579. mc->relaxed_coherency = replicate->relaxed_coherency;
  580. mc->replicate = replicate;
  581. mc->replicate->mc = mc;
  582. /* Save a copy of the interface */
  583. mc->chunk_interface = malloc(interface_size);
  584. STARPU_ASSERT(mc->chunk_interface);
  585. memcpy(mc->chunk_interface, replicate->data_interface, interface_size);
  586. return mc;
  587. }
  588. static void register_mem_chunk(struct _starpu_data_replicate *replicate, unsigned automatically_allocated)
  589. {
  590. unsigned dst_node = replicate->memory_node;
  591. struct _starpu_mem_chunk *mc;
  592. /* the interface was already filled by ops->allocate_data_on_node */
  593. size_t interface_size = replicate->handle->ops->interface_size;
  594. /* Put this memchunk in the list of memchunk in use */
  595. mc = _starpu_memchunk_init(replicate, interface_size, automatically_allocated);
  596. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[dst_node]);
  597. _starpu_mem_chunk_list_push_back(mc_list[dst_node], mc);
  598. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  599. }
  600. /* This function is called when the handle is destroyed (eg. when calling
  601. * unregister or unpartition). It puts all the memchunks that refer to the
  602. * specified handle into the cache.
  603. */
  604. void _starpu_request_mem_chunk_removal(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned node, size_t size)
  605. {
  606. struct _starpu_mem_chunk *mc = replicate->mc;
  607. STARPU_ASSERT(mc->data == handle);
  608. /* Record the allocated size, so that later in memory
  609. * reclaiming we can estimate how much memory we free
  610. * by freeing this. */
  611. mc->size = size;
  612. /* This memchunk doesn't have to do with the data any more. */
  613. replicate->mc = NULL;
  614. replicate->allocated = 0;
  615. replicate->automatically_allocated = 0;
  616. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  617. mc->data = NULL;
  618. /* remove it from the main list */
  619. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  620. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  621. /* We would only flush the RAM nodes cache if memory gets tight, either
  622. * because StarPU automatically knows the total memory size of the
  623. * machine, or because the user has provided a limitation.
  624. *
  625. * We don't really want the former scenario to be eating a lot of
  626. * memory just for caching allocations. Allocating main memory is cheap
  627. * anyway.
  628. */
  629. /* This is particularly important when
  630. * STARPU_USE_ALLOCATION_CACHE is not enabled, as we
  631. * wouldn't even re-use these allocations! */
  632. if (starpu_node_get_kind(node) == STARPU_CPU_RAM)
  633. {
  634. /* Free data immediately */
  635. free_memory_on_node(mc, node);
  636. free(mc->chunk_interface);
  637. _starpu_mem_chunk_delete(mc);
  638. }
  639. else
  640. /* put it in the list of buffers to be removed */
  641. _starpu_mem_chunk_list_push_front(memchunk_cache[node], mc);
  642. }
  643. /*
  644. * In order to allocate a piece of data, we try to reuse existing buffers if
  645. * its possible.
  646. * 1 - we try to reuse a memchunk that is explicitely unused.
  647. * 2 - we go through the list of memory chunks and find one that is not
  648. * referenced and that has the same footprint to reuse it.
  649. * 3 - we call the usual driver's alloc method
  650. * 4 - we go through the list of memory chunks and release those that are
  651. * not referenced (or part of those).
  652. *
  653. */
  654. static starpu_ssize_t _starpu_allocate_interface(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned dst_node, unsigned is_prefetch)
  655. {
  656. unsigned attempts = 0;
  657. starpu_ssize_t allocated_memory;
  658. int ret;
  659. _starpu_spin_checklocked(&handle->header_lock);
  660. _starpu_data_allocation_inc_stats(dst_node);
  661. #ifdef STARPU_USE_ALLOCATION_CACHE
  662. /* perhaps we can directly reuse a buffer in the free-list */
  663. uint32_t footprint = _starpu_compute_data_footprint(handle);
  664. _STARPU_TRACE_START_ALLOC_REUSE(dst_node);
  665. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[dst_node]);
  666. if (try_to_find_reusable_mem_chunk(dst_node, handle, replicate, footprint))
  667. {
  668. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  669. _starpu_allocation_cache_hit(dst_node);
  670. starpu_ssize_t data_size = _starpu_data_get_size(handle);
  671. return data_size;
  672. }
  673. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[dst_node]);
  674. _STARPU_TRACE_END_ALLOC_REUSE(dst_node);
  675. #endif
  676. do
  677. {
  678. STARPU_ASSERT(handle->ops);
  679. STARPU_ASSERT(handle->ops->allocate_data_on_node);
  680. _STARPU_TRACE_START_ALLOC(dst_node);
  681. STARPU_ASSERT(replicate->data_interface);
  682. #if defined(STARPU_USE_CUDA) && defined(HAVE_CUDA_MEMCPY_PEER) && !defined(STARPU_SIMGRID)
  683. if (starpu_node_get_kind(dst_node) == STARPU_CUDA_RAM)
  684. {
  685. /* To facilitate the design of interface, we set the
  686. * proper CUDA device in case it is needed. This avoids
  687. * having to set it again in the malloc method of each
  688. * interface. */
  689. starpu_cuda_set_device(_starpu_memory_node_get_devid(dst_node));
  690. }
  691. #endif
  692. allocated_memory = handle->ops->allocate_data_on_node(replicate->data_interface, dst_node);
  693. _STARPU_TRACE_END_ALLOC(dst_node);
  694. if (allocated_memory == -ENOMEM)
  695. {
  696. size_t reclaim = 0.25*_starpu_memory_manager_get_global_memory_size(dst_node);
  697. size_t handle_size = handle->ops->get_size(handle);
  698. if (starpu_memstrategy_data_size_coefficient*handle_size > reclaim)
  699. reclaim = starpu_memstrategy_data_size_coefficient*handle_size;
  700. /* Take temporary reference on the replicate */
  701. replicate->refcnt++;
  702. handle->busy_count++;
  703. _starpu_spin_unlock(&handle->header_lock);
  704. _STARPU_TRACE_START_MEMRECLAIM(dst_node);
  705. if (is_prefetch)
  706. {
  707. flush_memchunk_cache(dst_node, reclaim);
  708. }
  709. else
  710. _starpu_memory_reclaim_generic(dst_node, 0, reclaim);
  711. _STARPU_TRACE_END_MEMRECLAIM(dst_node);
  712. while (_starpu_spin_trylock(&handle->header_lock))
  713. _starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
  714. replicate->refcnt--;
  715. STARPU_ASSERT(replicate->refcnt >= 0);
  716. STARPU_ASSERT(handle->busy_count > 0);
  717. handle->busy_count--;
  718. ret = _starpu_data_check_not_busy(handle);
  719. STARPU_ASSERT(ret == 0);
  720. }
  721. }
  722. while((allocated_memory == -ENOMEM) && attempts++ < 2);
  723. return allocated_memory;
  724. }
  725. int _starpu_allocate_memory_on_node(starpu_data_handle_t handle, struct _starpu_data_replicate *replicate, unsigned is_prefetch)
  726. {
  727. starpu_ssize_t allocated_memory;
  728. unsigned dst_node = replicate->memory_node;
  729. STARPU_ASSERT(handle);
  730. /* A buffer is already allocated on the node */
  731. if (replicate->allocated)
  732. return 0;
  733. STARPU_ASSERT(replicate->data_interface);
  734. allocated_memory = _starpu_allocate_interface(handle, replicate, dst_node, is_prefetch);
  735. /* perhaps we could really not handle that capacity misses */
  736. if (allocated_memory == -ENOMEM)
  737. return -ENOMEM;
  738. register_mem_chunk(replicate, 1);
  739. replicate->allocated = 1;
  740. replicate->automatically_allocated = 1;
  741. if (dst_node == 0)
  742. {
  743. void *ptr = starpu_data_handle_to_pointer(handle, 0);
  744. if (ptr != NULL)
  745. {
  746. _starpu_data_register_ram_pointer(handle, ptr);
  747. }
  748. }
  749. return 0;
  750. }
  751. unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsigned memory_node)
  752. {
  753. return handle->per_node[memory_node].allocated;
  754. }
  755. void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node)
  756. {
  757. _starpu_spin_lock(&lru_rwlock[node]);
  758. struct _starpu_mem_chunk_lru *mc_lru=_starpu_mem_chunk_lru_new();
  759. mc_lru->mc=mc;
  760. _starpu_mem_chunk_lru_list_push_front(starpu_lru_list[node],mc_lru);
  761. _starpu_spin_unlock(&lru_rwlock[node]);
  762. }
  763. /* The mc_rwlock[node] rw-lock should be taken prior to calling this function.*/
  764. static void _starpu_memchunk_recently_used_move(struct _starpu_mem_chunk *mc, unsigned node)
  765. {
  766. /* XXX Sometimes the memchunk is not in the list... */
  767. struct _starpu_mem_chunk *mc_iter;
  768. for (mc_iter = _starpu_mem_chunk_list_begin(mc_list[node]);
  769. mc_iter != _starpu_mem_chunk_list_end(mc_list[node]);
  770. mc_iter = _starpu_mem_chunk_list_next(mc_iter) )
  771. {
  772. if (mc_iter==mc)
  773. {
  774. _starpu_mem_chunk_list_erase(mc_list[node], mc);
  775. _starpu_mem_chunk_list_push_back(mc_list[node], mc);
  776. return;
  777. }
  778. }
  779. }
  780. static void starpu_lru(unsigned node)
  781. {
  782. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  783. _starpu_spin_lock(&lru_rwlock[node]);
  784. while (!_starpu_mem_chunk_lru_list_empty(starpu_lru_list[node]))
  785. {
  786. struct _starpu_mem_chunk_lru *mc_lru=_starpu_mem_chunk_lru_list_front(starpu_lru_list[node]);
  787. _starpu_memchunk_recently_used_move(mc_lru->mc, node);
  788. _starpu_mem_chunk_lru_list_erase(starpu_lru_list[node], mc_lru);
  789. _starpu_mem_chunk_lru_delete(mc_lru);
  790. }
  791. _starpu_spin_unlock(&lru_rwlock[node]);
  792. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  793. }
  794. #ifdef STARPU_MEMORY_STATS
  795. void _starpu_memory_display_stats_by_node(int node)
  796. {
  797. STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);
  798. if (!_starpu_mem_chunk_list_empty(mc_list[node]))
  799. {
  800. struct _starpu_mem_chunk *mc;
  801. fprintf(stderr, "#-------\n");
  802. fprintf(stderr, "Data on Node #%d\n",node);
  803. for (mc = _starpu_mem_chunk_list_begin(mc_list[node]);
  804. mc != _starpu_mem_chunk_list_end(mc_list[node]);
  805. mc = _starpu_mem_chunk_list_next(mc))
  806. {
  807. if (mc->automatically_allocated == 0)
  808. _starpu_memory_display_handle_stats(mc->data);
  809. }
  810. }
  811. STARPU_PTHREAD_RWLOCK_UNLOCK(&mc_rwlock[node]);
  812. }
  813. #endif
  814. void starpu_data_display_memory_stats(void)
  815. {
  816. #ifdef STARPU_MEMORY_STATS
  817. unsigned node;
  818. fprintf(stderr, "\n#---------------------\n");
  819. fprintf(stderr, "Memory stats :\n");
  820. for (node = 0; node < STARPU_MAXNODES; node++)
  821. {
  822. _starpu_memory_display_stats_by_node(node);
  823. }
  824. fprintf(stderr, "\n#---------------------\n");
  825. #endif
  826. }
  827. static int
  828. get_better_disk_can_accept_size(starpu_data_handle_t handle)
  829. {
  830. int target = -1;
  831. unsigned nnodes = starpu_memory_nodes_get_count();
  832. unsigned int i;
  833. double time_disk = 0;
  834. for (i = 0; i < nnodes; i++)
  835. {
  836. if (starpu_node_get_kind(i) == STARPU_DISK_RAM &&
  837. (_starpu_memory_manager_test_allocate_size_(_starpu_data_get_size(handle), i) == 1 ||
  838. handle->per_node[i].allocated))
  839. {
  840. /* only time can change between disk <-> main_ram
  841. * and not between main_ram <-> worker if we compare diks*/
  842. double time_tmp = _starpu_predict_transfer_time(i, STARPU_MAIN_RAM, _starpu_data_get_size(handle));
  843. if (target == -1 || time_disk > time_tmp)
  844. {
  845. target = i;
  846. time_disk = time_tmp;
  847. }
  848. }
  849. }
  850. return target;
  851. }