joblib.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # StarPU --- Runtime system for heterogeneous multicore architectures.
  2. #
  3. # Copyright (C) 2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. #
  5. # StarPU is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU Lesser General Public License as published by
  7. # the Free Software Foundation; either version 2.1 of the License, or (at
  8. # your option) any later version.
  9. #
  10. # StarPU is distributed in the hope that it will be useful, but
  11. # WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. #
  14. # See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. #
  16. from starpu import starpupy
  17. import asyncio
  18. import math
  19. import os
  20. import pickle
  21. import json
  22. import functools
  23. # get the number of CPUs controlled by StarPU
  24. n_cpus=starpupy.cpu_worker_get_count()
  25. #class perfmodel
  26. class Perfmodel(object):
  27. def __init__(self, symbol):
  28. self.symbol=symbol
  29. self.pstruct=starpupy.init_perfmodel(self.symbol)
  30. def get_struct(self):
  31. return self.pstruct
  32. def __del__(self):
  33. #def free_struct(self):
  34. starpupy.free_perfmodel(self.pstruct)
  35. # split a list ls into n_block numbers of sub-lists
  36. def partition(ls, n_block):
  37. if len(ls)>=n_block:
  38. # there are n1 sub-lists which contain q1 elements, and (n_block-n1) sublists which contain q2 elements (n1 can be 0)
  39. q1=math.ceil(len(ls)/n_block)
  40. q2=math.floor(len(ls)/n_block)
  41. n1=len(ls)%n_block
  42. #n2=n_block-n1
  43. # generate n1 sub-lists in L1, and (n_block-n1) sub-lists in L2
  44. L1=[ls[i:i+q1] for i in range(0, n1*q1, q1)]
  45. L2=[ls[i:i+q2] for i in range(n1*q1, len(ls), q2)]
  46. L=L1+L2
  47. else:
  48. # if the block number is larger than the length of list, each element in the list is a sub-list
  49. L=[ls[i:i+1] for i in range (len(ls))]
  50. return L
  51. # generate the dictionary which contains the perfmodel symbol and its struct pointer
  52. dict_perf={}
  53. def dict_perf_generator(perfsymbol):
  54. if dict_perf.get(perfsymbol)==None:
  55. p=Perfmodel(perfsymbol)
  56. dict_perf[perfsymbol]=p
  57. else:
  58. p=dict_perf[perfsymbol]
  59. return p
  60. def future_generator(g, n_jobs, perfsymbol):
  61. # g is generated by delayed function, after converting to a list, the format is [function, (arg1, arg2, ... ,)]
  62. L=list(g)
  63. # generate a list of function according to g
  64. def lf(ls):
  65. L_func=[]
  66. for i in range(len(ls)):
  67. # the first element is the function
  68. f=ls[i][0]
  69. # the second element is the args list of a type tuple
  70. L_args=list(ls[i][1])
  71. # generate a list of function
  72. L_func.append(f(*L_args))
  73. return L_func
  74. # get the number of block
  75. if n_jobs<-n_cpus-1 or n_jobs>n_cpus:
  76. print("Error: n_jobs is out of range, number of CPUs is", n_cpus)
  77. elif n_jobs<0:
  78. n_block=n_cpus+1+n_jobs
  79. else:
  80. n_block=n_jobs
  81. # generate the split function list
  82. L_split=partition(L,n_block)
  83. # operation in each split list
  84. L_fut=[]
  85. for i in range(len(L_split)):
  86. if perfsymbol==None:
  87. fut=starpupy.task_submit(lf, L_split[i])
  88. L_fut.append(fut)
  89. else:
  90. p=dict_perf_generator(perfsymbol)
  91. fut=starpupy.task_submit(lf, L_split[i], p.get_struct())
  92. L_fut.append(fut)
  93. return L_fut
  94. def parallel(*, mode="normal", n_jobs=1, perfmodel=None, end_msg=None,\
  95. backend=None, verbose=0, timeout=None, pre_dispatch='2 * n_jobs',\
  96. batch_size='auto', temp_folder=None, max_nbytes='1M',\
  97. mmap_mode='r', prefer=None, require=None):
  98. # the mode normal, user can call the function directly without using async
  99. if mode=="normal":
  100. def parallel_normal(g):
  101. async def asy_main():
  102. L_fut=future_generator(g, n_jobs, perfmodel)
  103. res=[]
  104. for i in range(len(L_fut)):
  105. L_res=await L_fut[i]
  106. res.extend(L_res)
  107. #print(res)
  108. return res
  109. asyncio.run(asy_main())
  110. return asy_main
  111. return parallel_normal
  112. # the mode future, user needs to use asyncio module and await the Future result in main function
  113. elif mode=="future":
  114. def parallel_future(g):
  115. L_fut=future_generator(g, n_jobs, perfmodel)
  116. fut=asyncio.gather(*L_fut)
  117. if end_msg==None:
  118. return fut
  119. else:
  120. fut.add_done_callback(functools.partial(print, end_msg))
  121. return fut
  122. #return fut
  123. return parallel_future
  124. def delayed(f):
  125. def delayed_func(*args):
  126. return f, args
  127. return delayed_func
  128. ######################################################################
  129. # dump performance model
  130. def perfmodel_plot(perfmodel):
  131. p=dict_perf[perfmodel]
  132. starpupy.save_history_based_model(p.get_struct())
  133. os.system('starpu_perfmodel_plot -s "' + perfmodel +'"')
  134. os.system('gnuplot starpu_'+perfmodel+'.gp')
  135. os.system('gv starpu_'+perfmodel+'.eps')