simple_cpu_gpu_sched.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2012 Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <starpu.h>
  17. #include <starpu_scheduler.h>
  18. #include "../helper.h"
  19. #include <core/perfmodel/perfmodel.h>
  20. /*
  21. * Schedulers that are aware of the expected task length provided by the
  22. * perfmodels must make sure that :
  23. * - cpu_task is cheduled on a CPU.
  24. * - gpu_task is scheduled on a GPU.
  25. *
  26. * Applies to : dmda and to what other schedulers ?
  27. */
  28. void dummy(void *buffers[], void *args)
  29. {
  30. (void) buffers;
  31. (void) args;
  32. }
  33. /*
  34. * Fake cost functions.
  35. */
  36. static double
  37. cpu_task_cpu(struct starpu_task *task,
  38. struct starpu_perfmodel_arch* arch,
  39. unsigned nimpl)
  40. {
  41. (void) task;
  42. (void) arch;
  43. (void) nimpl;
  44. return 1.0;
  45. }
  46. static double
  47. cpu_task_gpu(struct starpu_task *task,
  48. struct starpu_perfmodel_arch* arch,
  49. unsigned nimpl)
  50. {
  51. (void) task;
  52. (void) arch;
  53. (void) nimpl;
  54. return 1000.0;
  55. }
  56. static double
  57. gpu_task_cpu(struct starpu_task *task,
  58. struct starpu_perfmodel_arch* arch,
  59. unsigned nimpl)
  60. {
  61. (void) task;
  62. (void) arch;
  63. (void) nimpl;
  64. return 1000.0;
  65. }
  66. static double
  67. gpu_task_gpu(struct starpu_task *task,
  68. struct starpu_perfmodel_arch* arch,
  69. unsigned nimpl)
  70. {
  71. (void) task;
  72. (void) arch;
  73. (void) nimpl;
  74. return 1.0;
  75. }
  76. static struct starpu_perfmodel model_cpu_task =
  77. {
  78. .type = STARPU_PER_ARCH,
  79. .symbol = "model_cpu_task"
  80. };
  81. static struct starpu_perfmodel model_gpu_task =
  82. {
  83. .type = STARPU_PER_ARCH,
  84. .symbol = "model_gpu_task"
  85. };
  86. static void
  87. init_perfmodels_gpu(int gpu_type)
  88. {
  89. int nb_worker_gpu = starpu_worker_get_count_by_type(gpu_type);
  90. int *worker_gpu_ids = malloc(nb_worker_gpu * sizeof(int));
  91. int worker_gpu;
  92. starpu_worker_get_ids_by_type(gpu_type, worker_gpu_ids, nb_worker_gpu);
  93. for(worker_gpu = 0 ; worker_gpu < nb_worker_gpu ; worker_gpu ++)
  94. {
  95. struct starpu_perfmodel_arch arch_gpu;
  96. arch_gpu.ndevices = 1;
  97. arch_gpu.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
  98. arch_gpu.devices[0].type = gpu_type;
  99. arch_gpu.devices[0].devid = starpu_worker_get_devid(worker_gpu_ids[worker_gpu]);
  100. arch_gpu.devices[0].ncores = 1;
  101. int comb_gpu = starpu_get_arch_comb(arch_gpu.ndevices, arch_gpu.devices);
  102. if(comb_gpu == -1)
  103. comb_gpu = starpu_add_arch_comb(arch_gpu.ndevices, arch_gpu.devices);
  104. //#error per_arch[comb_gpu] peut ne pas etre alloue, on doit fournir des fonctions publiques pour eviter de taper directtement dedans, la fonction se chargerait d allouer per_arch[comb_gpu] si necessaire
  105. if (comb_gpu >= model_cpu_task.ncombs_set)
  106. _starpu_perfmodel_realloc(&model_cpu_task, comb_gpu+1);
  107. if (comb_gpu >= model_gpu_task.ncombs_set)
  108. _starpu_perfmodel_realloc(&model_gpu_task, comb_gpu+1);
  109. model_cpu_task.per_arch[comb_gpu] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
  110. memset(&model_cpu_task.per_arch[comb_gpu][0], 0, sizeof(struct starpu_perfmodel_per_arch));
  111. model_cpu_task.nimpls[comb_gpu] = 1;
  112. model_cpu_task.per_arch[comb_gpu][0].cost_function = cpu_task_gpu;
  113. model_gpu_task.per_arch[comb_gpu] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
  114. memset(&model_gpu_task.per_arch[comb_gpu][0], 0, sizeof(struct starpu_perfmodel_per_arch));
  115. model_gpu_task.nimpls[comb_gpu] = 1;
  116. model_gpu_task.per_arch[comb_gpu][0].cost_function = gpu_task_gpu;
  117. }
  118. }
  119. static void
  120. init_perfmodels(void)
  121. {
  122. unsigned devid, ncore;
  123. starpu_perfmodel_init(NULL, &model_cpu_task);
  124. starpu_perfmodel_init(NULL, &model_gpu_task);
  125. struct starpu_perfmodel_arch arch_cpu;
  126. arch_cpu.ndevices = 1;
  127. arch_cpu.devices = (struct starpu_perfmodel_device*)malloc(sizeof(struct starpu_perfmodel_device));
  128. arch_cpu.devices[0].type = STARPU_CPU_WORKER;
  129. arch_cpu.devices[0].devid = 0;
  130. arch_cpu.devices[0].ncores = 1;
  131. int comb_cpu = starpu_get_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
  132. if (comb_cpu == -1)
  133. comb_cpu = starpu_add_arch_comb(arch_cpu.ndevices, arch_cpu.devices);
  134. //#error per_arch[comb_cpu] peut ne pas etre alloue, on doit fournir des fonctions publiques pour eviter de taper directtement dedans, la fonction se chargerait d allouer per_arch[comb_cpu] si necessaire
  135. if (comb_cpu >= model_cpu_task.ncombs_set)
  136. _starpu_perfmodel_realloc(&model_cpu_task, comb_cpu+1);
  137. if (comb_cpu >= model_gpu_task.ncombs_set)
  138. _starpu_perfmodel_realloc(&model_gpu_task, comb_cpu+1);
  139. model_cpu_task.per_arch[comb_cpu] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
  140. memset(&model_cpu_task.per_arch[comb_cpu][0], 0, sizeof(struct starpu_perfmodel_per_arch));
  141. model_cpu_task.nimpls[comb_cpu] = 1;
  142. model_cpu_task.per_arch[comb_cpu][0].cost_function = cpu_task_cpu;
  143. model_gpu_task.per_arch[comb_cpu] = (struct starpu_perfmodel_per_arch*)malloc(sizeof(struct starpu_perfmodel_per_arch));
  144. memset(&model_gpu_task.per_arch[comb_cpu][0], 0, sizeof(struct starpu_perfmodel_per_arch));
  145. model_gpu_task.nimpls[comb_cpu] = 1;
  146. model_gpu_task.per_arch[comb_cpu][0].cost_function = gpu_task_cpu;
  147. // We need to set the cost function for each combination with a CUDA or a OpenCL worker
  148. init_perfmodels_gpu(STARPU_CUDA_WORKER);
  149. init_perfmodels_gpu(STARPU_OPENCL_WORKER);
  150. /* if(model_cpu_task.per_arch[STARPU_CPU_WORKER] != NULL) */
  151. /* { */
  152. /* for(devid=0; model_cpu_task.per_arch[STARPU_CPU_WORKER][devid] != NULL; devid++) */
  153. /* { */
  154. /* for(ncore=0; model_cpu_task.per_arch[STARPU_CPU_WORKER][devid][ncore] != NULL; ncore++) */
  155. /* { */
  156. /* model_cpu_task.per_arch[STARPU_CPU_WORKER][devid][ncore][0].cost_function = cpu_task_cpu; */
  157. /* model_gpu_task.per_arch[STARPU_CPU_WORKER][devid][ncore][0].cost_function = gpu_task_cpu; */
  158. /* } */
  159. /* } */
  160. /* } */
  161. /* if(model_cpu_task.per_arch[STARPU_CUDA_WORKER] != NULL) */
  162. /* { */
  163. /* for(devid=0; model_cpu_task.per_arch[STARPU_CUDA_WORKER][devid] != NULL; devid++) */
  164. /* { */
  165. /* for(ncore=0; model_cpu_task.per_arch[STARPU_CUDA_WORKER][devid][ncore] != NULL; ncore++) */
  166. /* { */
  167. /* model_cpu_task.per_arch[STARPU_CUDA_WORKER][devid][ncore][0].cost_function = cpu_task_gpu; */
  168. /* model_gpu_task.per_arch[STARPU_CUDA_WORKER][devid][ncore][0].cost_function = gpu_task_gpu; */
  169. /* } */
  170. /* } */
  171. /* } */
  172. /* if(model_cpu_task.per_arch[STARPU_OPENCL_WORKER] != NULL) */
  173. /* { */
  174. /* for(devid=0; model_cpu_task.per_arch[STARPU_OPENCL_WORKER][devid] != NULL; devid++) */
  175. /* { */
  176. /* for(ncore=0; model_cpu_task.per_arch[STARPU_OPENCL_WORKER][devid][ncore] != NULL; ncore++) */
  177. /* { */
  178. /* model_cpu_task.per_arch[STARPU_OPENCL_WORKER][devid][ncore][0].cost_function = cpu_task_gpu; */
  179. /* model_gpu_task.per_arch[STARPU_OPENCL_WORKER][devid][ncore][0].cost_function = gpu_task_gpu; */
  180. /* } */
  181. /* } */
  182. /* } */
  183. }
  184. /*
  185. * Dummy codelets.
  186. */
  187. static struct starpu_codelet cpu_cl =
  188. {
  189. .cpu_funcs = { dummy, NULL },
  190. .cuda_funcs = { dummy, NULL },
  191. .opencl_funcs = { dummy, NULL },
  192. .nbuffers = 0,
  193. .model = &model_cpu_task
  194. };
  195. static struct starpu_codelet gpu_cl =
  196. {
  197. .cpu_funcs = { dummy, NULL },
  198. .cuda_funcs = { dummy, NULL },
  199. .opencl_funcs = { dummy, NULL },
  200. .nbuffers = 0,
  201. .model = &model_gpu_task
  202. };
  203. static int
  204. run(struct starpu_sched_policy *policy)
  205. {
  206. struct starpu_conf conf;
  207. starpu_conf_init(&conf);
  208. conf.sched_policy = policy;
  209. int ret = starpu_init(&conf);
  210. if (ret == -ENODEV)
  211. exit(STARPU_TEST_SKIPPED);
  212. /* At least 1 CPU and 1 GPU are needed. */
  213. if (starpu_cpu_worker_get_count() == 0)
  214. {
  215. starpu_shutdown();
  216. exit(STARPU_TEST_SKIPPED);
  217. }
  218. if (starpu_cuda_worker_get_count() == 0 && starpu_opencl_worker_get_count() == 0)
  219. {
  220. starpu_shutdown();
  221. exit(STARPU_TEST_SKIPPED);
  222. }
  223. starpu_profiling_status_set(1);
  224. init_perfmodels();
  225. struct starpu_task *cpu_task = starpu_task_create();
  226. cpu_task->cl = &cpu_cl;
  227. cpu_task->destroy = 0;
  228. struct starpu_task *gpu_task = starpu_task_create();
  229. gpu_task->cl = &gpu_cl;
  230. gpu_task->destroy = 0;
  231. ret = starpu_task_submit(cpu_task);
  232. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  233. ret = starpu_task_submit(gpu_task);
  234. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  235. starpu_task_wait_for_all();
  236. enum starpu_worker_archtype cpu_task_worker, gpu_task_worker;
  237. cpu_task_worker = starpu_worker_get_type(cpu_task->profiling_info->workerid);
  238. gpu_task_worker = starpu_worker_get_type(gpu_task->profiling_info->workerid);
  239. if (cpu_task_worker != STARPU_CPU_WORKER || (gpu_task_worker != STARPU_CUDA_WORKER && gpu_task_worker != STARPU_OPENCL_WORKER))
  240. {
  241. FPRINTF(stderr, "Task did not execute on expected worker\n");
  242. ret = 1;
  243. }
  244. else
  245. {
  246. FPRINTF(stderr, "Task DID execute on expected worker\n");
  247. ret = 0;
  248. }
  249. starpu_task_destroy(cpu_task);
  250. starpu_task_destroy(gpu_task);
  251. starpu_shutdown();
  252. return ret;
  253. }
  254. /*
  255. extern struct starpu_sched_policy _starpu_sched_ws_policy;
  256. extern struct starpu_sched_policy _starpu_sched_prio_policy;
  257. extern struct starpu_sched_policy _starpu_sched_random_policy;
  258. extern struct starpu_sched_policy _starpu_sched_dm_policy;
  259. extern struct starpu_sched_policy _starpu_sched_dmda_ready_policy;
  260. extern struct starpu_sched_policy _starpu_sched_dmda_sorted_policy;
  261. extern struct starpu_sched_policy _starpu_sched_eager_policy;
  262. extern struct starpu_sched_policy _starpu_sched_parallel_heft_policy;
  263. extern struct starpu_sched_policy _starpu_sched_peager_policy;
  264. */
  265. extern struct starpu_sched_policy _starpu_sched_dmda_policy;
  266. /* XXX: what policies are we interested in ? */
  267. static struct starpu_sched_policy *policies[] =
  268. {
  269. //&_starpu_sched_ws_policy,
  270. //&_starpu_sched_prio_policy,
  271. //&_starpu_sched_dm_policy,
  272. &_starpu_sched_dmda_policy,
  273. //&_starpu_sched_dmda_ready_policy,
  274. //&_starpu_sched_dmda_sorted_policy,
  275. //&_starpu_sched_random_policy,
  276. //&_starpu_sched_eager_policy,
  277. //&_starpu_sched_parallel_heft_policy,
  278. //&_starpu_sched_peager_policy
  279. };
  280. int
  281. main(void)
  282. {
  283. #ifndef STARPU_HAVE_SETENV
  284. /* XXX: is this macro used by all the schedulers we are interested in ? */
  285. #warning "setenv() is not available, skipping this test"
  286. return STARPU_TEST_SKIPPED;
  287. #else
  288. setenv("STARPU_SCHED_BETA", "0", 1);
  289. int i;
  290. int n_policies = sizeof(policies)/sizeof(policies[0]);
  291. for (i = 0; i < n_policies; ++i)
  292. {
  293. struct starpu_sched_policy *policy = policies[i];
  294. FPRINTF(stdout, "Running with policy %s.\n",
  295. policy->policy_name);
  296. int ret;
  297. ret = run(policy);
  298. if (ret == 1)
  299. return EXIT_FAILURE;
  300. }
  301. return EXIT_SUCCESS;
  302. #endif
  303. }