parallel_tasks_with_cluster_api.c 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2015 Université de Bordeaux
  4. * Copyright (C) 2015 INRIA
  5. * Copyright (C) 2015 CNRS
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <starpu.h>
  19. #include <omp.h>
  20. #ifdef STARPU_QUICK_CHECK
  21. #define NTASKS 8
  22. #else
  23. #define NTASKS 32
  24. #endif
  25. #define SIZE 4000
  26. /* Codelet SUM */
  27. static void sum_cpu(void * descr[], void *cl_arg)
  28. {
  29. double * v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]);
  30. double * v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);
  31. double * v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);
  32. int size;
  33. starpu_codelet_unpack_args(cl_arg, &size);
  34. int i, k;
  35. for (k=0;k<10;k++)
  36. {
  37. #pragma omp parallel for
  38. for (i=0; i<size; i++)
  39. {
  40. v_dst[i]+=v_src0[i]+v_src1[i];
  41. }
  42. }
  43. }
  44. static struct starpu_codelet sum_cl =
  45. {
  46. .cpu_funcs = {sum_cpu, NULL},
  47. .nbuffers = 3,
  48. .modes={STARPU_RW,STARPU_R, STARPU_R}
  49. };
  50. int main(int argc, char **argv)
  51. {
  52. int ntasks = NTASKS;
  53. int ret, i;
  54. struct starpu_cluster_machine *clusters;
  55. ret = starpu_init(NULL);
  56. if (ret == -ENODEV)
  57. return 77;
  58. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  59. /* We regroup resources under each sockets into a cluster. We express a partition
  60. * of one socket to create two internal clusters */
  61. clusters = starpu_cluster_machine(HWLOC_OBJ_SOCKET,
  62. STARPU_CLUSTER_PARTITION_ONE, STARPU_CLUSTER_NB, 2,
  63. 0);
  64. starpu_cluster_print(clusters);
  65. /* Data preparation */
  66. double array1[SIZE];
  67. double array2[SIZE];
  68. memset(array1, 0, sizeof(double));
  69. for (i=0;i<SIZE;i++)
  70. {
  71. array2[i]=i*2;
  72. }
  73. starpu_data_handle_t handle1;
  74. starpu_data_handle_t handle2;
  75. starpu_vector_data_register(&handle1, 0, (uintptr_t)array1, SIZE, sizeof(double));
  76. starpu_vector_data_register(&handle2, 0, (uintptr_t)array2, SIZE, sizeof(double));
  77. int size = SIZE;
  78. for (i = 0; i < ntasks; i++)
  79. {
  80. struct starpu_task * t;
  81. t=starpu_task_build(&sum_cl,
  82. STARPU_RW,handle1,
  83. STARPU_R,handle2,
  84. STARPU_R,handle1,
  85. STARPU_VALUE,&size,sizeof(int),
  86. 0);
  87. t->destroy = 1;
  88. /* For two tasks, try out the case when the task isn't parallel and expect
  89. the configuration to be sequential due to this, then automatically changed
  90. back to the parallel one */
  91. if (i<=4 || i > 6)
  92. t->possibly_parallel = 1;
  93. /* Note that this mode requires that you put a prologue callback managing
  94. this on all tasks to be taken into account. */
  95. t->prologue_callback_pop_func = &starpu_openmp_prologue;
  96. ret=starpu_task_submit(t);
  97. if (ret == -ENODEV)
  98. goto out;
  99. STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
  100. }
  101. out:
  102. /* wait for all tasks at the end*/
  103. starpu_task_wait_for_all();
  104. starpu_data_unregister(handle1);
  105. starpu_data_unregister(handle2);
  106. starpu_uncluster_machine(clusters);
  107. starpu_shutdown();
  108. return 0;
  109. }