retry.c 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2013,2015,2017 CNRS
  4. * Copyright (C) 2017 Inria
  5. * Copyright (C) 2019 Université de Bordeaux
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. /*
  19. * This tests the fault tolerance interface: it submits a tasks which repeatedly
  20. * fails until being eventually successful
  21. */
  22. #include <starpu.h>
  23. #include "../helper.h"
  24. /* This task fakes some repeated errors */
  25. static int retry;
  26. void cpu_increment(void *descr[], void *arg)
  27. {
  28. (void)arg;
  29. unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
  30. unsigned *var2 = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
  31. FPRINTF(stderr,"computing\n");
  32. *var2 = *var + 1;
  33. if (retry < 10)
  34. {
  35. FPRINTF(stderr,"failing\n");
  36. retry++;
  37. /* Fake failure */
  38. starpu_task_ft_failed(starpu_task_get_current());
  39. }
  40. else
  41. FPRINTF(stderr,"succeed\n");
  42. }
  43. static struct starpu_codelet my_codelet =
  44. {
  45. .cpu_funcs = {cpu_increment},
  46. .cpu_funcs_name = {"cpu_increment"},
  47. .modes = { STARPU_R, STARPU_W },
  48. .nbuffers = 2
  49. };
  50. /* This implements the retry strategy
  51. * (Identical to the default implementation: just retry) */
  52. static void check_ft(void *arg)
  53. {
  54. struct starpu_task *meta_task = arg;
  55. struct starpu_task *current_task = starpu_task_get_current();
  56. struct starpu_task *new_task;
  57. int ret;
  58. if (!current_task->failed)
  59. {
  60. FPRINTF(stderr,"didn't fail, release main task\n");
  61. starpu_task_ft_success(meta_task);
  62. return;
  63. }
  64. FPRINTF(stderr,"failed, try again\n");
  65. new_task = starpu_task_ft_create_retry(meta_task, current_task, check_ft);
  66. /* Here we could e.g. force the task to use only a CPU implementation
  67. * known to be failsafe */
  68. ret = starpu_task_submit_nodeps(new_task);
  69. STARPU_ASSERT(!ret);
  70. }
  71. int main(void)
  72. {
  73. int x = 12;
  74. int y = 1;
  75. starpu_data_handle_t h_x, h_y;
  76. int ret, ret1;
  77. ret = starpu_init(NULL);
  78. if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
  79. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  80. starpu_variable_data_register(&h_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x));
  81. starpu_variable_data_register(&h_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y));
  82. retry = 0;
  83. ret1 = starpu_task_insert(&my_codelet,
  84. STARPU_PROLOGUE_CALLBACK, starpu_task_ft_prologue,
  85. STARPU_PROLOGUE_CALLBACK_ARG, check_ft,
  86. STARPU_R, h_x,
  87. STARPU_W, h_y,
  88. 0);
  89. if (ret1 != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret1, "starpu_task_insert");
  90. starpu_task_wait_for_all();
  91. starpu_data_unregister(h_x);
  92. starpu_data_unregister(h_y);
  93. starpu_shutdown();
  94. if (x != 12)
  95. ret = 1;
  96. FPRINTF(stderr, "Value x = %d (expected 12)\n", x);
  97. if (ret1 != -ENODEV)
  98. {
  99. if (y != 13)
  100. ret = 1;
  101. FPRINTF(stderr, "Value y = %d (expected 13)\n", y);
  102. }
  103. STARPU_RETURN(ret);
  104. }