retry.c 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. /*
  17. * This tests the fault tolerance interface: it submits a tasks which repeatedly
  18. * fails until being eventually successful
  19. */
  20. #include <starpu.h>
  21. #include "../helper.h"
  22. /* This task fakes some repeated errors */
  23. static int retry;
  24. void cpu_increment(void *descr[], void *arg)
  25. {
  26. (void)arg;
  27. unsigned *var = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
  28. unsigned *var2 = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
  29. FPRINTF(stderr,"computing\n");
  30. *var2 = *var + 1;
  31. if (retry < 10)
  32. {
  33. FPRINTF(stderr,"failing\n");
  34. retry++;
  35. /* Fake failure */
  36. starpu_task_ft_failed(starpu_task_get_current());
  37. }
  38. else
  39. FPRINTF(stderr,"succeed\n");
  40. }
  41. static struct starpu_codelet my_codelet =
  42. {
  43. .cpu_funcs = {cpu_increment},
  44. //.cpu_funcs_name = {"cpu_increment"},
  45. .modes = { STARPU_R, STARPU_W },
  46. .nbuffers = 2
  47. };
  48. /* This implements the retry strategy
  49. * (Identical to the default implementation: just retry) */
  50. static void check_ft(void *arg)
  51. {
  52. struct starpu_task *meta_task = arg;
  53. struct starpu_task *current_task = starpu_task_get_current();
  54. struct starpu_task *new_task;
  55. int ret;
  56. if (!current_task->failed)
  57. {
  58. FPRINTF(stderr,"didn't fail, release main task\n");
  59. starpu_task_ft_success(meta_task);
  60. return;
  61. }
  62. FPRINTF(stderr,"failed, try again\n");
  63. new_task = starpu_task_ft_create_retry(meta_task, current_task, check_ft);
  64. /* Here we could e.g. force the task to use only a CPU implementation
  65. * known to be failsafe */
  66. ret = starpu_task_submit_nodeps(new_task);
  67. STARPU_ASSERT(!ret);
  68. }
  69. int main(void)
  70. {
  71. int x = 12;
  72. int y = 1;
  73. starpu_data_handle_t h_x, h_y;
  74. int ret, ret1;
  75. if (starpu_get_env_number_default("STARPU_GLOBAL_ARBITER", 0) > 0)
  76. /* TODO _submit_job_take_data_deps */
  77. return STARPU_TEST_SKIPPED;
  78. ret = starpu_init(NULL);
  79. if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
  80. STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
  81. starpu_variable_data_register(&h_x, STARPU_MAIN_RAM, (uintptr_t)&x, sizeof(x));
  82. starpu_variable_data_register(&h_y, STARPU_MAIN_RAM, (uintptr_t)&y, sizeof(y));
  83. retry = 0;
  84. ret1 = starpu_task_insert(&my_codelet,
  85. STARPU_PROLOGUE_CALLBACK, starpu_task_ft_prologue,
  86. STARPU_PROLOGUE_CALLBACK_ARG_NFREE, check_ft,
  87. STARPU_R, h_x,
  88. STARPU_W, h_y,
  89. 0);
  90. if (ret1 != -ENODEV) STARPU_CHECK_RETURN_VALUE(ret1, "starpu_task_insert");
  91. starpu_task_wait_for_all();
  92. starpu_data_unregister(h_x);
  93. starpu_data_unregister(h_y);
  94. starpu_shutdown();
  95. if (x != 12)
  96. ret = 1;
  97. FPRINTF(stderr, "Value x = %d (expected 12)\n", x);
  98. if (ret1 != -ENODEV)
  99. {
  100. if (y != 13)
  101. ret = 1;
  102. FPRINTF(stderr, "Value y = %d (expected 13)\n", y);
  103. }
  104. STARPU_RETURN(ret);
  105. }