loader.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #include <common/config.h>
  17. #include <sys/types.h>
  18. #include <sys/stat.h>
  19. #include <sys/wait.h>
  20. #include <sys/resource.h>
  21. #include <unistd.h>
  22. #include <stdio.h>
  23. #include <stdlib.h>
  24. #include <signal.h>
  25. #include <string.h>
  26. #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
  27. #include <windows.h>
  28. #else
  29. #include <sys/time.h>
  30. #endif
  31. #ifdef STARPU_QUICK_CHECK
  32. /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s */
  33. #define DEFAULT_TIMEOUT 60
  34. #elif !defined(STARPU_LONG_CHECK)
  35. /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */
  36. #define DEFAULT_TIMEOUT 300
  37. #else
  38. /* Long checks can be very long */
  39. #define DEFAULT_TIMEOUT 1000
  40. #endif
  41. #define AUTOTEST_SKIPPED_TEST 77
  42. static pid_t child_pid = 0;
  43. static int timeout;
  44. #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
  45. static int mygettimeofday(struct timeval *tv, void *tz)
  46. {
  47. if (tv)
  48. {
  49. FILETIME ft;
  50. unsigned long long res;
  51. GetSystemTimeAsFileTime(&ft);
  52. /* 100-nanosecond intervals since January 1, 1601 */
  53. res = ft.dwHighDateTime;
  54. res <<= 32;
  55. res |= ft.dwLowDateTime;
  56. res /= 10;
  57. /* Now we have microseconds */
  58. res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL;
  59. /* Now we are based on epoch */
  60. tv->tv_sec = res / 1000000ULL;
  61. tv->tv_usec = res % 1000000ULL;
  62. }
  63. }
  64. #else
  65. #define mygettimeofday(tv,tz) gettimeofday(tv,tz)
  66. #endif
  67. #ifdef STARPU_GDB_PATH
  68. static int try_launch_gdb(const char *exe, const char *core)
  69. {
  70. # define GDB_ALL_COMMAND "thread apply all bt full"
  71. # define GDB_COMMAND "bt full"
  72. int err;
  73. pid_t pid;
  74. struct stat st;
  75. const char *top_builddir;
  76. char *gdb;
  77. err = stat(core, &st);
  78. if (err != 0)
  79. {
  80. fprintf(stderr, "while looking for core file of %s: %s: %m\n",
  81. exe, core);
  82. return -1;
  83. }
  84. if (!(st.st_mode & S_IFREG))
  85. {
  86. fprintf(stderr, "%s: not a regular file\n", core);
  87. return -1;
  88. }
  89. top_builddir = getenv("top_builddir");
  90. pid = fork();
  91. switch (pid)
  92. {
  93. case 0: /* kid */
  94. if (top_builddir != NULL)
  95. {
  96. /* Run gdb with Libtool. */
  97. gdb = alloca(strlen(top_builddir)
  98. + sizeof("/libtool") + 1);
  99. strcpy(gdb, top_builddir);
  100. strcat(gdb, "/libtool");
  101. err = execl(gdb, "gdb", "--mode=execute",
  102. STARPU_GDB_PATH, "--batch",
  103. "-ex", GDB_COMMAND,
  104. "-ex", GDB_ALL_COMMAND,
  105. exe, core, NULL);
  106. }
  107. else
  108. {
  109. /* Run gdb directly */
  110. gdb = STARPU_GDB_PATH;
  111. err = execl(gdb, "gdb", "--batch",
  112. "-ex", GDB_COMMAND,
  113. "-ex", GDB_ALL_COMMAND,
  114. exe, core, NULL);
  115. }
  116. if (err != 0)
  117. {
  118. fprintf(stderr, "while launching `%s': %m\n", gdb);
  119. exit(EXIT_FAILURE);
  120. }
  121. exit(EXIT_SUCCESS);
  122. break;
  123. case -1:
  124. fprintf(stderr, "fork: %m\n");
  125. return -1;
  126. default: /* parent */
  127. {
  128. pid_t who;
  129. int status;
  130. who = waitpid(pid, &status, 0);
  131. if (who != pid)
  132. fprintf(stderr, "while waiting for gdb "
  133. "process %d: %m\n", pid);
  134. }
  135. }
  136. return 0;
  137. # undef GDB_COMMAND
  138. # undef GDB_ALL_COMMAND
  139. }
  140. #endif /* STARPU_GDB_PATH */
  141. static void launch_gdb(const char *exe)
  142. {
  143. #ifdef STARPU_GDB_PATH
  144. char s[32];
  145. snprintf(s, sizeof(s), "core.%d", child_pid);
  146. if (try_launch_gdb(exe, s) < 0)
  147. try_launch_gdb(exe, "core");
  148. #endif /* STARPU_GDB_PATH */
  149. }
  150. static char *test_name;
  151. static void test_cleaner(int sig)
  152. {
  153. pid_t child_gid;
  154. int status;
  155. (void) sig;
  156. // send signal to all loader family members
  157. fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout);
  158. child_gid = getpgid(child_pid);
  159. kill(-child_gid, SIGQUIT);
  160. waitpid(child_pid, &status, 0);
  161. launch_gdb(test_name);
  162. raise(SIGALRM);
  163. exit(EXIT_FAILURE);
  164. }
  165. static void forwardsig(int sig)
  166. {
  167. pid_t child_gid;
  168. child_gid = getpgid(child_pid);
  169. kill(-child_gid, sig);
  170. }
  171. static int _decode(char **src, char *motif, const char *value)
  172. {
  173. char *found;
  174. found = strstr(*src, motif);
  175. if (found == NULL) return 0;
  176. char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1);
  177. strncpy(new_src, *src, found - *src);
  178. strcat(new_src, value);
  179. strcat(new_src, found+strlen(motif));
  180. *src = new_src;
  181. return 1;
  182. }
  183. static void decode(char **src, char *motif, const char *value)
  184. {
  185. if (*src)
  186. {
  187. if (strstr(*src, motif) && value == NULL)
  188. {
  189. fprintf(stderr, "error: $%s undefined\n", motif);
  190. exit(EXIT_FAILURE);
  191. }
  192. int d = _decode(src, motif, value);
  193. while (d)
  194. d = _decode(src, motif, value);
  195. }
  196. }
  197. int main(int argc, char *argv[])
  198. {
  199. int child_exit_status;
  200. char *test_args;
  201. char *launcher;
  202. char *launcher_args;
  203. char *libtool;
  204. const char *top_builddir = getenv ("top_builddir");
  205. struct sigaction sa;
  206. int ret;
  207. struct timeval start;
  208. struct timeval end;
  209. double timing;
  210. int x=1;
  211. (void) argc;
  212. test_args = NULL;
  213. timeout = 0;
  214. launcher=getenv("STARPU_CHECK_LAUNCHER");
  215. launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS");
  216. if (argv[x] && strcmp(argv[x], "-t") == 0)
  217. {
  218. timeout = strtol(argv[x+1], NULL, 10);
  219. x += 2;
  220. }
  221. else if (getenv("STARPU_TIMEOUT_ENV"))
  222. {
  223. /* get user-defined iter_max value */
  224. timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10);
  225. }
  226. if (timeout <= 0)
  227. {
  228. timeout = DEFAULT_TIMEOUT;
  229. if ((launcher && strstr(launcher, "valgrind")) ||
  230. (launcher && strstr(launcher, "helgrind")) ||
  231. getenv("TSAN_OPTIONS") != NULL)
  232. timeout *= 20;
  233. if (getenv("ASAN_OPTIONS") != NULL ||
  234. getenv("USAN_OPTIONS") != NULL ||
  235. getenv("LSAN_OPTIONS") != NULL)
  236. timeout *= 5;
  237. }
  238. #ifdef STARPU_SIMGRID
  239. #ifdef STARPU_DEBUG
  240. timeout *= 20;
  241. #endif
  242. #endif
  243. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  244. /* compare values between the 2 values of timeout */
  245. if (getenv("MPIEXEC_TIMEOUT"))
  246. {
  247. int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10);
  248. if (mpiexec_timeout != timeout)
  249. fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout);
  250. }
  251. #endif
  252. if (argv[x] && strcmp(argv[x], "-p") == 0)
  253. {
  254. test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1);
  255. sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]);
  256. x += 3;
  257. }
  258. else
  259. {
  260. test_name = argv[x];
  261. x += 1;
  262. }
  263. if (!test_name)
  264. {
  265. fprintf(stderr, "[error] Need name of program to start\n");
  266. exit(EXIT_FAILURE);
  267. }
  268. size_t len = strlen(test_name);
  269. if (len >= 3 &&
  270. test_name[len-3] == '.' &&
  271. test_name[len-2] == 's' &&
  272. test_name[len-1] == 'h')
  273. {
  274. /* This is a shell script, don't run ourself on bash, but make
  275. * the script call us for each program invocation */
  276. setenv("STARPU_LAUNCH", argv[0], 1);
  277. execvp(test_name, argv+x-1);
  278. fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name);
  279. exit(EXIT_FAILURE);
  280. }
  281. if (strstr(test_name, "spmv/dw_block_spmv"))
  282. {
  283. test_args = (char *) calloc(512, sizeof(char));
  284. snprintf(test_args, 512, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR);
  285. }
  286. else if (strstr(test_name, "starpu_perfmodel_display"))
  287. {
  288. if (x >= argc)
  289. test_args = strdup("-l");
  290. }
  291. else if (strstr(test_name, "starpu_perfmodel_plot"))
  292. {
  293. if (x >= argc)
  294. test_args = strdup("-l");
  295. }
  296. /* get launcher program */
  297. if (launcher_args)
  298. launcher_args=strdup(launcher_args);
  299. if (top_builddir == NULL)
  300. {
  301. fprintf(stderr,
  302. "warning: $top_builddir undefined, "
  303. "so $STARPU_CHECK_LAUNCHER ignored\n");
  304. launcher = NULL;
  305. launcher_args = NULL;
  306. libtool = NULL;
  307. }
  308. else
  309. {
  310. libtool = malloc(strlen(top_builddir) + 1 + strlen("libtool") + 1);
  311. strcpy(libtool, top_builddir);
  312. strcat(libtool, "/libtool");
  313. }
  314. if (launcher)
  315. {
  316. const char *top_srcdir = getenv("top_srcdir");
  317. decode(&launcher, "@top_srcdir@", top_srcdir);
  318. decode(&launcher_args, "@top_srcdir@", top_srcdir);
  319. }
  320. setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1);
  321. /* set SIGALARM handler */
  322. sa.sa_flags = SA_RESETHAND | SA_NODEFER;
  323. sigemptyset(&sa.sa_mask);
  324. sa.sa_handler = test_cleaner;
  325. if (-1 == sigaction(SIGALRM, &sa, NULL))
  326. perror("sigaction");
  327. signal(SIGINT, forwardsig);
  328. signal(SIGHUP, forwardsig);
  329. signal(SIGPIPE, forwardsig);
  330. signal(SIGTERM, forwardsig);
  331. child_pid = fork();
  332. if (child_pid == 0)
  333. {
  334. char *launcher_argv[100];
  335. int i=0;
  336. setpgid(0, 0);
  337. /* "Launchers" such as Valgrind need to be inserted
  338. * after the Libtool-generated wrapper scripts, hence
  339. * this special-case. */
  340. if (launcher && top_builddir != NULL)
  341. {
  342. launcher_argv[i++] = libtool;
  343. launcher_argv[i++] = "--mode=execute";
  344. launcher_argv[i++] = launcher;
  345. if (launcher_args)
  346. {
  347. launcher_argv[i++] = strtok(launcher_args, " ");
  348. while (launcher_argv[i-1])
  349. {
  350. launcher_argv[i++] = strtok(NULL, " ");
  351. }
  352. }
  353. }
  354. launcher_argv[i++] = test_name;
  355. if (test_args)
  356. launcher_argv[i++] = test_args;
  357. else while (argv[x])
  358. {
  359. launcher_argv[i++] = argv[x++];
  360. }
  361. #ifdef STARPU_SIMGRID
  362. #ifdef STARPU_DEBUG
  363. launcher_argv[i++] = "--cfg=contexts/factory:thread";
  364. #endif
  365. #endif
  366. launcher_argv[i++] = NULL;
  367. execvp(*launcher_argv, launcher_argv);
  368. fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name);
  369. exit(EXIT_FAILURE);
  370. }
  371. if (child_pid == -1)
  372. {
  373. fprintf(stderr, "[error] fork. test marked as failed\n");
  374. exit(EXIT_FAILURE);
  375. }
  376. free(test_args);
  377. free(libtool);
  378. ret = EXIT_SUCCESS;
  379. gettimeofday(&start, NULL);
  380. alarm(timeout);
  381. if (child_pid == waitpid(child_pid, &child_exit_status, 0))
  382. {
  383. if (WIFEXITED(child_exit_status))
  384. {
  385. int status = WEXITSTATUS(child_exit_status);
  386. if (status == EXIT_SUCCESS)
  387. {
  388. alarm(0);
  389. }
  390. else
  391. {
  392. if (status != AUTOTEST_SKIPPED_TEST)
  393. fprintf(stdout, "`%s' exited with return code %d\n",
  394. test_name, status);
  395. ret = status;
  396. }
  397. }
  398. else if (WIFSIGNALED(child_exit_status))
  399. {
  400. fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n",
  401. test_name, WTERMSIG(child_exit_status));
  402. launch_gdb(test_name);
  403. ret = EXIT_FAILURE;
  404. }
  405. else
  406. {
  407. fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n",
  408. test_name);
  409. ret = EXIT_FAILURE;
  410. }
  411. }
  412. gettimeofday(&end, NULL);
  413. timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  414. fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name);
  415. return ret;
  416. }