loader.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2011,2012,2017 Inria
  4. * Copyright (C) 2011-2019 CNRS
  5. * Copyright (C) 2010,2014-2018 Université de Bordeaux
  6. *
  7. * StarPU is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation; either version 2.1 of the License, or (at
  10. * your option) any later version.
  11. *
  12. * StarPU is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. *
  16. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  17. */
  18. #include <common/config.h>
  19. #include <sys/types.h>
  20. #include <sys/stat.h>
  21. #include <sys/wait.h>
  22. #include <sys/resource.h>
  23. #include <unistd.h>
  24. #include <stdio.h>
  25. #include <stdlib.h>
  26. #include <signal.h>
  27. #include <string.h>
  28. #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
  29. #include <windows.h>
  30. #else
  31. #include <sys/time.h>
  32. #endif
  33. #ifdef STARPU_QUICK_CHECK
  34. /* Quick checks are supposed to be real quick, typically less than 1s each, sometimes 10s */
  35. #define DEFAULT_TIMEOUT 60
  36. #elif !defined(STARPU_LONG_CHECK)
  37. /* Normal checks are supposed to be short enough, typically less than 10s each, sometimes 1-2m */
  38. #define DEFAULT_TIMEOUT 300
  39. #else
  40. /* Long checks can be very long */
  41. #define DEFAULT_TIMEOUT 1800
  42. #endif
  43. #define AUTOTEST_SKIPPED_TEST 77
  44. static pid_t child_pid = 0;
  45. static int timeout;
  46. #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
  47. static int mygettimeofday(struct timeval *tv, void *tz)
  48. {
  49. if (tv)
  50. {
  51. FILETIME ft;
  52. unsigned long long res;
  53. GetSystemTimeAsFileTime(&ft);
  54. /* 100-nanosecond intervals since January 1, 1601 */
  55. res = ft.dwHighDateTime;
  56. res <<= 32;
  57. res |= ft.dwLowDateTime;
  58. res /= 10;
  59. /* Now we have microseconds */
  60. res -= (((1970-1601)*365) + 89) * 24ULL * 3600ULL * 1000000ULL;
  61. /* Now we are based on epoch */
  62. tv->tv_sec = res / 1000000ULL;
  63. tv->tv_usec = res % 1000000ULL;
  64. }
  65. }
  66. #else
  67. #define mygettimeofday(tv,tz) gettimeofday(tv,tz)
  68. #endif
  69. #ifdef STARPU_GDB_PATH
  70. static int try_launch_gdb(const char *exe, const char *core)
  71. {
  72. # define GDB_ALL_COMMAND "thread apply all bt full"
  73. # define GDB_COMMAND "bt full"
  74. int err;
  75. pid_t pid;
  76. struct stat st;
  77. const char *top_builddir;
  78. char *gdb;
  79. err = stat(core, &st);
  80. if (err != 0)
  81. {
  82. fprintf(stderr, "while looking for core file of %s: %s: %m\n",
  83. exe, core);
  84. return -1;
  85. }
  86. if (!(st.st_mode & S_IFREG))
  87. {
  88. fprintf(stderr, "%s: not a regular file\n", core);
  89. return -1;
  90. }
  91. top_builddir = getenv("top_builddir");
  92. pid = fork();
  93. switch (pid)
  94. {
  95. case 0: /* kid */
  96. if (top_builddir != NULL)
  97. {
  98. /* Run gdb with Libtool. */
  99. gdb = alloca(strlen(top_builddir)
  100. + sizeof("/libtool") + 1);
  101. strcpy(gdb, top_builddir);
  102. strcat(gdb, "/libtool");
  103. err = execl(gdb, "gdb", "--mode=execute",
  104. STARPU_GDB_PATH, "--batch",
  105. "-ex", GDB_COMMAND,
  106. "-ex", GDB_ALL_COMMAND,
  107. exe, core, NULL);
  108. }
  109. else
  110. {
  111. /* Run gdb directly */
  112. gdb = STARPU_GDB_PATH;
  113. err = execl(gdb, "gdb", "--batch",
  114. "-ex", GDB_COMMAND,
  115. "-ex", GDB_ALL_COMMAND,
  116. exe, core, NULL);
  117. }
  118. if (err != 0)
  119. {
  120. fprintf(stderr, "while launching `%s': %m\n", gdb);
  121. exit(EXIT_FAILURE);
  122. }
  123. exit(EXIT_SUCCESS);
  124. break;
  125. case -1:
  126. fprintf(stderr, "fork: %m\n");
  127. return -1;
  128. default: /* parent */
  129. {
  130. pid_t who;
  131. int status;
  132. who = waitpid(pid, &status, 0);
  133. if (who != pid)
  134. fprintf(stderr, "while waiting for gdb "
  135. "process %d: %m\n", pid);
  136. }
  137. }
  138. return 0;
  139. # undef GDB_COMMAND
  140. # undef GDB_ALL_COMMAND
  141. }
  142. #endif /* STARPU_GDB_PATH */
  143. static void launch_gdb(const char *exe)
  144. {
  145. #ifdef STARPU_GDB_PATH
  146. char s[32];
  147. snprintf(s, sizeof(s), "core.%d", child_pid);
  148. if (try_launch_gdb(exe, s) < 0)
  149. try_launch_gdb(exe, "core");
  150. #endif /* STARPU_GDB_PATH */
  151. }
  152. static char *test_name;
  153. static void test_cleaner(int sig)
  154. {
  155. pid_t child_gid;
  156. int status;
  157. // send signal to all loader family members
  158. fprintf(stderr, "[error] test %s has been blocked for %d seconds. Mark it as failed\n", test_name, timeout);
  159. child_gid = getpgid(child_pid);
  160. kill(-child_gid, SIGQUIT);
  161. waitpid(child_pid, &status, 0);
  162. launch_gdb(test_name);
  163. exit(EXIT_FAILURE);
  164. }
  165. static int _decode(char **src, char *motif, const char *value)
  166. {
  167. char *found;
  168. found = strstr(*src, motif);
  169. if (found == NULL) return 0;
  170. char *new_src = calloc(1, strlen(*src)-strlen(motif)+strlen(value)+1);
  171. strncpy(new_src, *src, found - *src);
  172. strcat(new_src, value);
  173. strcat(new_src, found+strlen(motif));
  174. *src = new_src;
  175. return 1;
  176. }
  177. static void decode(char **src, char *motif, const char *value)
  178. {
  179. if (*src)
  180. {
  181. if (strstr(*src, motif) && value == NULL)
  182. {
  183. fprintf(stderr, "error: $%s undefined\n", motif);
  184. exit(EXIT_FAILURE);
  185. }
  186. int d = _decode(src, motif, value);
  187. while (d)
  188. d = _decode(src, motif, value);
  189. }
  190. }
  191. int main(int argc, char *argv[])
  192. {
  193. int child_exit_status;
  194. char *test_args;
  195. char *launcher;
  196. char *launcher_args;
  197. struct sigaction sa;
  198. int ret;
  199. struct timeval start;
  200. struct timeval end;
  201. double timing;
  202. int x=1;
  203. test_args = NULL;
  204. timeout = 0;
  205. if (argv[x] && strcmp(argv[x], "-t") == 0)
  206. {
  207. timeout = strtol(argv[x+1], NULL, 10);
  208. x += 2;
  209. }
  210. else if (getenv("STARPU_TIMEOUT_ENV"))
  211. {
  212. /* get user-defined iter_max value */
  213. timeout = strtol(getenv("STARPU_TIMEOUT_ENV"), NULL, 10);
  214. }
  215. if (timeout <= 0)
  216. timeout = DEFAULT_TIMEOUT;
  217. #ifdef STARPU_USE_MPI_MASTER_SLAVE
  218. /* compare values between the 2 values of timeout */
  219. if (getenv("MPIEXEC_TIMEOUT"))
  220. {
  221. int mpiexec_timeout = strtol(getenv("MPIEXEC_TIMEOUT"), NULL, 10);
  222. if (mpiexec_timeout != timeout)
  223. fprintf(stderr, "[warning] MPIEXEC_TIMEOUT and STARPU_TIMEOUT_ENV values are different (%d and %d). The behavior may be different than expected !\n", mpiexec_timeout, timeout);
  224. }
  225. #endif
  226. if (argv[x] && strcmp(argv[x], "-p") == 0)
  227. {
  228. test_name = malloc(strlen(argv[x+1]) + 1 + strlen(argv[x+2]) + 1);
  229. sprintf(test_name, "%s/%s", argv[x+1], argv[x+2]);
  230. }
  231. else
  232. test_name = argv[x];
  233. if (!test_name)
  234. {
  235. fprintf(stderr, "[error] Need name of program to start\n");
  236. exit(EXIT_FAILURE);
  237. }
  238. if (strstr(test_name, "tasks_size_overhead_scheds.sh") || strstr(test_name, "schedulers.sh"))
  239. /* This extensively tests various schedulers, let it run longer */
  240. timeout *= 10;
  241. if (strstr(test_name, "spmv/dw_block_spmv"))
  242. {
  243. test_args = (char *) calloc(150, sizeof(char));
  244. snprintf(test_args, 150, "%s/examples/spmv/matrix_market/examples/fidapm05.mtx", STARPU_SRC_DIR);
  245. }
  246. else if (strstr(test_name, "starpu_perfmodel_display"))
  247. {
  248. test_args = strdup("-l");
  249. }
  250. else if (strstr(test_name, "starpu_perfmodel_plot"))
  251. {
  252. test_args = strdup("-l");
  253. }
  254. /* get launcher program */
  255. launcher=getenv("STARPU_CHECK_LAUNCHER");
  256. launcher_args=getenv("STARPU_CHECK_LAUNCHER_ARGS");
  257. if (launcher_args)
  258. launcher_args=strdup(launcher_args);
  259. setenv("STARPU_OPENCL_PROGRAM_DIR", STARPU_SRC_DIR, 1);
  260. /* set SIGALARM handler */
  261. sa.sa_flags = 0;
  262. sigemptyset(&sa.sa_mask);
  263. sa.sa_handler = test_cleaner;
  264. if (-1 == sigaction(SIGALRM, &sa, NULL))
  265. perror("sigaction");
  266. child_pid = fork();
  267. if (child_pid == 0)
  268. {
  269. if (launcher)
  270. {
  271. /* "Launchers" such as Valgrind need to be inserted
  272. * after the Libtool-generated wrapper scripts, hence
  273. * this special-case. */
  274. const char *top_builddir = getenv ("top_builddir");
  275. const char *top_srcdir = getenv("top_srcdir");
  276. if (top_builddir != NULL)
  277. {
  278. char *launcher_argv[100];
  279. int i=3;
  280. char libtool[strlen(top_builddir)
  281. + sizeof("libtool") + 1];
  282. strcpy(libtool, top_builddir);
  283. strcat(libtool, "/libtool");
  284. decode(&launcher, "@top_srcdir@", top_srcdir);
  285. decode(&launcher_args, "@top_srcdir@", top_srcdir);
  286. launcher_argv[0] = libtool;
  287. launcher_argv[1] = "--mode=execute";
  288. launcher_argv[2] = launcher;
  289. if (launcher_args)
  290. {
  291. launcher_argv[i] = strtok(launcher_args, " ");
  292. while (launcher_argv[i])
  293. {
  294. i++;
  295. launcher_argv[i] = strtok(NULL, " ");
  296. }
  297. }
  298. launcher_argv[i] = test_name;
  299. launcher_argv[i+1] = test_args;
  300. launcher_argv[i+2] = NULL;
  301. execvp(*launcher_argv, launcher_argv);
  302. }
  303. else
  304. {
  305. fprintf(stderr,
  306. "warning: $top_builddir undefined, "
  307. "so $STARPU_CHECK_LAUNCHER ignored\n");
  308. execl(test_name, test_name, test_args, NULL);
  309. }
  310. }
  311. else
  312. execl(test_name, test_name, test_args, NULL);
  313. fprintf(stderr, "[error] '%s' failed to exec. test marked as failed\n", test_name);
  314. exit(EXIT_FAILURE);
  315. }
  316. if (child_pid == -1)
  317. {
  318. fprintf(stderr, "[error] fork. test marked as failed\n");
  319. exit(EXIT_FAILURE);
  320. }
  321. free(test_args);
  322. ret = EXIT_SUCCESS;
  323. gettimeofday(&start, NULL);
  324. alarm(timeout);
  325. if (child_pid == waitpid(child_pid, &child_exit_status, 0))
  326. {
  327. if (WIFEXITED(child_exit_status))
  328. {
  329. int status = WEXITSTATUS(child_exit_status);
  330. if (status == EXIT_SUCCESS)
  331. {
  332. alarm(0);
  333. }
  334. else
  335. {
  336. if (status != AUTOTEST_SKIPPED_TEST)
  337. fprintf(stdout, "`%s' exited with return code %d\n",
  338. test_name, status);
  339. ret = status;
  340. }
  341. }
  342. else if (WIFSIGNALED(child_exit_status))
  343. {
  344. fprintf(stderr, "[error] `%s' killed with signal %d; test marked as failed\n",
  345. test_name, WTERMSIG(child_exit_status));
  346. launch_gdb(test_name);
  347. ret = EXIT_FAILURE;
  348. }
  349. else
  350. {
  351. fprintf(stderr, "[error] `%s' did not terminate normally; test marked as failed\n",
  352. test_name);
  353. ret = EXIT_FAILURE;
  354. }
  355. }
  356. gettimeofday(&end, NULL);
  357. timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
  358. fprintf(stderr, "#Execution_time_in_seconds %f %s\n", timing/1000000, test_name);
  359. return ret;
  360. }