Browse Source

Add checkpoint template struct and constructor, with test file.

Romain LION 5 years ago
parent
commit
3af9ff3cfd

+ 4 - 0
mpi/include/starpu_mpi.h

@@ -24,6 +24,10 @@
 #include <mpi.h>
 #include <stdint.h>
 
+//TODO: #if defined(STARPU_USE_MPI_FT)
+#include <starpu_mpi_ft.h>
+//#endif
+
 #ifdef __cplusplus
 extern "C"
 {

+ 26 - 0
mpi/include/starpu_mpi_ft.h

@@ -0,0 +1,26 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef FT_STARPU_STARPU_MPI_FT_H
+#define FT_STARPU_STARPU_MPI_FT_H
+
+struct _starpu_mpi_checkpoint_template;
+typedef struct _starpu_mpi_checkpoint_template* starpu_mpi_checkpoint_template;
+
+int starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template* cp_template, ...);
+int starpu_mpi_checkpoint_template_print(starpu_mpi_checkpoint_template cp_template);
+
+#endif //FT_STARPU_STARPU_MPI_FT_H

+ 2 - 0
mpi/src/Makefile.am

@@ -64,6 +64,7 @@ noinst_HEADERS =					\
 	starpu_mpi_stats.h				\
 	starpu_mpi_datatype.h				\
 	starpu_mpi_cache.h				\
+	starpu_mpi_checkpoint.h		\
 	starpu_mpi_select_node.h			\
 	starpu_mpi_cache_stats.h			\
 	starpu_mpi_task_insert.h			\
@@ -92,6 +93,7 @@ libstarpumpi_@STARPU_EFFECTIVE_VERSION@_la_SOURCES =	\
 	starpu_mpi_stats.c				\
 	starpu_mpi_private.c				\
 	starpu_mpi_cache.c				\
+	starpu_mpi_checkpoint.c				\
 	starpu_mpi_select_node.c			\
 	starpu_mpi_cache_stats.c			\
 	starpu_mpi_fortran.c				\

+ 93 - 0
mpi/src/starpu_mpi_checkpoint.c

@@ -0,0 +1,93 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+
+#include <stdarg.h>
+#include <common/utils.h>
+
+#include <starpu_mpi_checkpoint.h>
+
+int _starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template* cp_template, va_list varg_list) {
+
+    int i = 0;
+    int arg_type;
+
+    starpu_mpi_checkpoint_template _cp_template;
+    _STARPU_MALLOC(_cp_template, sizeof(struct _starpu_mpi_checkpoint_template));
+
+    va_list varg_list_copy;
+    va_copy(varg_list_copy, varg_list);
+
+    while ((arg_type = va_arg(varg_list_copy, int)) != 0) {
+
+        if (i == CHECKPOINT_STRUCTURE_MAX_SIZE) {
+            STARPU_ABORT_MSG("Unable to treat more data (CHECKPOINT_STRUCTURE_MAX_SIZE == %d.\n", \
+                   CHECKPOINT_STRUCTURE_MAX_SIZE);
+        }
+
+        if (arg_type & STARPU_COMMUTE) {
+            STARPU_ABORT_MSG("Unable to checkpoint non sequential task flow.\n");
+        } else if (arg_type==STARPU_R) {
+            _cp_template->items[i].type = STARPU_R;
+            _cp_template->items[i].ptr = va_arg(varg_list_copy, void*);
+            _cp_template->items[i].backup_rank = va_arg(varg_list_copy, int);
+        } else if (arg_type==STARPU_VALUE) {
+            _cp_template->items[i].type = STARPU_VALUE;
+            _cp_template->items[i].ptr = va_arg(varg_list_copy, void*);
+            _cp_template->items[i].count = va_arg(varg_list_copy, int);
+            _cp_template->items[i].backup_rank = va_arg(varg_list_copy, int);
+        } else if (arg_type==STARPU_DATA_ARRAY) {
+
+        } else {
+            STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", \
+                   arg_type);
+        }
+
+        i ++;
+    };
+    va_end(varg_list_copy);
+
+    _cp_template->size = i;
+    _cp_template->checkpoint_id = 50909;
+
+    *cp_template = _cp_template;
+
+    return 0;
+}
+
+int starpu_mpi_checkpoint_template_register(starpu_mpi_checkpoint_template* cp_template, ...) {
+    va_list varg_list;
+    va_start(varg_list, cp_template);
+    int ret = _starpu_mpi_checkpoint_template_register(cp_template, varg_list);
+    va_end(varg_list);
+    return ret;
+}
+
+int starpu_mpi_checkpoint_template_print(starpu_mpi_checkpoint_template cp_template) {
+    int val;
+    for (int i=0 ; i< cp_template->size ; i++) {
+        fprintf(stderr,"Item %2d: ", i);
+        if (cp_template->items[i].type == STARPU_VALUE) {
+            printf("STARPU_VALUE - Value=%d\n", (*(int *)(cp_template->items[i].ptr)));
+        } else if (cp_template->items[i].type == STARPU_R) {
+            val = *(int*)starpu_data_handle_to_pointer(*(starpu_data_handle_t*)(cp_template->items[i].ptr), 0);
+            printf("STARPU_R - Value=%d\n", val);
+        } else {
+            printf("Unrecognized type.\n");
+        }
+    }
+    return 0;
+}

+ 38 - 0
mpi/src/starpu_mpi_checkpoint.h

@@ -0,0 +1,38 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2014-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef FT_STARPU_STARPU_MPI_CHECKPOINT_H
+#define FT_STARPU_STARPU_MPI_CHECKPOINT_H
+
+#include <starpu_mpi.h>
+
+#define CHECKPOINT_STRUCTURE_MAX_SIZE 32
+
+struct _starpu_mpi_checkpoint_template_item{
+    int type;
+    void* ptr;
+    int count;
+    int backup_rank;
+};
+
+struct _starpu_mpi_checkpoint_template{
+    struct _starpu_mpi_checkpoint_template_item items[CHECKPOINT_STRUCTURE_MAX_SIZE];
+    int size;
+    int checkpoint_id;
+};
+
+
+#endif //FT_STARPU_STARPU_MPI_CHECKPOINT_H

+ 1 - 0
mpi/tests/Makefile.am

@@ -194,6 +194,7 @@ noinst_PROGRAMS =				\
 	cache					\
 	cache_disable				\
 	callback				\
+	checkpoints				\
 	matrix					\
 	matrix2					\
 	insert_task				\

+ 59 - 0
mpi/tests/checkpoints.c

@@ -0,0 +1,59 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu_mpi.h>
+#include "helper.h"
+
+int main(int argc, char* argv[]) {
+
+
+    starpu_data_handle_t h;
+    starpu_mpi_checkpoint_template cp_template;
+    int val = 42;
+    int val2 = 1234;
+    int ret;
+    struct starpu_conf conf;
+
+    starpu_conf_init(&conf);
+    conf.nmic = 0;
+    conf.nmpi_ms = 0;
+
+    FPRINTF(stderr, "Go\n");
+    ret = starpu_init(&conf);
+    if (STARPU_UNLIKELY(ret == -ENODEV))
+    {
+        return 77;
+    }
+    STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+    if (starpu_cpu_worker_get_count() < 1)
+    {
+        FPRINTF(stderr, "This application requires at least 1 cpu worker\n");
+        starpu_shutdown();
+        return 77;
+    }
+
+    FPRINTF(stderr, "init\n");
+    starpu_variable_data_register(&h, STARPU_MAIN_RAM, (uintptr_t)&val2, sizeof(int));
+    FPRINTF(stderr, "registered\n");
+    starpu_mpi_checkpoint_template_register(&cp_template,
+           STARPU_VALUE, &val, sizeof(int), 1,
+           STARPU_R, &h, 1,
+           0);
+    starpu_mpi_checkpoint_template_print(cp_template);
+    return 0;
+}
+