| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 | /* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2013-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */#include <starpu_mpi.h>#include "helper.h"#include <mpi_failure_tolerance/starpu_mpi_checkpoint_template.h>#define ARRAY_SIZE 12#define STARPU_MPI_INIT(void) do{struct starpu_conf conf; int ret; \starpu_conf_init(&conf); \conf.nmic = 0; \conf.nmpi_ms = 0; \ret = starpu_init(NULL); \if (STARPU_UNLIKELY(ret == -ENODEV)) \{ \return 77; \} \STARPU_CHECK_RETURN_VALUE(ret, "starpu_init"); \if (starpu_cpu_worker_get_count() < 1) \{ \FPRINTF(stderr, "This application requires at least 1 cpu worker\n"); \starpu_shutdown(); \return 77; \} \starpu_mpi_init(&argc, &argv, 1); \starpu_mpi_comm_size(MPI_COMM_WORLD, &nb_nodes); \starpu_mpi_comm_rank(MPI_COMM_WORLD, &me); \}while(0)int nb_nodes;int me;int backup_of(int _me){	if (_me==0)		return 1;	else		return 0;	return (_me+1)%nb_nodes;}int pseudotest_checkpoint_template_register(int argc, char* argv[]){	starpu_data_handle_t             h;	starpu_data_handle_t             h_array[ARRAY_SIZE];	starpu_mpi_checkpoint_template_t cp_template1, cp_template2;	int                              val = 42;	int                              val2 = 1234;	int array[ARRAY_SIZE];	int ret;	//init array	for (int i=0 ; i<ARRAY_SIZE ; i++)	{		array[i] = i*1111+42;	}	for (int i=0 ; i<ARRAY_SIZE ; i++)	{		h_array[i] = NULL;	}	FPRINTF(stderr, "Go\n");	STARPU_MPI_INIT();	FPRINTF_MPI(stderr, "Init ok - my rnk %d - size %d\n", me, nb_nodes);	starpu_variable_data_register(&h, STARPU_MAIN_RAM, (uintptr_t)&val2, sizeof(int));	starpu_mpi_data_register(h, 56, 0);	fprintf(stderr, "&h: %p, h:%p\n", &h, h);	for (int i=0 ; i<ARRAY_SIZE ; i++)	{		starpu_variable_data_register(&h_array[i], STARPU_MAIN_RAM, (uintptr_t)&array[i], sizeof(int));		starpu_mpi_data_register(h_array[i], 42+i, 1); //42 to 54	}	starpu_mpi_checkpoint_template_register(&cp_template1, 123486, 0,	                                        STARPU_VALUE, &val, sizeof(int), 84, backup_of,	                                        STARPU_R, h, 1,	                                        0);	FPRINTF(stderr, "registered!\n");	_starpu_mpi_checkpoint_template_print(cp_template1);	starpu_mpi_checkpoint_template_create(&cp_template2, 98765, 0);	starpu_mpi_checkpoint_template_add_entry(&cp_template2, STARPU_R, h, 1);	starpu_mpi_checkpoint_template_add_entry(&cp_template2, STARPU_VALUE, &val, sizeof(int), 84, backup_of);	starpu_mpi_checkpoint_template_freeze(&cp_template2);	FPRINTF(stderr, "registered 2!\n");	_starpu_mpi_checkpoint_template_print(cp_template1);	starpu_shutdown();	return 0;}int test_checkpoint_submit(int argc, char* argv[]){	int ret;	starpu_data_handle_t handle0, handle1;	starpu_mpi_checkpoint_template_t cp_template;	int val0 = 0;	int val1 = 0;	int stage = 10;	FPRINTF(stderr, "Go\n");	ret = starpu_mpi_init_conf(&argc, &argv, 1, MPI_COMM_WORLD, NULL);	STARPU_CHECK_RETURN_VALUE(ret, "starpu_mpi_init_conf");	starpu_mpi_comm_size(MPI_COMM_WORLD, &nb_nodes);	starpu_mpi_comm_rank(MPI_COMM_WORLD, &me);	stage+=me;	FPRINTF_MPI(stderr, "Init ok - my rnk %d - size %d\n", me, nb_nodes);	starpu_variable_data_register(&handle0, STARPU_MAIN_RAM, (uintptr_t)&val0, sizeof(int));	starpu_mpi_data_register(handle0, 100, 0);	starpu_variable_data_register(&handle1, STARPU_MAIN_RAM, (uintptr_t)&val1, sizeof(int));	starpu_mpi_data_register(handle1, 200, 1);	FPRINTF_MPI(stderr, "Registering\n");	starpu_mpi_checkpoint_template_register(&cp_template, 321, 0,			STARPU_R, handle0, 1,			STARPU_R, handle1, 0,            STARPU_VALUE, &stage, sizeof(int), 300, &backup_of,            STARPU_VALUE, &stage, sizeof(int), 301, &backup_of,            STARPU_VALUE, &stage, sizeof(int), 302, &backup_of,			0);	FPRINTF_MPI(stderr, "Registered\n");	_starpu_mpi_checkpoint_template_print(cp_template);	switch (me)	{		case 0:			val0 = 42;			break;		case 1:			val1 = 1000;			break;	}	FPRINTF_MPI(stderr, "Submitting\n");	starpu_mpi_submit_checkpoint_template(cp_template,0);	FPRINTF_MPI(stderr, "Submitted\n");	usleep(150000);	stage++;	fprintf(stderr, "\n\n");	usleep(150000);	if (me==0)	{		starpu_data_acquire(handle0, STARPU_RW);		val0*=2;		starpu_data_release(handle0);	}	if (me==1)	{		starpu_data_acquire(handle1, STARPU_RW);		val1*=2;		starpu_data_release(handle1);	}	FPRINTF_MPI(stderr, "Submitting\n");	starpu_mpi_submit_checkpoint_template(cp_template, 0);	FPRINTF_MPI(stderr, "Submitted\n");	usleep(150000);	fprintf(stderr, "\n\n");	starpu_mpi_wait_for_all(MPI_COMM_WORLD);	FPRINTF_MPI(stderr, "Bye!\n");	starpu_mpi_shutdown();	return 0;}int main(int argc, char* argv[]){	//pseudotest_checkpoint_template_register(argc, argv);	test_checkpoint_submit(argc, argv);	return 0;}
 |