| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146 | /* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2015-2020  Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */#include <starpu.h>#include <omp.h>#if !defined(STARPU_CLUSTER)int main(void){	return 77;}#else#ifdef STARPU_QUICK_CHECK#define NTASKS 8#else#define NTASKS 32#endif#define SIZE 4000/* Codelet SUM */static void sum_cpu(void * descr[], void *cl_arg){	double * v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]);	double * v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);	double * v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);	int size;	starpu_codelet_unpack_args(cl_arg, &size);	fprintf(stderr, "sum_cpu\n");	int i, k;#pragma omp parallel	fprintf(stderr, "hello from the task %d\n", omp_get_thread_num());	for (k=0;k<10;k++)	{#pragma omp parallel for		for (i=0; i<size; i++)		{			v_dst[i]+=v_src0[i]+v_src1[i];		}	}}static struct starpu_codelet sum_cl ={	.cpu_funcs = {sum_cpu, NULL},	.nbuffers = 3,	.modes={STARPU_RW,STARPU_R, STARPU_R}};int main(void){	int ntasks = NTASKS;	int ret, i;	struct starpu_cluster_machine *clusters;	setenv("STARPU_NMIC","0",1);	setenv("STARPU_NMPI_MS","0",1);	ret = starpu_init(NULL);	if (ret == -ENODEV)		return 77;	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");	/* We regroup resources under each sockets into a cluster. We express a partition	 * of one socket to create two internal clusters */	clusters = starpu_cluster_machine(HWLOC_OBJ_SOCKET,					  STARPU_CLUSTER_PARTITION_ONE,					  STARPU_CLUSTER_NEW,//					  STARPU_CLUSTER_TYPE, STARPU_CLUSTER_OPENMP,//					  STARPU_CLUSTER_TYPE, STARPU_CLUSTER_INTEL_OPENMP_MKL,					  STARPU_CLUSTER_NB, 2,					  STARPU_CLUSTER_NCORES, 1,					  0);	starpu_cluster_print(clusters);	/* Data preparation */	double array1[SIZE];	double array2[SIZE];	memset(array1, 0, sizeof(double));	for (i=0;i<SIZE;i++)	{		array2[i]=i*2;	}	starpu_data_handle_t handle1;	starpu_data_handle_t handle2;	starpu_vector_data_register(&handle1, 0, (uintptr_t)array1, SIZE, sizeof(double));	starpu_vector_data_register(&handle2, 0, (uintptr_t)array2, SIZE, sizeof(double));	int size = SIZE;	for (i = 0; i < ntasks; i++)	{		ret = starpu_task_insert(&sum_cl,					 STARPU_RW, handle1,					 STARPU_R, handle2,					 STARPU_R, handle1,					 STARPU_VALUE, &size, sizeof(int),					 /* For two tasks, try out the case when the task isn't parallel and expect					    the configuration to be sequential due to this, then automatically changed					    back to the parallel one */					 STARPU_POSSIBLY_PARALLEL, (i<=4 || i > 6) ? 1 : 0,					 /* Note that this mode requires that you put a prologue callback managing					    this on all tasks to be taken into account. */					 STARPU_PROLOGUE_CALLBACK_POP, &starpu_openmp_prologue,					 0);		if (ret == -ENODEV)			goto out;		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");	}out:	/* wait for all tasks at the end*/	starpu_task_wait_for_all();	starpu_data_unregister(handle1);	starpu_data_unregister(handle2);	starpu_uncluster_machine(clusters);	starpu_shutdown();	return 0;}#endif
 |