123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 |
- /* StarPU --- Runtime system for heterogeneous multicore architectures.
- *
- * Copyright (C) 2015-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
- *
- * StarPU is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- *
- * StarPU is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
- */
- #include <starpu.h>
- #include <omp.h>
- #if !defined(STARPU_CLUSTER)
- int main(void)
- {
- return 77;
- }
- #else
- #ifdef STARPU_QUICK_CHECK
- #define NTASKS 8
- #else
- #define NTASKS 32
- #endif
- #define SIZE 4000
- /* Codelet SUM */
- static void sum_cpu(void * descr[], void *cl_arg)
- {
- double * v_dst = (double *) STARPU_VECTOR_GET_PTR(descr[0]);
- double * v_src0 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);
- double * v_src1 = (double *) STARPU_VECTOR_GET_PTR(descr[1]);
- int size;
- starpu_codelet_unpack_args(cl_arg, &size);
- fprintf(stderr, "sum_cpu\n");
- int i, k;
- #pragma omp parallel
- fprintf(stderr, "hello from the task %d\n", omp_get_thread_num());
- for (k=0;k<10;k++)
- {
- #pragma omp parallel for
- for (i=0; i<size; i++)
- {
- v_dst[i]+=v_src0[i]+v_src1[i];
- }
- }
- }
- static struct starpu_codelet sum_cl =
- {
- .cpu_funcs = {sum_cpu, NULL},
- .nbuffers = 3,
- .modes={STARPU_RW,STARPU_R, STARPU_R}
- };
- int main(void)
- {
- int ntasks = NTASKS;
- int ret, i;
- struct starpu_cluster_machine *clusters;
- setenv("STARPU_NMPI_MS","0",1);
- ret = starpu_init(NULL);
- if (ret == -ENODEV)
- return 77;
- STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
- /* We regroup resources under each sockets into a cluster. We express a partition
- * of one socket to create two internal clusters */
- clusters = starpu_cluster_machine(HWLOC_OBJ_SOCKET,
- STARPU_CLUSTER_PARTITION_ONE,
- STARPU_CLUSTER_NEW,
- // STARPU_CLUSTER_TYPE, STARPU_CLUSTER_OPENMP,
- // STARPU_CLUSTER_TYPE, STARPU_CLUSTER_INTEL_OPENMP_MKL,
- STARPU_CLUSTER_NB, 2,
- STARPU_CLUSTER_NCORES, 1,
- 0);
- starpu_cluster_print(clusters);
- /* Data preparation */
- double array1[SIZE];
- double array2[SIZE];
- memset(array1, 0, sizeof(double));
- for (i=0;i<SIZE;i++)
- {
- array2[i]=i*2;
- }
- starpu_data_handle_t handle1;
- starpu_data_handle_t handle2;
- starpu_vector_data_register(&handle1, 0, (uintptr_t)array1, SIZE, sizeof(double));
- starpu_vector_data_register(&handle2, 0, (uintptr_t)array2, SIZE, sizeof(double));
- int size = SIZE;
- for (i = 0; i < ntasks; i++)
- {
- ret = starpu_task_insert(&sum_cl,
- STARPU_RW, handle1,
- STARPU_R, handle2,
- STARPU_R, handle1,
- STARPU_VALUE, &size, sizeof(int),
- /* For two tasks, try out the case when the task isn't parallel and expect
- the configuration to be sequential due to this, then automatically changed
- back to the parallel one */
- STARPU_POSSIBLY_PARALLEL, (i<=4 || i > 6) ? 1 : 0,
- /* Note that this mode requires that you put a prologue callback managing
- this on all tasks to be taken into account. */
- STARPU_PROLOGUE_CALLBACK_POP, &starpu_openmp_prologue,
- 0);
- if (ret == -ENODEV)
- goto out;
- STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
- }
- out:
- /* wait for all tasks at the end*/
- starpu_task_wait_for_all();
- starpu_data_unregister(handle1);
- starpu_data_unregister(handle2);
- starpu_uncluster_machine(clusters);
- starpu_shutdown();
- return 0;
- }
- #endif
|