/* StarPU --- Runtime system for heterogeneous multicore architectures. * * Copyright (C) 2010-2012, 2014 Université de Bordeaux * * StarPU is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * StarPU is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * See the GNU Lesser General Public License in COPYING.LGPL for more details. */ #include "sched_ctx_utils.h" #include unsigned size1; unsigned size2; unsigned nblocks1; unsigned nblocks2; unsigned cpu1; unsigned cpu2; unsigned gpu; unsigned gpu1; unsigned gpu2; struct params { unsigned id; unsigned ctx; int the_other_ctx; int *procs; int nprocs; void (*bench)(unsigned, unsigned); unsigned size; unsigned nblocks; }; struct retvals { double flops; double avg_timing; }; #define NSAMPLES 1 int first = 1; starpu_pthread_mutex_t mut; struct retvals rv[2]; struct params p1, p2; starpu_pthread_key_t key; void init() { size1 = 4*1024; size2 = 4*1024; nblocks1 = 16; nblocks2 = 16; cpu1 = 0; cpu2 = 0; gpu = 0; gpu1 = 0; gpu2 = 0; rv[0].flops = 0.0; rv[1].flops = 0.0; rv[1].avg_timing = 0.0; rv[1].avg_timing = 0.0; p1.ctx = 0; p2.ctx = 0; p1.id = 0; p2.id = 1; starpu_pthread_key_create(&key, NULL); } void update_sched_ctx_timing_results(double flops, double avg_timing) { unsigned *id = starpu_pthread_getspecific(key); rv[*id].flops += flops; rv[*id].avg_timing += avg_timing; } void* start_bench(void *val) { struct params *p = (struct params*)val; int i; starpu_pthread_setspecific(key, &p->id); if(p->ctx != 0) starpu_sched_ctx_set_context(&p->ctx); for(i = 0; i < NSAMPLES; i++) p->bench(p->size, p->nblocks); if(p->ctx != 0) { starpu_pthread_mutex_lock(&mut); if(first) { starpu_sched_ctx_delete(p->ctx); } first = 0; starpu_pthread_mutex_unlock(&mut); } rv[p->id].flops /= NSAMPLES; rv[p->id].avg_timing /= NSAMPLES; return NULL; } void start_2benchs(void (*bench)(unsigned, unsigned)) { p1.bench = bench; p1.size = size1; printf("size %u\n", size1); p1.nblocks = nblocks1; p2.bench = bench; p2.size = size2; printf("size %u\n", size2); p2.nblocks = nblocks2; starpu_pthread_t tid[2]; starpu_pthread_mutex_init(&mut, NULL); double start; double end; start = starpu_timing_now(); starpu_pthread_create(&tid[0], NULL, (void*)start_bench, (void*)&p1); starpu_pthread_create(&tid[1], NULL, (void*)start_bench, (void*)&p2); starpu_pthread_join(tid[0], NULL); starpu_pthread_join(tid[1], NULL); end = starpu_timing_now(); starpu_pthread_mutex_destroy(&mut); double timing = end - start; timing /= 1000000; printf("%2.2f %2.2f ", rv[0].flops, rv[1].flops); printf("%2.2f %2.2f %2.2f\n", rv[0].avg_timing, rv[1].avg_timing, timing); } void start_1stbench(void (*bench)(unsigned, unsigned)) { p1.bench = bench; p1.size = size1; p1.nblocks = nblocks1; double start; double end; start = starpu_timing_now(); start_bench((void*)&p1); end = starpu_timing_now(); starpu_pthread_mutex_destroy(&mut); double timing = end - start; timing /= 1000000; printf("%2.2f ", rv[0].flops); printf("%2.2f %2.2f\n", rv[0].avg_timing, timing); } void start_2ndbench(void (*bench)(unsigned, unsigned)) { p2.bench = bench; p2.size = size2; p2.nblocks = nblocks2; double start; double end; start = starpu_timing_now(); start_bench((void*)&p2); end = starpu_timing_now(); starpu_pthread_mutex_destroy(&mut); double timing = end - start; timing /= 1000000; printf("%2.2f ", rv[1].flops); printf("%2.2f %2.2f\n", rv[1].avg_timing, timing); } void construct_contexts(void (*bench)(unsigned, unsigned)) { unsigned nprocs1 = cpu1 + gpu + gpu1; unsigned nprocs2 = cpu2 + gpu + gpu2; unsigned n_all_gpus = gpu + gpu1 + gpu2; int procs[nprocs1]; unsigned i; int k = 0; for(i = 0; i < gpu; i++) { procs[k++] = i; printf("%u ", i); } for(i = gpu; i < gpu + gpu1; i++) { procs[k++] = i; printf("%u ", i); } for(i = n_all_gpus; i < n_all_gpus + cpu1; i++) { procs[k++] = i; printf("%u ", i); } printf("\n "); p1.ctx = starpu_sched_ctx_create(procs, nprocs1, "sched_ctx1", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0); p2.the_other_ctx = (int)p1.ctx; p1.procs = procs; p1.nprocs = nprocs1; int procs2[nprocs2]; k = 0; for(i = 0; i < gpu; i++) { procs2[k++] = i; printf("%u ", i); } for(i = gpu + gpu1; i < gpu + gpu1 + gpu2; i++) { procs2[k++] = i; printf("%u ", i); } for(i = n_all_gpus + cpu1; i < n_all_gpus + cpu1 + cpu2; i++) { procs2[k++] = i; printf("%u ", i); } printf("\n"); p2.ctx = starpu_sched_ctx_create(procs2, nprocs2, "sched_ctx2", STARPU_SCHED_CTX_POLICY_NAME, "heft", 0); p1.the_other_ctx = (int)p2.ctx; p2.procs = procs2; starpu_sched_ctx_set_inheritor(p1.ctx, p2.ctx); starpu_sched_ctx_set_inheritor(p2.ctx, p1.ctx); p2.nprocs = nprocs2; } void parse_args_ctx(int argc, char **argv) { init(); int i; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "-size1") == 0) { char *argptr; size1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks1") == 0) { char *argptr; nblocks1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-size2") == 0) { char *argptr; size2 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-nblocks2") == 0) { char *argptr; nblocks2 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-cpu1") == 0) { char *argptr; cpu1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-cpu2") == 0) { char *argptr; cpu2 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-gpu") == 0) { char *argptr; gpu = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-gpu1") == 0) { char *argptr; gpu1 = strtol(argv[++i], &argptr, 10); } if (strcmp(argv[i], "-gpu2") == 0) { char *argptr; gpu2 = strtol(argv[++i], &argptr, 10); } } }