123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748 |
- /**
- *
- * @file time_main.c
- *
- * PLASMA auxiliary routines
- * PLASMA is a software package provided by Univ. of Tennessee,
- * Univ. of California Berkeley and Univ. of Colorado Denver
- *
- * @version 2.3.1
- * @author ???
- * @author Mathieu Faverge
- * @date 2010-11-15
- *
- **/
- /* Define these so that the Microsoft VC compiler stops complaining
- about scanf and friends */
- #define _CRT_SECURE_NO_DEPRECATE
- #define _CRT_SECURE_NO_WARNINGS
- #include <math.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #ifdef PLASMA_EZTRACE
- #include <eztrace.h>
- #endif
- #if defined( _WIN32 ) || defined( _WIN64 )
- #include <windows.h>
- #include <time.h>
- #include <sys/timeb.h>
- #if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
- #define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64
- #else
- #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
- #endif
- struct timezone
- {
- int tz_minuteswest; /* minutes W of Greenwich */
- int tz_dsttime; /* type of dst correction */
- };
- int gettimeofday(struct timeval* tv, struct timezone* tz)
- {
- FILETIME ft;
- unsigned __int64 tmpres = 0;
- static int tzflag;
- if (NULL != tv)
- {
- GetSystemTimeAsFileTime(&ft);
- tmpres |= ft.dwHighDateTime;
- tmpres <<= 32;
- tmpres |= ft.dwLowDateTime;
- /*converting file time to unix epoch*/
- tmpres /= 10; /*convert into microseconds*/
- tmpres -= DELTA_EPOCH_IN_MICROSECS;
- tv->tv_sec = (long)(tmpres / 1000000UL);
- tv->tv_usec = (long)(tmpres % 1000000UL);
- }
- if (NULL != tz)
- {
- if (!tzflag)
- {
- _tzset();
- tzflag++;
- }
- tz->tz_minuteswest = _timezone / 60;
- tz->tz_dsttime = _daylight;
- }
- return 0;
- }
- #else /* Non-Windows */
- #include <unistd.h>
- #include <sys/time.h>
- #include <sys/resource.h>
- #endif
- #include <cblas.h>
- #include <lapacke.h>
- #include <plasma.h>
- #include <core_blas.h>
- #include <magma_morse.h>
- #include <sched_ctx_hypervisor.h>
- #include "timing.h"
- #include "auxiliary.h"
- #include <pthread.h>
- static int RunTest(int *iparam, _PREC *dparam, double *t_);
- double cWtime(void);
- int ISEED[4] = {0,0,0,1}; /* initial seed for zlarnv() */
- /*
- * struct timeval {time_t tv_sec; suseconds_t tv_usec;};
- */
- double cWtime(void)
- {
- struct timeval tp;
- gettimeofday( &tp, NULL );
- return tp.tv_sec + 1e-6 * tp.tv_usec;
- }
- double *t1, *t2;
- _PREC dparam1[TIMING_DNBPARAM];
- _PREC dparam2[TIMING_DNBPARAM];
- static int
- Test(int64_t n, int *iparam) {
- int i, j, iter, m;
- int thrdnbr, niter, nrhs;
- double *t;
- _PREC eps = _LAMCH( 'e' );
- _PREC dparam[TIMING_DNBPARAM];
- double flops, fmuls, fadds, fp_per_mul, fp_per_add;
- double sumgf, sumgf2, sumt, sd, gflops;
- double flops_2, fmuls_2, fadds_2;
- double sumgf_2, sumgf2_2, sumt_2, sd_2, gflops_2;
-
- char *s;
- char *env[] = {
- "OMP_NUM_THREADS",
- "MKL_NUM_THREADS",
- "GOTO_NUM_THREADS",
- "ACML_NUM_THREADS",
- "ATLAS_NUM_THREADS",
- "BLAS_NUM_THREADS", ""
- };
- int gnuplot = 0;
- thrdnbr = iparam[TIMING_THRDNBR];
- niter = iparam[TIMING_NITER];
- nrhs = iparam[TIMING_NRHS];
- if (n < 0 || thrdnbr < 0) {
- const char *bound_header = iparam[TIMING_BOUND] ? " thGflop/s" : "";
- const char *check_header = iparam[TIMING_CHECK] ? " ||Ax-b|| ||A|| ||x|| ||b|| eps ||Ax-b||/N/eps/(||A||||x||+||b||)" : "";
- const char *peak_header = iparam[TIMING_PEAK] ? " (\% of peak) peak" : "";
- printf( "# N NRHS threads seconds Gflop/s Deviation %s%s%s\n", bound_header, peak_header, check_header);
- if (gnuplot) {
- printf( "set title '%d_NUM_THREADS: ", thrdnbr );
- for (i = 0; env[i][0]; ++i) {
- s = getenv( env[i] );
- if (i) printf( " " ); /* separating space */
- for (j = 0; j < 5 && env[i][j] && env[i][j] != '_'; ++j)
- printf( "%c", env[i][j] );
- if (s)
- printf( "=%s", s );
- else
- printf( "->%s", "?" );
- }
- printf( "'\n" );
- printf( "%s\n%s\n%s\n%s\n%s%s%s\n",
- "set xlabel 'Matrix size'",
- "set ylabel 'Gflop/s'",
- "set key bottom",
- gnuplot > 1 ? "set terminal png giant\nset output 'timeplot.png'" : "",
- "plot '-' using 1:5 title '", _NAME, "' with linespoints" );
- }
- return 0;
- }
- printf( "%5d %4d %5d ", iparam[TIMING_N], iparam[TIMING_NRHS], iparam[TIMING_THRDNBR] );
- printf( "%5d %4d %5d ", iparam[TIMING_N2], iparam[TIMING_NRHS], iparam[TIMING_THRDNBR] );
- fflush( stdout );
- t = (double*)malloc(niter*sizeof(double));
- memset(t, 0, niter*sizeof(double));
- t1 = (double*)malloc(niter*sizeof(double));
- memset(t, 0, niter*sizeof(double));
- t2 = (double*)malloc(niter*sizeof(double));
- memset(t, 0, niter*sizeof(double));
- if (sizeof(_TYPE) == sizeof(_PREC)) {
- fp_per_mul = 1;
- fp_per_add = 1;
- } else {
- fp_per_mul = 6;
- fp_per_add = 2;
- }
- m = iparam[TIMING_M];
- n = iparam[TIMING_N];
- fadds = _FADDS;
- fmuls = _FMULS;
- flops = fmuls * fp_per_mul + fadds * fp_per_add;
- gflops = 0.0;
- m = iparam[TIMING_M2];
- n = iparam[TIMING_N2];
- fadds_2 = _FADDS;
- fmuls_2 = _FMULS;
- flops_2 = fmuls_2 * fp_per_mul + fadds_2 * fp_per_add;
- gflops_2 = 0.0;
- if ( iparam[TIMING_WARMUP] ) {
- RunTest( iparam, dparam, &(t[0]));
- }
- sumgf = 0.0;
- double sumgf_upper = 0.0;
- sumgf2 = 0.0;
- sumt = 0.0;
- sumgf_2 = 0.0;
- double sumgf_upper_2 = 0.0;
- sumgf2_2 = 0.0;
- sumt_2 = 0.0;
-
- for (iter = 0; iter < niter; iter++)
- {
- #ifdef PLASMA_EZTRACE
- if( iter == 0 ) {
- eztrace_start();
- RunTest( iparam, dparam, &(t[iter]));
- eztrace_stop();
- }
- else
- #endif
- RunTest( iparam, dparam, &(t[iter]));
- double tmin = 0.0;
- double integer_tmin = 0.0;
- double upper_gflops = 0.0;
- double tmin_2 = 0.0;
- double integer_tmin_2 = 0.0;
- double upper_gflops_2 = 0.0;
- #if 0
- if (iparam[TIMING_BOUND])
- {
- if (iparam[TIMING_BOUNDDEPS]) {
- FILE *out = fopen("bounddeps.pl", "w");
- starpu_bound_print_lp(out);
- fclose(out);
- out = fopen("bound.dot", "w");
- starpu_bound_print_dot(out);
- fclose(out);
- } else {
- #if 0
- FILE *out = fopen("bound.pl", "w");
- starpu_bound_print_lp(out);
- fclose(out);
- #endif
- starpu_bound_compute(&tmin, &integer_tmin, 0);
- upper_gflops = ((1e-6 * flops) / tmin);
- starpu_bound_compute(&tmin_2, &integer_tmin_2, 0);
- upper_gflops_2 = ((1e-6 * flops_2) / tmin_2);
- }
- }
- #endif
- printf("t1 = %lf t2 = %lf \n", t1[0], t2[0]);
- gflops = (1e-9 * flops) / t1[iter];
- sumt += t1[iter];
- sumgf_upper += upper_gflops;
- sumgf += gflops;
- sumgf2 += gflops*gflops;
- gflops_2 = (1e-9 * flops_2) / t2[iter];
- sumt_2 += t2[iter];
- sumgf_upper_2 += upper_gflops_2;
- sumgf_2 += gflops_2;
- sumgf2_2 += gflops_2*gflops_2;
- }
- gflops = sumgf / niter;
- sd = sqrt((sumgf2 - (sumgf*sumgf)/niter)/niter);
- gflops_2 = sumgf_2 / niter;
- sd_2 = sqrt((sumgf2_2 - (sumgf_2*sumgf_2)/niter)/niter);
- printf( "%9.3f %9.2f +-%7.2f ", sumt/niter, gflops, sd);
- if (iparam[TIMING_BOUND] && !iparam[TIMING_BOUNDDEPS])
- printf(" %9.2f", sumgf_upper/niter);
- if ( iparam[TIMING_PEAK] )
- {
- if (dparam1[TIMING_ESTIMATED_PEAK]<0.0f)
- printf(" n/a n/a ");
- else
- printf(" %2.2f\%% %9.2f ", 100.0f*(gflops/dparam1[TIMING_ESTIMATED_PEAK]), dparam1[TIMING_ESTIMATED_PEAK]);
- }
- if ( iparam[TIMING_CHECK] )
- printf( "%8.5e %8.5e %8.5e %8.5e %8.5e %8.5e",
- dparam1[TIMING_RES], dparam1[TIMING_ANORM], dparam1[TIMING_XNORM], dparam1[TIMING_BNORM], eps,
- dparam1[TIMING_RES] / n / eps / (dparam1[TIMING_ANORM] * dparam1[TIMING_XNORM] + dparam1[TIMING_BNORM] ));
- printf("\n");
- printf( "%9.3f %9.2f +-%7.2f ", sumt_2/niter, gflops_2, sd_2);
- if (iparam[TIMING_BOUND] && !iparam[TIMING_BOUNDDEPS])
- printf(" %9.2f", sumgf_upper_2/niter);
- if ( iparam[TIMING_PEAK] )
- {
- if (dparam2[TIMING_ESTIMATED_PEAK]<0.0f)
- printf(" n/a n/a ");
- else
- printf(" %2.2f\%% %9.2f ", 100.0f*(gflops_2/dparam2[TIMING_ESTIMATED_PEAK]), dparam2[TIMING_ESTIMATED_PEAK]);
- }
- if ( iparam[TIMING_CHECK] )
- printf( "%8.5e %8.5e %8.5e %8.5e %8.5e %8.5e",
- dparam2[TIMING_RES], dparam2[TIMING_ANORM], dparam2[TIMING_XNORM], dparam2[TIMING_BNORM], eps,
- dparam2[TIMING_RES] / n / eps / (dparam2[TIMING_ANORM] * dparam2[TIMING_XNORM] + dparam2[TIMING_BNORM] ));
- printf("\n");
- fflush( stdout );
- free(t);
- free(t1);
- free(t2);
- return 0;
- }
- static int
- startswith(const char *s, const char *prefix) {
- size_t n = strlen( prefix );
- if (strncmp( s, prefix, n ))
- return 0;
- return 1;
- }
- static int
- get_range(char *range, int *start_p, int *stop_p, int *step_p) {
- char *s, *s1, buf[21];
- int colon_count, copy_len, nbuf=20, n;
- int start=1000, stop=10000, step=1000;
- colon_count = 0;
- for (s = strchr( range, ':'); s; s = strchr( s+1, ':'))
- colon_count++;
- if (colon_count == 0) { /* No colon in range. */
- if (sscanf( range, "%d", &start ) < 1 || start < 1)
- return -1;
- step = start / 10;
- if (step < 1) step = 1;
- stop = start + 10 * step;
- } else if (colon_count == 1) { /* One colon in range.*/
- /* First, get the second number (after colon): the stop value. */
- s = strchr( range, ':' );
- if (sscanf( s+1, "%d", &stop ) < 1 || stop < 1)
- return -1;
- /* Next, get the first number (before colon): the start value. */
- n = s - range;
- copy_len = n > nbuf ? nbuf : n;
- strncpy( buf, range, copy_len );
- buf[copy_len] = 0;
- if (sscanf( buf, "%d", &start ) < 1 || start > stop || start < 1)
- return -1;
- /* Let's have 10 steps or less. */
- step = (stop - start) / 10;
- if (step < 1)
- step = 1;
- } else if (colon_count == 2) { /* Two colons in range. */
- /* First, get the first number (before the first colon): the start value. */
- s = strchr( range, ':' );
- n = s - range;
- copy_len = n > nbuf ? nbuf : n;
- strncpy( buf, range, copy_len );
- buf[copy_len] = 0;
- if(copy_len == 0)
- start = 0;
- else if (sscanf( buf, "%d", &start ) < 1 || start < 1)
- return -1;
- /* Next, get the second number (after the first colon): the stop value. */
- s1 = strchr( s+1, ':' );
- n = s1 - (s + 1);
- copy_len = n > nbuf ? nbuf : n;
- strncpy( buf, s+1, copy_len );
- buf[copy_len] = 0;
- if(copy_len == 0)
- stop = 0;
- else if (sscanf( buf, "%d", &stop ) < 1 || stop < start)
- return -1;
- /* Finally, get the third number (after the second colon): the step value. */
- if (sscanf( s1+1, "%d", &step ) < 1 || step < 1)
- return -1;
- } else
- return -1;
- *start_p = start;
- *stop_p = stop;
- *step_p = step;
- return 0;
- }
- static void
- show_help(char *prog_name) {
- printf( "Usage:\n%s [options]\n\n", prog_name );
- printf( "Options are:\n" );
- printf( " --threads=C Number of threads (default: 1)\n" );
- printf( " --n_range=R Range of N values: Start:Stop:Step (default: 500:5000:500)\n" );
- // printf( " --gnuplot produce output suitable for gnuplot" );
- printf( " --[no]check Check result (default: nocheck)\n" );
- printf( " --[no]warmup Perform a warmup run to pre-load libraries (default: warmup)\n");
- printf( " --parallel=N Use parallel tasks of size N (default: no)\n");
- printf( " --niter=N Number of iterations (default: 1)\n");
- printf( " --nb=N Nb size. Not used if autotuning is activated (default: 128)\n");
- printf( " --ib=N IB size. Not used if autotuning is activated (default: 32)\n");
- printf( " --nrhs=N Number of right-hand size (default: 1)\n");
- printf( " --[no]dyn Activate Dynamic scheduling (default: nodyn)\n");
- printf( " --[no]atun Activate autotuning (default: noatun)\n");
- printf( " --ifmt Input format. 0: CM, 1: CCRB, 2: CRRB, 3: RCRB, 4: RRRB, 5: RM (default: 0)\n");
- printf( " --ofmt Output format. 0: CM, 1: CCRB, 2: CRRB, 3: RCRB, 4: RRRB, 5: RM (default: 1)\n");
- printf( " --thrdbypb Number of threads per subproblem for inplace transformation (default: 1)\n");
- printf( " --[no]profile Profile kernels with StarPU (default: no)\n");
- printf( " --[no]peak Evalue sustained peak performance (default: no)\n");
- }
- static void
- get_thread_count(int *thrdnbr) {
- #if defined WIN32 || defined WIN64
- sscanf( getenv( "NUMBER_OF_PROCESSORS" ), "%d", thrdnbr );
- #else
- *thrdnbr = sysconf(_SC_NPROCESSORS_ONLN);
- #endif
- }
- typedef struct {
- PLASMA_enum uplo;
- magma_desc_t *descA;
- unsigned ctx;
- unsigned the_other_ctx;
- real_Double_t t;
- } params;
- double compute_flops(int n, int m)
- {
- double fp_per_mul, fp_per_add;
- if (sizeof(_TYPE) == sizeof(_PREC)) {
- fp_per_mul = 1;
- fp_per_add = 1;
- } else {
- fp_per_mul = 6;
- fp_per_add = 2;
- }
-
- double fmuls = (n * (1.0 / 6.0 * n + 0.5) * n);
- double fadds = (n * (1.0 / 6.0 * n ) * n);
- double flops = fmuls * fp_per_mul + fadds * fp_per_add;
- return flops;
- }
- params p1, p2;
- int
- main(int argc, char *argv[]) {
- int i;
- int start = 500;
- int stop = 5000;
- int step = 500;
- int start1 = 500;
- int stop1 = 5000;
- int step1 = 500;
- int start2 = 500;
- int stop2 = 5000;
- int step2 = 500;
- int start_cpus1 = 0, start_cpus2 = 0, start_gpus1 = 0, start_gpus2 = 0;
- int stop_cpus1 = -1, stop_cpus2 = -1, stop_gpus1 = -1, stop_gpus2 = -1;
- int step_cpus1 = 1, step_cpus2 = 1, step_gpus1 = 1, step_gpus2 = 1;
- int iparam[TIMING_INBPARAM];
- memset(iparam, 0, TIMING_INBPARAM*sizeof(int));
- iparam[TIMING_CHECK ] = 0;
- iparam[TIMING_WARMUP ] = 1;
- iparam[TIMING_NITER ] = 1;
- iparam[TIMING_N ] = 500;
- iparam[TIMING_N2 ] = 500;
- iparam[TIMING_NB ] = 128;
- iparam[TIMING_IB ] = 32;
- iparam[TIMING_NRHS ] = 1;
- iparam[TIMING_THRDNBR ] = 1;
- iparam[TIMING_NCUDAS ] = 0;
- iparam[TIMING_THRDNBR_SUBGRP] = 1;
- iparam[TIMING_SCHEDULER ] = 0;
- iparam[TIMING_AUTOTUNING ] = 1;
- iparam[TIMING_INPUTFMT ] = 0;
- iparam[TIMING_OUTPUTFMT ] = 0;
- iparam[TIMING_NDOM ] = 1;
- iparam[TIMING_PROFILE ] = 0;
- iparam[TIMING_PEAK ] = 0;
- iparam[TIMING_PARALLEL_TASKS] = 0;
- iparam[TIMING_NO_CPU ] = 0;
- iparam[TIMING_BOUND ] = 0;
- iparam[TIMING_BOUNDDEPS ] = 0;
- iparam[TIMING_BOUNDDEPSPRIO ] = 0;
- iparam[TIMING_WITH_CTXS ] = 1;
- get_thread_count( &(iparam[TIMING_THRDNBR]) );
- for (i = 1; i < argc && argv[i]; ++i) {
- if (startswith( argv[i], "--help" )) {
- show_help( argv[0] );
- return EXIT_SUCCESS;
- } else if (startswith( argv[i], "--n_cpus1=" )) {
- get_range( strchr( argv[i], '=' ) + 1, &start_cpus1, &stop_cpus1, &step_cpus1 );
- } else if (startswith( argv[i], "--n_cpus2=" )) {
- get_range( strchr( argv[i], '=' ) + 1, &start_cpus2, &stop_cpus2, &step_cpus2 );
- } else if (startswith( argv[i], "--n_gpus1=" )) {
- get_range( strchr( argv[i], '=' ) + 1, &start_gpus1, &stop_gpus1, &step_gpus1 );
- } else if (startswith( argv[i], "--n_gpus2=" )) {
- get_range( strchr( argv[i], '=' ) + 1, &start_gpus2, &stop_gpus2, &step_gpus2 );
- } else if (startswith( argv[i], "--n_range=" )) {
- get_range( strchr( argv[i], '=' ) + 1, &start, &stop, &step );
- } else if (startswith( argv[i], "--n_range1=" )) {
- get_range( strchr( argv[i], '=' ) + 1, &start1, &stop1, &step1 );
- } else if (startswith( argv[i], "--n_range2=" )) {
- get_range( strchr( argv[i], '=' ) + 1, &start2, &stop2, &step2 );
- } else if (startswith( argv[i], "--threads=" )) {
- sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[TIMING_THRDNBR]) );
- /* } else if (startswith( argv[i], "--gnuplot-png" )) { */
- /* gnuplot = 2; */
- /* } else if (startswith( argv[i], "--gnuplot" )) { */
- /* gnuplot = 1; */
- } else if (startswith( argv[i], "--noctxs" )) {
- iparam[TIMING_WITH_CTXS] = 0;
- } else if (startswith( argv[i], "--check" )) {
- iparam[TIMING_CHECK] = 1;
- } else if (startswith( argv[i], "--nocheck" )) {
- iparam[TIMING_CHECK] = 0;
- } else if (startswith( argv[i], "--warmup" )) {
- iparam[TIMING_WARMUP] = 1;
- } else if (startswith( argv[i], "--nowarmup" )) {
- iparam[TIMING_WARMUP] = 0;
- } else if (startswith( argv[i], "--dyn" )) {
- iparam[TIMING_SCHEDULER] = 1;
- } else if (startswith( argv[i], "--nodyn" )) {
- iparam[TIMING_SCHEDULER] = 0;
- } else if (startswith( argv[i], "--atun" )) {
- iparam[TIMING_AUTOTUNING] = 1;
- } else if (startswith( argv[i], "--noatun" )) {
- iparam[TIMING_AUTOTUNING] = 0;
- } else if (startswith( argv[i], "--profile" )) {
- iparam[TIMING_PROFILE] = 1;
- } else if (startswith( argv[i], "--peak" )) {
- iparam[TIMING_PEAK] = 1;
- } else if (startswith( argv[i], "--noprofile" )) {
- iparam[TIMING_PROFILE] = 0;
- } else if (startswith( argv[i], "--parallel=" )) {
- sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[TIMING_PARALLEL_TASKS]) );
- } else if (startswith( argv[i], "--noparallel" )) {
- iparam[TIMING_PARALLEL_TASKS] = 0;
- } else if (startswith( argv[i], "--nocpu" )) {
- iparam[TIMING_NO_CPU] = 1;
- } else if (startswith( argv[i], "--nb=" )) {
- sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[TIMING_NB]) );
- } else if (startswith( argv[i], "--m=" )) {
- sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[TIMING_M]) );
- } else if (startswith( argv[i], "--ib=" )) {
- sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[TIMING_IB]) );
- } else if (startswith( argv[i], "--nrhs=" )) {
- sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[TIMING_NRHS]) );
- } else if (startswith( argv[i], "--ifmt=" )) {
- sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[TIMING_INPUTFMT]) );
- } else if (startswith( argv[i], "--ofmt=" )) {
- sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[TIMING_OUTPUTFMT]) );
- } else if (startswith( argv[i], "--thrdbypb=" )) {
- sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[TIMING_THRDNBR_SUBGRP]) );
- } else if (startswith( argv[i], "--niter=" )) {
- sscanf( strchr( argv[i], '=' ) + 1, "%d", &iparam[TIMING_NITER] );
- } else if (startswith( argv[i], "--ndom=" )) {
- sscanf( strchr( argv[i], '=' ) + 1, "%d", &iparam[TIMING_NDOM] );
- } else if (startswith( argv[i], "--bounddepsprio" )) {
- iparam[TIMING_BOUND] = 1;
- iparam[TIMING_BOUNDDEPS] = 1;
- iparam[TIMING_BOUNDDEPSPRIO] = 1;
- } else if (startswith( argv[i], "--bounddeps" )) {
- iparam[TIMING_BOUND] = 1;
- iparam[TIMING_BOUNDDEPS] = 1;
- } else if (startswith( argv[i], "--bound" )) {
- iparam[TIMING_BOUND] = 1;
- } else {
- fprintf( stderr, "Unknown option: %s\n", argv[i] );
- }
- }
- if (step < 1) step = 1;
- if (step1 < 1) step1 = 1;
- if (step2 < 1) step2 = 1;
- /* TODO : correct into plasma */
- if ( iparam[TIMING_IB] > iparam[TIMING_NB] )
- iparam[TIMING_IB] = iparam[TIMING_NB];
- /* TODO */
- if (iparam[TIMING_PARALLEL_TASKS]) {
- MAGMA_InitPar(iparam[TIMING_THRDNBR]/iparam[TIMING_PARALLEL_TASKS],
- iparam[TIMING_NCUDAS],
- iparam[TIMING_PARALLEL_TASKS]);
- }
- else {
- MAGMA_Init( iparam[TIMING_THRDNBR],
- iparam[TIMING_NCUDAS]);
-
- }
- MAGMA_Disable(MAGMA_AUTOTUNING);
- MAGMA_Set(MAGMA_TILE_SIZE, iparam[TIMING_NB] );
- MAGMA_Set(MAGMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] );
- if(iparam[TIMING_WITH_CTXS])
- {
- int nprocs1 = (stop_cpus1 - start_cpus1 + 1)/step_cpus1 + (stop_gpus1 - start_gpus1 + 1)/step_gpus1;
- int nprocs2 = (stop_cpus2 - start_cpus2 + 1)/step_cpus2 + (stop_gpus2 - start_gpus2 + 1)/step_gpus2;
- int procs1[nprocs1];
- int procs2[nprocs2];
- int i, j = 0;
- printf("%d: ", nprocs1);
- for (i = start_gpus1; i <= stop_gpus1; i += step_gpus1)
- {
- printf("%d ", i);
- procs1[j++] = i;
- }
-
- for (i = start_cpus1; i <= stop_cpus1; i += step_cpus1)
- {
- printf("%d ", i);
- procs1[j++] = i;
- }
- printf("\n");
-
- printf("%d: ", nprocs2);
- j = 0;
- for (i = start_gpus2; i <= stop_gpus2; i += step_gpus2)
- {
- printf("%d ", i);
- procs2[j++] = i;
- }
-
- for (i = start_cpus2; i <= stop_cpus2; i += step_cpus2)
- {
- printf("%d ", i);
- procs2[j++] = i;
- }
- printf("\n");
-
- struct starpu_sched_ctx_hypervisor_criteria *criteria = sched_ctx_hypervisor_init(SIMPLE_POLICY);
- p1.ctx = starpu_create_sched_ctx_with_criteria("heft", procs1, nprocs1, "sched_ctx1", &criteria);
-
- p2.ctx = starpu_create_sched_ctx_with_criteria("heft", procs2, nprocs2, "sched_ctx2", &criteria);
- /* p1.ctx = starpu_create_sched_ctx("heft", procs1, nprocs1, "sched_ctx1"); */
-
- /* p2.ctx = starpu_create_sched_ctx("heft", procs2, nprocs2, "sched_ctx2"); */
- double flops1 = compute_flops(start1, start1);
- double flops2 = compute_flops(start2, start2);
- printf("flops1 = %lf flops2 = %lf\n", flops1, flops2);
- sched_ctx_hypervisor_handle_ctx(p1.ctx, compute_flops(start1, start1));
- sched_ctx_hypervisor_handle_ctx(p2.ctx, compute_flops(start2, start2));
-
- p1.the_other_ctx = p2.ctx;
- p2.the_other_ctx = p1.ctx;
-
- int procs[12];
- for(i = 0; i < 12; i++)
- procs[i] = i;
- int gpus[3];
- for(i = 0; i < 3; i++)
- gpus[i] = i;
- sched_ctx_hypervisor_ioctl(p1.ctx,
- HYPERVISOR_GRANULARITY, 2,
- HYPERVISOR_MIN_TASKS, 10,
- HYPERVISOR_MIN_WORKERS, 3,
- HYPERVISOR_MAX_WORKERS, 12,
- HYPERVISOR_FIXED_WORKERS, gpus, 3,
- // HYPERVISOR_MAX_IDLE, procs, 12, 40000.0,
- // HYPERVISOR_MAX_IDLE, gpus, 3, 10000.0,
- NULL);
-
- sched_ctx_hypervisor_ioctl(p2.ctx,
- HYPERVISOR_GRANULARITY, 2,
- HYPERVISOR_MIN_TASKS, 10,
- HYPERVISOR_MIN_WORKERS, 0,
- HYPERVISOR_MAX_WORKERS, 12,
- HYPERVISOR_FIXED_WORKERS, gpus, 3,
- // HYPERVISOR_MAX_IDLE, procs, 12, 40000.0,
- // HYPERVISOR_MAX_IDLE, gpus, 3, 10000.0,
- NULL);
-
- }
- else
- {
- p1.ctx = 0;
- p2.ctx = 0;
- }
-
- Test( -1, iparam ); /* print header */
- iparam[TIMING_N] = start1;
- iparam[TIMING_N2] = start2;
-
- if ( iparam[TIMING_M] == 0 )
- iparam[TIMING_M] = iparam[TIMING_N];
- if ( iparam[TIMING_M2] == 0 )
- iparam[TIMING_M2] = iparam[TIMING_N2];
- Test( start1, iparam );
-
- MAGMA_Finalize();
- if(iparam[TIMING_WITH_CTXS])
- sched_ctx_hypervisor_shutdown();
-
- /* if (gnuplot) { */
- /* printf( "%s\n%s\n", */
- /* "e", */
- /* gnuplot > 1 ? "" : "pause 10" ); */
- /* } */
- return EXIT_SUCCESS;
- }
|