15 年之前 · a4ea56d983
--- a/ChangeLog
+++ b/ChangeLog
@@ -23,7 +23,7 @@ The asynchronous heterogeneous multi-accelerator release
 
				     - Model data transfer overhead
			
 
				     - One model is created for each accelerator
			
 
				   * Support for CUDA's driver API is deprecated
			
 
				-  * The WORKERS_GPUID and WORKERS_CPUID env. variables make it possible to
			
 
				+  * The STARPU_WORKERS_GPUID and STARPU_WORKERS_CPUID env. variables make it possible to
			
 
				     specify where to bind the workers
			
 
				   * Use the hwloc library to detect the actual number of cores
			
 
				 
			
--- a/doc/starpu.texi
+++ b/doc/starpu.texi
@@ -233,15 +233,15 @@ TODO, explicit configuration (passed to starpu_init) overrides env variables.
 
				 @section Configuring workers
			
 
				 
			
 
				 @menu
			
 
				-* NCPUS     :: Number of CPU workers
			
 
				-* NCUDA     :: Number of CUDA workers
			
 
				-* NGORDON   :: Number of SPU workers (Cell)
			
 
				-* WORKERS_CPUID  :: Bind workers to specific CPUs
			
 
				-* WORKERS_GPUID  :: Select specific CUDA devices
			
 
				+* STARPU_NCPUS     :: Number of CPU workers
			
 
				+* STARPU_NCUDA     :: Number of CUDA workers
			
 
				+* STARPU_NGORDON   :: Number of SPU workers (Cell)
			
 
				+* STARPU_WORKERS_CPUID  :: Bind workers to specific CPUs
			
 
				+* STARPU_WORKERS_GPUID  :: Select specific CUDA devices
			
 
				 @end menu
			
 
				 
			
 
				-@node NCPUS
			
 
				-@subsection @code{NCPUS} -- Number of CPU workers
			
 
				+@node STARPU_NCPUS
			
 
				+@subsection @code{STARPU_NCPUS} -- Number of CPU workers
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
@@ -249,8 +249,8 @@ TODO
 
				 
			
 
				 @end table
			
 
				 
			
 
				-@node NCUDA
			
 
				-@subsection @code{NCUDA} -- Number of CUDA workers
			
 
				+@node STARPU_NCUDA
			
 
				+@subsection @code{STARPU_NCUDA} -- Number of CUDA workers
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
@@ -258,8 +258,8 @@ TODO
 
				 
			
 
				 @end table
			
 
				 
			
 
				-@node NGORDON
			
 
				-@subsection @code{NGORDON} -- Number of SPU workers (Cell)
			
 
				+@node STARPU_NGORDON
			
 
				+@subsection @code{STARPU_NGORDON} -- Number of SPU workers (Cell)
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
@@ -268,8 +268,8 @@ TODO
 
				 @end table
			
 
				 
			
 
				 
			
 
				-@node WORKERS_CPUID
			
 
				-@subsection @code{WORKERS_CPUID} -- Bind workers to specific CPUs
			
 
				+@node STARPU_WORKERS_CPUID
			
 
				+@subsection @code{STARPU_WORKERS_CPUID} -- Bind workers to specific CPUs
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
@@ -277,8 +277,8 @@ TODO
 
				 
			
 
				 @end table
			
 
				 
			
 
				-@node WORKERS_GPUID
			
 
				-@subsection @code{WORKERS_GPUID} -- Select specific CUDA devices
			
 
				+@node STARPU_WORKERS_GPUID
			
 
				+@subsection @code{STARPU_WORKERS_GPUID} -- Select specific CUDA devices
			
 
				 @table @asis
			
 
				 
			
 
				 @item @emph{Description}:
			
@@ -422,15 +422,15 @@ This is the name of the scheduling policy. This can also be specified with the
 
				 
			
 
				 @item @code{ncpus} (default = -1):
			
 
				 This is the maximum number of CPU cores that StarPU can use. This can also be
			
 
				-specified with the @code{NCPUS} environment variable.
			
 
				+specified with the @code{STARPU_NCPUS} environment variable.
			
 
				 
			
 
				 @item @code{ncuda} (default = -1):
			
 
				 This is the maximum number of CUDA devices that StarPU can use. This can also be
			
 
				-specified with the @code{NCUDA} environment variable.
			
 
				+specified with the @code{STARPU_NCUDA} environment variable.
			
 
				 
			
 
				 @item @code{nspus} (default = -1):
			
 
				 This is the maximum number of Cell SPUs that StarPU can use. This can also be
			
 
				-specified with the @code{NGORDON} environment variable.
			
 
				+specified with the @code{STARPU_NGORDON} environment variable.
			
 
				 
			
 
				 @item @code{calibrate} (default = 0):
			
 
				 If this flag is set, StarPU will calibrate the performance models when
			
--- a/mpi/examples/perf.sh
+++ b/mpi/examples/perf.sh
@@ -42,7 +42,7 @@ echo "*******************************************">> log
 
				 cat log
			
 
				 cat log >> log.all
			
 
				 
			
 
				-NCPUS=0 NCUDA=$ngpus SCHED="dmda" PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err
			
 
				+STARPU_NCPUS=0 STARPU_NCUDA=$ngpus SCHED="dmda" PREFETCH=1 mpirun -machinefile machinefile.${PBS_JOBID} -np $np ./mpi_lu/plu_example_float -p $p -q $q -nblocks $nblocks -size $(($nblocks * $BLOCKSIZE)) -numa > log.out 2> log.err
			
 
				 cat log.out > log
			
 
				 cat log.err >> log
			
 
				 cat log
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -54,7 +54,7 @@ static void _starpu_initialize_workers_gpuid(struct machine_config_s *config)
 
				 	/* conf->workers_bindid indicates the successive cpu identifier that
			
 
				 	 * should be used to bind the workers. It should be either filled
			
 
				 	 * according to the user's explicit parameters (from starpu_conf) or
			
 
				-	 * according to the WORKERS_CPUID env. variable. Otherwise, a
			
 
				+	 * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
			
 
				 	 * round-robin policy is used to distributed the workers over the
			
 
				 	 * cpus. */
			
 
				 
			
@@ -66,16 +66,16 @@ static void _starpu_initialize_workers_gpuid(struct machine_config_s *config)
 
				 			config->user_conf->workers_gpuid,
			
 
				 			STARPU_NMAXWORKERS*sizeof(unsigned));
			
 
				 	}
			
 
				-	else if ((strval = getenv("WORKERS_GPUID")))
			
 
				+	else if ((strval = getenv("STARPU_WORKERS_GPUID")))
			
 
				 	{
			
 
				-		/* WORKERS_GPUID certainly contains less entries than
			
 
				+		/* STARPU_WORKERS_GPUID certainly contains less entries than
			
 
				 		 * STARPU_NMAXWORKERS, so we reuse its entries in a round robin
			
 
				 		 * fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1 2". */
			
 
				 		unsigned wrap = 0;
			
 
				 		unsigned number_of_entries = 0;
			
 
				 
			
 
				 		char *endptr;
			
 
				-		/* we use the content of the WORKERS_GPUID env. variable */
			
 
				+		/* we use the content of the STARPU_WORKERS_GPUID env. variable */
			
 
				 		for (i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				 		{
			
 
				 			if (!wrap) {
			
@@ -188,7 +188,7 @@ static int _starpu_init_machine_config(struct machine_config_s *config,
 
				 			explicitval = user_conf->ncuda;
			
 
				 		}
			
 
				 		else {
			
 
				-			explicitval = starpu_get_env_number("NCUDA");
			
 
				+			explicitval = starpu_get_env_number("STARPU_NCUDA");
			
 
				 		}
			
 
				 
			
 
				 		if (explicitval < 0) {
			
@@ -227,7 +227,7 @@ static int _starpu_init_machine_config(struct machine_config_s *config,
 
				 		explicitval = user_conf->ncuda;
			
 
				 	}
			
 
				 	else {
			
 
				-		explicitval = starpu_get_env_number("NGORDON");
			
 
				+		explicitval = starpu_get_env_number("STARPU_NGORDON");
			
 
				 	}
			
 
				 
			
 
				 	if (explicitval < 0) {
			
@@ -263,7 +263,7 @@ static int _starpu_init_machine_config(struct machine_config_s *config,
 
				 		explicitval = user_conf->ncpus;
			
 
				 	}
			
 
				 	else {
			
 
				-		explicitval = starpu_get_env_number("NCPUS");
			
 
				+		explicitval = starpu_get_env_number("STARPU_NCPUS");
			
 
				 	}
			
 
				 
			
 
				 	if (explicitval < 0) {
			
@@ -314,7 +314,7 @@ static void _starpu_initialize_workers_bindid(struct machine_config_s *config)
 
				 	/* conf->workers_bindid indicates the successive cpu identifier that
			
 
				 	 * should be used to bind the workers. It should be either filled
			
 
				 	 * according to the user's explicit parameters (from starpu_conf) or
			
 
				-	 * according to the WORKERS_CPUID env. variable. Otherwise, a
			
 
				+	 * according to the STARPU_WORKERS_CPUID env. variable. Otherwise, a
			
 
				 	 * round-robin policy is used to distributed the workers over the
			
 
				 	 * cpus. */
			
 
				 
			
@@ -326,16 +326,16 @@ static void _starpu_initialize_workers_bindid(struct machine_config_s *config)
 
				 			config->user_conf->workers_bindid,
			
 
				 			STARPU_NMAXWORKERS*sizeof(unsigned));
			
 
				 	}
			
 
				-	else if ((strval = getenv("WORKERS_CPUID")))
			
 
				+	else if ((strval = getenv("STARPU_WORKERS_CPUID")))
			
 
				 	{
			
 
				-		/* WORKERS_CPUID certainly contains less entries than
			
 
				+		/* STARPU_WORKERS_CPUID certainly contains less entries than
			
 
				 		 * STARPU_NMAXWORKERS, so we reuse its entries in a round robin
			
 
				 		 * fashion: "1 2" is equivalent to "1 2 1 2 1 2 .... 1 2". */
			
 
				 		unsigned wrap = 0;
			
 
				 		unsigned number_of_entries = 0;
			
 
				 
			
 
				 		char *endptr;
			
 
				-		/* we use the content of the WORKERS_GPUID env. variable */
			
 
				+		/* we use the content of the STARPU_WORKERS_GPUID env. variable */
			
 
				 		for (i = 0; i < STARPU_NMAXWORKERS; i++)
			
 
				 		{
			
 
				 			if (!wrap) {
			
--- a/tests/cholesky/sched-one-gpu.sh
+++ b/tests/cholesky/sched-one-gpu.sh
@@ -20,7 +20,7 @@ maxiter=5
 
				 ROOTDIR=../../
			
 
				 TIMINGDIR=$PWD/timing/
			
 
				 
			
 
				-export WORKERS_GPUID="1"
			
 
				+export STARPU_WORKERS_GPUID="1"
			
 
				 
			
 
				 trace_sched()
			
 
				 {
			
@@ -57,7 +57,7 @@ trace_sched()
 
				 
			
 
				 schedlist='dm dm dm dm greedy dm'
			
 
				 
			
 
				-export NCUDA=1
			
 
				+export STARPU_NCUDA=1
			
 
				 export CALIBRATE=1
			
 
				 
			
 
				 mkdir -p $TIMINGDIR
			
--- a/tests/coverage/coverage-test.sh
+++ b/tests/coverage/coverage-test.sh
@@ -67,11 +67,11 @@ save_cov "tag_example2";
 
				 # save_cov "spmv";
			
 
				 # 
			
 
				 # echo "spmv.gpu"
			
 
				-# timing=`NCPUS=0 $BUILDDIR/examples/spmv/dw_spmv 2> /dev/null`
			
 
				+# timing=`STARPU_NCPUS=0 $BUILDDIR/examples/spmv/dw_spmv 2> /dev/null`
			
 
				 # save_cov "spmv.gpu";
			
 
				 # 
			
 
				 # echo "spmv.cpu"
			
 
				-# timing=`NCUDA=0 $BUILDDIR/examples/spmv/dw_spmv 2> /dev/null`
			
 
				+# timing=`STARPU_NCUDA=0 $BUILDDIR/examples/spmv/dw_spmv 2> /dev/null`
			
 
				 # save_cov "spmv.cpu";
			
 
				 # 
			
 
				 # echo "spmv.dm"
			
@@ -106,11 +106,11 @@ timing=`CALIBRATE=1 SCHED="dmda" $EXAMPLEDIR/dw_cholesky -pin 2> /dev/null`
 
				 save_cov "chol.dmda";
			
 
				 
			
 
				 echo "chol.cpu"
			
 
				-timing=`CALIBRATE=1 NCUDA=0 SCHED="dm" $EXAMPLEDIR/dw_cholesky -pin 2> /dev/null`
			
 
				+timing=`CALIBRATE=1 STARPU_NCUDA=0 SCHED="dm" $EXAMPLEDIR/dw_cholesky -pin 2> /dev/null`
			
 
				 save_cov "chol.cpu";
			
 
				 
			
 
				 echo "chol.gpu"
			
 
				-timing=`CALIBRATE=1 NCPUS=0 SCHED="dm" $EXAMPLEDIR/dw_cholesky -pin 2> /dev/null`
			
 
				+timing=`CALIBRATE=1 STARPU_NCPUS=0 SCHED="dm" $EXAMPLEDIR/dw_cholesky -pin 2> /dev/null`
			
 
				 save_cov "chol.gpu";
			
 
				 
			
 
				 echo "chol"
			
--- a/tests/heat/dmda.sh
+++ b/tests/heat/dmda.sh
@@ -55,8 +55,8 @@ mkdir -p $SAMPLINGDIR
 
				 #schedlist="ws no-prio greedy prio dm random"
			
 
				 #schedlist="random random random random"
			
 
				 
			
 
				-export NCUDA=3
			
 
				-export NCPUS=8 
			
 
				+export STARPU_NCUDA=3
			
 
				+export STARPU_NCPUS=8 
			
 
				 
			
 
				 #sizelist="2048 4096 6144 8192 10240 12288 14336 16384 24576 28672 30720"
			
 
				 sizelist=`seq 2048 2048 30720`
			
--- a/tests/heat/granularity.sh
+++ b/tests/heat/granularity.sh
@@ -60,7 +60,7 @@ trace_granularity()
 
				 		for iter in `seq 1 $maxiter`
			
 
				 		do
			
 
				 			echo "$iter / $maxiter"
			
 
				-			 val=`NCPUS=8 NCUDA=3 SCHED="dmda" PREFETCH=1 CALIBRATE=1 $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
			
 
				+			 val=`STARPU_NCPUS=8 STARPU_NCUDA=3 SCHED="dmda" PREFETCH=1 CALIBRATE=1 $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null`
			
 
				 			 echo "$val"
			
 
				 			 echo "$val" >> $filename
			
 
				 		done
			
@@ -121,12 +121,12 @@ calibrate_grain()
 
				 
			
 
				 	OPTIONS="-pin -nblocks $blocks -size $size -v3"
			
 
				 
			
 
				-	NCUDA=3 NCPUS=8 CALIBRATE=1 SCHED="dm" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null 
			
 
				-	NCUDA=3 NCPUS=8 CALIBRATE=1 PREFETCH=1 SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				-	NCUDA=3 NCPUS=8 CALIBRATE=1 PREFETCH=1 SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				-	NCUDA=3 NCPUS=8 CALIBRATE=1 PREFETCH=1 SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				-	NCUDA=3 NCPUS=8 CALIBRATE=1 PREFETCH=1 SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				-	NCUDA=3 NCPUS=8 CALIBRATE=1 PREFETCH=1 SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 CALIBRATE=1 SCHED="dm" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null 
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 CALIBRATE=1 PREFETCH=1 SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 CALIBRATE=1 PREFETCH=1 SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 CALIBRATE=1 PREFETCH=1 SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 CALIBRATE=1 PREFETCH=1 SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				+	STARPU_NCUDA=3 STARPU_NCPUS=8 CALIBRATE=1 PREFETCH=1 SCHED="dmda" $ROOTDIR/examples/heat/heat $OPTIONS 2> /dev/null
			
 
				 }
			
 
				 
			
 
				 mkdir -p $TIMINGDIR
			
--- a/tests/heat/heat.sh
+++ b/tests/heat/heat.sh
@@ -44,8 +44,8 @@ do
 
				 			blocks=$BLOCKS
			
 
				 			filename=$TIMINGDIR/timing.$cpus.$cublas.$size.$blocks
			
 
				 
			
 
				-			export NCPUS=$cpus
			
 
				-			export NCUDA=$cublas
			
 
				+			export STARPU_NCPUS=$cpus
			
 
				+			export STARPU_NCUDA=$cublas
			
 
				 
			
 
				 			echo "size $size cpus $cpus cublas $cublas blocks $blocks" 
			
 
				 			$ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $BLOCKS 2>/dev/null| tee $filename
			
@@ -60,8 +60,8 @@ do
 
				 			blocks=$BLOCKS
			
 
				 			filename=$TIMINGDIR/timing.$cpus.$cublas.$size.$blocks
			
 
				 
			
 
				-			export NCPUS=$cpus
			
 
				-			export NCUDA=$cublas
			
 
				+			export STARPU_NCPUS=$cpus
			
 
				+			export STARPU_NCUDA=$cublas
			
 
				 
			
 
				 			echo "size $size cpus $cpus cublas $cublas blocks $blocks" 
			
 
				 			$ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $BLOCKS 2>/dev/null| tee $filename
			
@@ -86,15 +86,15 @@ do
 
				 	do
			
 
				 		size=$(($theta*32))
			
 
				 
			
 
				-		export NCPUS=4
			
 
				-		export NCUDA=0
			
 
				+		export STARPU_NCPUS=4
			
 
				+		export STARPU_NCUDA=0
			
 
				 
			
 
				 		echo "size $size cpus 4 cublas 0 blocks $blocks"
			
 
				 		filename=$TIMINGDIR/timing.4.0.$size.$blocks
			
 
				 		$ROOTDIR/examples/heat -nthick 34 -ntheta $(($theta+2)) -nblocks $blocks 2>/dev/null| tee $filename
			
 
				 
			
 
				-		export NCPUS=3
			
 
				-		export NCUDA=1
			
 
				+		export STARPU_NCPUS=3
			
 
				+		export STARPU_NCUDA=1
			
 
				 
			
 
				 		echo "size $size cpus 3 cublas 1 blocks $blocks"
			
 
				 		filename=$TIMINGDIR/timing.3.1.$size.$blocks
			
--- a/tests/heat/sched.sh
+++ b/tests/heat/sched.sh
@@ -97,8 +97,8 @@ mkdir -p $SAMPLINGDIR
 
				 #schedlist="ws no-prio greedy prio dm random"
			
 
				 #schedlist="random random random random"
			
 
				 
			
 
				-export NCUDA=3
			
 
				-export NCPUS=8 
			
 
				+export STARPU_NCUDA=3
			
 
				+export STARPU_NCPUS=8 
			
 
				 
			
 
				 trace_sched "greedy" 0 0;
			
 
				 trace_sched "dm" 0 1;
			
--- a/tests/heat/speedup.sh
+++ b/tests/heat/speedup.sh
@@ -37,7 +37,7 @@ do
 
				 
			
 
				 	for cpus in 1 2 4 6 8 10 12 14 16
			
 
				 	do
			
 
				-		export NCPUS=$cpus
			
 
				+		export STARPU_NCPUS=$cpus
			
 
				 
			
 
				 		echo "ncpus $cpus size $size"
			
 
				 
			
--- a/tests/memory/memstress.sh
+++ b/tests/memory/memstress.sh
@@ -31,8 +31,8 @@ trace_stress()
 
				 {
			
 
				 	memstress=$1
			
 
				 
			
 
				-	export NCPUS=0
			
 
				-	export NCUDA=1
			
 
				+	export STARPU_NCPUS=0
			
 
				+	export STARPU_NCUDA=1
			
 
				 	export STRESS_MEM=$memstress
			
 
				 
			
 
				 	line="$memstress"
			
--- a/tests/mult/sched.sh
+++ b/tests/mult/sched.sh
@@ -37,7 +37,7 @@ trace_sched()
 
				 		do
			
 
				 			echo "$iter / $maxiter"
			
 
				 			#echo "$ROOTDIR/examples/mult/sgemm $OPTIONS 2> /dev/null"
			
 
				-			NCUDA=1 CALIBRATE=1 SCHED="$sched" $ROOTDIR/examples/mult/sgemm $OPTIONS 2> /dev/null >> $filename
			
 
				+			STARPU_NCUDA=1 CALIBRATE=1 SCHED="$sched" $ROOTDIR/examples/mult/sgemm $OPTIONS 2> /dev/null >> $filename
			
 
				 			tail -1 $filename
			
 
				 		done
			
 
				 	done
			
--- a/tests/mult/speedup.sh
+++ b/tests/mult/speedup.sh
@@ -35,7 +35,7 @@ do
 
				 
			
 
				 	for cpus in 1 2 4 6 8 10 12 14 16
			
 
				 	do
			
 
				-		export NCPUS=$cpus
			
 
				+		export STARPU_NCPUS=$cpus
			
 
				 
			
 
				 		echo "ncpus $cpus size $size"
			
 
				 
			
--- a/tests/perf-models/bench_sgemm.sh
+++ b/tests/perf-models/bench_sgemm.sh
@@ -40,7 +40,7 @@ gpu_output=$DIR/output.gpu
 
				 rm -f $cpu_output
			
 
				 rm -f $gpu_output
			
 
				 
			
 
				-export WORKERS_CPUID="2"
			
 
				+export STARPU_WORKERS_CPUID="2"
			
 
				 export CALIBRATE=1
			
 
				 export SCHED="dm"
			
 
				 
			
@@ -56,7 +56,7 @@ do
 
				 	fi
			
 
				 
			
 
				 	echo "GotoBLAS -> size $size niter $niter"
			
 
				-	timing=`NCPUS=1 NCUDA=0 $INSTALLDIR/lib/starpu/examples/dw_mult_no_filters -x $size -y $size -z $size -nblocks 1 -iter $niter 2> /dev/null`
			
 
				+	timing=`STARPU_NCPUS=1 STARPU_NCUDA=0 $INSTALLDIR/lib/starpu/examples/dw_mult_no_filters -x $size -y $size -z $size -nblocks 1 -iter $niter 2> /dev/null`
			
 
				 	echo "$size	$timing	$niter" >> $cpu_output
			
 
				 done
			
 
				 
			
@@ -73,7 +73,7 @@ do
 
				 	fi
			
 
				 
			
 
				 	echo "CUBLAS -> size $size niter $niter"
			
 
				-	timing=`NCPUS=0 NCUDA=1 $INSTALLDIR/lib/starpu/examples/dw_mult_no_filters -x $size -y $size -z $size -nblocks 1 -iter $niter 2 -pin 2> /dev/null`
			
 
				+	timing=`STARPU_NCPUS=0 STARPU_NCUDA=1 $INSTALLDIR/lib/starpu/examples/dw_mult_no_filters -x $size -y $size -z $size -nblocks 1 -iter $niter 2 -pin 2> /dev/null`
			
 
				 	echo "$size	$timing	$niter" >> $gpu_output
			
 
				 done