13 years ago · 38e00e9d11
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,6 @@
 
																 /config.status
															
 
																 /autom4te.cache
															
 
																 /libtool
															
 
																-/libstarpu.pc
															
 
																 /aclocal.m4
															
 
																 /build-aux
															
 
																 /GPATH
															
@@ -186,3 +185,66 @@ starpu.log
 
																 /gcc-plugin/tests/lib-user
															
 
																 /gcc-plugin/examples/matrix-mult
															
 
																 /gcc-plugin/src/c-expr.c
															
 
																+/gcc-plugin/tests/heap-allocated
															
 
																+/gcc-plugin/tests/output-pointer
															
 
																+/gcc-plugin/examples/vector_scal/vector_scal
															
 
																+/doc/starpu.info-1
															
 
																+/doc/starpu.info-2
															
 
																+/examples/axpy/axpy
															
 
																+/examples/basic_examples/mult_impl
															
 
																+/examples/basic_examples/multiformat
															
 
																+/examples/cg/cg
															
 
																+/examples/cholesky/cholesky_grain_tag
															
 
																+/examples/cholesky/cholesky_implicit
															
 
																+/examples/cholesky/cholesky_tag
															
 
																+/examples/cholesky/cholesky_tile_tag
															
 
																+/examples/cpp/incrementer_cpp
															
 
																+/examples/filters/custom_mf/custom_mf_filter
															
 
																+/examples/filters/multiformat/multiformat_filter
															
 
																+/examples/heat/heat
															
 
																+/examples/lu/lu_example_double
															
 
																+/examples/lu/lu_example_float
															
 
																+/examples/lu/lu_implicit_example_double
															
 
																+/examples/lu/lu_implicit_example_float
															
 
																+/examples/mult/dgemm
															
 
																+/examples/mult/sgemm
															
 
																+/mpi/starpumpi-1.0.pc
															
 
																+/socl/socl-1.0.pc
															
 
																+/starpufft/starpufft-1.0.pc
															
 
																+/tests/core/deprecated
															
 
																+/tests/core/deprecated_buffer
															
 
																+/tests/core/deprecated_func
															
 
																+/tests/core/multiformat_data_release
															
 
																+/tests/core/multiformat_handle_conversion
															
 
																+/tests/core/starpu_init
															
 
																+/tests/core/starpu_task_bundle
															
 
																+/tests/core/starpu_worker_exists
															
 
																+/tests/datawizard/copy
															
 
																+/tests/datawizard/double_parameter
															
 
																+/tests/datawizard/gpu_register
															
 
																+/tests/datawizard/in_place_partition
															
 
																+/tests/datawizard/increment_redux_lazy
															
 
																+/tests/datawizard/interfaces/bcsr/bcsr_interface
															
 
																+/tests/datawizard/interfaces/block/block_interface
															
 
																+/tests/datawizard/interfaces/csr/csr_interface
															
 
																+/tests/datawizard/interfaces/matrix/matrix_interface
															
 
																+/tests/datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl
															
 
																+/tests/datawizard/interfaces/multiformat/advanced/multiformat_data_release
															
 
																+/tests/datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion
															
 
																+/tests/datawizard/interfaces/multiformat/advanced/multiformat_worker
															
 
																+/tests/datawizard/interfaces/multiformat/advanced/same_handle
															
 
																+/tests/datawizard/interfaces/multiformat/multiformat_interface
															
 
																+/tests/datawizard/interfaces/test_interfaces
															
 
																+/tests/datawizard/interfaces/test_vector_interface
															
 
																+/tests/datawizard/interfaces/variable/variable_interface
															
 
																+/tests/datawizard/interfaces/vector/test_vector_interface
															
 
																+/tests/datawizard/interfaces/void/void_interface
															
 
																+/tests/datawizard/partition_lazy
															
 
																+/tests/loader
															
 
																+/tests/starpu_machine_display
															
 
																+/tools/starpu_calibrate_bus.1
															
 
																+/tools/starpu_machine_display.1
															
 
																+/tools/starpu_perfmodel_display.1
															
 
																+/tools/starpu_perfmodel_plot.1
															
 
																+/starpu-1.0.pc
															
 
																+/gcc-plugin/examples/cholesky/cholesky
															
--- a/AUTHORS
+++ b/AUTHORS
@@ -12,3 +12,5 @@ Jean-Marie Couteyen <jm.couteyen@gmail.com>
 
																 Anthony Roy <theanthony33@gmail.com>
															
 
																 David Gómez <david_gomez1380@yahoo.com.mx>
															
 
																 Nguyen Quôc Dinh <nguyen.quocdinh@gmail.com>
															
 
																+Antoine Lucas <antoine.lucas.33@gmail.com>
															
 
																+Pierre André Wacrenier <wacrenier@labri.fr>
															
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,87 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+
															
 
																+StarPU 1.0 (svn revision xxxx)
															
 
																+==============================================
															
 
																+The extensions-again release
															
 
																+
															
 
																+  * struct starpu_data_interface_ops --- operations on a data
															
 
																+        interface --- define a new function pointer allocate_new_data
															
 
																+        which creates a new data interface of the given type based on
															
 
																+        an existing handle
															
 
																+  * Make environment variables take precedence over the configuration
															
 
																+        passed to starpu_init()
															
 
																+  * Add man pages for some of the tools
															
 
																+  * Add reduction mode to starpu_mpi_insert_task
															
 
																+  * Add C++ application example in examples/cpp/
															
 
																+  * Increase default value for STARPU_MAXCPUS -- Maximum number of
															
 
																+        CPUs supported -- to 64.
															
 
																+  * Libtool interface versioning has been included in libraries names
															
 
																+        (libstarpu-1.0.so, libstarpumpi-1.0.so,
															
 
																+        libstarpufft-1.0.so, libsocl-1.0.so)
															
 
																+  * Enable by default the SOCL extension.
															
 
																+  * Enable by default the GCC plug-in extension.
															
 
																+  * Add a field named magic to struct starpu_task which is set when
															
 
																+        initialising the task. starpu_task_submit will fail if the
															
 
																+        field does not have the right value. This will hence avoid
															
 
																+        submitting tasks which have not been properly initialised.
															
 
																+  * Make where field for struct starpu_codelet optional. When unset, its
															
 
																+	value will be automatically set based on the availability of the
															
 
																+	different XXX_funcs fields of the codelet.
															
 
																+  * Add a hook function pre_exec_hook in struct starpu_sched_policy.
															
 
																+        The function is meant to be called in drivers. Schedulers
															
 
																+        can use it to be notified when a task is about being computed.
															
 
																+  * Define access modes for data handles into starpu_codelet and no longer
															
 
																+	in starpu_task. Hence mark (struct starpu_task).buffers as
															
 
																+	deprecated, and add (struct starpu_task).handles and (struct
															
 
																+	starpu_codelet).modes
															
 
																+  * Install headers under $includedir/starpu/1.0.
															
 
																+  * Deprecate cost_model, and introduce cost_function, which is provided
															
 
																+	with the whole task structure, the target arch and implementation
															
 
																+	number
															
 
																+  * Permit the application to provide its own size base for performance
															
 
																+	models
															
 
																+  * Fields xxx_func of struct starpu_codelet are made deprecated. One
															
 
																+	should use instead fields xxx_funcs.
															
 
																+  * Applications can provide several implementations of a codelet for the
															
 
																+	same architecture.
															
 
																+  * A new multi-format interface permits to use different binary formats
															
 
																+	on CPUs & GPUs, the conversion functions being provided by the
															
 
																+	application and called by StarPU as needed (and as less as
															
 
																+	possible).
															
 
																+  * Add a gcc plugin to extend the C interface with pragmas which allows to
															
 
																+	easily define codelets and issue tasks.
															
 
																+  * Add codelet execution time statistics plot.
															
 
																+  * Add bus speed in starpu_machine_display.
															
 
																+  * Add a StarPU-Top feedback and steering interface.
															
 
																+  * Documentation improvement.
															
 
																+  * Add a STARPU_DATA_ACQUIRE_CB which permits to inline the code to be
															
 
																+	done.
															
 
																+  * Permit to specify MPI tags for more efficient starpu_mpi_insert_task
															
 
																+  * Add SOCL, an OpenCL interface on top of StarPU.
															
 
																+  * Add gdb functions.
															
 
																+  * Add complex support to LU example.
															
 
																+  * Add an OpenMP fork-join example.
															
 
																+  * Permit to use the same data several times in write mode in the
															
 
																+	parameters of the same task.
															
 
																+  * Some types were renamed for consistency. The tools/dev/rename.sh
															
 
																+	script can be used to port code using former names. You can also
															
 
																+	choose to include starpu_deprecated_api.h (after starpu.h) to keep
															
 
																+	using the old types.
															
 
																+
															
 
																 StarPU 0.9 (svn revision 3721)
															
 
																 ==============================================
															
 
																 The extensions release
															
@@ -58,7 +142,7 @@ The asynchronous heterogeneous multi-accelerator release
 
																     - Implement starpu_worker_get_count
															
 
																     - Implement starpu_display_codelet_stats
															
 
																     - Implement starpu_data_prefetch_on_node
															
 
																-    - Expose the starpu_data_set_wb_mask function
															
 
																+    - Expose the starpu_data_set_wt_mask function
															
 
																   * Support nvidia (heterogeneous) multi-GPU
															
 
																   * Add the data request mechanism
															
 
																     - All data transfers use data requests now
															
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,7 +1,7 @@
 
																 # StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																 #
															
 
																-# Copyright (C) 2009-2011  Université de Bordeaux 1
															
 
																-# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+# Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																 #
															
 
																 # StarPU is free software; you can redistribute it and/or modify
															
 
																 # it under the terms of the GNU Lesser General Public License as published by
															
@@ -34,7 +34,6 @@ if COND_OPT
 
																 SUBDIRS += tests/opt examples/opt
															
 
																 endif
															
 
																-
															
 
																 if BUILD_GCC_PLUGIN
															
 
																 SUBDIRS += gcc-plugin
															
 
																 endif
															
@@ -43,12 +42,16 @@ if BUILD_SCHED_CTX_HYPERVISOR
 
																 SUBDIRS += sched_ctx_hypervisor
															
 
																 endif
															
 
																+if BUILD_STARPUFFT
															
 
																+SUBDIRS += starpufft
															
 
																+endif
															
 
																+
															
 
																 pkgconfigdir = $(libdir)/pkgconfig
															
 
																-pkgconfig_DATA = libstarpu.pc
															
 
																+pkgconfig_DATA = libstarpu.pc starpu-1.0.pc
															
 
																-include_HEADERS = 				\
															
 
																+versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)
															
 
																+versinclude_HEADERS = 				\
															
 
																 	include/starpu.h			\
															
 
																-	include/starpu_config.h			\
															
 
																 	include/starpu_data_filters.h		\
															
 
																 	include/starpu_data_interfaces.h	\
															
 
																 	include/starpu_task.h			\
															
@@ -57,13 +60,19 @@ include_HEADERS = 				\
 
																 	include/starpu_data.h			\
															
 
																 	include/starpu_perfmodel.h		\
															
 
																 	include/starpu_util.h			\
															
 
																+	include/starpu_fxt.h			\
															
 
																 	include/starpu_cuda.h			\
															
 
																 	include/starpu_opencl.h			\
															
 
																 	include/starpu_expert.h			\
															
 
																 	include/starpu_profiling.h		\
															
 
																 	include/starpu_bound.h			\
															
 
																 	include/starpu_scheduler.h		\
															
 
																-	include/starpu_top.h
															
 
																+	include/starpu_top.h			\
															
 
																+	include/starpu_deprecated_api.h         \
															
 
																+	include/starpu_hash.h
															
 
																+
															
 
																+nodist_versinclude_HEADERS = 			\
															
 
																+	include/starpu_config.h
															
 
																 if BUILD_STARPU_TOP
															
 
																 all-local:
															
@@ -86,6 +95,11 @@ else
 
																 txtdir = ${docdir}
															
 
																 endif
															
 
																 txt_DATA = AUTHORS COPYING.LGPL README
															
 
																-EXTRA_DIST = AUTHORS COPYING.LGPL README
															
 
																+EXTRA_DIST = AUTHORS COPYING.LGPL README STARPU-VERSION
															
 
																 include starpu-top/extradist
															
 
																+
															
 
																+showcheck:
															
 
																+	for i in $(SUBDIRS) ; do \
															
 
																+		make -C $$i showcheck ; \
															
 
																+	done
															
--- a/README
+++ b/README
@@ -1,3 +1,19 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+
															
 
																 ++=================++
															
 
																 || I. Introduction ||
															
 
																 ++=================++
															
@@ -134,7 +150,7 @@ Some examples ready to run are installed into $prefix/lib/starpu/{examples,mpi}
 
																 ++==============++
															
 
																 To upgrade your source code from older version (there were quite a few
															
 
																-renamings), use the tools/rename.sh script
															
 
																+renamings), use the tools/dev/rename.sh script
															
 
																 ++===============++
															
 
																 || VIII. Contact ||
															
--- a/README.dev
+++ b/README.dev
@@ -1,169 +1,53 @@
 
																-Installing StarPU on windows
															
 
																-----------------------------
															
 
																-
															
 
																-If you are building from a tarball downloaded from the website, you can skip the
															
 
																-cygwin part.
															
 
																-
															
 
																-1. Install cygwin
															
 
																-
															
 
																-http://cygwin.com/install.html
															
 
																-
															
 
																-Make sure the following packages are available:
															
 
																-- (Devel)/subversion
															
 
																-- (Devel)/libtool
															
 
																-- (Devel)/gcc
															
 
																-- (Devel)/make
															
 
																-- your favorite editor (vi, emacs, ...)
															
 
																-- (Devel)/gdb
															
 
																-- (Archive)/zip
															
 
																-- (Devel)/pkg-config
															
 
																-
															
 
																-2. Install mingw
															
 
																-
															
 
																-http://sourceforge.net/projects/mingw/
															
 
																-
															
 
																-3. Install hwloc (not mandatory)
															
 
																-
															
 
																-http://www.open-mpi.org/projects/hwloc
															
 
																-
															
 
																-4. Install Microsoft Visual C++ Studio Express
															
 
																-
															
 
																-   http://www.microsoft.com/express/Downloads
															
 
																-
															
 
																-   Add in your path the following directories.
															
 
																-   (adjusting where necessary for the Installation location according to VC
															
 
																-    version and on 64 and 32bit Windows versions)
															
 
																-
															
 
																-   On cygwin, with Visual C++ 2010 e.g.;
															
 
																-
															
 
																-   export PATH="/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 10.0/Common7/IDE":$PATH
															
 
																-   export PATH="/cygdrive/c/Program Files (x86)/Microsoft Visual Studio 10.0/VC/bin":$PATH
															
 
																-
															
 
																-   On MingW, with Visual C++ 2010, e.g.;
															
 
																-
															
 
																-   export PATH="/c/Program Files (x86)/Microsoft Visual Studio 10.0/Common7/IDE":$PATH
															
 
																-   export PATH="/c/Program Files (x86)/Microsoft Visual Studio 10.0/VC/bin":$PATH
															
 
																-
															
 
																-   Try to call <lib.exe> and <link.exe> without any option to make sure these
															
 
																-   dump their help output, else no .def or .lib file will be produced.
															
 
																-
															
 
																-5. Install GPU Drivers (not mandatory)
															
 
																-
															
 
																-  5.1 Install Cuda
															
 
																-
															
 
																-      http://developer.nvidia.com/object/cuda_3_2_downloads.html
															
 
																-
															
 
																-      You need to install at least the CUDA toolkit.
															
 
																-
															
 
																-      libtool is not able to find the libraries automatically, you
															
 
																-      need to make some copies:
															
 
																-
															
 
																-      copy c:\cuda\lib\cuda.lib c:\cuda\lib\libcuda.lib
															
 
																-      copy c:\cuda\lib\cudart.lib c:\cuda\lib\libcudart.lib
															
 
																-      copy c:\cuda\lib\cublas.lib c:\cuda\lib\libcublas.lib
															
 
																-      copy c:\cuda\lib\cufft.lib c:\cuda\lib\libcufft.lib
															
 
																-      copy c:\cuda\lib\OpenCL.lib c:\cuda\lib\libOpenCL.lib
															
 
																-
															
 
																-      (and if the version of your CUDA driver is >= 3.2)
															
 
																-
															
 
																-      copy c:\cuda\lib\curand.lib c:\cuda\lib\libcurand.lib
															
 
																-
															
 
																-      Add the CUDA bin directory in your path
															
 
																-
															
 
																-      export PATH=/cygdrive/c/CUDA/bin:$PATH
															
 
																-
															
 
																-      Since we tell nvcc to build CUDA code with gcc instead of Visual studio,
															
 
																-      a fix is needed: c:\cuda\include\host_defines.h has a bogus CUDARTAPI
															
 
																-      definition which makes linking fail completely. Replace the first
															
 
																-      occurence of
															
 
																-
															
 
																-      #define CUDARTAPI
															
 
																-
															
 
																-      with
															
 
																-
															
 
																-      #ifdef _WIN32
															
 
																-      #define CUDARTAPI __stdcall
															
 
																-      #else
															
 
																-      #define CUDARTAPI
															
 
																-      #endif
															
 
																-
															
 
																-      While at it, you can also comment the __cdecl definition to avoid spurious
															
 
																-      warnings.
															
 
																-
															
 
																-
															
 
																-  5.2 Install OpenCL
															
 
																-
															
 
																-      http://developer.nvidia.com/object/opencl-download.html
															
 
																-
															
 
																-      You need to download the NVIDIA Drivers for your version of
															
 
																-      Windows. Executing the file will extract all files in a given
															
 
																-      directory. The the driver installation will start, it will fail
															
 
																-      if no compatibles drivers can be found on your system.
															
 
																-
															
 
																-      Anyway, you should copy the *.dl_ files from the directory
															
 
																-      (extraction path) in the bin directory of the CUDA installation
															
 
																-      directory (the directory should be v3.2/bin/)
															
 
																-
															
 
																-  5.3 Install MsCompress
															
 
																-
															
 
																-      http://gnuwin32.sourceforge.net/packages/mscompress.htm
															
 
																-
															
 
																-      Go in the CUDA bin directory, uncompress .dl_ files and rename
															
 
																-      them in .dll files
															
 
																-
															
 
																-      cp /cygdrive/c/NVIDIA/DisplayDriver/190.89/International/*.dl_ .
															
 
																-      for i in *.dl_ ; do /cygdrive/c/Program\ Files/GnuWin32/bin/msexpand.exe  $i ; mv ${i%_} ${i%_}l ; done
															
 
																-
															
 
																-If you are building from a tarball downloaded from the website, you can skip the
															
 
																-autogen.sh part.
															
 
																-
															
 
																-6. Start autogen.sh from cygwin
															
 
																-
															
 
																-   cd starpu-trunk
															
 
																-   ./autogen.sh
															
 
																-
															
 
																-7. Start a MinGW shell
															
 
																-
															
 
																-   /cygdrive/c/MinGW/msys/1.0/bin/sh.exe --login -i
															
 
																-
															
 
																-8. Configure, make, install from MinGW
															
 
																-
															
 
																-   If you have a non-english version of windows, use
															
 
																-
															
 
																-     export LANG=C
															
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+
															
 
																+Contents
															
 
																+========
															
 
																+
															
 
																+- Developer Warnings
															
 
																+- Naming Conventions
															
 
																+- Coding Style
															
 
																+
															
 
																+Developer Warnings
															
 
																+------------------
															
 
																-   else libtool has troubles parsing the translated output of the toolchain.
															
 
																+They are enabled only if the STARPU_DEVEL environment variable is
															
 
																+defined to a non-empty value, when calling configure.
															
 
																-   cd starpu-trunk
															
 
																-   mkdir build
															
 
																-   cd build
															
 
																-   ../configure --prefix=$PWD/target --disable-default-drand48 \
															
 
																-        --with-hwloc=<HWLOC installation directory> \
															
 
																-        --with-cuda-dir=<CUDA installation directory> \
															
 
																-        --with-cuda-lib-dir=<CUDA installation directory>/lib/Win32 \
															
 
																-	--with-opencl-dir=<CUDA installation directory>
															
 
																-   make
															
 
																-   make install
															
 
																+
															
 
																-   Also convert a couple of files to CRLF:
															
 
																+Naming Conventions
															
 
																+------------------
															
 
																-   sed -e 's/$/'$'\015'/ < README > $PWD/target/README.txt
															
 
																-   sed -e 's/$/'$'\015'/ < AUTHORS > $PWD/target/AUTHORS.txt
															
 
																-   sed -e 's/$/'$'\015'/ < COPYING.LGPL > $PWD/target/COPYING.LGPL.txt
															
 
																+* Prefix names of public objects (types, functions, etc.) with "starpu"
															
 
																-9. If you want your StarPU installation to be standalone, you need to
															
 
																-   copy the DLL files from hwloc, Cuda, and OpenCL into the StarPU
															
 
																-   installation bin directory, as well as MinGW/bin/libpthread*dll
															
 
																+* Prefix names of internal objects (types, functions, etc.) with "_starpu"
															
 
																-   cp <CUDA directory>/bin/*dll target/bin
															
 
																-   cp <HWLOC directory>/bin/*dll target/bin
															
 
																-   cp /cygdrive/c/MinGW/bin/libpthread*dll target/bin
															
 
																+* Names for qualified types (struct, union, enum) do not end with _t, _s or similar.
															
 
																+  Use _t only for typedef types, such as opaque public types, e.g
															
 
																+       typedef struct _starpu_data_state* starpu_data_handle_t;
															
 
																+  or
															
 
																+       typedef uint64_t starpu_tag_t;
															
 
																-   and set the StarPU bin directory in your path.
															
 
																+* When a variable can only take a finite set of values, use an enum
															
 
																+  type instead of defining macros for each of the values.
															
 
																-   export PATH=<StarPU installation directory>/bin:$PATH
															
 
																+
															
 
																+Coding Style
															
 
																+------------
															
 
																-Developers warning
															
 
																-------------------
															
 
																-They are only enabled if the STARPU_DEVEL environment is defined to a non-empty value.
															
 
																+* Curly braces always go on a new line
															
--- a/STARPU-VERSION
+++ b/STARPU-VERSION
@@ -0,0 +1,21 @@
 
																+# -*- sh -*-
															
 
																+
															
 
																+# Versioning (SONAMEs) for StarPU libraries.
															
 
																+
															
 
																+# Libtool interface versioning (info "(libtool) Versioning").
															
 
																+LIBSTARPU_INTERFACE_CURRENT=0	# increment upon ABI change
															
 
																+LIBSTARPU_INTERFACE_REVISION=0	# increment upon implementation change
															
 
																+LIBSTARPU_INTERFACE_AGE=0	# set to CURRENT - PREVIOUS interface
															
 
																+STARPU_EFFECTIVE_VERSION=1.0
															
 
																+
															
 
																+LIBSTARPUFFT_INTERFACE_CURRENT=0	# increment upon ABI change
															
 
																+LIBSTARPUFFT_INTERFACE_REVISION=0	# increment upon implementation change
															
 
																+LIBSTARPUFFT_INTERFACE_AGE=0		# set to CURRENT - PREVIOUS interface
															
 
																+
															
 
																+LIBSTARPUMPI_INTERFACE_CURRENT=0	# increment upon ABI change
															
 
																+LIBSTARPUMPI_INTERFACE_REVISION=0	# increment upon implementation change
															
 
																+LIBSTARPUMPI_INTERFACE_AGE=0		# set to CURRENT - PREVIOUS interface
															
 
																+
															
 
																+LIBSOCL_INTERFACE_CURRENT=0	# increment upon ABI change
															
 
																+LIBSOCL_INTERFACE_REVISION=0	# increment upon implementation change
															
 
																+LIBSOCL_INTERFACE_AGE=0		# set to CURRENT - PREVIOUS interface
															
--- a/TODO
+++ b/TODO
@@ -0,0 +1,9 @@
 
																+
															
 
																+Moving access modes for data handles from struct starpu_task to struct starpu_codelet
															
 
																+=====================================================================================
															
 
																+
															
 
																+TODO list
															
 
																+
															
 
																+- Make struct starpu_buffer_descr private (or not, as it can still be used in tests and examples)
															
 
																+
															
 
																+- When cost_model is provided, but not cost_function, need to rebuild a struct starpu_buffer_descr
															
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -1,95 +0,0 @@
 
																-dnl Copyright (C) Free Software Foundation, Inc.
															
 
																-dnl
															
 
																-dnl This program is free software; you can redistribute it and/or modify
															
 
																-dnl it under the terms of the GNU General Public License as published by
															
 
																-dnl the Free Software Foundation; either version 2 of the License, or
															
 
																-dnl (at your option) any later version.
															
 
																-dnl 
															
 
																-dnl This program is distributed in the hope that it will be useful,
															
 
																-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
															
 
																-dnl GNU General Public License for more details.
															
 
																-dnl 
															
 
																-dnl You should have received a copy of the GNU General Public License
															
 
																-dnl along with this program; if not, write to the Free Software
															
 
																-dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
															
 
																-dnl
															
 
																-dnl This test is taken from libgfortran
															
 
																-
															
 
																-dnl Check whether the target supports __sync_val_compare_and_swap.
															
 
																-AC_DEFUN([STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP], [
															
 
																-  AC_CACHE_CHECK([whether the target supports __sync_val_compare_and_swap],
															
 
																-		 ac_cv_have_sync_val_compare_and_swap, [
															
 
																-  AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;],
															
 
																-			[bar = __sync_val_compare_and_swap(&foo, 0, 1);])],
															
 
																-			[ac_cv_have_sync_val_compare_and_swap=yes],
															
 
																-			[ac_cv_have_sync_val_compare_and_swap=no])])
															
 
																-  if test $ac_cv_have_sync_val_compare_and_swap = yes; then
															
 
																-    AC_DEFINE(STARPU_HAVE_SYNC_VAL_COMPARE_AND_SWAP, 1,
															
 
																-	      [Define to 1 if the target supports __sync_val_compare_and_swap])
															
 
																-  fi])
															
 
																-
															
 
																-dnl Check whether the target supports __sync_bool_compare_and_swap.
															
 
																-AC_DEFUN([STARPU_CHECK_SYNC_BOOL_COMPARE_AND_SWAP], [
															
 
																-  AC_CACHE_CHECK([whether the target supports __sync_bool_compare_and_swap],
															
 
																-		 ac_cv_have_sync_bool_compare_and_swap, [
															
 
																-  AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;],
															
 
																-			[bar = __sync_bool_compare_and_swap(&foo, 0, 1);])],
															
 
																-			[ac_cv_have_sync_bool_compare_and_swap=yes],
															
 
																-			[ac_cv_have_sync_bool_compare_and_swap=no])])
															
 
																-  if test $ac_cv_have_sync_bool_compare_and_swap = yes; then
															
 
																-    AC_DEFINE(STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP, 1,
															
 
																-	      [Define to 1 if the target supports __sync_bool_compare_and_swap])
															
 
																-  fi])
															
 
																-
															
 
																-dnl Check whether the target supports __sync_fetch_and_add.
															
 
																-AC_DEFUN([STARPU_CHECK_SYNC_FETCH_AND_ADD], [
															
 
																-  AC_CACHE_CHECK([whether the target supports __sync_fetch_and_add],
															
 
																-		 ac_cv_have_sync_fetch_and_add, [
															
 
																-  AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;],
															
 
																-			[bar = __sync_fetch_and_add(&foo, 1);])],
															
 
																-			[ac_cv_have_sync_fetch_and_add=yes],
															
 
																-			[ac_cv_have_sync_fetch_and_add=no])])
															
 
																-  if test $ac_cv_have_sync_fetch_and_add = yes; then
															
 
																-    AC_DEFINE(STARPU_HAVE_SYNC_FETCH_AND_ADD, 1,
															
 
																-	      [Define to 1 if the target supports __sync_fetch_and_add])
															
 
																-  fi])
															
 
																-
															
 
																-dnl Check whether the target supports __sync_fetch_and_or.
															
 
																-AC_DEFUN([STARPU_CHECK_SYNC_FETCH_AND_OR], [
															
 
																-  AC_CACHE_CHECK([whether the target supports __sync_fetch_and_or],
															
 
																-		 ac_cv_have_sync_fetch_and_or, [
															
 
																-  AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;],
															
 
																-			[bar = __sync_fetch_and_or(&foo, 1);])],
															
 
																-			[ac_cv_have_sync_fetch_and_or=yes],
															
 
																-			[ac_cv_have_sync_fetch_and_or=no])])
															
 
																-  if test $ac_cv_have_sync_fetch_and_or = yes; then
															
 
																-    AC_DEFINE(STARPU_HAVE_SYNC_FETCH_AND_OR, 1,
															
 
																-	      [Define to 1 if the target supports __sync_fetch_and_or])
															
 
																-  fi])
															
 
																-
															
 
																-dnl Check whether the target supports __sync_lock_test_and_set.
															
 
																-AC_DEFUN([STARPU_CHECK_SYNC_LOCK_TEST_AND_SET], [
															
 
																-  AC_CACHE_CHECK([whether the target supports __sync_lock_test_and_set],
															
 
																-		 ac_cv_have_sync_lock_test_and_set, [
															
 
																-  AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo, bar;],
															
 
																-			[bar = __sync_lock_test_and_set(&foo, 1);])],
															
 
																-			[ac_cv_have_sync_lock_test_and_set=yes],
															
 
																-			[ac_cv_have_sync_lock_test_and_set=no])])
															
 
																-  if test $ac_cv_have_sync_lock_test_and_set = yes; then
															
 
																-    AC_DEFINE(STARPU_HAVE_SYNC_LOCK_TEST_AND_SET, 1,
															
 
																-	      [Define to 1 if the target supports __sync_lock_test_and_set])
															
 
																-  fi])
															
 
																-
															
 
																-dnl Check whether the target supports __sync_synchronize.
															
 
																-AC_DEFUN([STARPU_CHECK_SYNC_SYNCHRONIZE], [
															
 
																-  AC_CACHE_CHECK([whether the target supports __sync_synchronize],
															
 
																-		 ac_cv_have_sync_synchronize, [
															
 
																-  AC_LINK_IFELSE([AC_LANG_PROGRAM(,
															
 
																-			[__sync_synchronize();])],
															
 
																-			[ac_cv_have_sync_synchronize=yes],
															
 
																-			[ac_cv_have_sync_synchronize=no])])
															
 
																-  if test $ac_cv_have_sync_synchronize = yes; then
															
 
																-    AC_DEFINE(STARPU_HAVE_SYNC_SYNCHRONIZE, 1,
															
 
																-	      [Define to 1 if the target supports __sync_synchronize])
															
 
																-  fi])
															
--- a/configure.ac
+++ b/configure.ac
@@ -1,9 +1,9 @@
 
																 # StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																 #
															
 
																-# Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																-# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+# Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																 # Copyright (C) 2011  Télécom-SudParis
															
 
																-# Copyright (C) 2011  INRIA
															
 
																+# Copyright (C) 2011, 2012  Institut National de Recherche en Informatique et Automatique
															
 
																 #
															
 
																 # StarPU is free software; you can redistribute it and/or modify
															
 
																 # it under the terms of the GNU Lesser General Public License as published by
															
@@ -16,20 +16,51 @@
 
																 #
															
 
																 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																-AC_INIT([StarPU],0.9.2, [starpu-bugs@lists.gforge.inria.fr], starpu)
															
 
																+AC_INIT([StarPU],1.0.0rc2, [starpu-devel@lists.gforge.inria.fr], starpu)
															
 
																 AC_CONFIG_SRCDIR(include/starpu.h)
															
 
																 AC_CONFIG_AUX_DIR([build-aux])
															
 
																+
															
 
																+dnl Versioning.
															
 
																+
															
 
																+STARPU_MAJOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 1`"
															
 
																+STARPU_MINOR_VERSION="`echo $PACKAGE_VERSION | cut -d . -f 2`"
															
 
																+AC_SUBST([STARPU_MAJOR_VERSION])
															
 
																+AC_SUBST([STARPU_MINOR_VERSION])
															
 
																+AC_SUBST([STARPU_EFFECTIVE_VERSION])
															
 
																+AC_DEFINE_UNQUOTED([STARPU_MAJOR_VERSION], [$STARPU_MAJOR_VERSION],
															
 
																+  [Major version number of StarPU.])
															
 
																+AC_DEFINE_UNQUOTED([STARPU_MINOR_VERSION], [$STARPU_MINOR_VERSION],
															
 
																+  [Major version number of StarPU.])
															
 
																+
															
 
																+. "$srcdir/STARPU-VERSION"
															
 
																+AC_SUBST([LIBSTARPU_INTERFACE_CURRENT])
															
 
																+AC_SUBST([LIBSTARPU_INTERFACE_REVISION])
															
 
																+AC_SUBST([LIBSTARPU_INTERFACE_AGE])
															
 
																+AC_SUBST([LIBSTARPUMPI_INTERFACE_CURRENT])
															
 
																+AC_SUBST([LIBSTARPUMPI_INTERFACE_REVISION])
															
 
																+AC_SUBST([LIBSTARPUMPI_INTERFACE_AGE])
															
 
																+AC_SUBST([LIBSTARPUFFT_INTERFACE_CURRENT])
															
 
																+AC_SUBST([LIBSTARPUFFT_INTERFACE_REVISION])
															
 
																+AC_SUBST([LIBSTARPUFFT_INTERFACE_AGE])
															
 
																+AC_SUBST([LIBSOCL_INTERFACE_CURRENT])
															
 
																+AC_SUBST([LIBSOCL_INTERFACE_REVISION])
															
 
																+AC_SUBST([LIBSOCL_INTERFACE_AGE])
															
 
																+
															
 
																 AC_CANONICAL_SYSTEM
															
 
																 dnl Automake 1.11 introduced `silent-rules' and `color-tests'.  Use them
															
 
																 dnl when they're available.
															
 
																 m4_ifdef([AM_SILENT_RULES],
															
 
																-  [AM_INIT_AUTOMAKE([1.11 -Wall -Werror foreign silent-rules color-tests])],
															
 
																+  [AM_INIT_AUTOMAKE([1.11 -Wall -Werror foreign silent-rules color-tests parallel-tests])],
															
 
																   [AM_INIT_AUTOMAKE([1.10 -Wall -Werror foreign])])
															
 
																+m4_ifdef([AM_SILENT_RULES],
															
 
																+  [AM_SILENT_RULES(yes)])
															
 
																+
															
 
																 AC_PREREQ(2.60)
															
 
																 AC_PROG_CC
															
 
																+AC_PROG_CXX
															
 
																 AC_PROG_CPP
															
 
																 AC_PROG_SED
															
 
																 AC_PROG_LN_S
															
@@ -61,13 +92,18 @@ AM_CONDITIONAL([STARPU_HAVE_WINDOWS], [test "x$starpu_windows" = "xyes"])
 
																 # on Darwin, GCC targets i386 by default, so we don't have atomic ops
															
 
																 AC_CHECK_SIZEOF([void *])
															
 
																 SIZEOF_VOID_P=$ac_cv_sizeof_void_p
															
 
																-if test x$SIZEOF_VOID_P = x4; then
															
 
																-	case "$target" in
															
 
																-	i386-*darwin*) CFLAGS+=" -march=i686 " ;;
															
 
																-	esac
															
 
																-fi
															
 
																-
															
 
																-
															
 
																+case $SIZEOF_VOID_P in
															
 
																+	4)
															
 
																+		case "$target" in
															
 
																+		i386-*darwin*) CFLAGS+=" -march=i686 " ;;
															
 
																+		esac
															
 
																+		STARPU_MS_LIB_ARCH=X86
															
 
																+		;;
															
 
																+	8)
															
 
																+		STARPU_MS_LIB_ARCH=X64
															
 
																+		;;
															
 
																+esac
															
 
																+AC_SUBST(STARPU_MS_LIB_ARCH)
															
 
																 # This will be useful for program which use CUDA (and .cubin files) which need
															
 
																 # some path to the CUDA code at runtime.
															
@@ -122,8 +158,14 @@ else
 
																   AC_DEFINE([starpu_erand48_r(xsubi, buffer, result)],[do {*(result) = ((double)(rand()) / RAND_MAX);} while (0);],[erand48_r equivalent function])
															
 
																 fi
															
 
																+# Some systems do not define strerror_r
															
 
																+AC_CHECK_FUNC([strerror_r], [AC_DEFINE([STARPU_HAVE_STRERROR_R], [1], [Define to 1 if the function strerro_r is available.])])
															
 
																+
															
 
																+# Some systems do not define unsetenv
															
 
																+AC_CHECK_FUNC([unsetenv], [AC_DEFINE([STARPU_HAVE_UNSETENV], [1], [Define to 1 if the function unsetenv is available.])])
															
 
																+
															
 
																 # Define slow machine
															
 
																-AC_ARG_ENABLE(slow-machine, [AS_HELP_STRING([--disable-slow-machine],
															
 
																+AC_ARG_ENABLE(slow-machine, [AS_HELP_STRING([--enable-slow-machine],
															
 
																 				   [Lower default values for the testcases run by make check])],
															
 
																 				   enable_slow_machine=$enableval, enable_slow_machine=false)
															
 
																 if  test x$enable_slow_machine = xyes; then
															
@@ -132,6 +174,8 @@ fi
 
																 AC_CHECK_HEADERS([malloc.h], [AC_DEFINE([STARPU_HAVE_MALLOC_H], [1], [Define to 1 if you have the <malloc.h> header file.])])
															
 
																+AC_CHECK_HEADERS([valgrind/valgrind.h], [AC_DEFINE([STARPU_HAVE_VALGRIND_H], [1], [Define to 1 if you have the <valgrind/valgrind.h> header file.])])
															
 
																+
															
 
																 # This defines HAVE_SYNC_VAL_COMPARE_AND_SWAP
															
 
																 STARPU_CHECK_SYNC_VAL_COMPARE_AND_SWAP
															
@@ -198,7 +242,7 @@ AM_CONDITIONAL([STARPU_USE_SCHED_CTX_HYPERVISOR], [test "x$build_sched_ctx_hyper
 
																 AC_MSG_CHECKING(maximum number of CPUs)
															
 
																 AC_ARG_ENABLE(maxcpus, [AS_HELP_STRING([--enable-maxcpus=<number>],
															
 
																 			[maximum number of CPUs])],
															
 
																-			maxcpus=$enableval, maxcpus=16)
															
 
																+			maxcpus=$enableval, maxcpus=64)
															
 
																 AC_MSG_RESULT($maxcpus)
															
 
																 AC_DEFINE_UNQUOTED(STARPU_MAXCPUS, [$maxcpus], [Maximum number of CPUs supported])
															
@@ -312,7 +356,7 @@ AC_DEFUN([STARPU_CHECK_CUDA_RUNTIME],
 
																     __cuda_include_dir=$2
															
 
																     __cuda_lib_dir=$3
															
 
																-    if test "$__cuda_dir" != "no" ; then
															
 
																+    if test "$__cuda_dir" != "no" -a "$__cuda_dir" != "" ; then
															
 
																 	AC_MSG_CHECKING(whether CUDA RT is available in $__cuda_dir)
															
 
																     else
															
 
																 	AC_MSG_CHECKING(whether CUDA RT is available)
															
@@ -349,8 +393,8 @@ AC_DEFUN([STARPU_CHECK_CUDA_RUNTIME],
 
																         if test "$have_valid_cuda" = "no" ; then
															
 
																             if test "$3" = "no" -a "$__cuda_dir" != "no" ; then
															
 
																                 __cuda_lib_dir="$__cuda_dir/lib64"
															
 
																+		LDFLAGS="${SAVED_LDFLAGS} -L$__cuda_lib_dir"
															
 
																 	        STARPU_CUDA_LDFLAGS="${SAVED_STARPU_CUDA_LDFLAGS} -L$__cuda_lib_dir"
															
 
																-	        LDFLAGS="${SAVED_LDFLAGS} -L$__cuda_lib_dir"
															
 
																 	        AC_HAVE_LIBRARY([cudart],[have_valid_cuda=yes],[have_valid_cuda=no])
															
 
																                 unset ac_cv_lib_cudart_main
															
 
																             fi
															
@@ -359,6 +403,7 @@ AC_DEFUN([STARPU_CHECK_CUDA_RUNTIME],
 
																     if test "$have_valid_cuda" = "yes" ; then
															
 
																         STARPU_CUDA_LDFLAGS="$STARPU_CUDA_LDFLAGS -lcudart"
															
 
																+	LDFLAGS="${SAVED_LDFLAGS} ${STARPU_CUDA_LDFLAGS}"
															
 
																 	# we also check that CUBLAS is available
															
 
																 	AC_HAVE_LIBRARY([cublas],[have_valid_cuda=yes],[have_valid_cuda=no])
															
 
																         unset ac_cv_lib_cublas_main
															
@@ -379,7 +424,7 @@ AC_DEFUN([STARPU_CHECK_CUDA_RUNTIME],
 
																 if test x$enable_cuda = xyes -o x$enable_cuda = xmaybe; then
															
 
																     STARPU_CHECK_CUDA($cuda_dir, $cuda_lib_dir)
															
 
																     if test "$have_valid_cuda" = "no" ; then
															
 
																-        for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_INC_PATH" "$CUDA_INSTALL_PATH"; do
															
 
																+        for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_INC_PATH/.." "$CUDA_INSTALL_PATH" "$CUDA_TOOLKIT"; do
															
 
																             STARPU_CHECK_CUDA($f, "no")
															
 
																             if test "$have_valid_cuda" = "yes" ; then
															
 
																                 break
															
@@ -390,7 +435,7 @@ if test x$enable_cuda = xyes -o x$enable_cuda = xmaybe; then
 
																     if test "$have_valid_cuda" = "yes" ; then
															
 
																         STARPU_CHECK_CUDA_RUNTIME($cuda_dir, $cuda_include_dir, $cuda_lib_dir)
															
 
																         if test "$have_valid_cuda" = "no" ; then
															
 
																-            for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_INC_PATH" "$CUDA_INSTALL_PATH"; do
															
 
																+            for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_INC_PATH/.." "$CUDA_INSTALL_PATH" "$CUDA_TOOLKIT"; do
															
 
																                 STARPU_CHECK_CUDA_RUNTIME($f, "no", "no")
															
 
																                 if test "$have_valid_cuda" = "yes" ; then
															
 
																                     break
															
@@ -399,8 +444,24 @@ if test x$enable_cuda = xyes -o x$enable_cuda = xmaybe; then
 
																         fi
															
 
																     fi
															
 
																+    # Check cuda is compatible with the C compiler
															
 
																+    AC_MSG_CHECKING(whether CUDA is working)
															
 
																+    if test "$have_valid_cuda" = "yes" ; then
															
 
																+        SAVED_CPPFLAGS="${CPPFLAGS}"
															
 
																+        CPPFLAGS="${CPPFLAGS} ${STARPU_CUDA_CPPFLAGS}"
															
 
																+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
															
 
																+		[[#include <cuda.h>]],
															
 
																+		[[]]
															
 
																+		),
															
 
																+	    [have_valid_cuda="yes"],
															
 
																+	    [have_valid_cuda="no"]
															
 
																+	])
															
 
																+        CPPFLAGS="${SAVED_CPPFLAGS}"
															
 
																+    fi
															
 
																+    AC_MSG_RESULT($have_valid_cuda)
															
 
																+
															
 
																     # in case CUDA was explicitely required, but is not available, this is an error
															
 
																-    if test x$enable_cuda = xyes -a x$have_valid_cuda = no; then
															
 
																+    if test x$enable_cuda = xyes -a x$have_valid_cuda = xno; then
															
 
																 	AC_MSG_ERROR([cannot find CUDA])
															
 
																     fi
															
 
																     # now we enable CUDA if and only if a proper setup is available
															
@@ -609,21 +670,28 @@ AC_ARG_WITH(opencl-lib-dir,
 
																 		enable_opencl=yes
															
 
																 	], [opencl_lib_dir=no])
															
 
																-if test x$enable_opencl = xyes -o x$enable_opencl = xmaybe; then
															
 
																-    	STARPU_CHECK_OPENCL($opencl_dir, $opencl_include_dir, $opencl_lib_dir)
															
 
																-        if test "$have_valid_opencl" = "no" ; then
															
 
																-            for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_INC_PATH" "$CUDA_INSTALL_PATH" ; do
															
 
																-                if test -n $f ; then
															
 
																-    	            STARPU_CHECK_OPENCL($f, "no", "no")
															
 
																-                    if test "$have_valid_opencl" = "yes" ; then
															
 
																-                        break
															
 
																-                    fi
															
 
																-                fi
															
 
																-            done
															
 
																-        fi
															
 
																+AC_DEFUN([STARPU_LOOK_FOR_OPENCL],
															
 
																+[
															
 
																+    	if test "x$has_opencl_being_checked" != "xyes" ; then
															
 
																+    	    STARPU_CHECK_OPENCL($opencl_dir, $opencl_include_dir, $opencl_lib_dir)
															
 
																+	    if test "$have_valid_opencl" = "no" ; then
															
 
																+            	for f in "/usr/local/cuda" "/c/cuda" "/cygdrive/c/cuda" "/opt/cuda" "$CUDA_INC_PATH/.." "$CUDA_INSTALL_PATH" "$CUDA_TOOLKIT"; do
															
 
																+		    if test -n $f ; then
															
 
																+    			STARPU_CHECK_OPENCL($f, "no", "no")
															
 
																+			if test "$have_valid_opencl" = "yes" ; then
															
 
																+			    break
															
 
																+			fi
															
 
																+		    fi
															
 
																+		done
															
 
																+	    fi
															
 
																+	    has_opencl_being_checked=yes
															
 
																+	fi
															
 
																+])
															
 
																+if test x$enable_opencl = xyes -o x$enable_opencl = xmaybe; then
															
 
																+	STARPU_LOOK_FOR_OPENCL()
															
 
																 	# in case OpenCL was explicitely required, but is not available, this is an error
															
 
																-	if test x$enable_opencl = xyes -a x$have_valid_opencl = no; then
															
 
																+	if test x$enable_opencl = xyes -a x$have_valid_opencl = xno; then
															
 
																 	    AC_MSG_ERROR([cannot find OpenCL])
															
 
																 	fi
															
@@ -684,7 +752,7 @@ if test x$enable_gordon = xyes -o x$enable_gordon = xmaybe; then
 
																 	# AC_CHECK_FUNC(gordon_init, [gordon], [have_valid_gordon=no])
															
 
																 	# in case Gordon was explicitely required, but is not available, this is an error
															
 
																-	if test x$enable_gordon = xyes -a x$have_valid_gordon = no; then
															
 
																+	if test x$enable_gordon = xyes -a x$have_valid_gordon = xno; then
															
 
																 		AC_MSG_ERROR([cannot find Gordon])
															
 
																 	fi
															
@@ -727,6 +795,7 @@ AC_MSG_RESULT($enable_debug)
 
																 if test x$enable_debug = xyes; then
															
 
																 	CFLAGS="$CFLAGS -O0"
															
 
																+	AC_DEFINE(STARPU_SPINLOCK_CHECK, [1], [check spinlock use])
															
 
																 else
															
 
																 	CFLAGS="$CFLAGS -O3"
															
 
																 fi
															
@@ -741,6 +810,14 @@ if test x$enable_fast = xyes; then
 
																 	AC_DEFINE(STARPU_NO_ASSERT, [1], [disable assertions])
															
 
																 fi
															
 
																+AC_MSG_CHECKING(whether memory status should be displayed)
															
 
																+AC_ARG_ENABLE(memory-status, [AS_HELP_STRING([--enable-memory-status],
															
 
																+			     [display memory status at the end of execution])],
															
 
																+			     enable_memory_status=$enableval, enable_memory_status=no)
															
 
																+AC_MSG_RESULT($enable_memory_status)
															
 
																+if test x$enable_memory_status = xyes; then
															
 
																+        AC_DEFINE(STARPU_MEMORY_STATUS, [1], [display memory status])
															
 
																+fi
															
 
																 AC_MSG_CHECKING(whether debug messages should be displayed)
															
@@ -927,7 +1004,7 @@ AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of worker
 
																 AC_MSG_CHECKING(maximum number of implementations)
															
 
																 AC_ARG_ENABLE(maximplementations, [AS_HELP_STRING([--enable-maximplementations=<number>],
															
 
																 		[maximum number of implementations])],
															
 
																-		maximplementations=$enableval, maximplementations=1)
															
 
																+		maximplementations=$enableval, maximplementations=4)
															
 
																 AC_MSG_RESULT($maximplementations)
															
 
																 AC_DEFINE_UNQUOTED(STARPU_MAXIMPLEMENTATIONS, [$maximplementations],
															
 
																 		[maximum number of implementations])
															
@@ -1031,45 +1108,63 @@ fi
 
																 #                                                                             #
															
 
																 ###############################################################################
															
 
																-build_starpu_top=no
															
 
																-AC_PATH_PROGS([QMAKE], [qmake-qt4 qmake], [not-found])
															
 
																-if test x$QMAKE != xnot-found; then
															
 
																-	QMAKE_VERSION=`$QMAKE --version 2>&1 | head -n 1 | cut -d '.' -f 1 | cut -d ' ' -f 3`
															
 
																-	if test $QMAKE_VERSION -ge 2 ; then
															
 
																-		PKG_CHECK_EXISTS([QtGui QtNetwork QtOpenGL QtSql], [
															
 
																-			QT_MAJVERSION=`$PKG_CONFIG --modversion QtGui | cut -d '.' -f 1`
															
 
																-			QT_MINVERSION=`$PKG_CONFIG --modversion QtGui | cut -d '.' -f 2`
															
 
																-			if test $QT_MAJVERSION -gt 4 -o \( $QT_MAJVERSION -eq 4 -a $QT_MINVERSION -ge 7 \) ; then
															
 
																-				build_starpu_top=yes
															
 
																-			fi
															
 
																-			QWT_PRI=embed
															
 
																-			AC_ARG_WITH(qwt-include-dir,
															
 
																-				[AS_HELP_STRING([--with-qwt-include-dir=<path>],
															
 
																-				[specify installed libqwt include path])],
															
 
																-				[
															
 
																-					STARPU_QWT_CPPFLAGS="-I$withval"
															
 
																-					AC_SUBST(STARPU_QWT_CPPFLAGS)
															
 
																-					QWT_PRI=system
															
 
																-				])
															
 
																-			AC_ARG_WITH(qwt-lib-dir,
															
 
																-				[AS_HELP_STRING([--with-qwt-lib-dir=<path>],
															
 
																-				[specify installed libqwt library path])],
															
 
																-				[
															
 
																-					STARPU_QWT_LDFLAGS="-L$withval"
															
 
																-					QWT_PRI=system
															
 
																-				])
															
 
																-			AC_ARG_WITH(qwt-lib,
															
 
																-				[AS_HELP_STRING([--with-qwt-lib=<path>],
															
 
																-				[specify installed libqwt library name])],
															
 
																-				[
															
 
																-					STARPU_QWT_LDFLAGS="${STARPU_QWT_LDFLAGS} -l$withval"
															
 
																-					QWT_PRI=system
															
 
																-				])
															
 
																-			AC_SUBST(QWT_PRI)
															
 
																-		])
															
 
																+AC_ARG_ENABLE([starpu-top],
															
 
																+  [AS_HELP_STRING([--disable-starpu-top],
															
 
																+    [build StarPU-Top])],
															
 
																+  [enable_starpu_top="no"],
															
 
																+  [enable_starpu_top="maybe"])
															
 
																+
															
 
																+# Check whether StarPU-Top can be built
															
 
																+AC_MSG_CHECKING(for StarPU-Top)
															
 
																+
															
 
																+if test "x$enable_starpu_top" = "xmaybe" ; then
															
 
																+	can_build_starpu_top=no
															
 
																+	AC_PATH_PROGS([QMAKE], [qmake-qt4 qmake], [not-found])
															
 
																+	if test x$QMAKE != xnot-found; then
															
 
																+		QMAKE_VERSION=`$QMAKE --version 2>&1 | head -n 1 | cut -d '.' -f 1 | cut -d ' ' -f 3`
															
 
																+		if test $QMAKE_VERSION -ge 2 ; then
															
 
																+			PKG_CHECK_EXISTS([QtGui QtNetwork QtOpenGL QtSql], [
															
 
																+				QT_MAJVERSION=`$PKG_CONFIG --modversion QtGui | cut -d '.' -f 1`
															
 
																+				QT_MINVERSION=`$PKG_CONFIG --modversion QtGui | cut -d '.' -f 2`
															
 
																+				if test $QT_MAJVERSION -gt 4 -o \( $QT_MAJVERSION -eq 4 -a $QT_MINVERSION -ge 7 \) ; then
															
 
																+					can_build_starpu_top=yes
															
 
																+				fi
															
 
																+				QWT_PRI=embed
															
 
																+				AC_ARG_WITH(qwt-include-dir,
															
 
																+					[AS_HELP_STRING([--with-qwt-include-dir=<path>],
															
 
																+					[specify installed libqwt include path])],
															
 
																+					[
															
 
																+						STARPU_QWT_INCLUDE="$withval"
															
 
																+						AC_SUBST(STARPU_QWT_INCLUDE)
															
 
																+						QWT_PRI=system
															
 
																+					])
															
 
																+				AC_ARG_WITH(qwt-lib-dir,
															
 
																+					[AS_HELP_STRING([--with-qwt-lib-dir=<path>],
															
 
																+					[specify installed libqwt library path])],
															
 
																+					[
															
 
																+						STARPU_QWT_LDFLAGS="-L$withval"
															
 
																+						QWT_PRI=system
															
 
																+					])
															
 
																+				AC_ARG_WITH(qwt-lib,
															
 
																+					[AS_HELP_STRING([--with-qwt-lib=<name>],
															
 
																+					[specify installed libqwt library name])],
															
 
																+					[
															
 
																+						STARPU_QWT_LDFLAGS="${STARPU_QWT_LDFLAGS} -l$withval"
															
 
																+						QWT_PRI=system
															
 
																+					])
															
 
																+				AC_SUBST(STARPU_QWT_LDFLAGS)
															
 
																+				AC_SUBST(QWT_PRI)
															
 
																+			])
															
 
																+		fi
															
 
																 	fi
															
 
																 fi
															
 
																+if test "x$enable_starpu_top" = "xmaybe" ; then
															
 
																+  build_starpu_top=$can_build_starpu_top
															
 
																+else
															
 
																+  build_starpu_top=no
															
 
																+fi
															
 
																+
															
 
																 AM_CONDITIONAL(BUILD_STARPU_TOP, test x$build_starpu_top = xyes)
															
 
																 ###############################################################################
															
@@ -1088,7 +1183,7 @@ AC_DEFUN([IS_SUPPORTED_CFLAG],
 
																 	AC_MSG_CHECKING([whether compiler support $1])
															
 
																 	SAVED_CFLAGS="$CFLAGS"
															
 
																-	CFLAGS="$1 -we10006"
															
 
																+	CFLAGS="$1" # -we10006"
															
 
																 	AC_COMPILE_IFELSE(
															
 
																 		AC_LANG_PROGRAM(
															
@@ -1117,6 +1212,11 @@ if test "x$STARPU_DEVEL" != x; then
 
																 	AC_DEFINE(STARPU_DEVEL, [1], [enable developer warnings])
															
 
																 fi
															
 
																+# Same value as Automake's, for use in other places.
															
 
																+pkglibdir="\${libdir}/$PACKAGE"
															
 
																+AC_SUBST([pkglibdir])
															
 
																+
															
 
																+
															
 
																 ###############################################################################
															
 
																 #                                                                             #
															
 
																 #                               GCC extensions                                #
															
@@ -1127,61 +1227,114 @@ AC_ARG_ENABLE([gcc-extensions],
 
																   [AS_HELP_STRING([--enable-gcc-extensions],
															
 
																     [build the GCC plug-in that provides C language extensions (experimental)])],
															
 
																   [enable_gcc_plugin="$enableval"],
															
 
																-  [enable_gcc_plugin="no"])
															
 
																+  [enable_gcc_plugin="maybe"])
															
 
																-if test "x$enable_gcc_plugin" = "xyes"; then
															
 
																-   STARPU_GCC_PLUGIN_SUPPORT
															
 
																+if test "x$enable_gcc_plugin" = "xyes" -o "x$enable_gcc_plugin" = "xmaybe" ; then
															
 
																+    STARPU_GCC_PLUGIN_SUPPORT
															
 
																-   if test "x$ac_cv_have_gcc_plugins" != "xyes"; then
															
 
																-     AC_MSG_ERROR([This compiler lacks GCC plug-in support.])
															
 
																-   fi
															
 
																+    if test "x$ac_cv_have_gcc_plugins" = "xno" ; then
															
 
																+        if test "x$enable_gcc_plugin" = "xyes" ; then
															
 
																+    	    # Since this was explicitly asked for, error out.
															
 
																+            AC_MSG_ERROR([This compiler lacks GCC plug-in support.])
															
 
																+	else
															
 
																+	    AC_MSG_WARN([GCC plug-ins not supported; StarPU's GCC plug-in will not be built])
															
 
																+        fi
															
 
																+    else
															
 
																+        # What GCC version are we using?
															
 
																+        STARPU_GCC_VERSION
															
 
																+
															
 
																+        # The `.so' itself cannot be called `starpu-gcc.so' (because
															
 
																+	# `-fplugin-arg-' option names and such must match the `.so'
															
 
																+	# name), so use a meaningful directory name.
															
 
																+	gccplugindir="\${pkglibdir}/${STARPU_EFFECTIVE_VERSION}/gcc/${STARPU_GCC_VERSION_MAJOR}.${STARPU_GCC_VERSION_MINOR}"
															
 
																+	AC_SUBST([gccplugindir])
															
 
																+
															
 
																+	# Lines to be inserted in the `.pc' file.
															
 
																+	GCC_PLUGIN_DIR_PKGCONFIG="gccplugindir=$gccplugindir"
															
 
																+	GCC_PLUGIN_PKGCONFIG="gccplugin=\${gccplugindir}/starpu.so"
															
 
																+	AC_SUBST([GCC_PLUGIN_DIR_PKGCONFIG])
															
 
																+	AC_SUBST([GCC_PLUGIN_PKGCONFIG])
															
 
																+    fi
															
 
																+fi
															
 
																-   build_gcc_plugin="yes"
															
 
																-   # GNU Guile 1.8/2.0 is used to run the test suite.
															
 
																-   AC_PATH_PROG([GUILE], [guile])
															
 
																-   if test "x$GUILE" != "x"; then
															
 
																-      run_gcc_plugin_test_suite="yes"
															
 
																-   else
															
 
																-      run_gcc_plugin_test_suite="no"
															
 
																-   fi
															
 
																+if test "x$ac_cv_have_gcc_plugins" = "xyes" ; then
															
 
																+    build_gcc_plugin="yes"
															
 
																+
															
 
																+    # GNU Guile 1.8/2.0 is used to run the test suite.
															
 
																+    AC_PATH_PROG([GUILE], [guile])
															
 
																+    if test "x$GUILE" != "x"; then
															
 
																+        if test "x$enable_cpu" = "xyes"; then
															
 
																+	   run_gcc_plugin_test_suite="yes"
															
 
																+	else
															
 
																+	   AC_MSG_WARN([CPU back-end disabled; GCC plug-in test suite will not be run])
															
 
																+	   run_gcc_plugin_test_suite="no"
															
 
																+	fi
															
 
																+    else
															
 
																+	run_gcc_plugin_test_suite="no"
															
 
																+    fi
															
 
																 else
															
 
																-   build_gcc_plugin="no"
															
 
																-   run_gcc_plugin_test_suite="no"
															
 
																+    build_gcc_plugin="no"
															
 
																+    run_gcc_plugin_test_suite="no"
															
 
																 fi
															
 
																 # Bison is used to generate the C expression parser.  The generated
															
 
																 # parser is part of the distribution, though.
															
 
																-AC_PROG_YACC
															
 
																+AM_MISSING_PROG([YACC], [bison])
															
 
																 AM_CONDITIONAL([BUILD_GCC_PLUGIN], [test "x$build_gcc_plugin" = "xyes"])
															
 
																 AM_CONDITIONAL([HAVE_GUILE], [test "x$GUILE" != "x"])
															
 
																 ###############################################################################
															
 
																 #                                                                             #
															
 
																-#                               OpenCL interface                              #
															
 
																+#                               SOCL interface                                #
															
 
																 #                                                                             #
															
 
																 ###############################################################################
															
 
																 AC_ARG_ENABLE([socl],
															
 
																   [AS_HELP_STRING([--enable-socl],
															
 
																-    [build the OpenCL interface (SOCL)])],
															
 
																+    [build the OpenCL interface (experimental)])],
															
 
																   [enable_socl="$enableval"],
															
 
																-  [enable_socl="no"])
															
 
																+  [enable_socl="maybe"])
															
 
																-if test "x$enable_socl" = "xyes"; then
															
 
																-   STARPU_SOCL_SUPPORT
															
 
																-   build_socl="yes"
															
 
																+AC_MSG_CHECKING(for SOCL)
															
 
																+
															
 
																+if test "x$enable_socl" = "xyes" -o "x$enable_socl" = "xmaybe" ; then
															
 
																+    if test "$have_valid_opencl" = "no" ; then
															
 
																+	STARPU_LOOK_FOR_OPENCL()
															
 
																+    fi
															
 
																+fi
															
 
																+
															
 
																+# in case SOCL was explicitely required, but is not available, this is an error
															
 
																+if test "x$enable_socl" = "xyes" -a "$have_valid_opencl" = "no" ; then
															
 
																+    AC_MSG_ERROR([SOCL cannot be enabled without OpenCL])
															
 
																+fi
															
 
																+
															
 
																+# now we enable SOCL if and only if a proper setup is available
															
 
																+if test "x$enable_socl" = "xyes" -o "x$enable_socl" = "xmaybe" ; then
															
 
																+   build_socl=$have_valid_opencl
															
 
																 else
															
 
																-   build_socl="no"
															
 
																-   run_socl_test_suite="no"
															
 
																+   build_socl=no
															
 
																 fi
															
 
																+AC_MSG_RESULT($build_socl)
															
 
																 AM_CONDITIONAL([BUILD_SOCL], [test "x$build_socl" = "xyes"])
															
 
																 AM_CONDITIONAL([STARPU_USE_SOCL], [test "x$build_socl" = "xyes"])
															
 
																 ###############################################################################
															
 
																 #                                                                             #
															
 
																+#                                 Debugging                                   #
															
 
																+#                                                                             #
															
 
																+###############################################################################
															
 
																+
															
 
																+AC_PATH_PROG([GDB], [gdb], [not-found])
															
 
																+if test "x$GDB" != "xnot-found"; then
															
 
																+   AC_DEFINE_UNQUOTED([STARPU_GDB_PATH], ["$GDB"],
															
 
																+     [Path to the GNU debugger.])
															
 
																+fi
															
 
																+
															
 
																+###############################################################################
															
 
																+#                                                                             #
															
 
																 #                                  Examples                                   #
															
 
																 #                                                                             #
															
 
																 ###############################################################################
															
@@ -1203,10 +1356,10 @@ AC_SUBST(STARPU_OPENGL_RENDER, $enable_opengl_render)
 
																 AC_MSG_RESULT($enable_opengl_render)
															
 
																 AC_PATH_XTRA
															
 
																-if test "x$x_includes" != "xNONE"; then
															
 
																+if test "x$no_x" != "xyes"; then
															
 
																 	AC_DEFINE(STARPU_HAVE_X11, [1], [enable X11])
															
 
																 fi
															
 
																-AM_CONDITIONAL([HAVE_X11], [test "x$x_includes" != "xNONE"])
															
 
																+AM_CONDITIONAL([HAVE_X11], [test "x$no_x" != "xyes"])
															
 
																 # In case there are BLAS kernels that are used by the example applications
															
 
																 # we may specify which library to use. Note that this is not used for StarPU
															
@@ -1330,6 +1483,11 @@ AC_SUBST(BLAS_LIB,$blas_lib)
 
																 have_fftw=no
															
 
																 have_fftwf=no
															
 
																 have_fftwl=no
															
 
																+fft_support=no
															
 
																+
															
 
																+AC_ARG_ENABLE(starpufft, [AS_HELP_STRING([--disable-starpufft],
															
 
																+			[Disable build of StarPU-FFT])],
															
 
																+			enable_starpufft=$enableval,enable_starpufft=yes)
															
 
																 PKG_CHECK_MODULES([FFTW],  [fftw3],  [
															
 
																   AC_DEFINE([STARPU_HAVE_FFTW], [1], [Define to 1 if you have the libfftw3 library.])
															
@@ -1337,7 +1495,7 @@ PKG_CHECK_MODULES([FFTW],  [fftw3],  [
 
																   have_fftw=yes
															
 
																 ], [:])
															
 
																 AM_CONDITIONAL(STARPU_HAVE_FFTW, [test x$have_fftw = xyes])
															
 
																- 
															
 
																+
															
 
																 PKG_CHECK_MODULES([FFTWF], [fftw3f], [
															
 
																   AC_DEFINE([STARPU_HAVE_FFTWF], [1], [Define to 1 if you have the libfftw3f library.])
															
 
																   AC_SUBST([STARPU_HAVE_FFTWF], [1])
															
@@ -1352,6 +1510,11 @@ PKG_CHECK_MODULES([FFTWL], [fftw3l], [
 
																 ], [:])
															
 
																 AM_CONDITIONAL(STARPU_HAVE_FFTWL, [test x$have_fftwl = xyes])
															
 
																+if test x$enable_starpufft = xyes -a \( \( x$enable_cpu = xyes -a x$have_fftw = xyes -a x$have_fftwf = xyes \) -o x$have_cufftdoublecomplex = xyes \); then
															
 
																+   fft_support=yes
															
 
																+fi
															
 
																+AM_CONDITIONAL(BUILD_STARPUFFT, [test x$fft_support = xyes])
															
 
																+
															
 
																 ##########################################
															
 
																 # hwloc                                  #
															
 
																 ##########################################
															
@@ -1407,10 +1570,56 @@ AC_ARG_ENABLE(optional_tests, [AS_HELP_STRING([--optional-tests],
 
																 AC_MSG_RESULT($want_optional_tests)
															
 
																 AM_CONDITIONAL([COND_OPT], [test "$want_optional_tests" = yes])
															
 
																+# Check if icc is available
															
 
																+AC_CHECK_PROGS([ICC], [icc])
															
 
																+
															
 
																+# If cuda and icc are both available, check they are compatible
															
 
																+if test "$enable_cuda" = "yes" -a "$ICC" != ""; then
															
 
																+   AC_MSG_CHECKING(whether CUDA and ICC are compatible)
															
 
																+   OLD_CC="$CC"
															
 
																+   CC="$ICC"
															
 
																+   AC_COMPILE_IFELSE(
															
 
																+       AC_LANG_PROGRAM(
															
 
																+	   [[#include <cuda.h>]],
															
 
																+	   [[]]
															
 
																+	   ),
															
 
																+       AC_MSG_RESULT(yes),
															
 
																+       [ICC=""
															
 
																+           AC_MSG_RESULT(no)]
															
 
																+   )
															
 
																+   CC="$OLD_CC"
															
 
																+fi
															
 
																+
															
 
																+# Disable ICC on windows
															
 
																+if test "x$ICC" != "x" -a "$starpu_windows" = "yes" ; then
															
 
																+    ICC=""
															
 
																+fi
															
 
																+if test "x$ICC" != "x"; then
															
 
																+  AC_DEFINE(STARPU_HAVE_ICC, [], [Define this if icc is available])
															
 
																+fi
															
 
																+AM_CONDITIONAL([STARPU_HAVE_ICC], [test "x$ICC" != "x"])
															
 
																+
															
 
																+# Do not generate manpages for the tools if we do not have help2man
															
 
																+AC_CHECK_PROGS([HELP2MAN], [help2man])
															
 
																+# Disable on windows
															
 
																+if test "$starpu_windows" = "yes" ; then
															
 
																+    HELP2MAN=""
															
 
																+fi
															
 
																+AM_CONDITIONAL([STARPU_HAVE_HELP2MAN], [test "x$HELP2MAN" != "x"])
															
 
																+
															
 
																+AC_CHECK_MEMBER([struct cudaDeviceProp.pciDomainID],
															
 
																+  AC_DEFINE([STARPU_HAVE_DOMAINID],[1],[Define to 1 if CUDA device properties include DomainID]),
															
 
																+  , [[#include <cuda_runtime_api.h>]])
															
 
																+
															
 
																+AC_CHECK_MEMBER([struct cudaDeviceProp.pciBusID],
															
 
																+  AC_DEFINE([STARPU_HAVE_BUSID],[1],[Define to 1 if CUDA device properties include BusID]),
															
 
																+  , [[#include <cuda_runtime_api.h>]])
															
 
																+
															
 
																 # File configuration
															
 
																 AC_CONFIG_COMMANDS([executable-scripts], [
															
 
																   chmod +x tests/regression/regression.sh
															
 
																   chmod +x gcc-plugin/tests/run-test
															
 
																+  chmod +x tools/starpu_workers_activity
															
 
																 ])
															
 
																 AC_CONFIG_FILES(tests/regression/regression.sh tests/regression/profiles tests/regression/profiles.build.only)
															
@@ -1420,19 +1629,27 @@ AC_OUTPUT([
 
																 	Makefile
															
 
																 	src/Makefile
															
 
																 	tools/Makefile
															
 
																+	tools/starpu_workers_activity
															
 
																 	socl/Makefile
															
 
																 	socl/src/Makefile
															
 
																+	socl/examples/Makefile
															
 
																+        socl/socl-1.0.pc
															
 
																 	libstarpu.pc
															
 
																+	starpu-1.0.pc
															
 
																+	mpi/libstarpumpi.pc
															
 
																+	mpi/starpumpi-1.0.pc
															
 
																+	starpufft/Makefile
															
 
																+	starpufft/libstarpufft.pc
															
 
																+	starpufft/starpufft-1.0.pc
															
 
																 	examples/Makefile
															
 
																         examples/opt/Makefile
															
 
																-	examples/starpufft/Makefile
															
 
																 	examples/stencil/Makefile
															
 
																-	examples/socl/Makefile
															
 
																 	tests/Makefile
															
 
																         tests/opt/Makefile
															
 
																 	doc/Makefile
															
 
																 	mpi/Makefile
															
 
																 	starpu-top/StarPU-Top.pro
															
 
																+	starpu-top/StarPU-Top-qwt-system.pri
															
 
																         gcc-plugin/Makefile
															
 
																 	gcc-plugin/src/Makefile
															
 
																 	gcc-plugin/tests/Makefile
															
@@ -1450,9 +1667,6 @@ AC_MSG_NOTICE([
 
																 	OpenCL enabled: $enable_opencl
															
 
																 	Cell   enabled: $enable_gordon
															
 
																-	GCC plug-in: $build_gcc_plugin
															
 
																-	GCC plug-in test suite: $run_gcc_plugin_test_suite
															
 
																-
															
 
																 	Compile-time limits
															
 
																 	(change these with --enable-maxcpus, --enable-maxcudadev,
															
 
																 	--enable-maxopencldev, --enable-maxbuffers)
															
@@ -1466,12 +1680,17 @@ AC_MSG_NOTICE([
 
																 	GPU-GPU transfers: $have_cuda_memcpy_peer
															
 
																 	Allocation cache:  $enable_allocation_cache
															
 
																-	MPI enabled:   $use_mpi
															
 
																-	SOCL enabled:  $build_socl
															
 
																 	Magma enabled: $have_magma
															
 
																 	BLAS library:  $blas_lib
															
 
																 	hwloc:         $have_valid_hwloc
															
 
																-
															
 
																 	FxT trace enabled: $use_fxt
															
 
																 	StarPU-Top:        $build_starpu_top
															
 
																+
															
 
																+	StarPU Extensions:
															
 
																+	       MPI enabled:   $use_mpi
															
 
																+	       MPI test suite: $running_mpi_check
															
 
																+	       FFT Support: $fft_support
															
 
																+	       GCC plug-in: $build_gcc_plugin
															
 
																+	       GCC plug-in test suite: $run_gcc_plugin_test_suite
															
 
																+	       SOCL enabled:  $build_socl
															
 
																 ])
															
--- a/libstarpu.pc.in
+++ b/libstarpu.pc.in
@@ -6,8 +6,8 @@ includedir=@includedir@
 
																 Name: starpu
															
 
																 Description: offers support for heterogeneous multicore architecture
															
 
																 Version: @PACKAGE_VERSION@
															
 
																-Cflags: -I${includedir} @STARPU_CUDA_CPPFLAGS@
															
 
																-Libs: -L${libdir} -lstarpu @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SCHED_CTX_HYPERVISOR@
															
 
																+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API
															
 
																+Libs: -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SCHED_CTX_HYPERVISOR@
															
 
																 Libs.private: @LDFLAGS@ @LIBS@
															
 
																 Requires: @HWLOC_REQUIRES@
															
 
																 Requires.private: @GORDON_REQUIRES@
															
--- a/starpu-1.0.pc.in
+++ b/starpu-1.0.pc.in
@@ -0,0 +1,35 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+
															
 
																+prefix=@prefix@
															
 
																+exec_prefix=@exec_prefix@
															
 
																+libdir=@libdir@
															
 
																+pkglibdir=@pkglibdir@
															
 
																+includedir=@includedir@
															
 
																+
															
 
																+# When the GCC plug-in is available, the following lines indicate
															
 
																+# where it is installed.
															
 
																+@GCC_PLUGIN_DIR_PKGCONFIG@
															
 
																+@GCC_PLUGIN_PKGCONFIG@
															
 
																+
															
 
																+Name: starpu
															
 
																+Description: offers support for heterogeneous multicore architecture
															
 
																+Version: @PACKAGE_VERSION@
															
 
																+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@
															
 
																+Libs: -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SCHED_CTX_HYPERVISOR@
															
 
																+Libs.private: @LDFLAGS@ @LIBS@
															
 
																+Requires: @HWLOC_REQUIRES@
															
 
																+Requires.private: @GORDON_REQUIRES@
															
--- a/starpu-top/StarPU-Top-common.pri
+++ b/starpu-top/StarPU-Top-common.pri
@@ -28,7 +28,7 @@ SOURCES += $$SRCDIR/main.cpp \
 
																     $$SRCDIR/aboutdialog.cpp

															
 
																 HEADERS += $$SRCDIR/mainwindow.h \

															
 
																 #STARPU-TOP

															
 
																-    $$SRCDIR/starputoptypes.h \

															
 
																+    $$SRCDIR/starpu_top_types.h \

															
 
																     $$SRCDIR/widgetwindowsmanager.h \

															
 
																     $$SRCDIR/configurationmanager.h \

															
 
																     $$SRCDIR/communicationthread.h \

															
--- a/starpu-top/StarPU-Top-qwt-system.pri
+++ b/starpu-top/StarPU-Top-qwt-system.pri
@@ -1,2 +0,0 @@
 
																-LIBS += -lqwt-qt4

															
 
																-INCLUDEPATH += /usr/include/qwt-qt4

															
--- a/starpu-top/StarPU-Top-qwt-system.pri.in
+++ b/starpu-top/StarPU-Top-qwt-system.pri.in
@@ -0,0 +1,2 @@
 
																+LIBS += @STARPU_QWT_LDFLAGS@

															
 
																+INCLUDEPATH += @STARPU_QWT_INCLUDE@

															
--- a/starpu-top/aboutdialog.ui
+++ b/starpu-top/aboutdialog.ui
@@ -112,7 +112,7 @@
 
																       <string/>

															
 
																      </property>

															
 
																      <property name="pixmap">

															
 
																-      <pixmap resource="resources.qrc">:/images/starputop.png</pixmap>

															
 
																+      <pixmap resource="resources.qrc">:/images/starpu_top.png</pixmap>

															
 
																      </property>

															
 
																      <property name="scaledContents">

															
 
																       <bool>true</bool>

															
--- a/starpu-top/communicationmanager.cpp
+++ b/starpu-top/communicationmanager.cpp
@@ -70,7 +70,7 @@ void CommunicationManager::initializeSession()
 
																 {
															
 
																     _dataDescriptions = new QList<DataDescription*> ();
															
 
																     _paramDescriptions = new QList<ParamDescription*> ();
															
 
																-    _serverDevices = new QList<StarputopDevice> ;
															
 
																+    _serverDevices = new QList<starpu_top_device> ;
															
 
																     _serverInfoMsgCount = 0;
															
 
																     _state = COM_STATE_INIT;
															
@@ -665,7 +665,7 @@ void CommunicationManager::parseInitDevMessage(QString messageString)
 
																         Q_ASSERT_X(ok == true, "CommunicationManager::parseInitDevMessage()",
															
 
																                    "Bogus message received in INIT DEV");
															
 
																-        StarputopDeviceType deviceType;
															
 
																+        starpu_top_device_type deviceType;
															
 
																         Q_ASSERT_X(
															
 
																                 deviceTypeString.compare(
															
@@ -701,7 +701,7 @@ void CommunicationManager::parseInitDevMessage(QString messageString)
 
																             deviceType = SERVERDEVICE_GORDON;
															
 
																         }
															
 
																-        StarputopDevice device;
															
 
																+        starpu_top_device device;
															
 
																         device.id = deviceId;
															
 
																         device.type = deviceType;
															
 
																         device.name = deviceNameString;
															
--- a/starpu-top/communicationmanager.h
+++ b/starpu-top/communicationmanager.h
@@ -27,7 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
																 #define COMMUNICATIONMANAGER_H
															
 
																 #include <QTcpSocket>
															
 
																-#include "starputoptypes.h"
															
 
																+#include "starpu_top_types.h"
															
 
																 class CommunicationManager : public QTcpSocket
															
 
																 { /* Receives protocol messages from server, parses them
															
@@ -54,7 +54,7 @@ private:
 
																     qlonglong _serverTimestamp;
															
 
																     QList<DataDescription*> *_dataDescriptions;
															
 
																     QList<ParamDescription*> *_paramDescriptions;
															
 
																-    QList<StarputopDevice> *_serverDevices;
															
 
																+    QList<starpu_top_device> *_serverDevices;
															
 
																     // Communication states
															
 
																     CommunicationState _state;
															
 
																     bool _initServerInfoCompleted;
															
@@ -125,7 +125,7 @@ signals:
 
																     void serverInitCompleted(QString serverID,
															
 
																                              QList<DataDescription*> *dataDescriptions,
															
 
																                              QList<ParamDescription*> *paramDescriptions,
															
 
																-                             QList<StarputopDevice> *serverDevices);
															
 
																+                             QList<starpu_top_device> *serverDevices);
															
 
																     // Notify GUI with a protocol message
															
 
																     // Protocol error
															
 
																     void protocolError(QString errorMessage);
															
--- a/starpu-top/communicationthread.cpp
+++ b/starpu-top/communicationthread.cpp
@@ -30,7 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
																 #include "configurationmanager.h"
															
 
																 #include "mainwindow.h"
															
 
																 #include "communicationmanager.h"
															
 
																-#include "starputoptypes.h"
															
 
																+#include "starpu_top_types.h"
															
 
																 const int MAX_CONNECTION_ATTEMPTS = 10;
															
@@ -103,12 +103,12 @@ void CommunicationThread::createNewCommunicationManager(void)
 
																                      SIGNAL(serverInitCompleted(QString,
															
 
																                                                 QList<DataDescription*>*,
															
 
																                                                 QList<ParamDescription*>*,
															
 
																-                                                QList<StarputopDevice>*)),
															
 
																+                                                QList<Starpu_TopDevice>*)),
															
 
																                      _mainWindow, SLOT(initClient(
															
 
																                              QString,
															
 
																                              QList<DataDescription*>*,
															
 
																                              QList<ParamDescription*>*,
															
 
																-                             QList<StarputopDevice>*)));
															
 
																+                             QList<Starpu_TopDevice>*)));
															
 
																     // Output data
															
 
																     QObject::connect(_mainWindow, SIGNAL(clientLaunched()),
															
 
																                      _communicationManager, SLOT(sendGoMessage()));
															
--- a/starpu-top/configurationmanager.h
+++ b/starpu-top/configurationmanager.h
@@ -29,7 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
																 #include <QSettings>

															
 
																 static const QString CONFIG_FILE_DIR = ".";

															
 
																-static const QString CONFIG_FILE_NAME = "starputop.cfg";

															
 
																+static const QString CONFIG_FILE_NAME = "starpu_top.cfg";

															
 
																 class ConfigurationManager

															
 
																 { /* Contains and manages all the application settings

															
--- a/starpu-top/dataaggregatorwidget.h
+++ b/starpu-top/dataaggregatorwidget.h
@@ -34,7 +34,7 @@ class QwtPlot;
 
																 #include <QHash>
															
 
																 #include <QAction>
															
 
																-#include "starputoptypes.h"
															
 
																+#include "starpu_top_types.h"
															
 
																 #include "abstractwidgetwindow.h"
															
 
																 class DataAggregatorWidget : public AbstractWidgetWindow
															
--- a/starpu-top/datawidget.h
+++ b/starpu-top/datawidget.h
@@ -31,7 +31,7 @@ class WidgetWindowsManager;
 
																 class QwtPlotCurve;
															
 
																 class QwtPlot;
															
 
																-#include "starputoptypes.h"
															
 
																+#include "starpu_top_types.h"
															
 
																 #include "abstractwidgetwindow.h"
															
 
																 class DataWidget : public AbstractWidgetWindow
															
--- a/starpu-top/extradist
+++ b/starpu-top/extradist
@@ -9,9 +9,8 @@ EXTRA_DIST	+=	\
 
																                 starpu-top/abstractwidgetwindow.cpp     \
															
 
																                 starpu-top/communicationthread.h        \
															
 
																                 starpu-top/configurationmanager.cpp     \
															
 
																-                starpu-top/starputoptypes.h             \
															
 
																+                starpu-top/starpu_top_types.h             \
															
 
																                 starpu-top/mainwindow.ui                \
															
 
																-                starpu-top/debug                        \
															
 
																                 starpu-top/mainwindow.cpp               \
															
 
																                 starpu-top/sessionsetupmanager.cpp      \
															
 
																                 starpu-top/resources.qrc                \
															
@@ -19,7 +18,7 @@ EXTRA_DIST	+=	\
 
																                 starpu-top/images/connect.png           \
															
 
																                 starpu-top/images/debugon.png           \
															
 
																                 starpu-top/images/help.png              \
															
 
																-                starpu-top/images/starputop.png         \
															
 
																+                starpu-top/images/starpu_top.png         \
															
 
																                 starpu-top/images/widget.png            \
															
 
																                 starpu-top/images/lock.png              \
															
 
																                 starpu-top/images/about.png             \
															
@@ -45,7 +44,6 @@ EXTRA_DIST	+=	\
 
																                 starpu-top/debugconsole.ui                      \
															
 
																                 starpu-top/dataaggregatorwidget.cpp             \
															
 
																                 starpu-top/datawidget.cpp                       \
															
 
																-                starpu-top/release                              \
															
 
																                 starpu-top/datawidget.h                         \
															
 
																                 starpu-top/debugconsole.cpp                     \
															
 
																                 starpu-top/ganttwidget.h                        \
															
--- a/starpu-top/ganttwidget.cpp
+++ b/starpu-top/ganttwidget.cpp
@@ -469,7 +469,7 @@ void GanttWidget::drawFromTime(QPainter *painter, qlonglong timestamp)
 
																         borneBefore = 0;
															
 
																     }
															
 
																     _tasks = _taskManager->tasks(borneBefore, _timePresent);
															
 
																-    foreach(StarputopTask t, _tasks)
															
 
																+    foreach(starpu_top_task t, _tasks)
															
 
																     {
															
 
																 	drawWorkPU(painter,t);
															
 
																     }
															
@@ -526,7 +526,7 @@ void GanttWidget::drawIdlePU(QPainter *painter)
 
																 }
															
 
																 /* draw forecasted working time for each processor */
															
 
																-void GanttWidget::drawPrevWorkPU(QPainter *painter, StarputopTask t)
															
 
																+void GanttWidget::drawPrevWorkPU(QPainter *painter, starpu_top_task t)
															
 
																 {
															
 
																     int starty = HEIGHT_TIME_AXIS + MARGIN;
															
 
																     int widthAllowed = size().width() - WIDTH_PROGRAM - MARGIN
															
@@ -623,7 +623,7 @@ void GanttWidget::drawPrevWorkPU(QPainter *painter, StarputopTask t)
 
																  we haven't to test if they are displayable or not. We just have to calculate
															
 
																  which part of time is displayable.
															
 
																  The task t has its begin or its end between time Before and timePresent */
															
 
																-void GanttWidget::drawWorkPU(QPainter *painter, StarputopTask t)
															
 
																+void GanttWidget::drawWorkPU(QPainter *painter, starpu_top_task t)
															
 
																 {
															
 
																     int starty = HEIGHT_TIME_AXIS + MARGIN;
															
 
																     int widthAllowed = size().width() - WIDTH_PROGRAM - MARGIN
															
@@ -783,12 +783,12 @@ void GanttWidget::countPUs()
 
																     _numPUs = length;
															
 
																     delete _PUsByDevice;
															
 
																     delete _PUsByPos;
															
 
																-    _PUsByDevice = new StarputopDevice[length];
															
 
																-    _PUsByPos = new StarputopDevice[length];
															
 
																+    _PUsByDevice = new starpu_top_device[length];
															
 
																+    _PUsByPos = new starpu_top_device[length];
															
 
																     int pos = 0;
															
 
																     /* CPUs */
															
 
																-    foreach(StarputopDevice sD,*_mainWindow->serverDevices())
															
 
																+    foreach(starpu_top_device sD,*_mainWindow->serverDevices())
															
 
																     {
															
 
																 	if(sD.type == 0)
															
 
																 	{
															
@@ -806,7 +806,7 @@ void GanttWidget::countPUs()
 
																     }
															
 
																     /* GPUs */
															
 
																-    foreach (StarputopDevice sD , *_mainWindow->serverDevices())
															
 
																+    foreach (starpu_top_device sD , *_mainWindow->serverDevices())
															
 
																     {
															
 
																 	if(sD.type == 1 || sD.type == 2)
															
 
																 	{
															
@@ -855,7 +855,7 @@ void GanttWidget::paint(QPainter *painter, QPaintEvent *event)
 
																             }
															
 
																             _tasks = _taskManager->tasks(borneBefore, _timePresent);
															
 
																-            foreach (StarputopTask t, _tasks)
															
 
																+            foreach (starpu_top_task t, _tasks)
															
 
																             {
															
 
																                 drawWorkPU(painter,t);
															
 
																             }
															
@@ -863,7 +863,7 @@ void GanttWidget::paint(QPainter *painter, QPaintEvent *event)
 
																             /* Future past */
															
 
																             qlonglong borneAfter = _timePresent + _timeAfter;
															
 
																             _tasks = _taskManager->prevTasks(_timePresent, borneAfter);
															
 
																-            foreach		(StarputopTask t, _tasks)
															
 
																+            foreach		(starpu_top_task t, _tasks)
															
 
																             {
															
 
																                 drawPrevWorkPU(painter,t);
															
 
																             }
															
--- a/starpu-top/ganttwidget.h
+++ b/starpu-top/ganttwidget.h
@@ -31,7 +31,7 @@ class TaskManager;
 
																 #include <QGLWidget>

															
 
																 #include <QPainter>

															
 
																-#include "starputoptypes.h"

															
 
																+#include "starpu_top_types.h"

															
 
																 class GanttWidget : public QGLWidget

															
 
																 {

															
@@ -58,9 +58,9 @@ protected:
 
																     void drawTime(QPainter *painter);

															
 
																     void drawProgram(QPainter *painter);

															
 
																     void resizeGL (int width,int height);

															
 
																-    void drawWorkPU(QPainter *painter, StarputopTask t);

															
 
																+    void drawWorkPU(QPainter *painter, starpu_top_task t);

															
 
																     void drawIdlePU(QPainter *painter);

															
 
																-    void drawPrevWorkPU(QPainter *painter, StarputopTask t);

															
 
																+    void drawPrevWorkPU(QPainter *painter, starpu_top_task t);

															
 
																     void defaultScreen(QPainter *painter);

															
 
																     void drawPresentLine(QPainter *painter);

															
 
																     int computeTimeInterval(int timeTotal);

															
@@ -84,7 +84,7 @@ private:
 
																     qreal _coordxPresentLine;

															
 
																     int _numPUs;

															
 
																     bool _wasRunning;

															
 
																-    QList<StarputopTask> _tasks;

															
 
																+    QList<starpu_top_task> _tasks;

															
 
																     int _timeTotal;

															
 
																     int _timeAfter;

															
 
																     int _timeBefore;

															
@@ -92,8 +92,8 @@ private:
 
																     QTimer *_timer;

															
 
																     qlonglong _timePresent;

															
 
																     qlonglong _timeToShow;

															
 
																-    StarputopDevice *_PUsByDevice;

															
 
																-    StarputopDevice *_PUsByPos;

															
 
																+    starpu_top_device *_PUsByDevice;

															
 
																+    starpu_top_device *_PUsByPos;

															
 
																     int _numCPUs;

															
 
																     int _numGPUs;

															
 
																     bool _initCompleted;

															
--- a/starpu-top/images/starpu_top.png
+++ b/starpu-top/images/starpu_top.png
--- a/starpu-top/interactivewidget.h
+++ b/starpu-top/interactivewidget.h
@@ -30,7 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
																 #include <QCloseEvent>
															
 
																 #include <QLabel>
															
 
																 #include <QHBoxLayout>
															
 
																-#include "starputoptypes.h"
															
 
																+#include "starpu_top_types.h"
															
 
																 class MainWindow;
															
--- a/starpu-top/mainwindow.cpp
+++ b/starpu-top/mainwindow.cpp
@@ -61,7 +61,7 @@ MainWindow::MainWindow(QWidget *parent) :
 
																     _dataAggregatorWidgets = new QList<QPointer<DataAggregatorWidget> > ();
															
 
																     _dataDescriptions = new QList<DataDescription*> ();
															
 
																     _paramDescriptions = new QList<ParamDescription*> ();
															
 
																-    _serverDevices = new QList<StarputopDevice> ();
															
 
																+    _serverDevices = new QList<starpu_top_device> ();
															
 
																     _nbDataWidgets = _nbInteractiveWidgets = _nbDataAggregatorWidgets = 0;
															
 
																     // Init managers
															
@@ -97,18 +97,18 @@ MainWindow::MainWindow(QWidget *parent) :
 
																     QObject::connect(settingsAction, SIGNAL(triggered()), this,
															
 
																                      SLOT(on_actionPreferences_triggered()));
															
 
																     connectButton->addAction(settingsAction);
															
 
																-    ui->menuStarputop->addAction(_actionConnect);
															
 
																+    ui->menu_starpu_top->addAction(_actionConnect);
															
 
																     // Action launch
															
 
																     _actionLaunch = ui->mainToolBar->addAction(QIcon(":/images/play.png"),
															
 
																                                                tr("Launch StarPU"));
															
 
																     _actionLaunch->setIconText("Launch StarPU");
															
 
																     _actionLaunch->setToolTip("Launch StarPU");
															
 
																     _actionLaunch->setShortcut(QKeySequence("Ctrl+L"));
															
 
																-    ui->menuStarputop->addAction(_actionLaunch);
															
 
																+    ui->menu_starpu_top->addAction(_actionLaunch);
															
 
																     QObject::connect(_actionLaunch, SIGNAL(triggered()), this,
															
 
																                      SLOT(on_actionLaunch_StarPU_triggered()));
															
 
																     ui->mainToolBar->addSeparator();
															
 
																-    ui->menuStarputop->addSeparator();
															
 
																+    ui->menu_starpu_top->addSeparator();
															
 
																     // Action debug
															
 
																     _actionDebug = ui->mainToolBar->addAction(QIcon(":/images/debugon.png"),
															
 
																                                               tr("Enable debug"));
															
@@ -116,7 +116,7 @@ MainWindow::MainWindow(QWidget *parent) :
 
																     _actionDebug->setToolTip("Enable debug");
															
 
																     _actionDebug->setShortcut(QKeySequence("Ctrl+D"));
															
 
																     _actionDebug->setCheckable(true);
															
 
																-    ui->menuStarputop->addAction(_actionDebug);
															
 
																+    ui->menu_starpu_top->addAction(_actionDebug);
															
 
																     QObject::connect(_actionDebug, SIGNAL(toggled(bool)),
															
 
																                      this, SLOT(on_actionDebug_triggered(bool)));
															
 
																     // Action save session setup
															
@@ -125,7 +125,7 @@ MainWindow::MainWindow(QWidget *parent) :
 
																     _actionSaveSessionSetup->setIconText("Save session setup");
															
 
																     _actionSaveSessionSetup->setToolTip("Save session setup");
															
 
																     _actionSaveSessionSetup->setShortcut(QKeySequence("Ctrl+S"));
															
 
																-    ui->menuStarputop->addAction(_actionSaveSessionSetup);
															
 
																+    ui->menu_starpu_top->addAction(_actionSaveSessionSetup);
															
 
																     QObject::connect(_actionSaveSessionSetup, SIGNAL(triggered()), this,
															
 
																                      SLOT(on_actionSaveSessionSetup_triggered()));
															
 
																     // Action add data aggregator widget
															
@@ -135,13 +135,13 @@ MainWindow::MainWindow(QWidget *parent) :
 
																     _actionAddDataAggregatorWidget->setIconText("Add data aggregator widget");
															
 
																     _actionAddDataAggregatorWidget->setToolTip("Add data aggregator widget");
															
 
																     _actionAddDataAggregatorWidget->setShortcut(QKeySequence("Ctrl+G"));
															
 
																-    ui->menuStarputop->addAction(_actionAddDataAggregatorWidget);
															
 
																+    ui->menu_starpu_top->addAction(_actionAddDataAggregatorWidget);
															
 
																     QObject::connect(_actionAddDataAggregatorWidget, SIGNAL(triggered()), this,
															
 
																                      SLOT(on_actionAddDataAggregatorWidget_triggered()));
															
 
																     ui->mainToolBar->addSeparator();
															
 
																-    ui->menuStarputop->addSeparator();
															
 
																+    ui->menu_starpu_top->addSeparator();
															
 
																     // Action quit
															
 
																-    QAction *actionQuit = ui->menuStarputop->addAction(
															
 
																+    QAction *actionQuit = ui->menu_starpu_top->addAction(
															
 
																             QIcon(":/images/quit.png"), tr("Quit"));
															
 
																     actionQuit->setIconText("Quit");
															
 
																     actionQuit->setToolTip("Quit");
															
@@ -540,7 +540,7 @@ void MainWindow::synchronizeSessionTime(qlonglong serverTimestamp)
 
																 void MainWindow::initClient(QString serverID,
															
 
																                             QList<DataDescription*> *dataDescriptions,
															
 
																                             QList<ParamDescription*> *paramDescriptions,
															
 
																-                            QList<StarputopDevice> *serverDevices)
															
 
																+                            QList<starpu_top_device> *serverDevices)
															
 
																 {
															
 
																     _serverID = serverID;
															
 
																     _dataDescriptions = dataDescriptions;
															
@@ -1213,7 +1213,7 @@ ParamDescription *MainWindow::paramDescriptionFromId(int paramId)
 
																     return 0;
															
 
																 }
															
 
																-const QList<StarputopDevice> *MainWindow::serverDevices() const
															
 
																+const QList<starpu_top_device> *MainWindow::serverDevices() const
															
 
																 {
															
 
																     return _serverDevices;
															
 
																 }
															
--- a/starpu-top/mainwindow.h
+++ b/starpu-top/mainwindow.h
@@ -49,7 +49,7 @@ class TaskManager;
 
																 #include <QAbstractSocket>
															
 
																 #include <QTime>
															
 
																 #include <QSpinBox>
															
 
																-#include "starputoptypes.h"
															
 
																+#include "starpu_top_types.h"
															
 
																 namespace Ui
															
 
																 {
															
@@ -79,7 +79,7 @@ public:
 
																     const QList<ParamDescription*> *paramDescriptions() const;
															
 
																     DataDescription *dataDescriptionFromId(int dataId);
															
 
																     ParamDescription *paramDescriptionFromId(int interactiveId);
															
 
																-    const QList<StarputopDevice> *serverDevices() const;
															
 
																+    const QList<starpu_top_device> *serverDevices() const;
															
 
																     // Get different widgets metadata
															
 
																     const QHash<DataWidgetType, QString> *dataWidgetNames() const;
															
 
																     const QHash<DataType, QSet<DataWidgetType> >
															
@@ -166,7 +166,7 @@ private:
 
																     // Different descriptions
															
 
																     QList<DataDescription*> *_dataDescriptions;
															
 
																     QList<ParamDescription*> *_paramDescriptions;
															
 
																-    QList<StarputopDevice> *_serverDevices;
															
 
																+    QList<starpu_top_device> *_serverDevices;
															
 
																     int _nbDataWidgets;
															
 
																     int _nbDataAggregatorWidgets;
															
 
																     int _nbInteractiveWidgets;
															
@@ -233,7 +233,7 @@ public slots:
 
																     void initClient(QString serverID,
															
 
																                     QList<DataDescription*> *dataDescriptions,
															
 
																                     QList<ParamDescription*> *paramDescriptions,
															
 
																-                    QList<StarputopDevice> *serverDevices);
															
 
																+                    QList<starpu_top_device> *serverDevices);
															
 
																     // Connection events handlers
															
 
																     void connectionSucceeded();
															
 
																     void connectionAborted(QString message);
															
--- a/starpu-top/mainwindow.ui
+++ b/starpu-top/mainwindow.ui
@@ -21,7 +21,7 @@
 
																   </property>

															
 
																   <property name="windowIcon">

															
 
																    <iconset resource="resources.qrc">

															
 
																-    <normaloff>:/images/starputop.png</normaloff>:/images/starputop.png</iconset>

															
 
																+    <normaloff>:/images/starpu_top.png</normaloff>:/images/starpu_top.png</iconset>

															
 
																   </property>

															
 
																   <widget class="QWidget" name="centralWidget">

															
 
																    <layout class="QGridLayout" name="gridLayout_2">

															
@@ -42,7 +42,7 @@
 
																      <height>21</height>

															
 
																     </rect>

															
 
																    </property>

															
 
																-   <widget class="QMenu" name="menuStarputop">

															
 
																+   <widget class="QMenu" name="menu_starpu_top">

															
 
																     <property name="title">

															
 
																      <string>StarPU-Top</string>

															
 
																     </property>

															
@@ -59,7 +59,7 @@
 
																     </property>

															
 
																     <addaction name="actionPreferences"/>

															
 
																    </widget>

															
 
																-   <addaction name="menuStarputop"/>

															
 
																+   <addaction name="menu_starpu_top"/>

															
 
																    <addaction name="menuDisplay"/>

															
 
																    <addaction name="menuHelp"/>

															
 
																   </widget>

															
--- a/starpu-top/preferencesdialog.h
+++ b/starpu-top/preferencesdialog.h
@@ -33,7 +33,7 @@ class SessionSetupManager;
 
																 #include <QMetaType>

															
 
																 #include <QDialog>

															
 
																 #include <QComboBox>

															
 
																-#include "starputoptypes.h"

															
 
																+#include "starpu_top_types.h"

															
 
																 namespace Ui

															
 
																 {

															
--- a/starpu-top/resources.qrc
+++ b/starpu-top/resources.qrc
@@ -13,7 +13,7 @@
 
																         <file>images/add.png</file>
															
 
																         <file>images/remove.png</file>
															
 
																         <file>images/widget.png</file>
															
 
																-        <file>images/starputop.png</file>
															
 
																+        <file>images/starpu_top.png</file>
															
 
																         <file>images/windows.png</file>
															
 
																         <file>images/lock.png</file>
															
 
																     </qresource>
															
--- a/starpu-top/sessionsetupmanager.h
+++ b/starpu-top/sessionsetupmanager.h
@@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
																 class MainWindow;

															
 
																-#include "starputoptypes.h"

															
 
																+#include "starpu_top_types.h"

															
 
																 #include <QSettings>

															
 
																 static const QString SESSION_SETUPS_DIR = "./sessionsetups";

															
--- a/starpu-top/starputoptypes.h
+++ b/starpu-top/starputoptypes.h
@@ -23,8 +23,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
																 */

															
 
																-#ifndef STARPUTOPTYPES_H

															
 
																-#define STARPUTOPTYPES_H

															
 
																+#ifndef STARPU_TOP_TYPES_H

															
 
																+#define STARPU_TOP_TYPES_H

															
 
																 #include <QString>

															
 
																 #include <QStringList>

															
@@ -112,7 +112,7 @@ enum ParamType
 
																     PARAM_TYPE_ENUM = 4,

															
 
																 };

															
 
																-enum StarputopDeviceType

															
 
																+enum starpu_top_device_type

															
 
																 {

															
 
																     SERVERDEVICE_CPU = 0,

															
 
																     SERVERDEVICE_CUDA = 1,

															
@@ -124,9 +124,9 @@ enum StarputopDeviceType
 
																 typedef struct

															
 
																 {

															
 
																     int id;

															
 
																-    StarputopDeviceType type;

															
 
																+    starpu_top_device_type type;

															
 
																     QString name;

															
 
																-} StarputopDevice;

															
 
																+} starpu_top_device;

															
 
																 // Server tasks

															
 
																 typedef struct

															
@@ -135,7 +135,7 @@ typedef struct
 
																     int deviceId;

															
 
																     qlonglong timestampStart;

															
 
																     qlonglong timestampEnd;

															
 
																-} StarputopTask;

															
 
																+} starpu_top_task;

															
 
																 // Descriptions

															
 
																 typedef struct

															
@@ -318,4 +318,4 @@ typedef struct
 
																     QList<int> dataIds;

															
 
																 } DataAggregatorWidgetSetup;

															
 
																-#endif // STARPUTOPTYPES_H

															
 
																+#endif // STARPU_TOP_TYPES_H

															
--- a/starpu-top/taskmanager.cpp
+++ b/starpu-top/taskmanager.cpp
@@ -171,10 +171,10 @@ void TaskManager::addTaskEnd(int taskId, qlonglong timestampEnd)
 
																     }
															
 
																 }
															
 
																-QList<StarputopTask> TaskManager::tasks(qlonglong timestampStart,
															
 
																-                                        qlonglong timestampEnd)
															
 
																+QList<starpu_top_task> TaskManager::tasks(qlonglong timestampStart,
															
 
																+					  qlonglong timestampEnd)
															
 
																 {
															
 
																-    QList < StarputopTask > tasks;
															
 
																+    QList < starpu_top_task > tasks;
															
 
																     _selectTasksQuery.addBindValue(timestampStart);
															
 
																     _selectTasksQuery.addBindValue(timestampEnd);
															
@@ -206,7 +206,7 @@ QList<StarputopTask> TaskManager::tasks(qlonglong timestampStart,
 
																             qlonglong timestampEnd =
															
 
																                     _selectTasksQuery.value(endField).toLongLong();
															
 
																-            StarputopTask task;
															
 
																+            starpu_top_task task;
															
 
																             task.taskId = taskId;
															
 
																             task.deviceId = deviceId;
															
 
																             task.timestampStart = timestampStart;
															
@@ -220,10 +220,10 @@ QList<StarputopTask> TaskManager::tasks(qlonglong timestampStart,
 
																     return tasks;
															
 
																 }
															
 
																-QList<StarputopTask> TaskManager::prevTasks(qlonglong timestampStart,
															
 
																+QList<starpu_top_task> TaskManager::prevTasks(qlonglong timestampStart,
															
 
																                                             qlonglong timestampEnd)
															
 
																 {
															
 
																-    QList < StarputopTask > prevTasks;
															
 
																+    QList < starpu_top_task > prevTasks;
															
 
																     _selectPrevTasksQuery.addBindValue(timestampStart);
															
 
																     _selectPrevTasksQuery.addBindValue(timestampEnd);
															
@@ -255,7 +255,7 @@ QList<StarputopTask> TaskManager::prevTasks(qlonglong timestampStart,
 
																             qlonglong timestampEnd =
															
 
																                     _selectPrevTasksQuery.value(endField).toLongLong();
															
 
																-            StarputopTask prevTask;
															
 
																+            starpu_top_task prevTask;
															
 
																             prevTask.taskId = taskId;
															
 
																             prevTask.deviceId = deviceId;
															
 
																             prevTask.timestampStart = timestampStart;
															
--- a/starpu-top/taskmanager.h
+++ b/starpu-top/taskmanager.h
@@ -26,7 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
																 #ifndef TASKMANAGER_H
															
 
																 #define TASKMANAGER_H
															
 
																-#include "starputoptypes.h"
															
 
																+#include "starpu_top_types.h"
															
 
																 #include <QDebug>
															
 
																 #include <QtSql/QSqlDatabase>
															
 
																 #include <QtSql/QSqlQuery>
															
@@ -46,9 +46,9 @@ public:
 
																     void addTaskStart(int taskId, int deviceId, qlonglong timestampStart);
															
 
																     void addTaskEnd(int taskId, qlonglong timestampEnd);
															
 
																     // Getters
															
 
																-    QList<StarputopTask> tasks(qlonglong timestampStart,
															
 
																+    QList<starpu_top_task> tasks(qlonglong timestampStart,
															
 
																                                qlonglong timestampEnd);
															
 
																-    QList<StarputopTask> prevTasks(qlonglong timestampStart,
															
 
																+    QList<starpu_top_task> prevTasks(qlonglong timestampStart,
															
 
																                                    qlonglong timestampEnd);
															
 
																 private:
															
--- a/starpufft/.gitignore
+++ b/starpufft/.gitignore
@@ -0,0 +1 @@
 
																+/.deps
															
--- a/starpufft/Makefile.am
+++ b/starpufft/Makefile.am
@@ -0,0 +1,97 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+#
															
 
																+AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/include $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
															
 
																+
															
 
																+lib_LTLIBRARIES = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la
															
 
																+
															
 
																+EXTRA_DIST =			\
															
 
																+	float.h			\
															
 
																+	double.h		\
															
 
																+	cudax_kernels.h		\
															
 
																+	starpufftx.c		\
															
 
																+	starpufftx1d.c		\
															
 
																+	starpufftx2d.c		\
															
 
																+	cuda_kernels.cu		\
															
 
																+	cudaf_kernels.cu	\
															
 
																+	cudax_kernels.cu	\
															
 
																+	examples/testx.c	\
															
 
																+	examples/testx_threads.c\
															
 
																+	examples/testf_threads.c\
															
 
																+	examples/test_threads.c
															
 
																+
															
 
																+versincludedir = $(includedir)/starpu/$(STARPU_EFFECTIVE_VERSION)
															
 
																+versinclude_HEADERS = 				\
															
 
																+	starpufft.h
															
 
																+
															
 
																+pkgconfigdir = $(libdir)/pkgconfig
															
 
																+pkgconfig_DATA = libstarpufft.pc starpufft-1.0.pc
															
 
																+
															
 
																+libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES = starpufft.c starpufftf.c starpufft_common.c
															
 
																+libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la $(FFTW_LIBS) $(FFTWF_LIBS) $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUFFT_LDFLAGS)
															
 
																+libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(FFTWF_CFLAGS)
															
 
																+libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) -no-undefined					\
															
 
																+  -version-info $(LIBSTARPUFFT_INTERFACE_CURRENT):$(LIBSTARPUFFT_INTERFACE_REVISION):$(LIBSTARPUFFT_INTERFACE_AGE)
															
 
																+
															
 
																+if STARPU_USE_CUDA
															
 
																+NVCCFLAGS += -Xcompiler -fPIC -Xlinker -fPIC
															
 
																+
															
 
																+cudaf_kernels.o: cudaf_kernels.cu
															
 
																+	$(NVCC) $(AM_CPPFLAGS) $< -c -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS) -I${includedir}
															
 
																+
															
 
																+libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += cudaf_kernels.cu
															
 
																+am_libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS = cudaf_kernels.o starpufft.lo starpufftf.lo starpufft_common.lo
															
 
																+
															
 
																+if STARPU_HAVE_CUFFTDOUBLECOMPLEX
															
 
																+cuda_kernels.o: cuda_kernels.cu
															
 
																+	$(NVCC) $(AM_CPPFLAGS) $< -c -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS) -I${includedir} -arch sm_13
															
 
																+
															
 
																+libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += cuda_kernels.cu
															
 
																+am_libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_OBJECTS += cuda_kernels.o
															
 
																+endif
															
 
																+
															
 
																+libstarpufft_@STARPU_EFFECTIVE_VERSION@_la_LIBADD +=  $(STARPU_CUDA_LDFLAGS)
															
 
																+endif
															
 
																+
															
 
																+examplebindir = $(libdir)/starpu/examples/starpufft
															
 
																+examplebin_PROGRAMS =				\
															
 
																+	examples/testf \
															
 
																+	examples/test
															
 
																+
															
 
																+check_PROGRAMS = examples/testf
															
 
																+examples_testf_LDADD = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la $(FFTWF_LIBS)
															
 
																+
															
 
																+# If we don't have CUDA, we assume that we have fftw available in double
															
 
																+# precision anyway, we just want to make sure that if CUFFT is used, it also
															
 
																+# supports double precision.
															
 
																+if !STARPU_USE_CUDA
															
 
																+check_PROGRAMS += examples/test
															
 
																+else
															
 
																+if STARPU_HAVE_CUFFTDOUBLECOMPLEX
															
 
																+check_PROGRAMS += examples/test
															
 
																+endif
															
 
																+endif
															
 
																+examples_test_LDADD = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la $(FFTW_LIBS)
															
 
																+
															
 
																+TESTS = $(check_PROGRAMS)
															
 
																+
															
 
																+
															
 
																+#check_PROGRAMS += examples/test_threads examples/testf_threads
															
 
																+#examples_test_threads_LDADD = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la $(top_builddir)/src/libstarpu.la -lfftw3_threads
															
 
																+#examples_testf_threads_LDADD = libstarpufft-@STARPU_EFFECTIVE_VERSION@.la $(top_builddir)/src/libstarpu.la -lfftw3f_threads
															
 
																+
															
 
																+showcheck:
															
 
																+	-cat $(TEST_LOGS) /dev/null
															
--- a/starpufft/cuda_kernels.cu
+++ b/starpufft/cuda_kernels.cu
@@ -0,0 +1,19 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include "double.h"
															
 
																+#include "cudax_kernels.cu"
															
--- a/starpufft/cudaf_kernels.cu
+++ b/starpufft/cudaf_kernels.cu
@@ -0,0 +1,19 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include "float.h"
															
 
																+#include "cudax_kernels.cu"
															
--- a/starpufft/cudax_kernels.cu
+++ b/starpufft/cudax_kernels.cu
@@ -0,0 +1,156 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#define _externC extern "C"
															
 
																+#include "cudax_kernels.h"
															
 
																+
															
 
																+/* Note: these assume that the sizes are powers of two */
															
 
																+
															
 
																+#define VARS_1d \
															
 
																+	unsigned start = threadIdx.x + blockIdx.x * blockDim.x; \
															
 
																+	unsigned numthreads = blockDim.x * gridDim.x;
															
 
																+
															
 
																+#define DISTRIB_1d(n, func,args) \
															
 
																+	unsigned threads_per_block = 128; \
															
 
																+\
															
 
																+	if (n < threads_per_block) \
															
 
																+	{			   \
															
 
																+		dim3 dimGrid(n); \
															
 
																+		func <<<dimGrid, 1, 0, starpu_cuda_get_local_stream()>>> args; \
															
 
																+	} 					\
															
 
																+	else 					\
															
 
																+	{				     \
															
 
																+		dim3 dimGrid(n / threads_per_block); \
															
 
																+		dim3 dimBlock(threads_per_block); \
															
 
																+		func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
															
 
																+	} \
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream()); \
															
 
																+
															
 
																+extern "C" __global__ void
															
 
																+STARPUFFT(cuda_twist1_1d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2)
															
 
																+{
															
 
																+	unsigned j;
															
 
																+	VARS_1d
															
 
																+	unsigned end = n2;
															
 
																+
															
 
																+	for (j = start; j < end; j += numthreads)
															
 
																+		twisted1[j] = in[i+j*n1];
															
 
																+}
															
 
																+
															
 
																+extern "C" void
															
 
																+STARPUFFT(cuda_twist1_1d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2)
															
 
																+{
															
 
																+	DISTRIB_1d(n2, STARPUFFT(cuda_twist1_1d), (in, twisted1, i, n1, n2));
															
 
																+}
															
 
																+
															
 
																+extern "C" __global__ void
															
 
																+STARPUFFT(cuda_twiddle_1d)(_cuComplex * out, const _cuComplex * roots, unsigned n, unsigned i)
															
 
																+{
															
 
																+	unsigned j;
															
 
																+	VARS_1d
															
 
																+	unsigned end = n;
															
 
																+
															
 
																+	for (j = start; j < end; j += numthreads)
															
 
																+		out[j] = _cuCmul(out[j], roots[i*j]);
															
 
																+	return;
															
 
																+}
															
 
																+
															
 
																+extern "C" void
															
 
																+STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsigned n, unsigned i)
															
 
																+{
															
 
																+	DISTRIB_1d(n, STARPUFFT(cuda_twiddle_1d), (out, roots, n, i));
															
 
																+}
															
 
																+
															
 
																+#define VARS_2d \
															
 
																+	unsigned startx = threadIdx.x + blockIdx.x * blockDim.x; \
															
 
																+	unsigned starty = threadIdx.y + blockIdx.y * blockDim.y; \
															
 
																+	unsigned numthreadsx = blockDim.x * gridDim.x; \
															
 
																+	unsigned numthreadsy = blockDim.y * gridDim.y;
															
 
																+
															
 
																+/* FIXME: introduce threads_per_dim_n / m instead */
															
 
																+#define DISTRIB_2d(n, m, func, args) \
															
 
																+	unsigned threads_per_dim = 16; \
															
 
																+	if (n < threads_per_dim) \
															
 
																+	{				   \
															
 
																+		if (m < threads_per_dim) \
															
 
																+		{			    \
															
 
																+			dim3 dimGrid(n, m); \
															
 
																+			func <<<dimGrid, 1, 0, starpu_cuda_get_local_stream()>>> args; \
															
 
																+		} \
															
 
																+		else \
															
 
																+		{					      \
															
 
																+			dim3 dimGrid(1, m / threads_per_dim); \
															
 
																+			dim3 dimBlock(n, threads_per_dim); \
															
 
																+			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
															
 
																+		} \
															
 
																+	} \
															
 
																+	else \
															
 
																+	{				   \
															
 
																+		if (m < threads_per_dim) \
															
 
																+		{					      \
															
 
																+			dim3 dimGrid(n / threads_per_dim, 1); \
															
 
																+			dim3 dimBlock(threads_per_dim, m); \
															
 
																+			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
															
 
																+		} \
															
 
																+		else \
															
 
																+		{							\
															
 
																+			dim3 dimGrid(n / threads_per_dim, m / threads_per_dim); \
															
 
																+			dim3 dimBlock(threads_per_dim, threads_per_dim); \
															
 
																+			func <<<dimGrid, dimBlock, 0, starpu_cuda_get_local_stream()>>> args; \
															
 
																+		} \
															
 
																+	} \
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream()); \
															
 
																+
															
 
																+extern "C" __global__ void
															
 
																+STARPUFFT(cuda_twist1_2d)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2)
															
 
																+{
															
 
																+	unsigned k, l;
															
 
																+	VARS_2d
															
 
																+	unsigned endx = n2;
															
 
																+	unsigned endy = m2;
															
 
																+	unsigned m = m1*m2;
															
 
																+
															
 
																+	for (k = startx; k < endx; k += numthreadsx)
															
 
																+		for (l = starty; l < endy; l += numthreadsy)
															
 
																+			twisted1[k*m2+l] = in[i*m+j+k*m*n1+l*m1];
															
 
																+}
															
 
																+
															
 
																+extern "C" void
															
 
																+STARPUFFT(cuda_twist1_2d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2)
															
 
																+{
															
 
																+	DISTRIB_2d(n2, m2, STARPUFFT(cuda_twist1_2d), (in, twisted1, i, j, n1, n2, m1, m2));
															
 
																+}
															
 
																+
															
 
																+extern "C" __global__ void
															
 
																+STARPUFFT(cuda_twiddle_2d)(_cuComplex * out, const _cuComplex * roots0, const _cuComplex * roots1, unsigned n2, unsigned m2, unsigned i, unsigned j)
															
 
																+{
															
 
																+	unsigned k, l;
															
 
																+	VARS_2d
															
 
																+	unsigned endx = n2;
															
 
																+	unsigned endy = m2;
															
 
																+
															
 
																+	for (k = startx; k < endx ; k += numthreadsx)
															
 
																+		for (l = starty; l < endy ; l += numthreadsy)
															
 
																+			out[k*m2 + l] = _cuCmul(_cuCmul(out[k*m2 + l], roots0[i*k]), roots1[j*l]);
															
 
																+	return;
															
 
																+}
															
 
																+
															
 
																+extern "C" void
															
 
																+STARPUFFT(cuda_twiddle_2d_host)(_cuComplex *out, const _cuComplex *roots0, const _cuComplex *roots1, unsigned n2, unsigned m2, unsigned i, unsigned j)
															
 
																+{
															
 
																+	DISTRIB_2d(n2, m2, STARPUFFT(cuda_twiddle_2d), (out, roots0, roots1, n2, m2, i, j));
															
 
																+}
															
--- a/starpufft/cudax_kernels.h
+++ b/starpufft/cudax_kernels.h
@@ -0,0 +1,23 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <cuComplex.h>
															
 
																+#include <starpu_cuda.h>
															
 
																+_externC void STARPUFFT(cuda_twist1_1d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned n1, unsigned n2);
															
 
																+_externC void STARPUFFT(cuda_twiddle_1d_host)(_cuComplex *out, const _cuComplex *roots, unsigned n, unsigned i);
															
 
																+_externC void STARPUFFT(cuda_twist1_2d_host)(const _cuComplex *in, _cuComplex *twisted1, unsigned i, unsigned j, unsigned n1, unsigned n2, unsigned m1, unsigned m2);
															
 
																+_externC void STARPUFFT(cuda_twiddle_2d_host)(_cuComplex *out, const _cuComplex *roots0, const _cuComplex *roots1, unsigned n2, unsigned m2, unsigned i, unsigned j);
															
--- a/starpufft/double.h
+++ b/starpufft/double.h
@@ -0,0 +1,51 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <complex.h>
															
 
																+#include <starpu_config.h>
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+#include <fftw3.h>
															
 
																+#endif
															
 
																+
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+#include <cufft.h>
															
 
																+#endif
															
 
																+
															
 
																+#undef  FLOAT
															
 
																+#define DOUBLE
															
 
																+
															
 
																+typedef double real;
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+typedef fftw_complex _fftw_complex;
															
 
																+typedef fftw_plan _fftw_plan;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+typedef cuDoubleComplex _cuComplex;
															
 
																+typedef cufftDoubleComplex _cufftComplex;
															
 
																+#define _cufftExecC2C cufftExecZ2Z
															
 
																+#define _cufftExecR2C cufftExecD2Z
															
 
																+#define _cufftExecC2R cufftExecZ2D
															
 
																+#define _CUFFT_C2C CUFFT_Z2Z
															
 
																+#define _CUFFT_R2C CUFFT_D2Z
															
 
																+#define _CUFFT_C2R CUFFT_Z2D
															
 
																+#define _cuCmul(x,y) cuCmul(x,y)
															
 
																+#endif
															
 
																+#define STARPUFFT(name) starpufft_##name
															
 
																+#define _FFTW(name) fftw_##name
															
 
																+
															
 
																+#define TYPE ""
															
--- a/starpufft/examples/test.c
+++ b/starpufft/examples/test.c
@@ -0,0 +1,19 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include "double.h"
															
 
																+#include "testx.c"
															
--- a/starpufft/examples/test_threads.c
+++ b/starpufft/examples/test_threads.c
@@ -0,0 +1,19 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include "double.h"
															
 
																+#include "testx_threads.c"
															
--- a/starpufft/examples/testf.c
+++ b/starpufft/examples/testf.c
@@ -0,0 +1,19 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include "float.h"
															
 
																+#include "testx.c"
															
--- a/starpufft/examples/testf_threads.c
+++ b/starpufft/examples/testf_threads.c
@@ -0,0 +1,19 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include "float.h"
															
 
																+#include "testx_threads.c"
															
--- a/starpufft/examples/testx.c
+++ b/starpufft/examples/testx.c
@@ -0,0 +1,283 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009, 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <complex.h>
															
 
																+#include <math.h>
															
 
																+#include <unistd.h>
															
 
																+#include <stdlib.h>
															
 
																+#include <assert.h>
															
 
																+#include <sys/time.h>
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+
															
 
																+#include <starpu_config.h>
															
 
																+#include "starpufft.h"
															
 
																+
															
 
																+#undef STARPU_USE_CUDA
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+#include <fftw3.h>
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+#include <cufft.h>
															
 
																+#endif
															
 
																+
															
 
																+#define SIGN (-1)
															
 
																+/* #define SIGN (1) */
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+static void check_fftw(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, int size)
															
 
																+{
															
 
																+	int i;
															
 
																+	double max = 0., tot = 0., norm = 0., normdiff = 0.;
															
 
																+	for (i = 0; i < size; i++)
															
 
																+	{
															
 
																+		double diff = cabs(out[i]-out_fftw[i]);
															
 
																+		double diff2 = diff * diff;
															
 
																+		double size = cabs(out_fftw[i]);
															
 
																+		double size2 = size * size;
															
 
																+		if (diff > max)
															
 
																+			max = diff;
															
 
																+		tot += diff;
															
 
																+		normdiff += diff2;
															
 
																+		norm += size2;
															
 
																+	}
															
 
																+	fprintf(stderr, "\nmaximum difference %g\n", max);
															
 
																+	fprintf(stderr, "average difference %g\n", tot / size);
															
 
																+	fprintf(stderr, "difference norm %g\n", sqrt(normdiff));
															
 
																+	double relmaxdiff = max / sqrt(norm);
															
 
																+	fprintf(stderr, "relative maximum difference %g\n", relmaxdiff);
															
 
																+	double relavgdiff = (tot / size) / sqrt(norm);
															
 
																+	fprintf(stderr, "relative average difference %g\n", relavgdiff);
															
 
																+	if (!strcmp(TYPE, "f") && (relmaxdiff > 1e-7 || relavgdiff > 1e-7)) {
															
 
																+		fprintf(stderr, "Failure: Difference too big (TYPE f)\n");
															
 
																+		exit(EXIT_FAILURE);
															
 
																+	}
															
 
																+	if (!strcmp(TYPE, "") && (relmaxdiff > 1e-16 || relavgdiff > 1e-16))
															
 
																+	{
															
 
																+		fprintf(stderr, "Failure: Difference too big\n");
															
 
																+		exit(EXIT_FAILURE);
															
 
																+	}
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+static void check_cuda(STARPUFFT(complex) *out, STARPUFFT(complex) *out_fftw, int size)
															
 
																+{
															
 
																+	int i;
															
 
																+	double max = 0., tot = 0., norm = 0., normdiff = 0.;
															
 
																+	for (i = 0; i < size; i++)
															
 
																+	{
															
 
																+		double diff = cabs(out_cuda[i]-out_fftw[i]);
															
 
																+		double diff2 = diff * diff;
															
 
																+		double size = cabs(out_fftw[i]);
															
 
																+		double size2 = size * size;
															
 
																+		if (diff > max)
															
 
																+			max = diff;
															
 
																+		tot += diff;
															
 
																+		normdiff += diff2;
															
 
																+		norm += size2;
															
 
																+	}
															
 
																+	fprintf(stderr, "\nmaximum difference %g\n", max);
															
 
																+	fprintf(stderr, "average difference %g\n", tot / size);
															
 
																+	fprintf(stderr, "difference norm %g\n", sqrt(normdiff));
															
 
																+	double relmaxdiff = max / sqrt(norm);
															
 
																+	fprintf(stderr, "relative maximum difference %g\n", relmaxdiff);
															
 
																+	double relavgdiff = (tot / size) / sqrt(norm);
															
 
																+	fprintf(stderr, "relative average difference %g\n", relavgdiff);
															
 
																+	if (!strcmp(TYPE, "f") && (relmaxdiff > 1e-8 || relavgdiff > 1e-8))
															
 
																+		exit(EXIT_FAILURE);
															
 
																+	if (!strcmp(TYPE, "") && (relmaxdiff > 1e-16 || relavgdiff > 1e-16))
															
 
																+		exit(EXIT_FAILURE);
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+int main(int argc, char *argv[])
															
 
																+{
															
 
																+	int i, ret;
															
 
																+	int size;
															
 
																+	int n = 0, m = 0;
															
 
																+	STARPUFFT(plan) plan;
															
 
																+	starpu_data_handle_t in_handle, out_handle;
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	_FFTW(plan) fftw_plan;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	cufftHandle cuda_plan;
															
 
																+	cudaError_t cures;
															
 
																+#endif
															
 
																+#if defined(STARPU_HAVE_FFTW) || defined(STARPU_USE_CUDA)
															
 
																+	struct timeval begin, end;
															
 
																+	double timing;
															
 
																+	size_t bytes;
															
 
																+#endif
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+
															
 
																+	if (argc == 1)
															
 
																+	{
															
 
																+		n = 42;
															
 
																+		/* 1D */
															
 
																+		size = n;
															
 
																+	}
															
 
																+	else if (argc == 2)
															
 
																+	{
															
 
																+		n = atoi(argv[1]);
															
 
																+
															
 
																+		/* 1D */
															
 
																+		size = n;
															
 
																+	}
															
 
																+	else if (argc == 3)
															
 
																+	{
															
 
																+		n = atoi(argv[1]);
															
 
																+		m = atoi(argv[2]);
															
 
																+
															
 
																+		/* 2D */
															
 
																+		size = n * m;
															
 
																+	}
															
 
																+	else
															
 
																+	{
															
 
																+		assert(0);
															
 
																+	}
															
 
																+
															
 
																+#if defined(STARPU_HAVE_FFTW) || defined(STARPU_USE_CUDA)
															
 
																+	bytes = size * sizeof(STARPUFFT(complex));
															
 
																+#endif
															
 
																+
															
 
																+	STARPUFFT(complex) *in = STARPUFFT(malloc)(size * sizeof(*in));
															
 
																+	starpu_srand48(0);
															
 
																+	for (i = 0; i < size; i++)
															
 
																+		in[i] = starpu_drand48() + I * starpu_drand48();
															
 
																+
															
 
																+	STARPUFFT(complex) *out = STARPUFFT(malloc)(size * sizeof(*out));
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	STARPUFFT(complex) *out_fftw = STARPUFFT(malloc)(size * sizeof(*out_fftw));
															
 
																+#endif
															
 
																+
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	STARPUFFT(complex) *out_cuda = STARPUFFT(malloc)(size * sizeof(*out_cuda));
															
 
																+#endif
															
 
																+
															
 
																+	if (argc <= 2)
															
 
																+	{
															
 
																+		plan = STARPUFFT(plan_dft_1d)(n, SIGN, 0);
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+		fftw_plan = _FFTW(plan_dft_1d)(n, NULL, (void*) 1, SIGN, FFTW_ESTIMATE);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+		if (cufftPlan1d(&cuda_plan, n, _CUFFT_C2C, 1) != CUFFT_SUCCESS)
															
 
																+			printf("erf\n");
															
 
																+#endif
															
 
																+
															
 
																+	}
															
 
																+	else if (argc == 3)
															
 
																+	{
															
 
																+		plan = STARPUFFT(plan_dft_2d)(n, m, SIGN, 0);
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+		fftw_plan = _FFTW(plan_dft_2d)(n, m, NULL, (void*) 1, SIGN, FFTW_ESTIMATE);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+		STARPU_ASSERT(cufftPlan2d(&cuda_plan, n, m, _CUFFT_C2C) == CUFFT_SUCCESS);
															
 
																+#endif
															
 
																+	}
															
 
																+	else
															
 
																+	{
															
 
																+		assert(0);
															
 
																+	}
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	gettimeofday(&begin, NULL);
															
 
																+	_FFTW(execute_dft)(fftw_plan, in, out_fftw);
															
 
																+	gettimeofday(&end, NULL);
															
 
																+	_FFTW(destroy_plan)(fftw_plan);
															
 
																+	timing = (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec));
															
 
																+	printf("FFTW took %2.2f ms (%2.2f MB/s)\n\n", timing/1000, bytes/timing);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	gettimeofday(&begin, NULL);
															
 
																+	if (cufftExecC2C(cuda_plan, (cufftComplex*) in, (cufftComplex*) out_cuda, CUFFT_FORWARD) != CUFFT_SUCCESS)
															
 
																+		printf("erf2\n");
															
 
																+	if ((cures = cudaThreadSynchronize()) != cudaSuccess)
															
 
																+		STARPU_CUDA_REPORT_ERROR(cures);
															
 
																+	gettimeofday(&end, NULL);
															
 
																+	cufftDestroy(cuda_plan);
															
 
																+	timing = (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec));
															
 
																+	printf("CUDA took %2.2f ms (%2.2f MB/s)\n\n", timing/1000, bytes/timing);
															
 
																+#endif
															
 
																+
															
 
																+	STARPUFFT(execute)(plan, in, out);
															
 
																+	STARPUFFT(showstats)(stdout);
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	check_fftw(out, out_fftw, size);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	check_cuda(out, out_cuda, size);
															
 
																+#endif
															
 
																+
															
 
																+#if 1
															
 
																+	starpu_vector_data_register(&in_handle, 0, (uintptr_t) in, size, sizeof(*in));
															
 
																+	starpu_vector_data_register(&out_handle, 0, (uintptr_t) out, size, sizeof(*out));
															
 
																+
															
 
																+	STARPUFFT(execute_handle)(plan, in_handle, out_handle);
															
 
																+
															
 
																+	starpu_data_unregister(in_handle);
															
 
																+	starpu_data_unregister(out_handle);
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	check_fftw(out, out_fftw, size);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	check_cuda(out, out_cuda, size);
															
 
																+#endif
															
 
																+#endif
															
 
																+
															
 
																+	STARPUFFT(showstats)(stdout);
															
 
																+	STARPUFFT(destroy_plan)(plan);
															
 
																+
															
 
																+	printf("\n");
															
 
																+#if 0
															
 
																+	for (i = 0; i < 16; i++)
															
 
																+		printf("(%f,%f) ", cimag(in[i]), creal(in[i]));
															
 
																+	printf("\n\n");
															
 
																+	for (i = 0; i < 16; i++)
															
 
																+		printf("(%f,%f) ", cimag(out[i]), creal(out[i]));
															
 
																+	printf("\n\n");
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	for (i = 0; i < 16; i++)
															
 
																+		printf("(%f,%f) ", cimag(out_fftw[i]), creal(out_fftw[i]));
															
 
																+	printf("\n\n");
															
 
																+#endif
															
 
																+#endif
															
 
																+
															
 
																+	STARPUFFT(free)(in);
															
 
																+	STARPUFFT(free)(out);
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	STARPUFFT(free)(out_fftw);
															
 
																+#endif
															
 
																+
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	free(out_cuda);
															
 
																+#endif
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	return EXIT_SUCCESS;
															
 
																+}
															
--- a/starpufft/examples/testx_threads.c
+++ b/starpufft/examples/testx_threads.c
@@ -0,0 +1,113 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009, 2010-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <complex.h>
															
 
																+#include <math.h>
															
 
																+#include <unistd.h>
															
 
																+#include <stdlib.h>
															
 
																+#include <assert.h>
															
 
																+#include <sys/time.h>
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+
															
 
																+#include <starpu_config.h>
															
 
																+#include "starpufft.h"
															
 
																+
															
 
																+#include <fftw3.h>
															
 
																+
															
 
																+#define SIGN (-1)
															
 
																+/* #define SIGN (1) */
															
 
																+
															
 
																+int main(int argc, char *argv[])
															
 
																+{
															
 
																+	int i;
															
 
																+	struct timeval begin, end;
															
 
																+	int size;
															
 
																+	size_t bytes;
															
 
																+	int n = 0, m = 0;
															
 
																+	_FFTW(plan) fftw_plan;
															
 
																+	double timing;
															
 
																+	char *num;
															
 
																+	int num_threads = 1;
															
 
																+
															
 
																+	_FFTW(init_threads)();
															
 
																+
															
 
																+	num = getenv("NUM_THREADS");
															
 
																+	if (num)
															
 
																+		num_threads = atoi(num);
															
 
																+	_FFTW(plan_with_nthreads)(num_threads);
															
 
																+
															
 
																+	if (argc < 2 || argc > 3)
															
 
																+	{
															
 
																+		fprintf(stderr,"need one or two size of vector\n");
															
 
																+		exit(EXIT_FAILURE);
															
 
																+	}
															
 
																+
															
 
																+	if (argc == 2)
															
 
																+	{
															
 
																+		n = atoi(argv[1]);
															
 
																+
															
 
																+		/* 1D */
															
 
																+		size = n;
															
 
																+	}
															
 
																+	else if (argc == 3)
															
 
																+	{
															
 
																+		n = atoi(argv[1]);
															
 
																+		m = atoi(argv[2]);
															
 
																+
															
 
																+		/* 2D */
															
 
																+		size = n * m;
															
 
																+	}
															
 
																+	else
															
 
																+	{
															
 
																+		assert(0);
															
 
																+	}
															
 
																+
															
 
																+	bytes = size * sizeof(_FFTW(complex));
															
 
																+
															
 
																+	_FFTW(complex) *in = _FFTW(malloc)(size * sizeof(*in));
															
 
																+	starpu_srand48(0);
															
 
																+	for (i = 0; i < size; i++)
															
 
																+		in[i] = starpu_drand48() + I * starpu_drand48();
															
 
																+
															
 
																+	_FFTW(complex) *out_fftw = _FFTW(malloc)(size * sizeof(*out_fftw));
															
 
																+
															
 
																+	if (argc == 2)
															
 
																+	{
															
 
																+		fftw_plan = _FFTW(plan_dft_1d)(n, in, out_fftw, SIGN, FFTW_ESTIMATE);
															
 
																+
															
 
																+	}
															
 
																+	else if (argc == 3)
															
 
																+	{
															
 
																+		fftw_plan = _FFTW(plan_dft_2d)(n, m, in, out_fftw, SIGN, FFTW_ESTIMATE);
															
 
																+	}
															
 
																+	else
															
 
																+	{
															
 
																+		assert(0);
															
 
																+	}
															
 
																+
															
 
																+	gettimeofday(&begin, NULL);
															
 
																+	_FFTW(execute)(fftw_plan);
															
 
																+	gettimeofday(&end, NULL);
															
 
																+	_FFTW(destroy_plan)(fftw_plan);
															
 
																+	timing = (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec));
															
 
																+	printf("FFTW with %d threads took %2.2f ms (%2.2f MB/s)\n\n", num_threads, timing/1000, bytes/(timing*num_threads));
															
 
																+
															
 
																+	printf("\n");
															
 
																+
															
 
																+	return EXIT_SUCCESS;
															
 
																+}
															
--- a/starpufft/float.h
+++ b/starpufft/float.h
@@ -0,0 +1,51 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009, 2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <complex.h>
															
 
																+#include <starpu_config.h>
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+#include <fftw3.h>
															
 
																+#endif
															
 
																+
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+#include <cufft.h>
															
 
																+#endif
															
 
																+
															
 
																+#undef  DOUBLE
															
 
																+#define FLOAT
															
 
																+
															
 
																+typedef float real;
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+typedef fftwf_complex _fftw_complex;
															
 
																+typedef fftwf_plan _fftw_plan;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+typedef cuComplex _cuComplex;
															
 
																+typedef cufftComplex _cufftComplex;
															
 
																+#define _cufftExecC2C cufftExecC2C
															
 
																+#define _cufftExecR2C cufftExecR2C
															
 
																+#define _cufftExecC2R cufftExecC2R
															
 
																+#define _CUFFT_C2C CUFFT_C2C
															
 
																+#define _CUFFT_R2C CUFFT_R2C
															
 
																+#define _CUFFT_C2R CUFFT_C2R
															
 
																+#define _cuCmul(x,y) cuCmulf(x,y)
															
 
																+#endif
															
 
																+#define STARPUFFT(name) starpufftf_##name
															
 
																+#define _FFTW(name) fftwf_##name
															
 
																+
															
 
																+#define TYPE "f"
															
--- a/starpufft/libstarpufft.pc.in
+++ b/starpufft/libstarpufft.pc.in
@@ -0,0 +1,27 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+
															
 
																+prefix=@prefix@
															
 
																+exec_prefix=@exec_prefix@
															
 
																+libdir=@libdir@
															
 
																+includedir=@includedir@
															
 
																+
															
 
																+Name: starpufft
															
 
																+Description: offers support for heterogeneous multicore architecture
															
 
																+Version: @PACKAGE_VERSION@
															
 
																+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API
															
 
																+Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ 
															
 
																+Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@
															
--- a/starpufft/starpufft-1.0.pc.in
+++ b/starpufft/starpufft-1.0.pc.in
@@ -0,0 +1,27 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+
															
 
																+prefix=@prefix@
															
 
																+exec_prefix=@exec_prefix@
															
 
																+libdir=@libdir@
															
 
																+includedir=@includedir@
															
 
																+
															
 
																+Name: starpufft
															
 
																+Description: offers support for heterogeneous multicore architecture
															
 
																+Version: @PACKAGE_VERSION@
															
 
																+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@
															
 
																+Libs: -L${libdir} -lstarpufft-@STARPU_EFFECTIVE_VERSION@ 
															
 
																+Libs.private: @LDFLAGS@ @LIBS@ @STARPU_CUFFT_LDFLAGS@ @FFTW_LIBS@ @FFTWF_LIBS@
															
--- a/starpufft/starpufft.c
+++ b/starpufft/starpufft.c
@@ -0,0 +1,19 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include "double.h"
															
 
																+#include "starpufftx.c"
															
--- a/starpufft/starpufft.h
+++ b/starpufft/starpufft.h
@@ -0,0 +1,60 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009, 2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <stdio.h>
															
 
																+#include <complex.h>
															
 
																+#include <starpu.h>
															
 
																+
															
 
																+#define STARPUFFT_FORWARD -1
															
 
																+#define STARPUFFT_INVERSE 1
															
 
																+
															
 
																+#define __STARPUFFT(name) starpufft_##name
															
 
																+#define __STARPUFFTF(name) starpufftf_##name
															
 
																+#define __STARPUFFTL(name) starpufftl_##name
															
 
																+
															
 
																+#define __STARPUFFT_INTERFACE(starpufft,real) \
															
 
																+typedef real _Complex starpufft(complex); \
															
 
																+\
															
 
																+typedef struct starpufft(plan) *starpufft(plan); \
															
 
																+\
															
 
																+starpufft(plan) starpufft(plan_dft_1d)(int n, int sign, unsigned flags); \
															
 
																+starpufft(plan) starpufft(plan_dft_2d)(int n, int m, int sign, unsigned flags); \
															
 
																+starpufft(plan) starpufft(plan_dft_r2c_1d)(int n, unsigned flags); \
															
 
																+starpufft(plan) starpufft(plan_dft_c2r_1d)(int n, unsigned flags); \
															
 
																+\
															
 
																+void *starpufft(malloc)(size_t n); \
															
 
																+void starpufft(free)(void *p); \
															
 
																+\
															
 
																+void starpufft(execute)(starpufft(plan) p, void *in, void *out); \
															
 
																+struct starpu_task *starpufft(start)(starpufft(plan) p, void *in, void *out); \
															
 
																+\
															
 
																+void starpufft(execute_handle)(starpufft(plan) p, starpu_data_handle_t in, starpu_data_handle_t out); \
															
 
																+struct starpu_task *starpufft(start_handle)(starpufft(plan) p, starpu_data_handle_t in, starpu_data_handle_t out); \
															
 
																+\
															
 
																+void starpufft(cleanup)(starpufft(plan) p); \
															
 
																+void starpufft(destroy_plan)(starpufft(plan) p); \
															
 
																+\
															
 
																+void starpufft(startstats)(void); \
															
 
																+void starpufft(stopstats)(void); \
															
 
																+void starpufft(showstats)(FILE *out);
															
 
																+
															
 
																+__STARPUFFT_INTERFACE(__STARPUFFT, double)
															
 
																+__STARPUFFT_INTERFACE(__STARPUFFTF, float)
															
 
																+__STARPUFFT_INTERFACE(__STARPUFFTL, long double)
															
 
																+
															
 
																+/* Internal use */
															
 
																+extern int starpufft_last_plan_number;
															
--- a/starpufft/starpufft_common.c
+++ b/starpufft/starpufft_common.c
@@ -0,0 +1,21 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010-2011  Université de Bordeaux 1
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include "starpufft.h"
															
 
																+
															
 
																+/* Used as an identifier in starpu tags to let plans run concurrently */
															
 
																+int starpufft_last_plan_number;
															
--- a/starpufft/starpufftf.c
+++ b/starpufft/starpufftf.c
@@ -0,0 +1,19 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include "float.h"
															
 
																+#include "starpufftx.c"
															
--- a/starpufft/starpufftx.c
+++ b/starpufft/starpufftx.c
@@ -0,0 +1,454 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#define PARALLEL 0
															
 
																+
															
 
																+#include <math.h>
															
 
																+#include <pthread.h>
															
 
																+#include <unistd.h>
															
 
																+#include <sys/time.h>
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include <config.h>
															
 
																+
															
 
																+#include "starpufft.h"
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+#define _externC extern
															
 
																+#include "cudax_kernels.h"
															
 
																+
															
 
																+#if defined(FLOAT) || defined(STARPU_HAVE_CUFFTDOUBLECOMPLEX)
															
 
																+#  define __STARPU_USE_CUDA
															
 
																+#else
															
 
																+#  undef __STARPU_USE_CUDA
															
 
																+#endif
															
 
																+
															
 
																+#endif
															
 
																+
															
 
																+#define _FFTW_FLAGS FFTW_ESTIMATE
															
 
																+
															
 
																+/* Steps for the parallel variant */
															
 
																+enum steps
															
 
																+{
															
 
																+	SPECIAL, TWIST1, FFT1, JOIN, TWIST2, FFT2, TWIST3, END
															
 
																+};
															
 
																+
															
 
																+#define NUMBER_BITS 5
															
 
																+#define NUMBER_SHIFT (64 - NUMBER_BITS)
															
 
																+#define STEP_BITS 3
															
 
																+#define STEP_SHIFT (NUMBER_SHIFT - STEP_BITS)
															
 
																+
															
 
																+/* Tags for the steps of the parallel variant */
															
 
																+#define _STEP_TAG(plan, step, i) (((starpu_tag_t) plan->number << NUMBER_SHIFT) | ((starpu_tag_t)(step) << STEP_SHIFT) | (starpu_tag_t) (i))
															
 
																+
															
 
																+
															
 
																+#define I_BITS STEP_SHIFT
															
 
																+
															
 
																+enum type
															
 
																+{
															
 
																+	R2C,
															
 
																+	C2R,
															
 
																+	C2C
															
 
																+};
															
 
																+
															
 
																+static unsigned task_per_worker[STARPU_NMAXWORKERS];
															
 
																+static unsigned samples_per_worker[STARPU_NMAXWORKERS];
															
 
																+static struct timeval start, submit_tasks, end;
															
 
																+
															
 
																+/*
															
 
																+ *
															
 
																+ *	The actual kernels
															
 
																+ *
															
 
																+ */
															
 
																+
															
 
																+struct STARPUFFT(plan)
															
 
																+{
															
 
																+	int number;	/* uniquely identifies the plan, for starpu tags */
															
 
																+
															
 
																+	int *n;
															
 
																+	int *n1;
															
 
																+	int *n2;
															
 
																+	int totsize;
															
 
																+	int totsize1;	/* Number of first-round tasks */
															
 
																+	int totsize2;	/* Size of first-round tasks */
															
 
																+	int totsize3;	/* Number of second-round tasks */
															
 
																+	int totsize4;	/* Size of second-round tasks */
															
 
																+	int dim;
															
 
																+	enum type type;
															
 
																+	int sign;
															
 
																+
															
 
																+	STARPUFFT(complex) *roots[2];
															
 
																+	starpu_data_handle_t roots_handle[2];
															
 
																+
															
 
																+	/* For each worker, we need some data */
															
 
																+	struct
															
 
																+	{
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+		/* CUFFT plans */
															
 
																+		cufftHandle plan1_cuda, plan2_cuda;
															
 
																+		/* Sequential version */
															
 
																+		cufftHandle plan_cuda;
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+		/* FFTW plans */
															
 
																+		_fftw_plan plan1_cpu, plan2_cpu;
															
 
																+		/* Sequential version */
															
 
																+		_fftw_plan plan_cpu;
															
 
																+#endif
															
 
																+	} plans[STARPU_NMAXWORKERS];
															
 
																+
															
 
																+	/* Buffers for codelets */
															
 
																+	STARPUFFT(complex) *in, *twisted1, *fft1, *twisted2, *fft2, *out;
															
 
																+
															
 
																+	/* corresponding starpu DSM handles */
															
 
																+	starpu_data_handle_t in_handle, *twisted1_handle, *fft1_handle, *twisted2_handle, *fft2_handle, out_handle;
															
 
																+
															
 
																+	/* Tasks */
															
 
																+	struct starpu_task **twist1_tasks, **fft1_tasks, **twist2_tasks, **fft2_tasks, **twist3_tasks;
															
 
																+	struct starpu_task *join_task, *end_task;
															
 
																+
															
 
																+	/* Arguments for tasks */
															
 
																+	struct STARPUFFT(args) *fft1_args, *fft2_args;
															
 
																+};
															
 
																+
															
 
																+struct STARPUFFT(args)
															
 
																+{
															
 
																+	struct STARPUFFT(plan) *plan;
															
 
																+	int i, j, jj, kk, ll, *iv, *kkv;
															
 
																+};
															
 
																+
															
 
																+static void
															
 
																+check_dims(STARPUFFT(plan) plan)
															
 
																+{
															
 
																+	int dim;
															
 
																+	for (dim = 0; dim < plan->dim; dim++)
															
 
																+		if (plan->n[dim] & (plan->n[dim]-1))
															
 
																+		{
															
 
																+			fprintf(stderr,"can't cope with non-power-of-2\n");
															
 
																+			STARPU_ABORT();
															
 
																+		}
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+compute_roots(STARPUFFT(plan) plan)
															
 
																+{
															
 
																+	int dim, k;
															
 
																+
															
 
																+	/* Compute the n-roots and m-roots of unity for twiddling */
															
 
																+	for (dim = 0; dim < plan->dim; dim++)
															
 
																+	{
															
 
																+		STARPUFFT(complex) exp = (plan->sign * 2. * 4.*atan(1.)) * _Complex_I / (STARPUFFT(complex)) plan->n[dim];
															
 
																+		plan->roots[dim] = malloc(plan->n[dim] * sizeof(**plan->roots));
															
 
																+		for (k = 0; k < plan->n[dim]; k++)
															
 
																+			plan->roots[dim][k] = cexp(exp*k);
															
 
																+		starpu_vector_data_register(&plan->roots_handle[dim], 0, (uintptr_t) plan->roots[dim], plan->n[dim], sizeof(**plan->roots));
															
 
																+
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+		if (plan->n[dim] > 100000)
															
 
																+		{
															
 
																+			/* prefetch the big root array on GPUs */
															
 
																+			unsigned worker;
															
 
																+			unsigned nworkers = starpu_worker_get_count();
															
 
																+			for (worker = 0; worker < nworkers; worker++)
															
 
																+			{
															
 
																+				unsigned node = starpu_worker_get_memory_node(worker);
															
 
																+				if (starpu_worker_get_type(worker) == STARPU_CUDA_WORKER)
															
 
																+					starpu_data_prefetch_on_node(plan->roots_handle[dim], node, 0);
															
 
																+			}
															
 
																+		}
															
 
																+#endif
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+/* Only CUDA capability >= 1.3 supports doubles, rule old card out.  */
															
 
																+#ifdef DOUBLE
															
 
																+static int can_execute(unsigned workerid, struct starpu_task *task, unsigned nimpl) {
															
 
																+	if (starpu_worker_get_type(workerid) == STARPU_CPU_WORKER)
															
 
																+		return 1;
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	{
															
 
																+		/* Cuda device */
															
 
																+		const struct cudaDeviceProp *props;
															
 
																+		props = starpu_cuda_get_device_properties(workerid);
															
 
																+		if (props->major >= 2 || props->minor >= 3)
															
 
																+			/* At least compute capability 1.3, supports doubles */
															
 
																+			return 1;
															
 
																+		/* Old card does not support doubles */
															
 
																+		return 0;
															
 
																+	}
															
 
																+#endif
															
 
																+	return 0;
															
 
																+}
															
 
																+#define CAN_EXECUTE .can_execute = can_execute,
															
 
																+#else
															
 
																+#define CAN_EXECUTE
															
 
																+#endif
															
 
																+
															
 
																+#include "starpufftx1d.c"
															
 
																+#include "starpufftx2d.c"
															
 
																+
															
 
																+struct starpu_task *
															
 
																+STARPUFFT(start)(STARPUFFT(plan) plan, void *_in, void *_out)
															
 
																+{
															
 
																+	struct starpu_task *task;
															
 
																+	int z;
															
 
																+
															
 
																+	plan->in = _in;
															
 
																+	plan->out = _out;
															
 
																+
															
 
																+	switch (plan->dim)
															
 
																+	{
															
 
																+		case 1:
															
 
																+		{
															
 
																+			switch (plan->type)
															
 
																+			{
															
 
																+			case C2C:
															
 
																+				starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
															
 
																+				if (!PARALLEL)
															
 
																+					starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
															
 
																+				if (PARALLEL)
															
 
																+				{
															
 
																+					for (z = 0; z < plan->totsize1; z++)
															
 
																+						plan->twist1_tasks[z]->handles[0] = plan->in_handle;
															
 
																+				}
															
 
																+				task = STARPUFFT(start1dC2C)(plan, plan->in_handle, plan->out_handle);
															
 
																+				break;
															
 
																+			default:
															
 
																+				STARPU_ABORT();
															
 
																+				break;
															
 
																+			}
															
 
																+			break;
															
 
																+		}
															
 
																+		case 2:
															
 
																+			starpu_vector_data_register(&plan->in_handle, 0, (uintptr_t) plan->in, plan->totsize, sizeof(STARPUFFT(complex)));
															
 
																+			if (!PARALLEL)
															
 
																+				starpu_vector_data_register(&plan->out_handle, 0, (uintptr_t) plan->out, plan->totsize, sizeof(STARPUFFT(complex)));
															
 
																+			if (PARALLEL)
															
 
																+			{
															
 
																+				for (z = 0; z < plan->totsize1; z++)
															
 
																+					plan->twist1_tasks[z]->handles[0] = plan->in_handle;
															
 
																+			}
															
 
																+			task = STARPUFFT(start2dC2C)(plan, plan->in_handle, plan->out_handle);
															
 
																+			break;
															
 
																+		default:
															
 
																+			STARPU_ABORT();
															
 
																+			break;
															
 
																+	}
															
 
																+	return task;
															
 
																+}
															
 
																+
															
 
																+void
															
 
																+STARPUFFT(cleanup)(STARPUFFT(plan) plan)
															
 
																+{
															
 
																+	if (plan->in_handle)
															
 
																+		starpu_data_unregister(plan->in_handle);
															
 
																+	if (!PARALLEL)
															
 
																+	{
															
 
																+		if (plan->out_handle)
															
 
																+			starpu_data_unregister(plan->out_handle);
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+struct starpu_task *
															
 
																+STARPUFFT(start_handle)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out)
															
 
																+{
															
 
																+	return STARPUFFT(start1dC2C)(plan, in, out);
															
 
																+}
															
 
																+
															
 
																+void
															
 
																+STARPUFFT(execute)(STARPUFFT(plan) plan, void *in, void *out)
															
 
																+{
															
 
																+	memset(task_per_worker, 0, sizeof(task_per_worker));
															
 
																+	memset(samples_per_worker, 0, sizeof(task_per_worker));
															
 
																+
															
 
																+	gettimeofday(&start, NULL);
															
 
																+
															
 
																+	struct starpu_task *task = STARPUFFT(start)(plan, in, out);
															
 
																+	gettimeofday(&submit_tasks, NULL);
															
 
																+	starpu_task_wait(task);
															
 
																+
															
 
																+	STARPUFFT(cleanup)(plan);
															
 
																+
															
 
																+	gettimeofday(&end, NULL);
															
 
																+}
															
 
																+
															
 
																+void
															
 
																+STARPUFFT(execute_handle)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out)
															
 
																+{
															
 
																+	struct starpu_task *task = STARPUFFT(start_handle)(plan, in, out);
															
 
																+	starpu_task_wait(task);
															
 
																+}
															
 
																+
															
 
																+/* Destroy FFTW plans, unregister and free buffers, and free tags */
															
 
																+void
															
 
																+STARPUFFT(destroy_plan)(STARPUFFT(plan) plan)
															
 
																+{
															
 
																+	int workerid, dim, i;
															
 
																+
															
 
																+	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++)
															
 
																+	{
															
 
																+		switch (starpu_worker_get_type(workerid))
															
 
																+		{
															
 
																+		case STARPU_CPU_WORKER:
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+			if (PARALLEL)
															
 
																+			{
															
 
																+				_FFTW(destroy_plan)(plan->plans[workerid].plan1_cpu);
															
 
																+				_FFTW(destroy_plan)(plan->plans[workerid].plan2_cpu);
															
 
																+			}
															
 
																+			else
															
 
																+			{
															
 
																+				_FFTW(destroy_plan)(plan->plans[workerid].plan_cpu);
															
 
																+			}
															
 
																+#endif
															
 
																+			break;
															
 
																+		case STARPU_CUDA_WORKER:
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+			/* FIXME: Can't deallocate */
															
 
																+#endif
															
 
																+			break;
															
 
																+		default:
															
 
																+			/* Do not care, we won't be executing anything there. */
															
 
																+			break;
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	if (PARALLEL)
															
 
																+	{
															
 
																+		for (i = 0; i < plan->totsize1; i++)
															
 
																+		{
															
 
																+			starpu_data_unregister(plan->twisted1_handle[i]);
															
 
																+			free(plan->twist1_tasks[i]);
															
 
																+			starpu_data_unregister(plan->fft1_handle[i]);
															
 
																+			free(plan->fft1_tasks[i]);
															
 
																+		}
															
 
																+
															
 
																+		free(plan->twisted1_handle);
															
 
																+		free(plan->twist1_tasks);
															
 
																+		free(plan->fft1_handle);
															
 
																+		free(plan->fft1_tasks);
															
 
																+		free(plan->fft1_args);
															
 
																+
															
 
																+		free(plan->join_task);
															
 
																+
															
 
																+		for (i = 0; i < plan->totsize3; i++)
															
 
																+		{
															
 
																+			starpu_data_unregister(plan->twisted2_handle[i]);
															
 
																+			free(plan->twist2_tasks[i]);
															
 
																+			starpu_data_unregister(plan->fft2_handle[i]);
															
 
																+			free(plan->fft2_tasks[i]);
															
 
																+			free(plan->twist3_tasks[i]);
															
 
																+		}
															
 
																+
															
 
																+		free(plan->end_task);
															
 
																+
															
 
																+		free(plan->twisted2_handle);
															
 
																+		free(plan->twist2_tasks);
															
 
																+		free(plan->fft2_handle);
															
 
																+		free(plan->fft2_tasks);
															
 
																+		free(plan->twist3_tasks);
															
 
																+		free(plan->fft2_args);
															
 
																+
															
 
																+		for (dim = 0; dim < plan->dim; dim++)
															
 
																+		{
															
 
																+			starpu_data_unregister(plan->roots_handle[dim]);
															
 
																+			free(plan->roots[dim]);
															
 
																+		}
															
 
																+
															
 
																+		switch (plan->dim)
															
 
																+		{
															
 
																+		case 1:
															
 
																+			STARPUFFT(free_1d_tags)(plan);
															
 
																+			break;
															
 
																+		case 2:
															
 
																+			STARPUFFT(free_2d_tags)(plan);
															
 
																+			break;
															
 
																+		default:
															
 
																+			STARPU_ABORT();
															
 
																+			break;
															
 
																+		}
															
 
																+
															
 
																+		free(plan->n1);
															
 
																+		free(plan->n2);
															
 
																+		STARPUFFT(free)(plan->twisted1);
															
 
																+		STARPUFFT(free)(plan->fft1);
															
 
																+		STARPUFFT(free)(plan->twisted2);
															
 
																+		STARPUFFT(free)(plan->fft2);
															
 
																+	}
															
 
																+	free(plan->n);
															
 
																+	free(plan);
															
 
																+}
															
 
																+
															
 
																+void *
															
 
																+STARPUFFT(malloc)(size_t n)
															
 
																+{
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	void *res;
															
 
																+	starpu_malloc(&res, n);
															
 
																+	return res;
															
 
																+#else
															
 
																+#  ifdef STARPU_HAVE_FFTW
															
 
																+	return _FFTW(malloc)(n);
															
 
																+#  else
															
 
																+	return malloc(n);
															
 
																+#  endif
															
 
																+#endif
															
 
																+}
															
 
																+
															
 
																+void
															
 
																+STARPUFFT(free)(void *p)
															
 
																+{
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	starpu_free(p);
															
 
																+#else
															
 
																+#  ifdef STARPU_HAVE_FFTW
															
 
																+	_FFTW(free)(p);
															
 
																+#  else
															
 
																+	free(p);
															
 
																+#  endif
															
 
																+#endif
															
 
																+}
															
 
																+
															
 
																+void
															
 
																+STARPUFFT(showstats)(FILE *out)
															
 
																+{
															
 
																+	int worker;
															
 
																+	unsigned total;
															
 
																+
															
 
																+#define TIMING(begin,end) (double)((end.tv_sec - begin.tv_sec)*1000000 + (end.tv_usec - begin.tv_usec))
															
 
																+#define MSTIMING(begin,end) (TIMING(begin,end)/1000.)
															
 
																+	double paratiming = TIMING(start,end);
															
 
																+	fprintf(out, "Tasks submission took %2.2f ms\n", MSTIMING(start,submit_tasks));
															
 
																+	fprintf(out, "Tasks termination took %2.2f ms\n", MSTIMING(submit_tasks,end));
															
 
																+
															
 
																+	fprintf(out, "Total %2.2f ms\n", MSTIMING(start,end));
															
 
																+
															
 
																+	for (worker = 0, total = 0; worker < starpu_worker_get_count(); worker++)
															
 
																+		total += task_per_worker[worker];
															
 
																+
															
 
																+	for (worker = 0; worker < starpu_worker_get_count(); worker++)
															
 
																+	{
															
 
																+		if (task_per_worker[worker])
															
 
																+		{
															
 
																+			char name[32];
															
 
																+			starpu_worker_get_name(worker, name, sizeof(name));
															
 
																+
															
 
																+			unsigned long bytes = sizeof(STARPUFFT(complex))*samples_per_worker[worker];
															
 
																+
															
 
																+			fprintf(stderr, "\t%s -> %2.2f MB\t%2.2f\tMB/s\t%u %2.2f %%\n", name, (1.0*bytes)/(1024*1024), bytes/paratiming, task_per_worker[worker], (100.0*task_per_worker[worker])/total);
															
 
																+		}
															
 
																+	}
															
 
																+}
															
--- a/starpufft/starpufftx1d.c
+++ b/starpufft/starpufftx1d.c
@@ -0,0 +1,847 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+/*
															
 
																+ *
															
 
																+ * Dumb parallel version
															
 
																+ *
															
 
																+ */
															
 
																+
															
 
																+#define DIV_1D 64
															
 
																+
															
 
																+  /*
															
 
																+   * Overall strategy for an fft of size n:
															
 
																+   * - perform n1 ffts of size n2
															
 
																+   * - twiddle
															
 
																+   * - perform n2 ffts of size n1
															
 
																+   *
															
 
																+   * - n1 defaults to DIV_1D, thus n2 defaults to n / DIV_1D.
															
 
																+   *
															
 
																+   * Precise tasks:
															
 
																+   *
															
 
																+   * - twist1: twist the whole n-element input (called "in") into n1 chunks of
															
 
																+   *           size n2, by using n1 tasks taking the whole n-element input as a
															
 
																+   *           R parameter and one n2 output as a W parameter. The result is
															
 
																+   *           called twisted1.
															
 
																+   * - fft1:   perform n1 (n2) ffts, by using n1 tasks doing one fft each. Also
															
 
																+   *           twiddle the result to prepare for the fft2. The result is called
															
 
																+   *           fft1.
															
 
																+   * - join:   depends on all the fft1s, to gather the n1 results of size n2 in
															
 
																+   *           the fft1 vector.
															
 
																+   * - twist2: twist the fft1 vector into n2 chunks of size n1, called twisted2.
															
 
																+   *           since n2 is typically very large, this step is divided in DIV_1D
															
 
																+   *           tasks, each of them performing n2/DIV_1D of them
															
 
																+   * - fft2:   perform n2 ffts of size n1. This is divided in DIV_1D tasks of
															
 
																+   *           n2/DIV_1D ffts, to be performed in batches. The result is called
															
 
																+   *           fft2.
															
 
																+   * - twist3: twist back the result of the fft2s above into the output buffer.
															
 
																+   *           Only implemented on CPUs for simplicity of the gathering.
															
 
																+   *
															
 
																+   * The tag space thus uses 3 dimensions:
															
 
																+   * - the number of the plan.
															
 
																+   * - the step (TWIST1, FFT1, JOIN, TWIST2, FFT2, TWIST3, END)
															
 
																+   * - an index i between 0 and DIV_1D-1.
															
 
																+   */
															
 
																+
															
 
																+#define STEP_TAG_1D(plan, step, i) _STEP_TAG(plan, step, i)
															
 
																+
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+/* twist1:
															
 
																+ *
															
 
																+ * Twist the full input vector (first parameter) into one chunk of size n2
															
 
																+ * (second parameter) */
															
 
																+static void
															
 
																+STARPUFFT(twist1_1d_kernel_gpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int i = args->i;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int n2 = plan->n2[0];
															
 
																+
															
 
																+	_cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	_cufftComplex * restrict twisted1 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+	
															
 
																+	STARPUFFT(cuda_twist1_1d_host)(in, twisted1, i, n1, n2);
															
 
																+
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
 
																+
															
 
																+/* fft1:
															
 
																+ *
															
 
																+ * Perform one fft of size n2 */
															
 
																+static void
															
 
																+STARPUFFT(fft1_1d_plan_gpu)(void *args)
															
 
																+{
															
 
																+	STARPUFFT(plan) plan = args;
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+	cufftResult cures;
															
 
																+
															
 
																+	cures = cufftPlan1d(&plan->plans[workerid].plan1_cuda, n2, _CUFFT_C2C, 1);
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+	cufftSetStream(plan->plans[workerid].plan1_cuda, starpu_cuda_get_local_stream());
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+STARPUFFT(fft1_1d_kernel_gpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int i = args->i;
															
 
																+	int n2 = plan->n2[0];
															
 
																+	cufftResult cures;
															
 
																+
															
 
																+	_cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	_cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+	const _cufftComplex * restrict roots = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[2]);
															
 
																+
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	cures = _cufftExecC2C(plan->plans[workerid].plan1_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE);
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+
															
 
																+	STARPUFFT(cuda_twiddle_1d_host)(out, roots, n2, i);
															
 
																+
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
 
																+
															
 
																+/* fft2:
															
 
																+ *
															
 
																+ * Perform n3 = n2/DIV_1D ffts of size n1 */
															
 
																+static void
															
 
																+STARPUFFT(fft2_1d_plan_gpu)(void *args)
															
 
																+{
															
 
																+	STARPUFFT(plan) plan = args;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int n3 = n2/DIV_1D;
															
 
																+	cufftResult cures;
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	cures = cufftPlan1d(&plan->plans[workerid].plan2_cuda, n1, _CUFFT_C2C, n3);
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+	cufftSetStream(plan->plans[workerid].plan2_cuda, starpu_cuda_get_local_stream());
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+STARPUFFT(fft2_1d_kernel_gpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	cufftResult cures;
															
 
																+
															
 
																+	_cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	_cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	/* NOTE using batch support */
															
 
																+	cures = _cufftExecC2C(plan->plans[workerid].plan2_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE);
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+/* twist1:
															
 
																+ *
															
 
																+ * Twist the full input vector (first parameter) into one chunk of size n2
															
 
																+ * (second parameter) */
															
 
																+static void
															
 
																+STARPUFFT(twist1_1d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int i = args->i;
															
 
																+	int j;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int n2 = plan->n2[0];
															
 
																+
															
 
																+	STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	/* printf("twist1 %d %g\n", i, (double) cabs(plan->in[i])); */
															
 
																+
															
 
																+	for (j = 0; j < n2; j++)
															
 
																+		twisted1[j] = in[i+j*n1];
															
 
																+}
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+/* fft1:
															
 
																+ *
															
 
																+ * Perform one fft of size n2 */
															
 
																+static void
															
 
																+STARPUFFT(fft1_1d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int i = args->i;
															
 
																+	int j;
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	STARPUFFT(complex) * restrict fft1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	/* printf("fft1 %d %g\n", i, (double) cabs(twisted1[0])); */
															
 
																+
															
 
																+	_FFTW(execute_dft)(plan->plans[workerid].plan1_cpu, twisted1, fft1);
															
 
																+
															
 
																+	/* twiddle fft1 buffer */
															
 
																+	for (j = 0; j < n2; j++)
															
 
																+		fft1[j] = fft1[j] * plan->roots[0][i*j];
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+/* twist2:
															
 
																+ *
															
 
																+ * Twist the full vector (results of the fft1s) into one package of n2/DIV_1D
															
 
																+ * chunks of size n1 */
															
 
																+static void
															
 
																+STARPUFFT(twist2_1d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int jj = args->jj;	/* between 0 and DIV_1D */
															
 
																+	int jjj;		/* beetween 0 and n3 */
															
 
																+	int i;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int n3 = n2/DIV_1D;
															
 
																+
															
 
																+	STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+
															
 
																+	/* printf("twist2 %d %g\n", jj, (double) cabs(plan->fft1[jj])); */
															
 
																+
															
 
																+	for (jjj = 0; jjj < n3; jjj++) {
															
 
																+		int j = jj * n3 + jjj;
															
 
																+		for (i = 0; i < n1; i++)
															
 
																+			twisted2[jjj*n1+i] = plan->fft1[i*n2+j];
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+/* fft2:
															
 
																+ *
															
 
																+ * Perform n3 = n2/DIV_1D ffts of size n1 */
															
 
																+static void
															
 
																+STARPUFFT(fft2_1d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	/* int jj = args->jj; */
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	/* printf("fft2 %d %g\n", jj, (double) cabs(twisted2[plan->totsize4-1])); */
															
 
																+
															
 
																+	_FFTW(execute_dft)(plan->plans[workerid].plan2_cpu, twisted2, fft2);
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+/* twist3:
															
 
																+ *
															
 
																+ * Spread the package of n2/DIV_1D chunks of size n1 into the output vector */
															
 
																+static void
															
 
																+STARPUFFT(twist3_1d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int jj = args->jj;	/* between 0 and DIV_1D */
															
 
																+	int jjj;		/* beetween 0 and n3 */
															
 
																+	int i;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int n3 = n2/DIV_1D;
															
 
																+
															
 
																+	const STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+
															
 
																+	/* printf("twist3 %d %g\n", jj, (double) cabs(fft2[0])); */
															
 
																+
															
 
																+	for (jjj = 0; jjj < n3; jjj++) {
															
 
																+		int j = jj * n3 + jjj;
															
 
																+		for (i = 0; i < n1; i++)
															
 
																+			plan->out[i*n2+j] = fft2[jjj*n1+i];
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+/* Performance models for the 5 kinds of tasks */
															
 
																+static struct starpu_perfmodel STARPUFFT(twist1_1d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"twist1_1d"
															
 
																+};
															
 
																+
															
 
																+static struct starpu_perfmodel STARPUFFT(fft1_1d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"fft1_1d"
															
 
																+};
															
 
																+
															
 
																+static struct starpu_perfmodel STARPUFFT(twist2_1d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"twist2_1d"
															
 
																+};
															
 
																+
															
 
																+static struct starpu_perfmodel STARPUFFT(fft2_1d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"fft2_1d"
															
 
																+};
															
 
																+
															
 
																+static struct starpu_perfmodel STARPUFFT(twist3_1d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"twist3_1d"
															
 
																+};
															
 
																+
															
 
																+/* codelet pointers for the 5 kinds of tasks */
															
 
																+static struct starpu_codelet STARPUFFT(twist1_1d_codelet) = {
															
 
																+	.where =
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+		STARPU_CUDA|
															
 
																+#endif
															
 
																+		STARPU_CPU,
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {STARPUFFT(twist1_1d_kernel_gpu), NULL},
															
 
																+#endif
															
 
																+	.cpu_funcs = {STARPUFFT(twist1_1d_kernel_cpu), NULL},
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(twist1_1d_model),
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet STARPUFFT(fft1_1d_codelet) = {
															
 
																+	.where =
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+		STARPU_CUDA|
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+		STARPU_CPU|
															
 
																+#endif
															
 
																+		0,
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {STARPUFFT(fft1_1d_kernel_gpu), NULL},
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	.cpu_funcs = {STARPUFFT(fft1_1d_kernel_cpu), NULL},
															
 
																+#endif
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(fft1_1d_model),
															
 
																+	.nbuffers = 3,
															
 
																+	.modes = {STARPU_R, STARPU_W, STARPU_R}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet STARPUFFT(twist2_1d_codelet) = {
															
 
																+	.where = STARPU_CPU,
															
 
																+	.cpu_funcs = {STARPUFFT(twist2_1d_kernel_cpu), NULL},
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(twist2_1d_model),
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet STARPUFFT(fft2_1d_codelet) = {
															
 
																+	.where =
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+		STARPU_CUDA|
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+		STARPU_CPU|
															
 
																+#endif
															
 
																+		0,
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {STARPUFFT(fft2_1d_kernel_gpu), NULL},
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	.cpu_funcs = {STARPUFFT(fft2_1d_kernel_cpu), NULL},
															
 
																+#endif
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(fft2_1d_model),
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet STARPUFFT(twist3_1d_codelet) = {
															
 
																+	.where = STARPU_CPU,
															
 
																+	.cpu_funcs = {STARPUFFT(twist3_1d_kernel_cpu), NULL},
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(twist3_1d_model),
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_R}
															
 
																+};
															
 
																+
															
 
																+/*
															
 
																+ *
															
 
																+ * Sequential version
															
 
																+ *
															
 
																+ */
															
 
																+
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+/* Perform one fft of size n */
															
 
																+static void
															
 
																+STARPUFFT(fft_1d_plan_gpu)(void *args)
															
 
																+{
															
 
																+	STARPUFFT(plan) plan = args;
															
 
																+	cufftResult cures;
															
 
																+	int n = plan->n[0];
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	cures = cufftPlan1d(&plan->plans[workerid].plan_cuda, n, _CUFFT_C2C, 1);
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+	cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream());
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+STARPUFFT(fft_1d_kernel_gpu)(void *descr[], void *args)
															
 
																+{
															
 
																+	STARPUFFT(plan) plan = args;
															
 
																+	cufftResult cures;
															
 
																+
															
 
																+	_cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	_cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	cures = _cufftExecC2C(plan->plans[workerid].plan_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE);
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+/* Perform one fft of size n */
															
 
																+static void
															
 
																+STARPUFFT(fft_1d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	STARPUFFT(plan) plan = _args;
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	STARPUFFT(complex) * restrict out = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	_FFTW(execute_dft)(plan->plans[workerid].plan_cpu, in, out);
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+static struct starpu_perfmodel STARPUFFT(fft_1d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"fft_1d"
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet STARPUFFT(fft_1d_codelet) = {
															
 
																+	.where =
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+		STARPU_CUDA|
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+		STARPU_CPU|
															
 
																+#endif
															
 
																+		0,
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {STARPUFFT(fft_1d_kernel_gpu), NULL},
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	.cpu_funcs = {STARPUFFT(fft_1d_kernel_cpu), NULL},
															
 
																+#endif
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(fft_1d_model),
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+/* Planning:
															
 
																+ *
															
 
																+ * - For each CPU worker, we need to plan the two fftw stages.
															
 
																+ * - For GPU workers, we need to do the planning in the CUDA context, so we do
															
 
																+ *   this lazily through the initialised1 and initialised2 flags ; TODO: use
															
 
																+ *   starpu_execute_on_each_worker instead (done in the omp branch).
															
 
																+ * - We allocate all the temporary buffers and register them to starpu.
															
 
																+ * - We create all the tasks, but do not submit them yet. It will be possible
															
 
																+ *   to reuse them at will to perform several ffts with the same planning.
															
 
																+ */
															
 
																+STARPUFFT(plan)
															
 
																+STARPUFFT(plan_dft_1d)(int n, int sign, unsigned flags)
															
 
																+{
															
 
																+	int workerid;
															
 
																+	int n1 = DIV_1D;
															
 
																+	int n2 = n / n1;
															
 
																+	int n3;
															
 
																+	int z;
															
 
																+	struct starpu_task *task;
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+	/* cufft 1D limited to 8M elements */
															
 
																+	while (n2 > 8 << 20) {
															
 
																+		n1 *= 2;
															
 
																+		n2 /= 2;
															
 
																+	}
															
 
																+#endif
															
 
																+	STARPU_ASSERT(n == n1*n2);
															
 
																+	STARPU_ASSERT(n1 < (1ULL << I_BITS));
															
 
																+
															
 
																+	/* distribute the n2 second ffts into DIV_1D packages */
															
 
																+	n3 = n2 / DIV_1D;
															
 
																+	STARPU_ASSERT(n2 == n3*DIV_1D);
															
 
																+}
															
 
																+
															
 
																+	/* TODO: flags? Automatically set FFTW_MEASURE on calibration? */
															
 
																+	STARPU_ASSERT(flags == 0);
															
 
																+
															
 
																+	STARPUFFT(plan) plan = malloc(sizeof(*plan));
															
 
																+	memset(plan, 0, sizeof(*plan));
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	plan->number = STARPU_ATOMIC_ADD(&starpufft_last_plan_number, 1) - 1;
															
 
																+
															
 
																+	/* The plan number has a limited size */
															
 
																+	STARPU_ASSERT(plan->number < (1ULL << NUMBER_BITS));
															
 
																+}
															
 
																+
															
 
																+	/* Just one dimension */
															
 
																+	plan->dim = 1;
															
 
																+	plan->n = malloc(plan->dim * sizeof(*plan->n));
															
 
																+	plan->n[0] = n;
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	check_dims(plan);
															
 
																+
															
 
																+	plan->n1 = malloc(plan->dim * sizeof(*plan->n1));
															
 
																+	plan->n1[0] = n1;
															
 
																+	plan->n2 = malloc(plan->dim * sizeof(*plan->n2));
															
 
																+	plan->n2[0] = n2;
															
 
																+}
															
 
																+
															
 
																+	/* Note: this is for coherency with the 2D case */
															
 
																+	plan->totsize = n;
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	plan->totsize1 = n1;
															
 
																+	plan->totsize2 = n2;
															
 
																+	plan->totsize3 = DIV_1D;
															
 
																+	plan->totsize4 = plan->totsize / plan->totsize3;
															
 
																+}
															
 
																+	plan->type = C2C;
															
 
																+	plan->sign = sign;
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	/* Compute the w^k just once. */
															
 
																+	compute_roots(plan);
															
 
																+}
															
 
																+
															
 
																+	/* Initialize per-worker working set */
															
 
																+	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) {
															
 
																+		switch (starpu_worker_get_type(workerid)) {
															
 
																+		case STARPU_CPU_WORKER:
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+if (PARALLEL) {
															
 
																+			/* first fft plan: one fft of size n2.
															
 
																+			 * FFTW imposes that buffer pointers are known at
															
 
																+			 * planning time. */
															
 
																+			plan->plans[workerid].plan1_cpu = _FFTW(plan_dft_1d)(n2, NULL, (void*) 1, sign, _FFTW_FLAGS);
															
 
																+			STARPU_ASSERT(plan->plans[workerid].plan1_cpu);
															
 
																+
															
 
																+			/* second fft plan: n3 ffts of size n1 */
															
 
																+			plan->plans[workerid].plan2_cpu = _FFTW(plan_many_dft)(plan->dim,
															
 
																+					plan->n1, n3,
															
 
																+					NULL, NULL, 1, plan->totsize1,
															
 
																+					(void*) 1, NULL, 1, plan->totsize1,
															
 
																+					sign, _FFTW_FLAGS);
															
 
																+			STARPU_ASSERT(plan->plans[workerid].plan2_cpu);
															
 
																+} else {
															
 
																+			/* fft plan: one fft of size n. */
															
 
																+			plan->plans[workerid].plan_cpu = _FFTW(plan_dft_1d)(n, NULL, (void*) 1, sign, _FFTW_FLAGS);
															
 
																+			STARPU_ASSERT(plan->plans[workerid].plan_cpu);
															
 
																+}
															
 
																+#else
															
 
																+/* #warning libstarpufft can not work correctly if libfftw3 is not installed */
															
 
																+#endif
															
 
																+			break;
															
 
																+		case STARPU_CUDA_WORKER:
															
 
																+			break;
															
 
																+		default:
															
 
																+			/* Do not care, we won't be executing anything there. */
															
 
																+			break;
															
 
																+		}
															
 
																+	}
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+if (PARALLEL) {
															
 
																+	starpu_execute_on_each_worker(STARPUFFT(fft1_1d_plan_gpu), plan, STARPU_CUDA);
															
 
																+	starpu_execute_on_each_worker(STARPUFFT(fft2_1d_plan_gpu), plan, STARPU_CUDA);
															
 
																+} else {
															
 
																+	starpu_execute_on_each_worker(STARPUFFT(fft_1d_plan_gpu), plan, STARPU_CUDA);
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	/* Allocate buffers. */
															
 
																+	plan->twisted1 = STARPUFFT(malloc)(plan->totsize * sizeof(*plan->twisted1));
															
 
																+	memset(plan->twisted1, 0, plan->totsize * sizeof(*plan->twisted1));
															
 
																+	plan->fft1 = STARPUFFT(malloc)(plan->totsize * sizeof(*plan->fft1));
															
 
																+	memset(plan->fft1, 0, plan->totsize * sizeof(*plan->fft1));
															
 
																+	plan->twisted2 = STARPUFFT(malloc)(plan->totsize * sizeof(*plan->twisted2));
															
 
																+	memset(plan->twisted2, 0, plan->totsize * sizeof(*plan->twisted2));
															
 
																+	plan->fft2 = STARPUFFT(malloc)(plan->totsize * sizeof(*plan->fft2));
															
 
																+	memset(plan->fft2, 0, plan->totsize * sizeof(*plan->fft2));
															
 
																+
															
 
																+	/* Allocate handle arrays */
															
 
																+	plan->twisted1_handle = malloc(plan->totsize1 * sizeof(*plan->twisted1_handle));
															
 
																+	plan->fft1_handle = malloc(plan->totsize1 * sizeof(*plan->fft1_handle));
															
 
																+	plan->twisted2_handle = malloc(plan->totsize3 * sizeof(*plan->twisted2_handle));
															
 
																+	plan->fft2_handle = malloc(plan->totsize3 * sizeof(*plan->fft2_handle));
															
 
																+
															
 
																+	/* Allocate task arrays */
															
 
																+	plan->twist1_tasks = malloc(plan->totsize1 * sizeof(*plan->twist1_tasks));
															
 
																+	plan->fft1_tasks = malloc(plan->totsize1 * sizeof(*plan->fft1_tasks));
															
 
																+	plan->twist2_tasks = malloc(plan->totsize3 * sizeof(*plan->twist2_tasks));
															
 
																+	plan->fft2_tasks = malloc(plan->totsize3 * sizeof(*plan->fft2_tasks));
															
 
																+	plan->twist3_tasks = malloc(plan->totsize3 * sizeof(*plan->twist3_tasks));
															
 
																+
															
 
																+	/* Allocate codelet argument arrays */
															
 
																+	plan->fft1_args = malloc(plan->totsize1 * sizeof(*plan->fft1_args));
															
 
																+	plan->fft2_args = malloc(plan->totsize3 * sizeof(*plan->fft2_args));
															
 
																+
															
 
																+	/* Create first-round tasks: DIV_1D tasks of type twist1 and fft1 */
															
 
																+	for (z = 0; z < plan->totsize1; z++) {
															
 
																+		int i = z;
															
 
																+#define STEP_TAG(step)	STEP_TAG_1D(plan, step, i)
															
 
																+
															
 
																+		/* TODO: get rid of tags */
															
 
																+
															
 
																+		plan->fft1_args[z].plan = plan;
															
 
																+		plan->fft1_args[z].i = i;
															
 
																+
															
 
																+		/* Register the twisted1 buffer of size n2. */
															
 
																+		starpu_vector_data_register(&plan->twisted1_handle[z], 0, (uintptr_t) &plan->twisted1[z*plan->totsize2], plan->totsize2, sizeof(*plan->twisted1));
															
 
																+		/* Register the fft1 buffer of size n2. */
															
 
																+		starpu_vector_data_register(&plan->fft1_handle[z], 0, (uintptr_t) &plan->fft1[z*plan->totsize2], plan->totsize2, sizeof(*plan->fft1));
															
 
																+
															
 
																+		/* We'll need the result of fft1 on the CPU for the second
															
 
																+		 * twist anyway, so tell starpu to not keep the fft1 buffer in
															
 
																+		 * the GPU. */
															
 
																+		starpu_data_set_wt_mask(plan->fft1_handle[z], 1<<0);
															
 
																+
															
 
																+		/* Create twist1 task */
															
 
																+		plan->twist1_tasks[z] = task = starpu_task_create();
															
 
																+		task->cl = &STARPUFFT(twist1_1d_codelet);
															
 
																+		/* task->handles[0] = to be filled at execution to point
															
 
																+		   to the application input. */
															
 
																+		task->handles[1] = plan->twisted1_handle[z];
															
 
																+		task->cl_arg = &plan->fft1_args[z];
															
 
																+		task->tag_id = STEP_TAG(TWIST1);
															
 
																+		task->use_tag = 1;
															
 
																+		task->destroy = 0;
															
 
																+
															
 
																+		/* Tell that fft1 depends on twisted1 */
															
 
																+		starpu_tag_declare_deps(STEP_TAG(FFT1),
															
 
																+				1, STEP_TAG(TWIST1));
															
 
																+
															
 
																+		/* Create FFT1 task */
															
 
																+		plan->fft1_tasks[z] = task = starpu_task_create();
															
 
																+		task->cl = &STARPUFFT(fft1_1d_codelet);
															
 
																+		task->handles[0] = plan->twisted1_handle[z];
															
 
																+		task->handles[1] = plan->fft1_handle[z];
															
 
																+		task->handles[2] = plan->roots_handle[0];
															
 
																+		task->cl_arg = &plan->fft1_args[z];
															
 
																+		task->tag_id = STEP_TAG(FFT1);
															
 
																+		task->use_tag = 1;
															
 
																+		task->destroy = 0;
															
 
																+
															
 
																+		/* Tell that the join task will depend on the fft1 task. */
															
 
																+		starpu_tag_declare_deps(STEP_TAG_1D(plan, JOIN, 0),
															
 
																+				1, STEP_TAG(FFT1));
															
 
																+#undef STEP_TAG
															
 
																+	}
															
 
																+
															
 
																+	/* Create the join task, only serving as a dependency point between
															
 
																+	 * fft1 and twist2 tasks */
															
 
																+	plan->join_task = task = starpu_task_create();
															
 
																+	task->cl = NULL;
															
 
																+	task->tag_id = STEP_TAG_1D(plan, JOIN, 0);
															
 
																+	task->use_tag = 1;
															
 
																+	task->destroy = 0;
															
 
																+
															
 
																+	/* Create second-round tasks: DIV_1D batches of n2/DIV_1D twist2, fft2,
															
 
																+	 * and twist3 */
															
 
																+	for (z = 0; z < plan->totsize3; z++) {
															
 
																+		int jj = z;
															
 
																+#define STEP_TAG(step)	STEP_TAG_1D(plan, step, jj)
															
 
																+
															
 
																+		plan->fft2_args[z].plan = plan;
															
 
																+		plan->fft2_args[z].jj = jj;
															
 
																+
															
 
																+		/* Register n3 twisted2 buffers of size n1 */
															
 
																+		starpu_vector_data_register(&plan->twisted2_handle[z], 0, (uintptr_t) &plan->twisted2[z*plan->totsize4], plan->totsize4, sizeof(*plan->twisted2));
															
 
																+		starpu_vector_data_register(&plan->fft2_handle[z], 0, (uintptr_t) &plan->fft2[z*plan->totsize4], plan->totsize4, sizeof(*plan->fft2));
															
 
																+
															
 
																+		/* We'll need the result of fft2 on the CPU for the third
															
 
																+		 * twist anyway, so tell starpu to not keep the fft2 buffer in
															
 
																+		 * the GPU. */
															
 
																+		starpu_data_set_wt_mask(plan->fft2_handle[z], 1<<0);
															
 
																+
															
 
																+		/* Tell that twisted2 depends on the join task */
															
 
																+		starpu_tag_declare_deps(STEP_TAG(TWIST2),
															
 
																+				1, STEP_TAG_1D(plan, JOIN, 0));
															
 
																+
															
 
																+		/* Create twist2 task */
															
 
																+		plan->twist2_tasks[z] = task = starpu_task_create();
															
 
																+		task->cl = &STARPUFFT(twist2_1d_codelet);
															
 
																+		task->handles[0] = plan->twisted2_handle[z];
															
 
																+		task->cl_arg = &plan->fft2_args[z];
															
 
																+		task->tag_id = STEP_TAG(TWIST2);
															
 
																+		task->use_tag = 1;
															
 
																+		task->destroy = 0;
															
 
																+
															
 
																+		/* Tell that fft2 depends on twisted2 */
															
 
																+		starpu_tag_declare_deps(STEP_TAG(FFT2),
															
 
																+				1, STEP_TAG(TWIST2));
															
 
																+
															
 
																+		/* Create FFT2 task */
															
 
																+		plan->fft2_tasks[z] = task = starpu_task_create();
															
 
																+		task->cl = &STARPUFFT(fft2_1d_codelet);
															
 
																+		task->handles[0] = plan->twisted2_handle[z];
															
 
																+		task->handles[1] = plan->fft2_handle[z];
															
 
																+		task->cl_arg = &plan->fft2_args[z];
															
 
																+		task->tag_id = STEP_TAG(FFT2);
															
 
																+		task->use_tag = 1;
															
 
																+		task->destroy = 0;
															
 
																+
															
 
																+		/* Tell that twist3 depends on fft2 */
															
 
																+		starpu_tag_declare_deps(STEP_TAG(TWIST3),
															
 
																+				1, STEP_TAG(FFT2));
															
 
																+
															
 
																+		/* Create twist3 tasks */
															
 
																+		/* These run only on CPUs and thus write directly into the
															
 
																+		 * application output buffer. */
															
 
																+		plan->twist3_tasks[z] = task = starpu_task_create();
															
 
																+		task->cl = &STARPUFFT(twist3_1d_codelet);
															
 
																+		task->handles[0] = plan->fft2_handle[z];
															
 
																+		task->cl_arg = &plan->fft2_args[z];
															
 
																+		task->tag_id = STEP_TAG(TWIST3);
															
 
																+		task->use_tag = 1;
															
 
																+		task->destroy = 0;
															
 
																+
															
 
																+		/* Tell that to be completely finished we need to have finished
															
 
																+		 * this twisted3 */
															
 
																+		starpu_tag_declare_deps(STEP_TAG_1D(plan, END, 0),
															
 
																+				1, STEP_TAG(TWIST3));
															
 
																+#undef STEP_TAG
															
 
																+	}
															
 
																+
															
 
																+	/* Create end task, only serving as a join point. */
															
 
																+	plan->end_task = task = starpu_task_create();
															
 
																+	task->cl = NULL;
															
 
																+	task->tag_id = STEP_TAG_1D(plan, END, 0);
															
 
																+	task->use_tag = 1;
															
 
																+	task->destroy = 0;
															
 
																+
															
 
																+}
															
 
																+
															
 
																+	return plan;
															
 
																+}
															
 
																+
															
 
																+/* Actually submit all the tasks. */
															
 
																+static struct starpu_task *
															
 
																+STARPUFFT(start1dC2C)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out)
															
 
																+{
															
 
																+	STARPU_ASSERT(plan->type == C2C);
															
 
																+	int z;
															
 
																+	int ret;
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	for (z=0; z < plan->totsize1; z++) {
															
 
																+		ret = starpu_task_submit(plan->twist1_tasks[z]);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+		ret = starpu_task_submit(plan->fft1_tasks[z]);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+	}
															
 
																+
															
 
																+	ret = starpu_task_submit(plan->join_task);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+
															
 
																+	for (z=0; z < plan->totsize3; z++) {
															
 
																+		ret = starpu_task_submit(plan->twist2_tasks[z]);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+		ret = starpu_task_submit(plan->fft2_tasks[z]);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+		ret = starpu_task_submit(plan->twist3_tasks[z]);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+	}
															
 
																+
															
 
																+	ret = starpu_task_submit(plan->end_task);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+
															
 
																+	return plan->end_task;
															
 
																+} else /* !PARALLEL */ {
															
 
																+	struct starpu_task *task;
															
 
																+
															
 
																+	/* Create FFT task */
															
 
																+	task = starpu_task_create();
															
 
																+	task->detach = 0;
															
 
																+	task->cl = &STARPUFFT(fft_1d_codelet);
															
 
																+	task->handles[0] = in;
															
 
																+	task->handles[1] = out;
															
 
																+	task->cl_arg = plan;
															
 
																+
															
 
																+	ret = starpu_task_submit(task);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+	return task;
															
 
																+}
															
 
																+}
															
 
																+
															
 
																+/* Free all the tags. The generic code handles freeing the buffers. */
															
 
																+static void
															
 
																+STARPUFFT(free_1d_tags)(STARPUFFT(plan) plan)
															
 
																+{
															
 
																+	unsigned i;
															
 
																+	int n1 = plan->n1[0];
															
 
																+
															
 
																+	if (!PARALLEL)
															
 
																+		return;
															
 
																+
															
 
																+	for (i = 0; i < n1; i++) {
															
 
																+		starpu_tag_remove(STEP_TAG_1D(plan, TWIST1, i));
															
 
																+		starpu_tag_remove(STEP_TAG_1D(plan, FFT1, i));
															
 
																+	}
															
 
																+
															
 
																+	starpu_tag_remove(STEP_TAG_1D(plan, JOIN, 0));
															
 
																+
															
 
																+	for (i = 0; i < DIV_1D; i++) {
															
 
																+		starpu_tag_remove(STEP_TAG_1D(plan, TWIST2, i));
															
 
																+		starpu_tag_remove(STEP_TAG_1D(plan, FFT2, i));
															
 
																+		starpu_tag_remove(STEP_TAG_1D(plan, TWIST3, i));
															
 
																+	}
															
 
																+
															
 
																+	starpu_tag_remove(STEP_TAG_1D(plan, END, 0));
															
 
																+}
															
--- a/starpufft/starpufftx2d.c
+++ b/starpufft/starpufftx2d.c
@@ -0,0 +1,850 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#define DIV_2D_N 8
															
 
																+#define DIV_2D_M 8
															
 
																+
															
 
																+#define I_SHIFT (I_BITS/2)
															
 
																+#define J_BITS I_SHIFT
															
 
																+
															
 
																+#define STEP_TAG_2D(plan, step, i, j) _STEP_TAG(plan, step, ((starpu_tag_t) i << I_SHIFT) | (starpu_tag_t) j)
															
 
																+
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+/* Twist the full vector into a n2,m2 chunk */
															
 
																+static void
															
 
																+STARPUFFT(twist1_2d_kernel_gpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int i = args->i;
															
 
																+	int j = args->j;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int m1 = plan->n1[1];
															
 
																+	int m2 = plan->n2[1];
															
 
																+
															
 
																+	_cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	_cufftComplex * restrict twisted1 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	STARPUFFT(cuda_twist1_2d_host)(in, twisted1, i, j, n1, n2, m1, m2);
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
 
																+
															
 
																+/* fft1:
															
 
																+ *
															
 
																+ * Perform one fft of size n2,m2 */
															
 
																+static void
															
 
																+STARPUFFT(fft1_2d_plan_gpu)(void *args)
															
 
																+{
															
 
																+	STARPUFFT(plan) plan = args;
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int m2 = plan->n2[1];
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+	cufftResult cures;
															
 
																+
															
 
																+	cures = cufftPlan2d(&plan->plans[workerid].plan1_cuda, n2, m2, _CUFFT_C2C);
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+	cufftSetStream(plan->plans[workerid].plan1_cuda, starpu_cuda_get_local_stream());
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+STARPUFFT(fft1_2d_kernel_gpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int i = args->i;
															
 
																+	int j = args->j;
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int m2 = plan->n2[1];
															
 
																+	cufftResult cures;
															
 
																+
															
 
																+	_cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	_cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+	const _cufftComplex * restrict roots0 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[2]);
															
 
																+	const _cufftComplex * restrict roots1 = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[3]);
															
 
																+
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	cures = _cufftExecC2C(plan->plans[workerid].plan1_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE);
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+
															
 
																+	/* synchronization is done after the twiddling */
															
 
																+	STARPUFFT(cuda_twiddle_2d_host)(out, roots0, roots1, n2, m2, i, j);
															
 
																+
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
 
																+
															
 
																+/* fft2:
															
 
																+ *
															
 
																+ * Perform n3*m3 ffts of size n1,m1 */
															
 
																+static void
															
 
																+STARPUFFT(fft2_2d_plan_gpu(void *args))
															
 
																+{
															
 
																+	STARPUFFT(plan) plan = args;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int m1 = plan->n1[1];
															
 
																+	cufftResult cures;
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	cures = cufftPlan2d(&plan->plans[workerid].plan2_cuda, n1, m1, _CUFFT_C2C);
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+	cufftSetStream(plan->plans[workerid].plan2_cuda, starpu_cuda_get_local_stream());
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+STARPUFFT(fft2_2d_kernel_gpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int m1 = plan->n1[1];
															
 
																+	int m2 = plan->n2[1];
															
 
																+	int n3 = n2/DIV_2D_N;
															
 
																+	int m3 = m2/DIV_2D_M;
															
 
																+	int n;
															
 
																+	cufftResult cures;
															
 
																+
															
 
																+	_cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	_cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	for (n = 0; n < n3*m3; n++) {
															
 
																+		cures = _cufftExecC2C(plan->plans[workerid].plan2_cuda, in + n * n1*m1, out + n * n1*m1, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE);
															
 
																+		STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+	}
															
 
																+
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+/* Twist the full vector into a n2,m2 chunk */
															
 
																+static void
															
 
																+STARPUFFT(twist1_2d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int i = args->i;
															
 
																+	int j = args->j;
															
 
																+	int k, l;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int m1 = plan->n1[1];
															
 
																+	int m2 = plan->n2[1];
															
 
																+	int m = plan->n[1];
															
 
																+
															
 
																+	STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	STARPUFFT(complex) * restrict twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	/* printf("twist1 %d %d %g\n", i, j, (double) cabs(plan->in[i+j])); */
															
 
																+
															
 
																+	for (k = 0; k < n2; k++)
															
 
																+		for (l = 0; l < m2; l++)
															
 
																+			twisted1[k*m2+l] = in[i*m+j+k*m*n1+l*m1];
															
 
																+}
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+/* Perform an n2,m2 fft */
															
 
																+static void
															
 
																+STARPUFFT(fft1_2d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int i = args->i;
															
 
																+	int j = args->j;
															
 
																+	int k, l;
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int m2 = plan->n2[1];
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	STARPUFFT(complex) *twisted1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	STARPUFFT(complex) *fft1 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	/* printf("fft1 %d %d %g\n", i, j, (double) cabs(twisted1[0])); */
															
 
																+
															
 
																+	_FFTW(execute_dft)(plan->plans[workerid].plan1_cpu, twisted1, fft1);
															
 
																+	for (k = 0; k < n2; k++)
															
 
																+		for (l = 0; l < m2; l++)
															
 
																+			fft1[k*m2 + l] = fft1[k*m2 + l] * plan->roots[0][i*k] * plan->roots[1][j*l];
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+/* Twist the full vector into a package of n2/DIV_2D_N,m2/DIV_2D_M (n1,m1) chunks */
															
 
																+static void
															
 
																+STARPUFFT(twist2_2d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int kk = args->kk;	/* between 0 and DIV_2D_N */
															
 
																+	int ll = args->ll;	/* between 0 and DIV_2D_M */
															
 
																+	int kkk, lll;		/* beetween 0,0 and n3,m3 */
															
 
																+	int i, j;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int m1 = plan->n1[1];
															
 
																+	int m2 = plan->n2[1];
															
 
																+	int n3 = n2/DIV_2D_N;
															
 
																+	int m3 = m2/DIV_2D_M;
															
 
																+
															
 
																+	STARPUFFT(complex) * restrict twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+
															
 
																+	/* printf("twist2 %d %d %g\n", kk, ll, (double) cabs(plan->fft1[kk+ll])); */
															
 
																+
															
 
																+	for (kkk = 0; kkk < n3; kkk++) {
															
 
																+		int k = kk * n3 + kkk;
															
 
																+		for (lll = 0; lll < m3; lll++) {
															
 
																+			int l = ll * m3 + lll;
															
 
																+			for (i = 0; i < n1; i++)
															
 
																+				for (j = 0; j < m1; j++)
															
 
																+					twisted2[kkk*m3*n1*m1+lll*n1*m1+i*m1+j] = plan->fft1[i*n1*n2*m2+j*n2*m2+k*m2+l];
															
 
																+		}
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+/* Perform (n2/DIV_2D_N)*(m2/DIV_2D_M) (n1,m1) ffts */
															
 
																+static void
															
 
																+STARPUFFT(fft2_2d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	/* int kk = args->kk; */
															
 
																+	/* int ll = args->ll; */
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	STARPUFFT(complex) *twisted2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	STARPUFFT(complex) *fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	/* printf("fft2 %d %d %g\n", kk, ll, (double) cabs(twisted2[plan->totsize4-1])); */
															
 
																+
															
 
																+	_FFTW(execute_dft)(plan->plans[workerid].plan2_cpu, twisted2, fft2);
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+/* Spread the package of (n2/DIV_2D_N)*(m2/DIV_2D_M) (n1,m1) chunks into the full vector */
															
 
																+static void
															
 
																+STARPUFFT(twist3_2d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	struct STARPUFFT(args) *args = _args;
															
 
																+	STARPUFFT(plan) plan = args->plan;
															
 
																+	int kk = args->kk;	/* between 0 and DIV_2D_N */
															
 
																+	int ll = args->ll;	/* between 0 and DIV_2D_M */
															
 
																+	int kkk, lll;		/* beetween 0,0 and n3,m3 */
															
 
																+	int i, j;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int n2 = plan->n2[0];
															
 
																+	int m1 = plan->n1[1];
															
 
																+	int m2 = plan->n2[1];
															
 
																+	int n3 = n2/DIV_2D_N;
															
 
																+	int m3 = m2/DIV_2D_M;
															
 
																+	int m = plan->n[1];
															
 
																+
															
 
																+	const STARPUFFT(complex) * restrict fft2 = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+
															
 
																+	/* printf("twist3 %d %d %g\n", kk, ll, (double) cabs(fft2[0])); */
															
 
																+
															
 
																+	for (kkk = 0; kkk < n3; kkk++) {
															
 
																+		int k = kk * n3 + kkk;
															
 
																+		for (lll = 0; lll < m3; lll++) {
															
 
																+			int l = ll * m3 + lll;
															
 
																+			for (i = 0; i < n1; i++)
															
 
																+				for (j = 0; j < m1; j++)
															
 
																+					plan->out[i*n2*m+j*m2+k*m+l] = fft2[kkk*m3*n1*m1+lll*n1*m1+i*m1+j];
															
 
																+		}
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+struct starpu_perfmodel STARPUFFT(twist1_2d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"twist1_2d"
															
 
																+};
															
 
																+
															
 
																+struct starpu_perfmodel STARPUFFT(fft1_2d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"fft1_2d"
															
 
																+};
															
 
																+
															
 
																+struct starpu_perfmodel STARPUFFT(twist2_2d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"twist2_2d"
															
 
																+};
															
 
																+
															
 
																+struct starpu_perfmodel STARPUFFT(fft2_2d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"fft2_2d"
															
 
																+};
															
 
																+
															
 
																+struct starpu_perfmodel STARPUFFT(twist3_2d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"twist3_2d"
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet STARPUFFT(twist1_2d_codelet) = {
															
 
																+	.where =
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+		STARPU_CUDA|
															
 
																+#endif
															
 
																+		STARPU_CPU,
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {STARPUFFT(twist1_2d_kernel_gpu), NULL},
															
 
																+#endif
															
 
																+	.cpu_funcs = {STARPUFFT(twist1_2d_kernel_cpu), NULL},
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(twist1_2d_model),
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet STARPUFFT(fft1_2d_codelet) = {
															
 
																+	.where =
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+		STARPU_CUDA|
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+		STARPU_CPU|
															
 
																+#endif
															
 
																+		0,
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {STARPUFFT(fft1_2d_kernel_gpu), NULL},
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	.cpu_funcs = {STARPUFFT(fft1_2d_kernel_cpu), NULL},
															
 
																+#endif
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(fft1_2d_model),
															
 
																+	.nbuffers = 4,
															
 
																+	.modes = {STARPU_R, STARPU_W, STARPU_R, STARPU_R}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet STARPUFFT(twist2_2d_codelet) = {
															
 
																+	.where = STARPU_CPU,
															
 
																+	.cpu_funcs = {STARPUFFT(twist2_2d_kernel_cpu), NULL},
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(twist2_2d_model),
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet STARPUFFT(fft2_2d_codelet) = {
															
 
																+	.where =
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+		STARPU_CUDA|
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+		STARPU_CPU|
															
 
																+#endif
															
 
																+		0,
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {STARPUFFT(fft2_2d_kernel_gpu), NULL},
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	.cpu_funcs = {STARPUFFT(fft2_2d_kernel_cpu), NULL},
															
 
																+#endif
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(fft2_2d_model),
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet STARPUFFT(twist3_2d_codelet) = {
															
 
																+	.where = STARPU_CPU,
															
 
																+	.cpu_funcs = {STARPUFFT(twist3_2d_kernel_cpu), NULL},
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(twist3_2d_model),
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_R}
															
 
																+};
															
 
																+
															
 
																+/*
															
 
																+ *
															
 
																+ * Sequential version
															
 
																+ *
															
 
																+ */
															
 
																+
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+/* Perform one fft of size n,m */
															
 
																+static void
															
 
																+STARPUFFT(fft_2d_plan_gpu)(void *args)
															
 
																+{
															
 
																+	STARPUFFT(plan) plan = args;
															
 
																+	cufftResult cures;
															
 
																+	int n = plan->n[0];
															
 
																+	int m = plan->n[1];
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	cures = cufftPlan2d(&plan->plans[workerid].plan_cuda, n, m, _CUFFT_C2C);
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+	cufftSetStream(plan->plans[workerid].plan_cuda, starpu_cuda_get_local_stream());
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+STARPUFFT(fft_2d_kernel_gpu)(void *descr[], void *args)
															
 
																+{
															
 
																+	STARPUFFT(plan) plan = args;
															
 
																+	cufftResult cures;
															
 
																+
															
 
																+	_cufftComplex * restrict in = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	_cufftComplex * restrict out = (_cufftComplex *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	cures = _cufftExecC2C(plan->plans[workerid].plan_cuda, in, out, plan->sign == -1 ? CUFFT_FORWARD : CUFFT_INVERSE);
															
 
																+	STARPU_ASSERT(cures == CUFFT_SUCCESS);
															
 
																+
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+/* Perform one fft of size n,m */
															
 
																+static void
															
 
																+STARPUFFT(fft_2d_kernel_cpu)(void *descr[], void *_args)
															
 
																+{
															
 
																+	STARPUFFT(plan) plan = _args;
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+
															
 
																+	task_per_worker[workerid]++;
															
 
																+
															
 
																+	STARPUFFT(complex) * restrict in = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																+	STARPUFFT(complex) * restrict out = (STARPUFFT(complex) *)STARPU_VECTOR_GET_PTR(descr[1]);
															
 
																+
															
 
																+	_FFTW(execute_dft)(plan->plans[workerid].plan_cpu, in, out);
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+static struct starpu_perfmodel STARPUFFT(fft_2d_model) = {
															
 
																+	.type = STARPU_HISTORY_BASED,
															
 
																+	.symbol = TYPE"fft_2d"
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet STARPUFFT(fft_2d_codelet) = {
															
 
																+	.where =
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+		STARPU_CUDA|
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+		STARPU_CPU|
															
 
																+#endif
															
 
																+		0,
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {STARPUFFT(fft_2d_kernel_gpu), NULL},
															
 
																+#endif
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+	.cpu_funcs = {STARPUFFT(fft_2d_kernel_cpu), NULL},
															
 
																+#endif
															
 
																+	CAN_EXECUTE
															
 
																+	.model = &STARPUFFT(fft_2d_model),
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+STARPUFFT(plan)
															
 
																+STARPUFFT(plan_dft_2d)(int n, int m, int sign, unsigned flags)
															
 
																+{
															
 
																+	int workerid;
															
 
																+	int n1 = DIV_2D_N;
															
 
																+	int n2 = n / n1;
															
 
																+	int n3;
															
 
																+	int m1 = DIV_2D_M;
															
 
																+	int m2 = m / m1;
															
 
																+	int m3;
															
 
																+	int z;
															
 
																+	struct starpu_task *task;
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	/*
															
 
																+	 * Simple strategy:
															
 
																+	 *
															
 
																+	 * - twist1: twist input in n1*m1 (n2,m2) chunks
															
 
																+	 * - fft1:   perform n1*m1 (n2,m2) ffts
															
 
																+	 * - twist2: twist into n2*m2 (n1,m1) chunks distributed in
															
 
																+	 *           DIV_2D_N*DIV_2D_M groups
															
 
																+	 * - fft2:   perform DIV_2D_N*DIV_2D_M times n3*m3 (n1,m1) ffts
															
 
																+	 * - twist3: twist back into output
															
 
																+	 */
															
 
																+
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+	/* cufft 2D-3D limited to [2,16384] */
															
 
																+	while (n2 > 16384) {
															
 
																+		n1 *= 2;
															
 
																+		n2 /= 2;
															
 
																+	}
															
 
																+#endif
															
 
																+	STARPU_ASSERT(n == n1*n2);
															
 
																+	STARPU_ASSERT(n1 < (1ULL << J_BITS));
															
 
																+
															
 
																+
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+	/* cufft 2D-3D limited to [2,16384] */
															
 
																+	while (m2 > 16384) {
															
 
																+		m1 *= 2;
															
 
																+		m2 /= 2;
															
 
																+	}
															
 
																+#endif
															
 
																+	STARPU_ASSERT(m == m1*m2);
															
 
																+	STARPU_ASSERT(m1 < (1ULL << J_BITS));
															
 
																+
															
 
																+	/* distribute the n2*m2 second ffts into DIV_2D_N*DIV_2D_M packages */
															
 
																+	n3 = n2 / DIV_2D_N;
															
 
																+	STARPU_ASSERT(n2 == n3*DIV_2D_N);
															
 
																+	m3 = m2 / DIV_2D_M;
															
 
																+	STARPU_ASSERT(m2 == m3*DIV_2D_M);
															
 
																+}
															
 
																+
															
 
																+	/* TODO: flags? Automatically set FFTW_MEASURE on calibration? */
															
 
																+	STARPU_ASSERT(flags == 0);
															
 
																+
															
 
																+	STARPUFFT(plan) plan = malloc(sizeof(*plan));
															
 
																+	memset(plan, 0, sizeof(*plan));
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	plan->number = STARPU_ATOMIC_ADD(&starpufft_last_plan_number, 1) - 1;
															
 
																+
															
 
																+	/* 4bit limitation in the tag space */
															
 
																+	STARPU_ASSERT(plan->number < (1ULL << NUMBER_BITS));
															
 
																+}
															
 
																+
															
 
																+	plan->dim = 2;
															
 
																+	plan->n = malloc(plan->dim * sizeof(*plan->n));
															
 
																+	plan->n[0] = n;
															
 
																+	plan->n[1] = m;
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	check_dims(plan);
															
 
																+
															
 
																+	plan->n1 = malloc(plan->dim * sizeof(*plan->n1));
															
 
																+	plan->n1[0] = n1;
															
 
																+	plan->n1[1] = m1;
															
 
																+	plan->n2 = malloc(plan->dim * sizeof(*plan->n2));
															
 
																+	plan->n2[0] = n2;
															
 
																+	plan->n2[1] = m2;
															
 
																+}
															
 
																+
															
 
																+	plan->totsize = n * m;
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	plan->totsize1 = n1 * m1;
															
 
																+	plan->totsize2 = n2 * m2;
															
 
																+	plan->totsize3 = DIV_2D_N * DIV_2D_M;
															
 
																+	plan->totsize4 = plan->totsize / plan->totsize3;
															
 
																+}
															
 
																+	plan->type = C2C;
															
 
																+	plan->sign = sign;
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	/* Compute the w^k just once. */
															
 
																+	compute_roots(plan);
															
 
																+}
															
 
																+
															
 
																+	/* Initialize per-worker working set */
															
 
																+	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) {
															
 
																+		switch (starpu_worker_get_type(workerid)) {
															
 
																+		case STARPU_CPU_WORKER:
															
 
																+#ifdef STARPU_HAVE_FFTW
															
 
																+if (PARALLEL) {
															
 
																+			/* first fft plan: one n2*m2 fft */
															
 
																+			plan->plans[workerid].plan1_cpu = _FFTW(plan_dft_2d)(n2, m2, NULL, (void*) 1, sign, _FFTW_FLAGS);
															
 
																+			STARPU_ASSERT(plan->plans[workerid].plan1_cpu);
															
 
																+
															
 
																+			/* second fft plan: n3*m3 n1*m1 ffts */
															
 
																+			plan->plans[workerid].plan2_cpu = _FFTW(plan_many_dft)(plan->dim,
															
 
																+					plan->n1, n3*m3,
															
 
																+					NULL, NULL, 1, plan->totsize1,
															
 
																+					(void*) 1, NULL, 1, plan->totsize1,
															
 
																+					sign, _FFTW_FLAGS);
															
 
																+			STARPU_ASSERT(plan->plans[workerid].plan2_cpu);
															
 
																+} else {
															
 
																+			/* fft plan: one fft of size n, m. */
															
 
																+			plan->plans[workerid].plan_cpu = _FFTW(plan_dft_2d)(n, m, NULL, (void*) 1, sign, _FFTW_FLAGS);
															
 
																+			STARPU_ASSERT(plan->plans[workerid].plan_cpu);
															
 
																+}
															
 
																+#else
															
 
																+/* #warning libstarpufft can not work correctly if libfftw3 is not installed */
															
 
																+#endif
															
 
																+			break;
															
 
																+		case STARPU_CUDA_WORKER:
															
 
																+			break;
															
 
																+		default:
															
 
																+			/* Do not care, we won't be executing anything there. */
															
 
																+			break;
															
 
																+		}
															
 
																+	}
															
 
																+#ifdef __STARPU_USE_CUDA
															
 
																+if (PARALLEL) {
															
 
																+	starpu_execute_on_each_worker(STARPUFFT(fft1_2d_plan_gpu), plan, STARPU_CUDA);
															
 
																+	starpu_execute_on_each_worker(STARPUFFT(fft2_2d_plan_gpu), plan, STARPU_CUDA);
															
 
																+} else {
															
 
																+	starpu_execute_on_each_worker(STARPUFFT(fft_2d_plan_gpu), plan, STARPU_CUDA);
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	/* Allocate buffers. */
															
 
																+	plan->twisted1 = STARPUFFT(malloc)(plan->totsize * sizeof(*plan->twisted1));
															
 
																+	memset(plan->twisted1, 0, plan->totsize * sizeof(*plan->twisted1));
															
 
																+	plan->fft1 = STARPUFFT(malloc)(plan->totsize * sizeof(*plan->fft1));
															
 
																+	memset(plan->fft1, 0, plan->totsize * sizeof(*plan->fft1));
															
 
																+	plan->twisted2 = STARPUFFT(malloc)(plan->totsize * sizeof(*plan->twisted2));
															
 
																+	memset(plan->twisted2, 0, plan->totsize * sizeof(*plan->twisted2));
															
 
																+	plan->fft2 = STARPUFFT(malloc)(plan->totsize * sizeof(*plan->fft2));
															
 
																+	memset(plan->fft2, 0, plan->totsize * sizeof(*plan->fft2));
															
 
																+
															
 
																+	/* Allocate handle arrays */
															
 
																+	plan->twisted1_handle = malloc(plan->totsize1 * sizeof(*plan->twisted1_handle));
															
 
																+	plan->fft1_handle = malloc(plan->totsize1 * sizeof(*plan->fft1_handle));
															
 
																+	plan->twisted2_handle = malloc(plan->totsize3 * sizeof(*plan->twisted2_handle));
															
 
																+	plan->fft2_handle = malloc(plan->totsize3 * sizeof(*plan->fft2_handle));
															
 
																+
															
 
																+	/* Allocate task arrays */
															
 
																+	plan->twist1_tasks = malloc(plan->totsize1 * sizeof(*plan->twist1_tasks));
															
 
																+	plan->fft1_tasks = malloc(plan->totsize1 * sizeof(*plan->fft1_tasks));
															
 
																+	plan->twist2_tasks = malloc(plan->totsize3 * sizeof(*plan->twist2_tasks));
															
 
																+	plan->fft2_tasks = malloc(plan->totsize3 * sizeof(*plan->fft2_tasks));
															
 
																+	plan->twist3_tasks = malloc(plan->totsize3 * sizeof(*plan->twist3_tasks));
															
 
																+
															
 
																+	/* Allocate codelet argument arrays */
															
 
																+	plan->fft1_args = malloc(plan->totsize1 * sizeof(*plan->fft1_args));
															
 
																+	plan->fft2_args = malloc(plan->totsize3 * sizeof(*plan->fft2_args));
															
 
																+
															
 
																+	/* Create first-round tasks */
															
 
																+	for (z = 0; z < plan->totsize1; z++) {
															
 
																+		int i = z / m1, j = z % m1;
															
 
																+#define STEP_TAG(step)	STEP_TAG_2D(plan, step, i, j)
															
 
																+
															
 
																+		/* TODO: get rid of tags */
															
 
																+
															
 
																+		plan->fft1_args[z].plan = plan;
															
 
																+		plan->fft1_args[z].i = i;
															
 
																+		plan->fft1_args[z].j = j;
															
 
																+
															
 
																+		/* Register (n2,m2) chunks */
															
 
																+		starpu_vector_data_register(&plan->twisted1_handle[z], 0, (uintptr_t) &plan->twisted1[z*plan->totsize2], plan->totsize2, sizeof(*plan->twisted1));
															
 
																+		starpu_vector_data_register(&plan->fft1_handle[z], 0, (uintptr_t) &plan->fft1[z*plan->totsize2], plan->totsize2, sizeof(*plan->fft1));
															
 
																+
															
 
																+		/* We'll need it on the CPU for the second twist anyway */
															
 
																+		starpu_data_set_wt_mask(plan->fft1_handle[z], 1<<0);
															
 
																+
															
 
																+		/* Create twist1 task */
															
 
																+		plan->twist1_tasks[z] = task = starpu_task_create();
															
 
																+		task->cl = &STARPUFFT(twist1_2d_codelet);
															
 
																+		/* task->handles[0] = to be filled at execution */
															
 
																+		task->handles[1] = plan->twisted1_handle[z];
															
 
																+		task->cl_arg = &plan->fft1_args[z];
															
 
																+		task->tag_id = STEP_TAG(TWIST1);
															
 
																+		task->use_tag = 1;
															
 
																+		task->destroy = 0;
															
 
																+
															
 
																+		/* Tell that fft1 depends on twisted1 */
															
 
																+		starpu_tag_declare_deps(STEP_TAG(FFT1),
															
 
																+				1, STEP_TAG(TWIST1));
															
 
																+
															
 
																+		/* Create FFT1 task */
															
 
																+		plan->fft1_tasks[z] = task = starpu_task_create();
															
 
																+		task->cl = &STARPUFFT(fft1_2d_codelet);
															
 
																+		task->handles[0] = plan->twisted1_handle[z];
															
 
																+		task->handles[1] = plan->fft1_handle[z];
															
 
																+		task->handles[2] = plan->roots_handle[0];
															
 
																+		task->handles[3] = plan->roots_handle[1];
															
 
																+		task->cl_arg = &plan->fft1_args[z];
															
 
																+		task->tag_id = STEP_TAG(FFT1);
															
 
																+		task->use_tag = 1;
															
 
																+		task->destroy = 0;
															
 
																+
															
 
																+		/* Tell that to be done with first step we need to have
															
 
																+		 * finished this fft1 */
															
 
																+		starpu_tag_declare_deps(STEP_TAG_2D(plan, JOIN, 0, 0),
															
 
																+				1, STEP_TAG(FFT1));
															
 
																+#undef STEP_TAG
															
 
																+	}
															
 
																+
															
 
																+	/* Create join task */
															
 
																+	plan->join_task = task = starpu_task_create();
															
 
																+	task->cl = NULL;
															
 
																+	task->tag_id = STEP_TAG_2D(plan, JOIN, 0, 0);
															
 
																+	task->use_tag = 1;
															
 
																+	task->destroy = 0;
															
 
																+
															
 
																+	/* Create second-round tasks */
															
 
																+	for (z = 0; z < plan->totsize3; z++) {
															
 
																+		int kk = z / DIV_2D_M, ll = z % DIV_2D_M;
															
 
																+#define STEP_TAG(step)	STEP_TAG_2D(plan, step, kk, ll)
															
 
																+
															
 
																+		plan->fft2_args[z].plan = plan;
															
 
																+		plan->fft2_args[z].kk = kk;
															
 
																+		plan->fft2_args[z].ll = ll;
															
 
																+
															
 
																+		/* Register n3*m3 (n1,m1) chunks */
															
 
																+		starpu_vector_data_register(&plan->twisted2_handle[z], 0, (uintptr_t) &plan->twisted2[z*plan->totsize4], plan->totsize4, sizeof(*plan->twisted2));
															
 
																+		starpu_vector_data_register(&plan->fft2_handle[z], 0, (uintptr_t) &plan->fft2[z*plan->totsize4], plan->totsize4, sizeof(*plan->fft2));
															
 
																+
															
 
																+		/* We'll need it on the CPU for the last twist anyway */
															
 
																+		starpu_data_set_wt_mask(plan->fft2_handle[z], 1<<0);
															
 
																+
															
 
																+		/* Tell that twisted2 depends on the whole first step to be
															
 
																+		 * done */
															
 
																+		starpu_tag_declare_deps(STEP_TAG(TWIST2),
															
 
																+				1, STEP_TAG_2D(plan, JOIN, 0, 0));
															
 
																+
															
 
																+		/* Create twist2 task */
															
 
																+		plan->twist2_tasks[z] = task = starpu_task_create();
															
 
																+		task->cl = &STARPUFFT(twist2_2d_codelet);
															
 
																+		task->handles[0] = plan->twisted2_handle[z];
															
 
																+		task->cl_arg = &plan->fft2_args[z];
															
 
																+		task->tag_id = STEP_TAG(TWIST2);
															
 
																+		task->use_tag = 1;
															
 
																+		task->destroy = 0;
															
 
																+
															
 
																+		/* Tell that fft2 depends on twisted2 */
															
 
																+		starpu_tag_declare_deps(STEP_TAG(FFT2),
															
 
																+				1, STEP_TAG(TWIST2));
															
 
																+
															
 
																+		/* Create FFT2 task */
															
 
																+		plan->fft2_tasks[z] = task = starpu_task_create();
															
 
																+		task->cl = &STARPUFFT(fft2_2d_codelet);
															
 
																+		task->handles[0] = plan->twisted2_handle[z];
															
 
																+		task->handles[1] = plan->fft2_handle[z];
															
 
																+		task->cl_arg = &plan->fft2_args[z];
															
 
																+		task->tag_id = STEP_TAG(FFT2);
															
 
																+		task->use_tag = 1;
															
 
																+		task->destroy = 0;
															
 
																+
															
 
																+		/* Tell that twist3 depends on fft2 */
															
 
																+		starpu_tag_declare_deps(STEP_TAG(TWIST3),
															
 
																+				1, STEP_TAG(FFT2));
															
 
																+
															
 
																+		/* Create twist3 tasks */
															
 
																+		/* These run only on CPUs and thus write directly into the
															
 
																+		 * application output buffer. */
															
 
																+		plan->twist3_tasks[z] = task = starpu_task_create();
															
 
																+		task->cl = &STARPUFFT(twist3_2d_codelet);
															
 
																+		task->handles[0] = plan->fft2_handle[z];
															
 
																+		task->cl_arg = &plan->fft2_args[z];
															
 
																+		task->tag_id = STEP_TAG(TWIST3);
															
 
																+		task->use_tag = 1;
															
 
																+		task->destroy = 0;
															
 
																+
															
 
																+		/* Tell that to be completely finished we need to have finished this twisted3 */
															
 
																+		starpu_tag_declare_deps(STEP_TAG_2D(plan, END, 0, 0),
															
 
																+				1, STEP_TAG(TWIST3));
															
 
																+#undef STEP_TAG
															
 
																+	}
															
 
																+
															
 
																+	/* Create end task */
															
 
																+	plan->end_task = task = starpu_task_create();
															
 
																+	task->cl = NULL;
															
 
																+	task->tag_id = STEP_TAG_2D(plan, END, 0, 0);
															
 
																+	task->use_tag = 1;
															
 
																+	task->destroy = 0;
															
 
																+
															
 
																+}
															
 
																+
															
 
																+	return plan;
															
 
																+}
															
 
																+
															
 
																+/* Actually submit all the tasks. */
															
 
																+static struct starpu_task *
															
 
																+STARPUFFT(start2dC2C)(STARPUFFT(plan) plan, starpu_data_handle_t in, starpu_data_handle_t out)
															
 
																+{
															
 
																+	STARPU_ASSERT(plan->type == C2C);
															
 
																+	int z;
															
 
																+	int ret;
															
 
																+
															
 
																+if (PARALLEL) {
															
 
																+	for (z=0; z < plan->totsize1; z++) {
															
 
																+		ret = starpu_task_submit(plan->twist1_tasks[z]);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+		ret = starpu_task_submit(plan->fft1_tasks[z]);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+	}
															
 
																+
															
 
																+	ret = starpu_task_submit(plan->join_task);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+
															
 
																+	for (z=0; z < plan->totsize3; z++) {
															
 
																+		ret = starpu_task_submit(plan->twist2_tasks[z]);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+		ret = starpu_task_submit(plan->fft2_tasks[z]);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+		ret = starpu_task_submit(plan->twist3_tasks[z]);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+	}
															
 
																+
															
 
																+	ret = starpu_task_submit(plan->end_task);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+
															
 
																+	return plan->end_task;
															
 
																+} else /* !PARALLEL */ {
															
 
																+	struct starpu_task *task;
															
 
																+
															
 
																+	/* Create FFT task */
															
 
																+	task = starpu_task_create();
															
 
																+	task->detach = 0;
															
 
																+	task->cl = &STARPUFFT(fft_2d_codelet);
															
 
																+	task->handles[0] = in;
															
 
																+	task->handles[1] = out;
															
 
																+	task->cl_arg = plan;
															
 
																+
															
 
																+	ret = starpu_task_submit(task);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+	return task;
															
 
																+}
															
 
																+}
															
 
																+
															
 
																+/* Free all the tags. The generic code handles freeing the buffers. */
															
 
																+static void
															
 
																+STARPUFFT(free_2d_tags)(STARPUFFT(plan) plan)
															
 
																+{
															
 
																+	unsigned i, j;
															
 
																+	int n1 = plan->n1[0];
															
 
																+	int m1 = plan->n1[1];
															
 
																+
															
 
																+	if (!PARALLEL)
															
 
																+		return;
															
 
																+
															
 
																+	for (i = 0; i < n1; i++) {
															
 
																+		for (j = 0; j < m1; j++) {
															
 
																+			starpu_tag_remove(STEP_TAG_2D(plan, TWIST1, i, j));
															
 
																+			starpu_tag_remove(STEP_TAG_2D(plan, FFT1, i, j));
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	starpu_tag_remove(STEP_TAG_2D(plan, JOIN, 0, 0));
															
 
																+
															
 
																+	for (i = 0; i < DIV_2D_N; i++) {
															
 
																+		for (j = 0; j < DIV_2D_M; j++) {
															
 
																+			starpu_tag_remove(STEP_TAG_2D(plan, TWIST2, i, j));
															
 
																+			starpu_tag_remove(STEP_TAG_2D(plan, FFT2, i, j));
															
 
																+			starpu_tag_remove(STEP_TAG_2D(plan, TWIST3, i, j));
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	starpu_tag_remove(STEP_TAG_2D(plan, END, 0, 0));
															
 
																+}
															
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1,8 +1,8 @@
 
																 # StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																 #
															
 
																-# Copyright (C) 2009, 2010, 2011  Université de Bordeaux 1
															
 
																-# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																-# Copyright (C) 2010, 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+# Copyright (C) 2009, 2010, 2011-2012  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+# Copyright (C) 2010, 2011, 2012  Institut National de Recherche en Informatique et Automatique
															
 
																 #
															
 
																 # StarPU is free software; you can redistribute it and/or modify
															
 
																 # it under the terms of the GNU Lesser General Public License as published by
															
@@ -16,15 +16,28 @@
 
																 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																 AM_CFLAGS = $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
															
 
																-LIBS = $(top_builddir)/src/libstarpu.la $(HWLOC_LIBS) @LIBS@
															
 
																+LIBS = $(top_builddir)/src/libstarpu-@STARPU_EFFECTIVE_VERSION@.la $(HWLOC_LIBS) @LIBS@
															
 
																 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/
															
 
																 AM_LDFLAGS = $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS)
															
 
																 EXTRA_DIST =					\
															
 
																+	helper.h				\
															
 
																+	datawizard/scal.h			\
															
 
																 	microbenchs/null_kernel_gordon.c	\
															
 
																 	datawizard/sync_and_notify_data_gordon_kernels.c \
															
 
																 	datawizard/sync_and_notify_data_opencl_codelet.cl\
															
 
																-	coverage/coverage.sh
															
 
																+	coverage/coverage.sh			\
															
 
																+	datawizard/interfaces/test_interfaces.h	\
															
 
																+	datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl \
															
 
																+	datawizard/interfaces/matrix/matrix_opencl_kernel.cl \
															
 
																+	datawizard/interfaces/variable/variable_opencl_kernel.cl \
															
 
																+	datawizard/interfaces/vector/test_vector_opencl_kernel.cl \
															
 
																+	datawizard/interfaces/multiformat/multiformat_types.h \
															
 
																+	datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl \
															
 
																+	datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl \
															
 
																+	datawizard/interfaces/multiformat/advanced/generic.h \
															
 
																+	datawizard/interfaces/csr/csr_opencl_kernel.cl \
															
 
																+	datawizard/interfaces/block/block_opencl_kernel.cl
															
 
																 CLEANFILES = 					\
															
 
																 	*.gcno *.gcda *.linkinfo		\
															
@@ -43,7 +56,7 @@ if STARPU_USE_CUDA
 
																 # TODO define NVCCFLAGS
															
 
																 NVCC ?= nvcc
															
 
																-NVCCFLAGS += -I$(top_srcdir)/include/ -I$(top_builddir)/include $(HWLOC_CFLAGS)
															
 
																+NVCCFLAGS += -I$(top_srcdir)/include/ -I$(top_srcdir)/src -I$(top_builddir)/src -I$(top_builddir)/include $(HWLOC_CFLAGS)
															
 
																 .cu.cubin:
															
 
																 	$(MKDIR_P) `dirname $@`
															
@@ -83,7 +96,7 @@ if !STARPU_HAVE_WINDOWS
 
																 ## test loader program
															
 
																 LOADER			=	loader
															
 
																 LOADER_BIN		=	$(abs_top_builddir)/tests/$(LOADER)
															
 
																-TESTS_ENVIRONMENT	=	$(LOADER_BIN)
															
 
																+TESTS_ENVIRONMENT	=	top_builddir="$(abs_top_builddir)" $(LOADER_BIN)
															
 
																 endif
															
 
																 TESTS = $(noinst_PROGRAMS)
															
@@ -92,31 +105,39 @@ if STARPU_COVERAGE_ENABLED
 
																 TESTS	+=	coverage/coverage.sh
															
 
																 endif
															
 
																+starpu_machine_display_SOURCES	=	../tools/starpu_machine_display.c
															
 
																+
															
 
																 noinst_PROGRAMS =				\
															
 
																-	core/restart				\
															
 
																-	core/execute_on_a_specific_worker	\
															
 
																-	core/insert_task			\
															
 
																-	core/multithreaded			\
															
 
																-	core/multithreaded_init			\
															
 
																-	core/starpu_task_wait_for_all		\
															
 
																-	core/starpu_task_wait			\
															
 
																-	core/static_restartable			\
															
 
																-	core/static_restartable_using_initializer\
															
 
																-	core/static_restartable_tag		\
															
 
																-	core/regenerate				\
															
 
																-	core/wait_all_regenerable_tasks		\
															
 
																-	core/subgraph_repeat			\
															
 
																-	core/subgraph_repeat_regenerate		\
															
 
																-	core/empty_task				\
															
 
																-	core/empty_task_sync_point		\
															
 
																-	core/empty_task_sync_point_tasks	\
															
 
																-	core/empty_task_chain			\
															
 
																-	core/tag_wait_api			\
															
 
																-	core/task_wait_api			\
															
 
																-	core/declare_deps_in_callback		\
															
 
																-	core/declare_deps_after_submission	\
															
 
																-	core/declare_deps_after_submission_synchronous	\
															
 
																-	core/get_current_task			\
															
 
																+	starpu_machine_display			\
															
 
																+	main/deprecated_func			\
															
 
																+	main/deprecated_buffer			\
															
 
																+	main/restart				\
															
 
																+	main/execute_on_a_specific_worker	\
															
 
																+	main/insert_task			\
															
 
																+	main/multithreaded			\
															
 
																+	main/multithreaded_init			\
															
 
																+	main/starpu_task_bundle			\
															
 
																+	main/starpu_task_wait_for_all		\
															
 
																+	main/starpu_task_wait			\
															
 
																+	main/static_restartable			\
															
 
																+	main/static_restartable_using_initializer\
															
 
																+	main/static_restartable_tag		\
															
 
																+	main/regenerate				\
															
 
																+	main/wait_all_regenerable_tasks		\
															
 
																+	main/subgraph_repeat			\
															
 
																+	main/subgraph_repeat_regenerate		\
															
 
																+	main/empty_task				\
															
 
																+	main/empty_task_sync_point		\
															
 
																+	main/empty_task_sync_point_tasks	\
															
 
																+	main/empty_task_chain			\
															
 
																+	main/tag_wait_api			\
															
 
																+	main/task_wait_api			\
															
 
																+	main/declare_deps_in_callback		\
															
 
																+	main/declare_deps_after_submission	\
															
 
																+	main/declare_deps_after_submission_synchronous	\
															
 
																+	main/get_current_task			\
															
 
																+	main/starpu_init			\
															
 
																+	main/starpu_worker_exists               \
															
 
																 	datawizard/acquire_cb			\
															
 
																 	datawizard/acquire_cb_insert		\
															
 
																 	datawizard/acquire_release		\
															
@@ -128,6 +149,7 @@ noinst_PROGRAMS =				\
 
																 	datawizard/sync_and_notify_data		\
															
 
																 	datawizard/sync_and_notify_data_implicit\
															
 
																 	datawizard/dsm_stress			\
															
 
																+	datawizard/double_parameter		\
															
 
																 	datawizard/write_only_tmp_buffer	\
															
 
																 	datawizard/data_invalidation		\
															
 
																 	datawizard/dining_philosophers		\
															
@@ -144,8 +166,26 @@ noinst_PROGRAMS =				\
 
																 	datawizard/critical_section_with_void_interface\
															
 
																 	datawizard/increment_redux		\
															
 
																 	datawizard/increment_redux_v2		\
															
 
																+	datawizard/increment_redux_lazy		\
															
 
																 	datawizard/handle_to_pointer		\
															
 
																 	datawizard/lazy_allocation		\
															
 
																+	datawizard/interfaces/copy_interfaces	\
															
 
																+	datawizard/interfaces/block/block_interface \
															
 
																+	datawizard/interfaces/bcsr/bcsr_interface \
															
 
																+	datawizard/interfaces/csr/csr_interface \
															
 
																+	datawizard/interfaces/matrix/matrix_interface \
															
 
																+	datawizard/interfaces/multiformat/multiformat_interface \
															
 
																+	datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl \
															
 
																+	datawizard/interfaces/multiformat/advanced/multiformat_data_release \
															
 
																+	datawizard/interfaces/multiformat/advanced/multiformat_worker \
															
 
																+	datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion \
															
 
																+	datawizard/interfaces/multiformat/advanced/same_handle \
															
 
																+	datawizard/interfaces/variable/variable_interface    \
															
 
																+	datawizard/interfaces/vector/test_vector_interface   \
															
 
																+	datawizard/interfaces/void/void_interface \
															
 
																+	datawizard/in_place_partition   	\
															
 
																+	datawizard/partition_lazy		\
															
 
																+	datawizard/gpu_register   		\
															
 
																 	errorcheck/starpu_init_noworker		\
															
 
																 	errorcheck/invalid_blocking_calls	\
															
 
																 	errorcheck/invalid_tasks		\
															
@@ -165,7 +205,7 @@ noinst_PROGRAMS =				\
 
																 	parallel_tasks/parallel_kernels		\
															
 
																 	parallel_tasks/parallel_kernels_spmd	\
															
 
																 	perfmodels/regression_based		\
															
 
																-	perfmodels/non_linear_regression_based
															
 
																+	perfmodels/non_linear_regression_based 
															
 
																 if STARPU_HAVE_WINDOWS
															
 
																 check_PROGRAMS = $(noinst_PROGRAMS)
															
@@ -236,6 +276,42 @@ datawizard_sync_and_notify_data_implicit_SOURCES +=	\
 
																 	datawizard/sync_and_notify_data_opencl.c
															
 
																 endif
															
 
																+datawizard_in_place_partition_SOURCES =	\
															
 
																+	datawizard/in_place_partition.c	\
															
 
																+	datawizard/scal.c
															
 
																+if STARPU_USE_CUDA
															
 
																+datawizard_in_place_partition_SOURCES +=	\
															
 
																+	datawizard/scal_cuda.cu
															
 
																+endif
															
 
																+if STARPU_USE_OPENCL
															
 
																+datawizard_in_place_partition_SOURCES +=	\
															
 
																+	datawizard/scal_opencl.cl
															
 
																+endif
															
 
																+
															
 
																+datawizard_partition_lazy_SOURCES =	\
															
 
																+	datawizard/partition_lazy.c	\
															
 
																+	datawizard/scal.c
															
 
																+if STARPU_USE_CUDA
															
 
																+datawizard_partition_lazy_SOURCES +=	\
															
 
																+	datawizard/scal_cuda.cu
															
 
																+endif
															
 
																+if STARPU_USE_OPENCL
															
 
																+datawizard_partition_lazy_SOURCES +=	\
															
 
																+	datawizard/scal_opencl.cl
															
 
																+endif
															
 
																+
															
 
																+datawizard_gpu_register_SOURCES =	\
															
 
																+	datawizard/gpu_register.c	\
															
 
																+	datawizard/scal.c
															
 
																+if STARPU_USE_CUDA
															
 
																+datawizard_gpu_register_SOURCES +=	\
															
 
																+	datawizard/scal_cuda.cu
															
 
																+endif
															
 
																+if STARPU_USE_OPENCL
															
 
																+datawizard_gpu_register_SOURCES +=	\
															
 
																+	datawizard/scal_opencl.cl
															
 
																+endif
															
 
																+
															
 
																 if STARPU_USE_GORDON
															
 
																 datawizard_sync_and_notify_data_SOURCES +=	\
															
 
																 	datawizard/sync_and_notify_data_gordon_kernels.c
															
@@ -245,3 +321,167 @@ BUILT_SOURCES += 						\
 
																 	datawizard/sync_and_notify_data_gordon_kernels.spuelf	\
															
 
																 	microbenchs/null_kernel_gordon.spuelf
															
 
																 endif
															
 
																+
															
 
																+###################
															
 
																+# Block interface #
															
 
																+###################
															
 
																+datawizard_interfaces_block_block_interface_SOURCES= \
															
 
																+	datawizard/interfaces/test_interfaces.c  \
															
 
																+	datawizard/interfaces/block/block_interface.c
															
 
																+
															
 
																+if STARPU_USE_CUDA
															
 
																+datawizard_interfaces_block_block_interface_SOURCES+= \
															
 
																+	datawizard/interfaces/block/block_cuda.cu
															
 
																+endif
															
 
																+
															
 
																+if STARPU_USE_OPENCL
															
 
																+datawizard_interfaces_block_block_interface_SOURCES+= \
															
 
																+	datawizard/interfaces/block/block_opencl.c
															
 
																+nobase_STARPU_OPENCL_DATA_DATA += \
															
 
																+	datawizard/interfaces/block/block_opencl_kernel.cl
															
 
																+endif
															
 
																+
															
 
																+##################
															
 
																+# BSCR interface #
															
 
																+##################
															
 
																+datawizard_interfaces_bcsr_bcsr_interface_SOURCES= \
															
 
																+	datawizard/interfaces/test_interfaces.c \
															
 
																+	datawizard/interfaces/bcsr/bcsr_interface.c 
															
 
																+
															
 
																+if STARPU_USE_CUDA
															
 
																+datawizard_interfaces_bcsr_bcsr_interface_SOURCES+= \
															
 
																+	datawizard/interfaces/bcsr/bcsr_cuda.cu
															
 
																+endif
															
 
																+
															
 
																+if STARPU_USE_OPENCL
															
 
																+datawizard_interfaces_bcsr_bcsr_interface_SOURCES+= \
															
 
																+	datawizard/interfaces/bcsr/bcsr_opencl.c
															
 
																+nobase_STARPU_OPENCL_DATA_DATA += \
															
 
																+	datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl
															
 
																+endif
															
 
																+
															
 
																+#################
															
 
																+# CSR interface #
															
 
																+#################
															
 
																+datawizard_interfaces_csr_csr_interface_SOURCES= \
															
 
																+	datawizard/interfaces/test_interfaces.c  \
															
 
																+	datawizard/interfaces/csr/csr_interface.c
															
 
																+
															
 
																+if STARPU_USE_CUDA
															
 
																+datawizard_interfaces_csr_csr_interface_SOURCES+= \
															
 
																+	datawizard/interfaces/csr/csr_cuda.cu
															
 
																+endif
															
 
																+
															
 
																+if STARPU_USE_OPENCL
															
 
																+datawizard_interfaces_csr_csr_interface_SOURCES+= \
															
 
																+	datawizard/interfaces/csr/csr_opencl.c
															
 
																+nobase_STARPU_OPENCL_DATA_DATA += \
															
 
																+	datawizard/interfaces/csr/csr_opencl_kernel.cl
															
 
																+endif
															
 
																+
															
 
																+
															
 
																+datawizard_interfaces_vector_test_vector_interface_SOURCES =               \
															
 
																+	datawizard/interfaces/vector/test_vector_interface.c               \
															
 
																+	datawizard/interfaces/test_interfaces.c
															
 
																+
															
 
																+if STARPU_USE_CUDA
															
 
																+datawizard_interfaces_vector_test_vector_interface_SOURCES +=               \
															
 
																+	datawizard/interfaces/vector/test_vector_cuda.cu
															
 
																+endif
															
 
																+
															
 
																+if STARPU_USE_OPENCL
															
 
																+datawizard_interfaces_vector_test_vector_interface_SOURCES +=               \
															
 
																+	datawizard/interfaces/vector/test_vector_opencl.c 
															
 
																+nobase_STARPU_OPENCL_DATA_DATA += \
															
 
																+	datawizard/interfaces/vector/test_vector_opencl_kernel.cl
															
 
																+endif
															
 
																+
															
 
																+####################
															
 
																+# Matrix interface #
															
 
																+####################
															
 
																+datawizard_interfaces_matrix_matrix_interface_SOURCES= \
															
 
																+	datawizard/interfaces/test_interfaces.c        \
															
 
																+	datawizard/interfaces/matrix/matrix_interface.c
															
 
																+
															
 
																+if STARPU_USE_CUDA
															
 
																+datawizard_interfaces_matrix_matrix_interface_SOURCES+= \
															
 
																+	datawizard/interfaces/matrix/matrix_cuda.cu
															
 
																+endif
															
 
																+
															
 
																+if STARPU_USE_OPENCL
															
 
																+datawizard_interfaces_matrix_matrix_interface_SOURCES+= \
															
 
																+	datawizard/interfaces/matrix/matrix_opencl.c
															
 
																+nobase_STARPU_OPENCL_DATA_DATA+= \
															
 
																+	datawizard/interfaces/matrix/matrix_opencl_kernel.cl
															
 
																+endif
															
 
																+
															
 
																+
															
 
																+#########################
															
 
																+# Multiformat interface #
															
 
																+#########################
															
 
																+datawizard_interfaces_multiformat_multiformat_interface_SOURCES =           \
															
 
																+	datawizard/interfaces/test_interfaces.c                             \
															
 
																+	datawizard/interfaces/multiformat/multiformat_interface.c           \
															
 
																+	datawizard/interfaces/multiformat/multiformat_conversion_codelets.c
															
 
																+
															
 
																+if STARPU_USE_CUDA
															
 
																+datawizard_interfaces_multiformat_multiformat_interface_SOURCES+=                  \
															
 
																+	datawizard/interfaces/multiformat/multiformat_cuda.cu                      \
															
 
																+	datawizard/interfaces/multiformat/multiformat_conversion_codelets_cuda.cu
															
 
																+endif
															
 
																+
															
 
																+if STARPU_USE_OPENCL
															
 
																+datawizard_interfaces_multiformat_multiformat_interface_SOURCES+=                  \
															
 
																+	datawizard/interfaces/multiformat/multiformat_opencl.c                     \
															
 
																+	datawizard/interfaces/multiformat/multiformat_conversion_codelets_opencl.c
															
 
																+nobase_STARPU_OPENCL_DATA_DATA +=                                                          \
															
 
																+	datawizard/interfaces/multiformat/multiformat_opencl_kernel.cl                     \
															
 
																+	datawizard/interfaces/multiformat/multiformat_conversion_codelets_kernel.cl
															
 
																+endif
															
 
																+
															
 
																+datawizard_interfaces_multiformat_advanced_multiformat_cuda_opencl_SOURCES=\
															
 
																+	datawizard/interfaces/multiformat/advanced/generic.c               \
															
 
																+	datawizard/interfaces/multiformat/advanced/multiformat_cuda_opencl.c
															
 
																+
															
 
																+datawizard_interfaces_multiformat_advanced_multiformat_data_release_SOURCES = \
															
 
																+	datawizard/interfaces/multiformat/advanced/generic.c                  \
															
 
																+	datawizard/interfaces/multiformat/advanced/multiformat_data_release.c
															
 
																+
															
 
																+datawizard_interfaces_multiformat_advanced_multiformat_worker_SOURCES=\
															
 
																+	datawizard/interfaces/multiformat/advanced/generic.c               \
															
 
																+	datawizard/interfaces/multiformat/advanced/multiformat_worker.c
															
 
																+
															
 
																+datawizard_interfaces_multiformat_advanced_multiformat_handle_conversion_SOURCES = \
															
 
																+	datawizard/interfaces/multiformat/advanced/generic.c \
															
 
																+	datawizard/interfaces/multiformat/advanced/multiformat_handle_conversion.c
															
 
																+
															
 
																+datawizard_interfaces_multiformat_advanced_same_handle_SOURCES= \
															
 
																+	datawizard/interfaces/multiformat/advanced/generic.c               \
															
 
																+	datawizard/interfaces/multiformat/advanced/same_handle.c
															
 
																+
															
 
																+
															
 
																+datawizard_interfaces_variable_variable_interface_SOURCES=   \
															
 
																+	datawizard/interfaces/test_interfaces.c              \
															
 
																+	datawizard/interfaces/variable/variable_interface.c
															
 
																+
															
 
																+if STARPU_USE_CUDA
															
 
																+datawizard_interfaces_variable_variable_interface_SOURCES+= \
															
 
																+	datawizard/interfaces/variable/variable_cuda.cu
															
 
																+endif
															
 
																+
															
 
																+if STARPU_USE_OPENCL
															
 
																+datawizard_interfaces_variable_variable_interface_SOURCES+= \
															
 
																+	datawizard/interfaces/variable/variable_opencl.c
															
 
																+nobase_STARPU_OPENCL_DATA_DATA += \
															
 
																+	datawizard/interfaces/variable/variable_opencl_kernel.cl
															
 
																+endif
															
 
																+
															
 
																+##################
															
 
																+# Void interface #
															
 
																+##################
															
 
																+datawizard_interfaces_void_void_interface_SOURCES=\
															
 
																+	datawizard/interfaces/test_interfaces.c        \
															
 
																+	datawizard/interfaces/void/void_interface.c
															
 
																+
															
 
																+showcheck:
															
 
																+	-cat $(TEST_LOGS) /dev/null
															
--- a/tests/cholesky/prio.r
+++ b/tests/cholesky/prio.r
@@ -1,3 +1,20 @@
 
																+
															
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2010  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+
															
 
																 sizelist <- seq(2048, 24576, 2048);
															
 
																 schedlist <- c("greedy", "prio", "dm", "random");
															
@@ -15,7 +32,8 @@ parse <- function (size, sched)
 
																 	filename = paste("timings_sched/sched", sched, size, sep=".");
															
 
																 	if (file.exists(filename))
															
 
																-	{	ret <- scan(paste("timings_sched/sched", sched, size, sep="."));
															
 
																+	{
															
 
																+		ret <- scan(paste("timings_sched/sched", sched, size, sep="."));
															
 
																 		return(ret);
															
 
																 	};
															
@@ -35,7 +53,8 @@ handle_sched <- function(sched)
 
																 	gflopstab <- NULL;
															
 
																 	sizetab <- NULL;
															
 
																-	for (size in sizelist) {
															
 
																+	for (size in sizelist)
															
 
																+	{
															
 
																 		list <- handle_size(size, sched);
															
 
																 		gflopstab <- c(gflopstab, list);
															
 
																 		sizetab <- c(sizetab, array(size, c(length(list))));
															
@@ -51,7 +70,8 @@ handle_sched_mean <- function(sched)
 
																 	meantab <- NULL;
															
 
																 	sizetab <- NULL;
															
 
																-	for (size in sizelist) {
															
 
																+	for (size in sizelist)
															
 
																+	{
															
 
																 		list <- mean(handle_size(size, sched));
															
 
																 		meantab <- c(meantab, list);
															
 
																 		sizetab <- c(sizetab, array(size, c(length(list))));
															
--- a/tests/cholesky/sched.r
+++ b/tests/cholesky/sched.r
@@ -1,3 +1,20 @@
 
																+
															
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2010  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+
															
 
																 sizelist <- seq(2048, 24576, 2048);
															
 
																 schedlist <- c("greedy", "prio", "dm", "random");
															
@@ -15,7 +32,8 @@ parse <- function (size, sched)
 
																 	filename = paste("timings_sched/sched", sched, size, sep=".");
															
 
																 	if (file.exists(filename))
															
 
																-	{	ret <- scan(paste("timings_sched/sched", sched, size, sep="."));
															
 
																+	{
															
 
																+		ret <- scan(paste("timings_sched/sched", sched, size, sep="."));
															
 
																 		return(ret);
															
 
																 	};
															
@@ -35,7 +53,8 @@ handle_sched <- function(sched)
 
																 	gflopstab <- NULL;
															
 
																 	sizetab <- NULL;
															
 
																-	for (size in sizelist) {
															
 
																+	for (size in sizelist)
															
 
																+	{
															
 
																 		list <- handle_size(size, sched);
															
 
																 		gflopstab <- c(gflopstab, list);
															
 
																 		sizetab <- c(sizetab, array(size, c(length(list))));
															
@@ -51,7 +70,8 @@ handle_sched_mean <- function(sched)
 
																 	meantab <- NULL;
															
 
																 	sizetab <- NULL;
															
 
																-	for (size in sizelist) {
															
 
																+	for (size in sizelist)
															
 
																+	{
															
 
																 		list <- mean(handle_size(size, sched));
															
 
																 		meantab <- c(meantab, list);
															
 
																 		sizetab <- c(sizetab, array(size, c(length(list))));
															
--- a/tests/core/multithreaded_init.c
+++ b/tests/core/multithreaded_init.c
@@ -1,65 +0,0 @@
 
																-/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																- *
															
 
																- * Copyright (C) 2010  Institut National de Recherche en Informatique et Automatique
															
 
																- * Copyright (C) 2010-2011  Université de Bordeaux 1
															
 
																- *
															
 
																- * StarPU is free software; you can redistribute it and/or modify
															
 
																- * it under the terms of the GNU Lesser General Public License as published by
															
 
																- * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																- * your option) any later version.
															
 
																- *
															
 
																- * StarPU is distributed in the hope that it will be useful, but
															
 
																- * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																- *
															
 
																- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																- */
															
 
																-#include <sys/time.h>
															
 
																-#include <stdio.h>
															
 
																-#include <pthread.h>
															
 
																-#include <starpu.h>
															
 
																-
															
 
																-#define NUM_THREADS 5
															
 
																-#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
															
 
																-
															
 
																-void *launch_starpu(void *id)
															
 
																-{ 
															
 
																-   starpu_init(NULL);
															
 
																-   return NULL;
															
 
																-}
															
 
																-
															
 
																-int main(int argc, char **argv)
															
 
																-{ 
															
 
																-  unsigned i;
															
 
																-  double timing;
															
 
																-  struct timeval start;
															
 
																-  struct timeval end;
															
 
																-
															
 
																-  pthread_t threads[NUM_THREADS];
															
 
																-  
															
 
																-  gettimeofday(&start, NULL);
															
 
																-
															
 
																-  for (i = 0; i < NUM_THREADS; ++i)
															
 
																-    {
															
 
																-      int ret = pthread_create(&threads[i], NULL, launch_starpu, NULL);
															
 
																-      STARPU_ASSERT(ret == 0);
															
 
																-    }
															
 
																-
															
 
																-  for (i = 0; i < NUM_THREADS; ++i)
															
 
																-    {
															
 
																-      int ret = pthread_join(threads[i], NULL);
															
 
																-      STARPU_ASSERT(ret == 0);
															
 
																-    }
															
 
																-
															
 
																-  gettimeofday(&end, NULL);
															
 
																-
															
 
																-  timing = (double)((end.tv_sec - start.tv_sec)*1000000 + (end.tv_usec - start.tv_usec));
															
 
																-
															
 
																-  FPRINTF(stderr, "Success : %d threads launching simultaneously starpu_init\n", NUM_THREADS);
															
 
																-  FPRINTF(stderr, "Total: %f secs\n", timing/1000000);
															
 
																-  FPRINTF(stderr, "Per task: %f usecs\n", timing/NUM_THREADS);
															
 
																-
															
 
																-  starpu_shutdown();
															
 
																-
															
 
																-  return 0;
															
 
																-}
															
--- a/tests/core/task_wait_api.c
+++ b/tests/core/task_wait_api.c
@@ -1,121 +0,0 @@
 
																-/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																- *
															
 
																- * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																- *
															
 
																- * StarPU is free software; you can redistribute it and/or modify
															
 
																- * it under the terms of the GNU Lesser General Public License as published by
															
 
																- * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																- * your option) any later version.
															
 
																- *
															
 
																- * StarPU is distributed in the hope that it will be useful, but
															
 
																- * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																- *
															
 
																- * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																- */
															
 
																-
															
 
																-#include <pthread.h>
															
 
																-#include <stdio.h>
															
 
																-#include <unistd.h>
															
 
																-
															
 
																-#include <starpu.h>
															
 
																-
															
 
																-#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
															
 
																-
															
 
																-static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
															
 
																-{
															
 
																-}
															
 
																-
															
 
																-static starpu_codelet dummy_codelet =
															
 
																-{
															
 
																-	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																-	.cpu_func = dummy_func,
															
 
																-	.cuda_func = dummy_func,
															
 
																-	.opencl_func = dummy_func,
															
 
																-        .model = NULL,
															
 
																-	.nbuffers = 0
															
 
																-};
															
 
																-
															
 
																-static struct starpu_task *create_dummy_task(void)
															
 
																-{
															
 
																-	struct starpu_task *task = starpu_task_create();
															
 
																-
															
 
																-	task->cl = &dummy_codelet;
															
 
																-	task->cl_arg = NULL;
															
 
																-	task->detach = 0;
															
 
																-
															
 
																-	return task;
															
 
																-}
															
 
																-
															
 
																-int main(int argc, char **argv)
															
 
																-{
															
 
																-	starpu_init(NULL);
															
 
																-
															
 
																-	FPRINTF(stderr, "{ A } -> { B }\n");
															
 
																-	fflush(stderr);
															
 
																-
															
 
																-	struct starpu_task *taskA, *taskB;
															
 
																-
															
 
																-	taskA = create_dummy_task();
															
 
																-	taskB = create_dummy_task();
															
 
																-
															
 
																-	/* B depends on A */
															
 
																-	starpu_task_declare_deps_array(taskB, 1, &taskA);
															
 
																-
															
 
																-	starpu_task_submit(taskB);
															
 
																-	starpu_task_submit(taskA);
															
 
																-
															
 
																-	starpu_task_wait(taskB);
															
 
																-
															
 
																-	FPRINTF(stderr, "{ C, D, E, F } -> { G }\n");
															
 
																-
															
 
																-	struct starpu_task *taskC, *taskD, *taskE, *taskF, *taskG;
															
 
																-
															
 
																-	taskC = create_dummy_task();
															
 
																-	taskD = create_dummy_task();
															
 
																-	taskE = create_dummy_task();
															
 
																-	taskF = create_dummy_task();
															
 
																-	taskG = create_dummy_task();
															
 
																-
															
 
																-	struct starpu_task *tasksCDEF[4] = {taskC, taskD, taskE, taskF};
															
 
																-	starpu_task_declare_deps_array(taskG, 4, tasksCDEF);
															
 
																-
															
 
																-	starpu_task_submit(taskC);
															
 
																-	starpu_task_submit(taskD);
															
 
																-	starpu_task_submit(taskG);
															
 
																-	starpu_task_submit(taskE);
															
 
																-	starpu_task_submit(taskF);
															
 
																-
															
 
																-	starpu_task_wait(taskG);
															
 
																-
															
 
																-	FPRINTF(stderr, "{ H, I } -> { J, K, L }\n");
															
 
																-
															
 
																-	struct starpu_task *taskH, *taskI, *taskJ, *taskK, *taskL;
															
 
																-
															
 
																-	taskH = create_dummy_task();
															
 
																-	taskI = create_dummy_task();
															
 
																-	taskJ = create_dummy_task();
															
 
																-	taskK = create_dummy_task();
															
 
																-	taskL = create_dummy_task();
															
 
																-
															
 
																-	struct starpu_task *tasksHI[2] = {taskH, taskI};
															
 
																-
															
 
																-	starpu_task_declare_deps_array(taskJ, 2, tasksHI);
															
 
																-	starpu_task_declare_deps_array(taskK, 2, tasksHI);
															
 
																-	starpu_task_declare_deps_array(taskL, 2, tasksHI);
															
 
																-
															
 
																-	starpu_task_submit(taskH);
															
 
																-	starpu_task_submit(taskI);
															
 
																-	starpu_task_submit(taskJ);
															
 
																-	starpu_task_submit(taskK);
															
 
																-	starpu_task_submit(taskL);
															
 
																-
															
 
																-	starpu_task_wait(taskJ);
															
 
																-	starpu_task_wait(taskK);
															
 
																-	starpu_task_wait(taskL);
															
 
																-
															
 
																-	starpu_shutdown();
															
 
																-
															
 
																-	return 0;
															
 
																-}
															
--- a/tests/datawizard/acquire_cb.c
+++ b/tests/datawizard/acquire_cb.c
@@ -15,11 +15,10 @@
 
																  */
															
 
																 #include <starpu.h>
															
 
																-
															
 
																-#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
															
 
																+#include "../helper.h"
															
 
																 unsigned token = 0;
															
 
																-starpu_data_handle token_handle;
															
 
																+starpu_data_handle_t token_handle;
															
 
																 void callback(void *arg __attribute__ ((unused)))
															
 
																 {
															
@@ -29,7 +28,11 @@ void callback(void *arg __attribute__ ((unused)))
 
																 int main(int argc, char **argv)
															
 
																 {
															
 
																-        starpu_init(NULL);
															
 
																+	int ret;
															
 
																+
															
 
																+        ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	starpu_variable_data_register(&token_handle, 0, (uintptr_t)&token, sizeof(unsigned));
															
 
																         starpu_data_acquire_cb(token_handle, STARPU_RW, callback, NULL);
															
@@ -41,5 +44,5 @@ int main(int argc, char **argv)
 
																 	starpu_shutdown();
															
 
																-	return 0;
															
 
																+	return EXIT_SUCCESS;
															
 
																 }
															
--- a/tests/datawizard/acquire_cb_insert.c
+++ b/tests/datawizard/acquire_cb_insert.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -14,30 +14,38 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																+#include <config.h>
															
 
																 #include <starpu.h>
															
 
																+#include "../helper.h"
															
 
																+
															
 
																+#warning memory leak
															
 
																 #define N 16
															
 
																 #define M 4
															
 
																 #define X 2
															
 
																-#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
															
 
																-
															
 
																 void which_index_cpu(void *descr[], void *_args)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	int *x0 = (int *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	/* A real case would actually compute something */
															
 
																 	*x0 = X;
															
 
																 }
															
 
																-starpu_codelet which_index = {
															
 
																+struct starpu_codelet which_index =
															
 
																+{
															
 
																 	.where = STARPU_CPU,
															
 
																-	.cpu_func = which_index_cpu,
															
 
																-        .nbuffers = 1
															
 
																+	.cpu_funcs = {which_index_cpu, NULL},
															
 
																+        .nbuffers = 1,
															
 
																+	.modes = {STARPU_W}
															
 
																 };
															
 
																 void work_cpu(void *descr[], void *_args)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	int i, n = STARPU_VECTOR_GET_NX(descr[0]);
															
 
																 	float *x0 = (float *)STARPU_VECTOR_GET_PTR(descr[0]);
															
@@ -45,16 +53,19 @@ void work_cpu(void *descr[], void *_args)
 
																 		x0[i] = i + 1;
															
 
																 }
															
 
																-starpu_codelet work = {
															
 
																+struct starpu_codelet work =
															
 
																+{
															
 
																 	.where = STARPU_CPU,
															
 
																-	.cpu_func = work_cpu,
															
 
																-        .nbuffers = 1
															
 
																+	.cpu_funcs = {work_cpu, NULL},
															
 
																+        .nbuffers = 1,
															
 
																+	.modes = {STARPU_W}
															
 
																 };
															
 
																 static int x;
															
 
																-static starpu_data_handle x_handle, f_handle;
															
 
																+static starpu_data_handle_t x_handle, f_handle;
															
 
																-void callback(void *arg) {
															
 
																+void callback(void *arg)
															
 
																+{
															
 
																 	starpu_insert_task(&work, STARPU_W, starpu_data_get_sub_data(f_handle, 1, x), 0);
															
 
																 	starpu_data_release(x_handle);
															
 
																 }
															
@@ -64,18 +75,22 @@ int main(int argc, char **argv)
 
																         int i, ret;
															
 
																 	float *f;
															
 
																-	starpu_init(NULL);
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	/* Declare x */
															
 
																 	starpu_variable_data_register(&x_handle, 0, (uintptr_t)&x, sizeof(x));
															
 
																 	/* Allocate and Declare f */
															
 
																-	starpu_malloc((void**)&f, N * sizeof(*f));
															
 
																+	ret = starpu_malloc((void**)&f, N * sizeof(*f));
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc");
															
 
																 	memset(f, 0, N * sizeof(*f));
															
 
																 	starpu_vector_data_register(&f_handle, 0, (uintptr_t)f, N, sizeof(*f));
															
 
																 	/* Partition f */
															
 
																-	struct starpu_data_filter filter = {
															
 
																+	struct starpu_data_filter filter =
															
 
																+	{
															
 
																 		.filter_func = starpu_block_filter_func_vector,
															
 
																 		.nchildren = M,
															
 
																 	};
															
@@ -84,6 +99,7 @@ int main(int argc, char **argv)
 
																 	/* Compute which portion we will work on */
															
 
																         ret = starpu_insert_task(&which_index, STARPU_W, x_handle, 0);
															
 
																 	if (ret == -ENODEV) goto enodev;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
															
 
																 	/* And submit the corresponding task */
															
 
																 #ifdef __GCC__
															
@@ -96,30 +112,32 @@ int main(int argc, char **argv)
 
																 	starpu_data_acquire_cb(x_handle, STARPU_W, callback, NULL);
															
 
																 #endif
															
 
																-	starpu_task_wait_for_all();
															
 
																+	ret = starpu_task_wait_for_all();
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
															
 
																 	starpu_data_unpartition(f_handle, 0);
															
 
																 	starpu_data_unregister(f_handle);
															
 
																 	starpu_data_unregister(x_handle);
															
 
																         FPRINTF(stderr, "VALUES: %d", x);
															
 
																-
															
 
																-        for(i=0 ; i<N ; i++) {
															
 
																+        for(i=0 ; i<N ; i++)
															
 
																+	{
															
 
																 		FPRINTF(stderr, " %f", f[i]);
															
 
																         }
															
 
																-
															
 
																-	STARPU_ASSERT(f[X*(N/M)] == 1);
															
 
																-	STARPU_ASSERT(f[X*(N/M)+1] == 2);
															
 
																-	STARPU_ASSERT(f[X*(N/M)+2] == 3);
															
 
																-	STARPU_ASSERT(f[X*(N/M)+3] == 4);
															
 
																-
															
 
																 	FPRINTF(stderr, "\n");
															
 
																+	ret = EXIT_SUCCESS;
															
 
																+	if (f[X*(N/M)] != 1 || f[X*(N/M)+1] != 2 ||
															
 
																+	    f[X*(N/M)+2] != 3 || f[X*(N/M)+3] != 4)
															
 
																+		ret = EXIT_FAILURE;
															
 
																+
															
 
																+	starpu_free(f);
															
 
																 	starpu_shutdown();
															
 
																-	return 0;
															
 
																+	STARPU_RETURN(ret);
															
 
																 enodev:
															
 
																 	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																 	/* yes, we do not perform the computation but we did detect that no one
															
 
																  	 * could perform the kernel, so this is not an error from StarPU */
															
 
																-	return 77;
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																 }
															
--- a/tests/datawizard/acquire_release.c
+++ b/tests/datawizard/acquire_release.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -15,11 +15,15 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																+#include <config.h>
															
 
																 #include <starpu.h>
															
 
																+#include "../helper.h"
															
 
																-#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
															
 
																-
															
 
																+#ifdef STARPU_SLOW_MACHINE
															
 
																+static unsigned ntasks = 10;
															
 
																+#else
															
 
																 static unsigned ntasks = 10000;
															
 
																+#endif
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
															
@@ -27,30 +31,35 @@ extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 
																 void increment_cpu(void *descr[], __attribute__ ((unused)) void *_args)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	(*tokenptr)++;
															
 
																 }
															
 
																-static starpu_codelet increment_cl = {
															
 
																+static struct starpu_codelet increment_cl =
															
 
																+{
															
 
																+	.modes = { STARPU_RW },
															
 
																         .where = STARPU_CPU|STARPU_CUDA,
															
 
																-	.cpu_func = increment_cpu,
															
 
																+	.cpu_funcs = {increment_cpu, NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	.cuda_func = increment_cuda,
															
 
																+	.cuda_funcs = {increment_cuda, NULL},
															
 
																 #endif
															
 
																 	.nbuffers = 1
															
 
																 };
															
 
																 unsigned token = 0;
															
 
																-starpu_data_handle token_handle;
															
 
																+starpu_data_handle_t token_handle;
															
 
																-void increment_token()
															
 
																+int increment_token()
															
 
																 {
															
 
																+	int ret;
															
 
																 	struct starpu_task *task = starpu_task_create();
															
 
																         task->synchronous = 1;
															
 
																 	task->cl = &increment_cl;
															
 
																-	task->buffers[0].handle = token_handle;
															
 
																-	task->buffers[0].mode = STARPU_RW;
															
 
																-	starpu_task_submit(task);
															
 
																+	task->handles[0] = token_handle;
															
 
																+	ret = starpu_task_submit(task);
															
 
																+	return ret;
															
 
																 }
															
 
																 void callback(void *arg __attribute__ ((unused)))
															
@@ -61,8 +70,12 @@ void callback(void *arg __attribute__ ((unused)))
 
																 int main(int argc, char **argv)
															
 
																 {
															
 
																 	int i;
															
 
																+	int ret;
															
 
																+
															
 
																+        ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																-        starpu_init(NULL);
															
 
																 	starpu_variable_data_register(&token_handle, 0, (uintptr_t)&token, sizeof(unsigned));
															
 
																         FPRINTF(stderr, "Token: %u\n", token);
															
@@ -70,21 +83,36 @@ int main(int argc, char **argv)
 
																 	for(i=0; i<ntasks; i++)
															
 
																 	{
															
 
																 		/* synchronize data in RAM */
															
 
																-                starpu_data_acquire(token_handle, STARPU_R);
															
 
																+                ret = starpu_data_acquire(token_handle, STARPU_R);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
															
 
																+
															
 
																                 token ++;
															
 
																                 starpu_data_release(token_handle);
															
 
																-                increment_token();
															
 
																+                ret = increment_token();
															
 
																+		if (ret == -ENODEV) goto enodev;
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																-                starpu_data_acquire_cb(token_handle, STARPU_RW, callback, NULL);
															
 
																+                ret = starpu_data_acquire_cb(token_handle, STARPU_RW, callback, NULL);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb");
															
 
																 	}
															
 
																 	starpu_data_unregister(token_handle);
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																         FPRINTF(stderr, "Token: %u\n", token);
															
 
																-        STARPU_ASSERT(token==ntasks*2);
															
 
																+	if (token == ntasks * 2)
															
 
																+		ret = EXIT_SUCCESS;
															
 
																+	else
															
 
																+		ret = EXIT_FAILURE;
															
 
																+	STARPU_RETURN(ret);
															
 
																+enodev:
															
 
																+	starpu_data_unregister(token_handle);
															
 
																+	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																+	/* yes, we do not perform the computation but we did detect that no one
															
 
																+ 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																 	starpu_shutdown();
															
 
																-
															
 
																-	return 0;
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																 }
															
--- a/tests/datawizard/acquire_release2.c
+++ b/tests/datawizard/acquire_release2.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -14,9 +14,11 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																+#include <config.h>
															
 
																 #include <starpu.h>
															
 
																+#include "../helper.h"
															
 
																-#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
															
 
																+#warning memory leak
															
 
																 static unsigned ntasks = 40000;
															
@@ -26,30 +28,33 @@ extern void increment_cuda(void *descr[], __attribute__ ((unused)) void *_args);
 
																 void increment_cpu(void *descr[], __attribute__ ((unused)) void *_args)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	(*tokenptr)++;
															
 
																 }
															
 
																-static starpu_codelet increment_cl = {
															
 
																+static struct starpu_codelet increment_cl =
															
 
																+{
															
 
																+	.modes = { STARPU_RW },
															
 
																         .where = STARPU_CPU|STARPU_CUDA,
															
 
																-	.cpu_func = increment_cpu,
															
 
																+	.cpu_funcs = {increment_cpu, NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	.cuda_func = increment_cuda,
															
 
																+	.cuda_funcs = {increment_cuda, NULL},
															
 
																 #endif
															
 
																 	.nbuffers = 1
															
 
																 };
															
 
																 unsigned token = 0;
															
 
																-starpu_data_handle token_handle;
															
 
																+starpu_data_handle_t token_handle;
															
 
																-void increment_token(int synchronous)
															
 
																+int increment_token(int synchronous)
															
 
																 {
															
 
																 	struct starpu_task *task = starpu_task_create();
															
 
																         task->synchronous = synchronous;
															
 
																 	task->cl = &increment_cl;
															
 
																-	task->buffers[0].handle = token_handle;
															
 
																-	task->buffers[0].mode = STARPU_RW;
															
 
																-	starpu_task_submit(task);
															
 
																+	task->handles[0] = token_handle;
															
 
																+	return starpu_task_submit(task);
															
 
																 }
															
 
																 void callback(void *arg __attribute__ ((unused)))
															
@@ -62,8 +67,12 @@ void callback(void *arg __attribute__ ((unused)))
 
																 int main(int argc, char **argv)
															
 
																 {
															
 
																 	int i;
															
 
																+	int ret;
															
 
																+
															
 
																+        ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																-        starpu_init(NULL);
															
 
																 	starpu_variable_data_register(&token_handle, 0, (uintptr_t)&token, sizeof(unsigned));
															
 
																         FPRINTF(stderr, "Token: %u\n", token);
															
@@ -74,16 +83,33 @@ int main(int argc, char **argv)
 
																 	for(i=0; i<ntasks; i++)
															
 
																 	{
															
 
																-                starpu_data_acquire_cb(token_handle, STARPU_W, callback, NULL);  // recv
															
 
																-                increment_token(0);
															
 
																+                ret = starpu_data_acquire_cb(token_handle, STARPU_W, callback, NULL);  // recv
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb");
															
 
																+
															
 
																+                ret = increment_token(0);
															
 
																+		if (ret == -ENODEV) goto enodev;
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+
															
 
																                 starpu_data_acquire_cb(token_handle, STARPU_R, callback, NULL);  // send
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire_cb");
															
 
																 	}
															
 
																 	starpu_data_unregister(token_handle);
															
 
																-        FPRINTF(stderr, "Token: %u\n", token);
															
 
																-        assert(token==ntasks);
															
 
																 	starpu_shutdown();
															
 
																-	return 0;
															
 
																+        FPRINTF(stderr, "Token: %u\n", token);
															
 
																+	if (token == ntasks)
															
 
																+		ret = EXIT_SUCCESS;
															
 
																+	else
															
 
																+		ret = EXIT_FAILURE;
															
 
																+	STARPU_RETURN(ret);
															
 
																+
															
 
																+enodev:
															
 
																+	starpu_data_unregister(token_handle);
															
 
																+	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																+	/* yes, we do not perform the computation but we did detect that no one
															
 
																+ 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																 }
															
--- a/tests/datawizard/copy.c
+++ b/tests/datawizard/copy.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010-2011  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -16,8 +16,7 @@
 
																  */
															
 
																 #include <starpu.h>
															
 
																-
															
 
																-#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
															
 
																+#include "../helper.h"
															
 
																 static unsigned nloops = 1000;
															
@@ -25,37 +24,41 @@ static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attri
 
																 {
															
 
																 }
															
 
																-static starpu_codelet cpu_codelet =
															
 
																+static struct starpu_codelet cpu_codelet =
															
 
																 {
															
 
																         .where = STARPU_CPU,
															
 
																-        .cpu_func = dummy_func,
															
 
																+        .cpu_funcs = {dummy_func, NULL},
															
 
																         .model = NULL,
															
 
																-        .nbuffers = 1
															
 
																+        .nbuffers = 1,
															
 
																+	.modes = {STARPU_RW}
															
 
																 };
															
 
																-static starpu_codelet gpu_codelet =
															
 
																+static struct starpu_codelet gpu_codelet =
															
 
																 {
															
 
																         .where = STARPU_CUDA|STARPU_OPENCL,
															
 
																-        .cuda_func = dummy_func,
															
 
																-        .opencl_func = dummy_func,
															
 
																+        .cuda_funcs = {dummy_func, NULL},
															
 
																+        .opencl_funcs = {dummy_func, NULL},
															
 
																         .model = NULL,
															
 
																-        .nbuffers = 1
															
 
																+        .nbuffers = 1,
															
 
																+	.modes = {STARPU_RW}
															
 
																 };
															
 
																 int main(int argc, char **argv)
															
 
																 {
															
 
																         float foo;
															
 
																-	starpu_data_handle float_array_handle;
															
 
																-        int i;
															
 
																+	starpu_data_handle_t float_array_handle;
															
 
																+        int i, ret;
															
 
																-        starpu_init(NULL);
															
 
																+        ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	if (starpu_worker_get_count_by_type(STARPU_CUDA_WORKER) == 0 && starpu_worker_get_count_by_type(STARPU_OPENCL_WORKER) == 0)
															
 
																 	{
															
 
																 		FPRINTF(stderr, "This application requires a CUDA or OpenCL Worker\n");
															
 
																 		starpu_shutdown();
															
 
																-		return 77;
															
 
																+		return STARPU_TEST_SKIPPED;
															
 
																 	}
															
 
																         foo = 0.0f;
															
@@ -71,32 +74,33 @@ int main(int argc, char **argv)
 
																 		task_cpu->cl = &cpu_codelet;
															
 
																 		task_cpu->callback_func = NULL;
															
 
																-		task_cpu->buffers[0].handle = float_array_handle;
															
 
																-		task_cpu->buffers[0].mode = STARPU_RW;
															
 
																+		task_cpu->handles[0] = float_array_handle;
															
 
																 		task_gpu->cl = &gpu_codelet;
															
 
																 		task_gpu->callback_func = NULL;
															
 
																-		task_gpu->buffers[0].handle = float_array_handle;
															
 
																-		task_gpu->buffers[0].mode = STARPU_RW;
															
 
																+		task_gpu->handles[0] = float_array_handle;
															
 
																 		ret = starpu_task_submit(task_cpu);
															
 
																-		if (STARPU_UNLIKELY(ret == -ENODEV))
															
 
																-		{
															
 
																-			FPRINTF(stderr, "No worker may execute this task\n");
															
 
																-			exit(0);
															
 
																-		}
															
 
																+		if (ret == -ENODEV) goto enodev;
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																 		ret = starpu_task_submit(task_gpu);
															
 
																-		if (STARPU_UNLIKELY(ret == -ENODEV))
															
 
																-		{
															
 
																-			FPRINTF(stderr, "No worker may execute this task\n");
															
 
																-			exit(0);
															
 
																-		}
															
 
																+		if (ret == -ENODEV) goto enodev;
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																         }
															
 
																-	starpu_task_wait_for_all();
															
 
																+	ret = starpu_task_wait_for_all();
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
															
 
																 	starpu_data_unregister(float_array_handle);
															
 
																         starpu_shutdown();
															
 
																-        return 0;
															
 
																+        return EXIT_SUCCESS;
															
 
																+
															
 
																+enodev:
															
 
																+	starpu_data_unregister(float_array_handle);
															
 
																+	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																+	/* yes, we do not perform the computation but we did detect that no one
															
 
																+ 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																 }
															
--- a/tests/datawizard/critical_section_with_void_interface.c
+++ b/tests/datawizard/critical_section_with_void_interface.c
@@ -1,6 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -14,29 +15,35 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																+#include <config.h>
															
 
																 #include <stdio.h>
															
 
																 #include <unistd.h>
															
 
																 #include <errno.h>
															
 
																 #include <starpu.h>
															
 
																 #include <stdlib.h>
															
 
																+#include "../helper.h"
															
 
																-starpu_data_handle void_handle;
															
 
																+starpu_data_handle_t void_handle;
															
 
																 int critical_var;
															
 
																 static void critical_section(void *descr[], __attribute__ ((unused)) void *_args)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	/* We do not protect this variable because it is only accessed when the
															
 
																 	 * "void_handle" piece of data is accessed. */
															
 
																 	critical_var++;
															
 
																 }
															
 
																-static starpu_codelet cl = {
															
 
																+static struct starpu_codelet cl =
															
 
																+{
															
 
																 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																-	.cpu_func = critical_section,
															
 
																-	.cuda_func = critical_section,
															
 
																-	.opencl_func = critical_section,
															
 
																-	.nbuffers = 1
															
 
																+	.cpu_funcs = {critical_section, NULL},
															
 
																+	.cuda_funcs = {critical_section, NULL},
															
 
																+	.opencl_funcs = {critical_section, NULL},
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_RW}
															
 
																 };
															
 
																 int main(int argc, char **argv)
															
@@ -48,7 +55,9 @@ int main(int argc, char **argv)
 
																 	ntasks /= 10;
															
 
																 #endif
															
 
																-	starpu_init(NULL);
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	critical_var = 0;
															
@@ -59,13 +68,12 @@ int main(int argc, char **argv)
 
																 	for (i = 0; i < ntasks; i++)
															
 
																 	{
															
 
																 		struct starpu_task *task = starpu_task_create();
															
 
																-			task->cl = &cl;
															
 
																-			task->buffers[0].handle = void_handle;
															
 
																-			task->buffers[0].mode = STARPU_RW;
															
 
																-	
															
 
																+		task->cl = &cl;
															
 
																+		task->handles[0] = void_handle;
															
 
																+
															
 
																 		ret = starpu_task_submit(task);
															
 
																-		if (ret == -ENODEV)
															
 
																-			goto enodev;
															
 
																+		if (ret == -ENODEV) goto enodev;
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																 	}
															
 
																 	starpu_data_unregister(void_handle);
															
@@ -74,11 +82,12 @@ int main(int argc, char **argv)
 
																 	starpu_shutdown();
															
 
																-	return 0;
															
 
																+	return EXIT_SUCCESS;
															
 
																 enodev:
															
 
																 	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																 	/* yes, we do not perform the computation but we did detect that no one
															
 
																  	 * could perform the kernel, so this is not an error from StarPU */
															
 
																-	return 77;
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																 }
															
--- a/tests/datawizard/data_implicit_deps.c
+++ b/tests/datawizard/data_implicit_deps.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -15,61 +15,78 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																+#include <config.h>
															
 
																 #include <stdio.h>
															
 
																 #include <unistd.h>
															
 
																 #include <errno.h>
															
 
																 #include <starpu.h>
															
 
																 #include <stdlib.h>
															
 
																+#include "../helper.h"
															
 
																 #define VECTORSIZE	1024
															
 
																-#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
															
 
																 static unsigned *A, *B, *C, *D;
															
 
																-starpu_data_handle A_handle, B_handle, C_handle, D_handle;
															
 
																+starpu_data_handle_t A_handle, B_handle, C_handle, D_handle;
															
 
																 static unsigned var = 0;
															
 
																 static void f(void *descr[], __attribute__ ((unused)) void *_args)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	usleep(200000);
															
 
																 }
															
 
																-static starpu_codelet cl_f = {
															
 
																+static struct starpu_codelet cl_f =
															
 
																+{
															
 
																+	.modes = { STARPU_R, STARPU_RW },
															
 
																 	.where = STARPU_CPU|STARPU_CUDA,
															
 
																-	.cpu_func = f,
															
 
																-	.cuda_func = f,
															
 
																+	.cpu_funcs = {f, NULL},
															
 
																+	.cuda_funcs = {f, NULL},
															
 
																 	.nbuffers = 2
															
 
																 };
															
 
																 static void g(void *descr[], __attribute__ ((unused)) void *_args)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	usleep(100000);
															
 
																 	var = 42;
															
 
																 }
															
 
																-static starpu_codelet cl_g = {
															
 
																+static struct starpu_codelet cl_g =
															
 
																+{
															
 
																+	.modes = { STARPU_R, STARPU_RW },
															
 
																 	.where = STARPU_CPU|STARPU_CUDA,
															
 
																-	.cpu_func = g,
															
 
																-	.cuda_func = g,
															
 
																+	.cpu_funcs = {g, NULL},
															
 
																+	.cuda_funcs = {g, NULL},
															
 
																 	.nbuffers = 2
															
 
																 };
															
 
																 static void h(void *descr[], __attribute__ ((unused)) void *_args)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	FPRINTF(stderr, "VAR %u (should be 42)\n", var);
															
 
																 	STARPU_ASSERT(var == 42);
															
 
																 }
															
 
																-static starpu_codelet cl_h = {
															
 
																+static struct starpu_codelet cl_h =
															
 
																+{
															
 
																+	.modes = { STARPU_R, STARPU_RW },
															
 
																 	.where = STARPU_CPU|STARPU_CUDA,
															
 
																-	.cpu_func = h,
															
 
																-	.cuda_func = h,
															
 
																+	.cpu_funcs = {h, NULL},
															
 
																+	.cuda_funcs = {h, NULL},
															
 
																 	.nbuffers = 2
															
 
																 };
															
 
																 int main(int argc, char **argv)
															
 
																 {
															
 
																-	starpu_init(NULL);
															
 
																+	int ret;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	A = (unsigned *) malloc(VECTORSIZE*sizeof(unsigned));
															
 
																 	B = (unsigned *) malloc(VECTORSIZE*sizeof(unsigned));
															
@@ -81,12 +98,12 @@ int main(int argc, char **argv)
 
																 	starpu_vector_data_register(&C_handle, 0, (uintptr_t)C, VECTORSIZE, sizeof(unsigned));
															
 
																 	starpu_vector_data_register(&D_handle, 0, (uintptr_t)D, VECTORSIZE, sizeof(unsigned));
															
 
																-	#if 0
															
 
																+#if 0
															
 
																 	starpu_data_set_sequential_consistency_flag(A_handle, 0);
															
 
																 	starpu_data_set_sequential_consistency_flag(B_handle, 0);
															
 
																 	starpu_data_set_sequential_consistency_flag(C_handle, 0);
															
 
																 	starpu_data_set_sequential_consistency_flag(D_handle, 0);
															
 
																-	#endif
															
 
																+#endif
															
 
																 	/* 	f(Ar, Brw): sleep 
															
 
																 	 *	g(Br; Crw); sleep, var = 42
															
@@ -94,29 +111,35 @@ int main(int argc, char **argv)
 
																 	 */
															
 
																 	struct starpu_task *task_f = starpu_task_create();
															
 
																 	task_f->cl = &cl_f;
															
 
																-	task_f->buffers[0].handle = A_handle;
															
 
																-	task_f->buffers[0].mode = STARPU_R;
															
 
																-	task_f->buffers[1].handle = B_handle;
															
 
																-	task_f->buffers[1].mode = STARPU_RW;
															
 
																-	starpu_task_submit(task_f);
															
 
																+	task_f->handles[0] = A_handle;
															
 
																+	task_f->handles[1] = B_handle;
															
 
																+	ret = starpu_task_submit(task_f);
															
 
																+	if (ret == -ENODEV) goto enodev;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																 	struct starpu_task *task_g = starpu_task_create();
															
 
																 	task_g->cl = &cl_g;
															
 
																-	task_g->buffers[0].handle = B_handle;
															
 
																-	task_g->buffers[0].mode = STARPU_R;
															
 
																-	task_g->buffers[1].handle = C_handle;
															
 
																-	task_g->buffers[1].mode = STARPU_RW;
															
 
																-	starpu_task_submit(task_g);
															
 
																+	task_g->handles[0] = B_handle;
															
 
																+	task_g->handles[1] = C_handle;
															
 
																+	ret = starpu_task_submit(task_g);
															
 
																+	if (ret == -ENODEV) goto enodev;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																 	struct starpu_task *task_h = starpu_task_create();
															
 
																 	task_h->cl = &cl_h;
															
 
																-	task_h->buffers[0].handle = C_handle;
															
 
																-	task_h->buffers[0].mode = STARPU_R;
															
 
																-	task_h->buffers[1].handle = D_handle;
															
 
																-	task_h->buffers[1].mode = STARPU_RW;
															
 
																-	starpu_task_submit(task_h);
															
 
																+	task_h->handles[0] = C_handle;
															
 
																+	task_h->handles[1] = D_handle;
															
 
																+	ret = starpu_task_submit(task_h);
															
 
																+	if (ret == -ENODEV) goto enodev;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																-	starpu_task_wait_for_all();
															
 
																+	ret = starpu_task_wait_for_all();
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
															
 
																+
															
 
																+	starpu_data_unregister(A_handle);
															
 
																+	starpu_data_unregister(B_handle);
															
 
																+	starpu_data_unregister(C_handle);
															
 
																+	starpu_data_unregister(D_handle);
															
 
																 	free(A);
															
 
																 	free(B);
															
@@ -125,5 +148,16 @@ int main(int argc, char **argv)
 
																 	starpu_shutdown();
															
 
																-	return 0;
															
 
																+	return EXIT_SUCCESS;
															
 
																+
															
 
																+enodev:
															
 
																+	free(A);
															
 
																+	free(B);
															
 
																+	free(C);
															
 
																+	free(D);
															
 
																+	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																+	/* yes, we do not perform the computation but we did detect that no one
															
 
																+ 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																 }
															
--- a/tests/datawizard/data_invalidation.c
+++ b/tests/datawizard/data_invalidation.c
@@ -1,6 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -14,18 +15,23 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																+#include <config.h>
															
 
																 #include <stdio.h>
															
 
																 #include <unistd.h>
															
 
																 #include <errno.h>
															
 
																 #include <starpu.h>
															
 
																 #include <starpu_cuda.h>
															
 
																 #include <stdlib.h>
															
 
																+#include "../helper.h"
															
 
																+#ifdef STARPU_SLOW_MACHINE
															
 
																+#define NLOOPS		100
															
 
																+#else
															
 
																 #define NLOOPS		1000
															
 
																+#endif
															
 
																 #define VECTORSIZE	1024
															
 
																-#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
															
 
																-static starpu_data_handle v_handle;
															
 
																+static starpu_data_handle_t v_handle;
															
 
																 /*
															
 
																  *	Memset
															
@@ -34,6 +40,8 @@ static starpu_data_handle v_handle;
 
																 #ifdef STARPU_USE_CUDA
															
 
																 static void cuda_memset_codelet(void *descr[], __attribute__ ((unused)) void *_args)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																 	unsigned length = STARPU_VECTOR_GET_NX(descr[0]);
															
@@ -44,19 +52,23 @@ static void cuda_memset_codelet(void *descr[], __attribute__ ((unused)) void *_a
 
																 static void cpu_memset_codelet(void *descr[], __attribute__ ((unused)) void *_args)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																 	unsigned length = STARPU_VECTOR_GET_NX(descr[0]);
															
 
																 	memset(buf, 42, length);
															
 
																 }
															
 
																-static starpu_codelet memset_cl = {
															
 
																+static struct starpu_codelet memset_cl =
															
 
																+{
															
 
																 	.where = STARPU_CPU|STARPU_CUDA,
															
 
																-	.cpu_func = cpu_memset_codelet,
															
 
																+	.cpu_funcs = {cpu_memset_codelet, NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	.cuda_func = cuda_memset_codelet,
															
 
																+	.cuda_funcs = {cuda_memset_codelet, NULL},
															
 
																 #endif
															
 
																-	.nbuffers = 1
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_W}
															
 
																 };
															
 
																 /*
															
@@ -65,6 +77,8 @@ static starpu_codelet memset_cl = {
 
																 static void cpu_check_content_codelet(void *descr[], __attribute__ ((unused)) void *_args)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	char *buf = (char *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																 	unsigned length = STARPU_VECTOR_GET_NX(descr[0]);
															
@@ -79,10 +93,12 @@ static void cpu_check_content_codelet(void *descr[], __attribute__ ((unused)) vo
 
																 	}
															
 
																 }
															
 
																-static starpu_codelet check_content_cl = {
															
 
																+static struct starpu_codelet check_content_cl =
															
 
																+{
															
 
																 	.where = STARPU_CPU,
															
 
																-	.cpu_func = cpu_check_content_codelet,
															
 
																-	.nbuffers = 1
															
 
																+	.cpu_funcs = {cpu_check_content_codelet, NULL},
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_R}
															
 
																 };
															
@@ -90,7 +106,9 @@ int main(int argc, char **argv)
 
																 {
															
 
																 	int ret;
															
 
																-	starpu_init(NULL);
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	/* The buffer should never be explicitely allocated */
															
 
																 	starpu_vector_data_register(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(char));
															
@@ -103,31 +121,27 @@ int main(int argc, char **argv)
 
																 		memset_task = starpu_task_create();
															
 
																 		memset_task->cl = &memset_cl;
															
 
																-		memset_task->buffers[0].handle = v_handle;
															
 
																-		memset_task->buffers[0].mode = STARPU_W;
															
 
																+		memset_task->handles[0] = v_handle;
															
 
																 		memset_task->detach = 0;
															
 
																-	
															
 
																+
															
 
																 		ret = starpu_task_submit(memset_task);
															
 
																-		if (ret == -ENODEV)
															
 
																-				goto enodev;
															
 
																-	
															
 
																+		if (ret == -ENODEV) goto enodev;
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+
															
 
																 		ret = starpu_task_wait(memset_task);
															
 
																-		if (ret)
															
 
																-			exit(-1);
															
 
																-		
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait");
															
 
																+
															
 
																 		check_content_task = starpu_task_create();
															
 
																 		check_content_task->cl = &check_content_cl;
															
 
																-		check_content_task->buffers[0].handle = v_handle;
															
 
																-		check_content_task->buffers[0].mode = STARPU_R;
															
 
																+		check_content_task->handles[0] = v_handle;
															
 
																 		check_content_task->detach = 0;
															
 
																-	
															
 
																+
															
 
																 		ret = starpu_task_submit(check_content_task);
															
 
																-		if (ret == -ENODEV)
															
 
																-				goto enodev;
															
 
																-	
															
 
																+		if (ret == -ENODEV) goto enodev;
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+
															
 
																 		ret = starpu_task_wait(check_content_task);
															
 
																-		if (ret)
															
 
																-			exit(-1);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait");
															
 
																 		starpu_data_invalidate(v_handle);
															
 
																 	}
															
@@ -137,11 +151,13 @@ int main(int argc, char **argv)
 
																 	starpu_shutdown();
															
 
																-	return 0;
															
 
																+	return EXIT_SUCCESS;
															
 
																 enodev:
															
 
																+	starpu_data_unregister(v_handle);
															
 
																 	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																 	/* yes, we do not perform the computation but we did detect that no one
															
 
																  	 * could perform the kernel, so this is not an error from StarPU */
															
 
																-	return 77;
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																 }
															
--- a/tests/datawizard/data_lookup.c
+++ b/tests/datawizard/data_lookup.c
@@ -20,6 +20,7 @@
 
																 #include <starpu.h>
															
 
																 #include <stdlib.h>
															
 
																 #include <sys/types.h>
															
 
																+#include "../helper.h"
															
 
																 static void task(void **buffers, void *args)
															
 
																 {
															
@@ -27,17 +28,19 @@ static void task(void **buffers, void *args)
 
																 	size_t size, i;
															
 
																 	numbers = (float *) STARPU_VECTOR_GET_PTR(buffers[0]);
															
 
																-	starpu_unpack_cl_args (args, &size);
															
 
																+	starpu_codelet_unpack_args (args, &size);
															
 
																 	for(i = 0; i < size; i++)
															
 
																 	{
															
 
																 		numbers[i] = i;
															
 
																 	}
															
 
																 }
															
 
																-static starpu_codelet cl = {
															
 
																+static struct starpu_codelet cl =
															
 
																+{
															
 
																 	.where = STARPU_CPU,
															
 
																-	.cpu_func = task,
															
 
																-	.nbuffers = 1
															
 
																+	.cpu_funcs = {task, NULL},
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_W}
															
 
																 };
															
 
																 static int test_lazy_allocation()
															
@@ -46,7 +49,7 @@ static int test_lazy_allocation()
 
																 	size_t i;
															
 
																 	void *pointer;
															
 
																-	starpu_data_handle handle;
															
 
																+	starpu_data_handle_t handle;
															
 
																 	int ret;
															
 
																 	/* Lazily-allocated vector.  */
															
@@ -58,28 +61,31 @@ static int test_lazy_allocation()
 
																 				 STARPU_VALUE, &count, sizeof(size_t),
															
 
																 				 0);
															
 
																 	if (ret == -ENODEV) return ret;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_insert_task");
															
 
																+
															
 
																 	/* yes, we do not perform the computation but we did detect that no one
															
 
																 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																 	/* Acquire the handle, forcing a local allocation.  */
															
 
																-	starpu_data_acquire(handle, STARPU_R);
															
 
																+	ret = starpu_data_acquire(handle, STARPU_R);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
															
 
																 	/* Make sure we have a local pointer to it.  */
															
 
																 	pointer = starpu_handle_get_local_ptr(handle);
															
 
																-	assert(pointer != NULL);
															
 
																+	STARPU_ASSERT(pointer != NULL);
															
 
																 	for(i = 0; i < count; i++)
															
 
																 	{
															
 
																 		float *numbers = (float *)pointer;
															
 
																-		assert(numbers[i] == i);
															
 
																+		STARPU_ASSERT(numbers[i] == i);
															
 
																 	}
															
 
																 	/* Make sure the pointer/handle mapping is up-to-date.  */
															
 
																-	assert(starpu_data_lookup(pointer) == handle);
															
 
																+	STARPU_ASSERT(starpu_data_lookup(pointer) == handle);
															
 
																 	starpu_data_release(handle);
															
 
																 	starpu_data_unregister(handle);
															
 
																-	assert(starpu_data_lookup(pointer) == NULL);
															
 
																+	STARPU_ASSERT(starpu_data_lookup(pointer) == NULL);
															
 
																 	return 0;
															
 
																 }
															
@@ -91,12 +97,12 @@ static int test_lazy_allocation()
 
																 static void test_filters()
															
 
																 {
															
 
																 #define CHILDREN_COUNT 10
															
 
																-	int err, i;
															
 
																+	int ret, i;
															
 
																 	int *ptr, *children_pointers[CHILDREN_COUNT];
															
 
																-	starpu_data_handle handle;
															
 
																+	starpu_data_handle_t handle;
															
 
																-	err = starpu_malloc((void**)&ptr, VECTOR_SIZE * sizeof(*ptr));
															
 
																-	assert(err == 0);
															
 
																+	ret = starpu_malloc((void**)&ptr, VECTOR_SIZE * sizeof(*ptr));
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc");
															
 
																 	starpu_vector_data_register(&handle, 0, (uintptr_t)ptr,
															
 
																 				    VECTOR_SIZE, sizeof(*ptr));
															
@@ -107,18 +113,18 @@ static void test_filters()
 
																 		.nchildren = CHILDREN_COUNT
															
 
																 	};
															
 
																 	starpu_data_partition(handle, &f);
															
 
																-	assert(starpu_data_get_nb_children(handle) == CHILDREN_COUNT);
															
 
																+	STARPU_ASSERT(starpu_data_get_nb_children(handle) == CHILDREN_COUNT);
															
 
																 	for (i = 0; i < CHILDREN_COUNT; i++)
															
 
																 	{
															
 
																-                starpu_data_handle child;
															
 
																+                starpu_data_handle_t child;
															
 
																 		child = starpu_data_get_sub_data(handle, 1, i);
															
 
																 		children_pointers[i] = (int *) starpu_handle_get_local_ptr(child);
															
 
																-		assert(children_pointers[i] != NULL);
															
 
																+		STARPU_ASSERT(children_pointers[i] != NULL);
															
 
																 		/* Make sure we have a pointer -> handle mapping for CHILD.  */
															
 
																-		assert(starpu_data_lookup(children_pointers[i]) == child);
															
 
																+		STARPU_ASSERT(starpu_data_lookup(children_pointers[i]) == child);
															
 
																 	}
															
 
																 	starpu_data_unpartition(handle, 0);
															
@@ -127,11 +133,11 @@ static void test_filters()
 
																 	{
															
 
																 		if (children_pointers[i] != ptr)
															
 
																 			/* Make sure the pointer -> handle mapping is gone.  */
															
 
																-			assert(starpu_data_lookup(children_pointers[i]) == NULL);
															
 
																+			STARPU_ASSERT(starpu_data_lookup(children_pointers[i]) == NULL);
															
 
																 	}
															
 
																 	/* Make sure the parent's mapping is back.  */
															
 
																-	assert(starpu_data_lookup(ptr) == handle);
															
 
																+	STARPU_ASSERT(starpu_data_lookup(ptr) == handle);
															
 
																 	starpu_data_unregister(handle);
															
 
																 	starpu_free(ptr);
															
@@ -141,20 +147,22 @@ static void test_filters()
 
																 int main(int argc, char *argv[])
															
 
																 {
															
 
																-	int err;
															
 
																+	int ret;
															
 
																 	size_t i;
															
 
																 	void *vectors[VECTOR_COUNT], *variables[VARIABLE_COUNT];
															
 
																-	starpu_data_handle vector_handles[VECTOR_COUNT];
															
 
																-	starpu_data_handle variable_handles[VARIABLE_COUNT];
															
 
																+	starpu_data_handle_t vector_handles[VECTOR_COUNT];
															
 
																+	starpu_data_handle_t variable_handles[VARIABLE_COUNT];
															
 
																-	starpu_init(NULL);
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	/* Register data regions.  */
															
 
																 	for(i = 0; i < VARIABLE_COUNT; i++)
															
 
																 	{
															
 
																-		err = starpu_malloc(&variables[i], sizeof(float));
															
 
																-		assert(err == 0);
															
 
																+		ret = starpu_malloc(&variables[i], sizeof(float));
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc");
															
 
																 		starpu_variable_data_register(&variable_handles[i], 0,
															
 
																 					      (uintptr_t)variables[i],
															
 
																 					      sizeof(float));
															
@@ -162,8 +170,8 @@ int main(int argc, char *argv[])
 
																 	for(i = 0; i < VECTOR_COUNT; i++)
															
 
																 	{
															
 
																-		err = starpu_malloc(&vectors[i], VECTOR_SIZE * sizeof(float));
															
 
																-		assert(err == 0);
															
 
																+		ret = starpu_malloc(&vectors[i], VECTOR_SIZE * sizeof(float));
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc");
															
 
																 		starpu_vector_data_register(&vector_handles[i], 0,
															
 
																 					    (uintptr_t)vectors[i],
															
 
																 					    VECTOR_SIZE, sizeof(float));
															
@@ -173,18 +181,18 @@ int main(int argc, char *argv[])
 
																 	for(i = 0; i < VARIABLE_COUNT; i++)
															
 
																 	{
															
 
																-		starpu_data_handle handle;
															
 
																+		starpu_data_handle_t handle;
															
 
																 		handle = starpu_data_lookup(variables[i]);
															
 
																-		assert(handle == variable_handles[i]);
															
 
																+		STARPU_ASSERT(handle == variable_handles[i]);
															
 
																 	}
															
 
																 	for(i = 0; i < VECTOR_COUNT; i++)
															
 
																 	{
															
 
																-		starpu_data_handle handle;
															
 
																+		starpu_data_handle_t handle;
															
 
																 		handle = starpu_data_lookup(vectors[i]);
															
 
																-		assert(handle == vector_handles[i]);
															
 
																+		STARPU_ASSERT(handle == vector_handles[i]);
															
 
																 	}
															
 
																 	/* Unregister them.  */
															
@@ -203,24 +211,24 @@ int main(int argc, char *argv[])
 
																 	for(i = 0; i < VARIABLE_COUNT; i++)
															
 
																 	{
															
 
																-		starpu_data_handle handle;
															
 
																+		starpu_data_handle_t handle;
															
 
																 		handle = starpu_data_lookup(variables[i]);
															
 
																-		assert(handle == NULL);
															
 
																+		STARPU_ASSERT(handle == NULL);
															
 
																 		starpu_free(variables[i]);
															
 
																 	}
															
 
																 	for(i = 0; i < VECTOR_COUNT; i++)
															
 
																 	{
															
 
																-		starpu_data_handle handle;
															
 
																+		starpu_data_handle_t handle;
															
 
																 		handle = starpu_data_lookup(vectors[i]);
															
 
																-		assert(handle == NULL);
															
 
																+		STARPU_ASSERT(handle == NULL);
															
 
																 		starpu_free(vectors[i]);
															
 
																 	}
															
 
																-	err = test_lazy_allocation();
															
 
																-	if (err == -ENODEV) goto enodev;
															
 
																+	ret = test_lazy_allocation();
															
 
																+	if (ret == -ENODEV) goto enodev;
															
 
																 	test_filters();
															
 
																 	starpu_shutdown();
															
@@ -231,5 +239,6 @@ enodev:
 
																 	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																 	/* yes, we do not perform the computation but we did detect that no one
															
 
																  	 * could perform the kernel, so this is not an error from StarPU */
															
 
																-	return 77;
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																 }
															
--- a/tests/datawizard/dining_philosophers.c
+++ b/tests/datawizard/dining_philosophers.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2009, 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -16,28 +16,29 @@
 
																  */
															
 
																 #include <starpu.h>
															
 
																+#include "../helper.h"
															
 
																 /* number of philosophers */
															
 
																 #define N	16
															
 
																-starpu_data_handle fork_handles[N];
															
 
																+starpu_data_handle_t fork_handles[N];
															
 
																 unsigned forks[N];
															
 
																-#define FPRINTF(ofile, fmt, args ...) do { if (!getenv("STARPU_SSILENT")) {fprintf(ofile, fmt, ##args); }} while(0)
															
 
																-
															
 
																 static void eat_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																 }
															
 
																-static starpu_codelet eating_cl = {
															
 
																+static struct starpu_codelet eating_cl =
															
 
																+{
															
 
																+	.modes = { STARPU_RW, STARPU_RW },
															
 
																 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																-	.cuda_func = eat_kernel,
															
 
																-	.cpu_func = eat_kernel,
															
 
																-        .opencl_func = eat_kernel,
															
 
																+	.cuda_funcs = {eat_kernel, NULL},
															
 
																+	.cpu_funcs = {eat_kernel, NULL},
															
 
																+        .opencl_funcs = {eat_kernel, NULL},
															
 
																 	.nbuffers = 2
															
 
																 };
															
 
																-void submit_one_task(unsigned p)
															
 
																+int submit_one_task(unsigned p)
															
 
																 {
															
 
																 	struct starpu_task *task = starpu_task_create();
															
@@ -46,18 +47,20 @@ void submit_one_task(unsigned p)
 
																 	unsigned left = p;
															
 
																 	unsigned right = (p+1)%N;
															
 
																-	task->buffers[0].handle = fork_handles[left];
															
 
																-	task->buffers[0].mode = STARPU_RW;
															
 
																-	task->buffers[1].handle = fork_handles[right];
															
 
																-	task->buffers[1].mode = STARPU_RW;
															
 
																+	task->handles[0] = fork_handles[left];
															
 
																+	task->handles[1] = fork_handles[right];
															
 
																 	int ret = starpu_task_submit(task);
															
 
																-	STARPU_ASSERT(!ret);
															
 
																+	return ret;
															
 
																 }
															
 
																 int main(int argc, char **argv)
															
 
																 {
															
 
																-	starpu_init(NULL);
															
 
																+	int ret;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	/* initialize the forks */
															
 
																 	unsigned f;
															
@@ -75,10 +78,13 @@ int main(int argc, char **argv)
 
																 	{
															
 
																 		/* select one philosopher randomly */
															
 
																 		unsigned philosopher = rand() % N;
															
 
																-		submit_one_task(philosopher);
															
 
																+		ret = submit_one_task(philosopher);
															
 
																+		if (ret == -ENODEV) goto enodev;
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																 	}
															
 
																-	starpu_task_wait_for_all();
															
 
																+	ret = starpu_task_wait_for_all();
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
															
 
																 	FPRINTF(stderr, "waiting done\n");
															
 
																 	for (f = 0; f < N; f++)
															
@@ -88,5 +94,16 @@ int main(int argc, char **argv)
 
																 	starpu_shutdown();
															
 
																-	return 0;
															
 
																+	return EXIT_SUCCESS;
															
 
																+
															
 
																+enodev:
															
 
																+	for (f = 0; f < N; f++)
															
 
																+	{
															
 
																+		starpu_data_unregister(fork_handles[f]);
															
 
																+	}
															
 
																+	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																+	/* yes, we do not perform the computation but we did detect that no one
															
 
																+ 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																 }
															
--- a/tests/datawizard/double_parameter.c
+++ b/tests/datawizard/double_parameter.c
@@ -0,0 +1,174 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2012  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include "../helper.h"
															
 
																+
															
 
																+static void dummy_func(void *descr[] __attribute__ ((unused)), void *arg __attribute__ ((unused)))
															
 
																+{
															
 
																+}
															
 
																+
															
 
																+static struct starpu_codelet codelet_R_R =
															
 
																+{
															
 
																+        .where = STARPU_CPU,
															
 
																+        .cpu_funcs = { dummy_func, NULL },
															
 
																+        .model = NULL,
															
 
																+        .nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_R}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet codelet_R_W =
															
 
																+{
															
 
																+        .where = STARPU_CPU,
															
 
																+        .cpu_funcs = { dummy_func, NULL },
															
 
																+        .model = NULL,
															
 
																+        .nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet codelet_R_RW =
															
 
																+{
															
 
																+        .where = STARPU_CPU,
															
 
																+        .cpu_funcs = { dummy_func, NULL },
															
 
																+        .model = NULL,
															
 
																+        .nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_RW}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet codelet_W_R =
															
 
																+{
															
 
																+        .where = STARPU_CPU,
															
 
																+        .cpu_funcs = { dummy_func, NULL },
															
 
																+        .model = NULL,
															
 
																+        .nbuffers = 2,
															
 
																+	.modes = {STARPU_W, STARPU_R}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet codelet_W_W =
															
 
																+{
															
 
																+        .where = STARPU_CPU,
															
 
																+        .cpu_funcs = { dummy_func, NULL },
															
 
																+        .model = NULL,
															
 
																+        .nbuffers = 2,
															
 
																+	.modes = {STARPU_W, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet codelet_W_RW =
															
 
																+{
															
 
																+        .where = STARPU_CPU,
															
 
																+        .cpu_funcs = { dummy_func, NULL },
															
 
																+        .model = NULL,
															
 
																+        .nbuffers = 2,
															
 
																+	.modes = {STARPU_W, STARPU_RW}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet codelet_RW_R =
															
 
																+{
															
 
																+        .where = STARPU_CPU,
															
 
																+        .cpu_funcs = { dummy_func, NULL },
															
 
																+        .model = NULL,
															
 
																+        .nbuffers = 2,
															
 
																+	.modes = {STARPU_RW, STARPU_R}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet codelet_RW_W =
															
 
																+{
															
 
																+        .where = STARPU_CPU,
															
 
																+        .cpu_funcs = { dummy_func, NULL },
															
 
																+        .model = NULL,
															
 
																+        .nbuffers = 2,
															
 
																+	.modes = {STARPU_RW, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet codelet_RW_RW =
															
 
																+{
															
 
																+        .where = STARPU_CPU,
															
 
																+        .cpu_funcs = { dummy_func, NULL },
															
 
																+        .model = NULL,
															
 
																+        .nbuffers = 2,
															
 
																+	.modes = {STARPU_RW, STARPU_RW}
															
 
																+};
															
 
																+
															
 
																+int main(int argc, char **argv)
															
 
																+{
															
 
																+	float foo = 0.0f;
															
 
																+	starpu_data_handle_t handle;
															
 
																+	int ret;
															
 
																+	struct starpu_task *task;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+
															
 
																+	starpu_variable_data_register(&handle, 0, (uintptr_t)&foo, sizeof(foo));
															
 
																+
															
 
																+#define SUBMIT(mode0, mode1) \
															
 
																+	{ \
															
 
																+		task = starpu_task_create();	\
															
 
																+		task->handles[0] = handle;	\
															
 
																+		task->handles[1] = handle;		 \
															
 
																+		enum starpu_access_mode smode0 = STARPU_##mode0;	\
															
 
																+		enum starpu_access_mode smode1 = STARPU_##mode0;	\
															
 
																+		if      (smode0 == STARPU_R && smode1 == STARPU_R)	\
															
 
																+			task->cl = &codelet_R_R;			\
															
 
																+		else if (smode0 == STARPU_R && smode1 == STARPU_W)	\
															
 
																+			task->cl = &codelet_R_W;			\
															
 
																+		else if (smode0 == STARPU_R && smode1 == STARPU_RW)	\
															
 
																+			task->cl = &codelet_R_RW;			\
															
 
																+		else if (smode0 == STARPU_W && smode1 == STARPU_R)	\
															
 
																+			task->cl = &codelet_W_R;			\
															
 
																+		else if (smode0 == STARPU_W && smode1 == STARPU_W)	\
															
 
																+			task->cl = &codelet_W_W;			\
															
 
																+		else if (smode0 == STARPU_W && smode1 == STARPU_RW)	\
															
 
																+			task->cl = &codelet_W_RW;			\
															
 
																+		else if (smode0 == STARPU_RW && smode1 == STARPU_R)	\
															
 
																+			task->cl = &codelet_RW_R;			\
															
 
																+		else if (smode0 == STARPU_RW && smode1 == STARPU_W)	\
															
 
																+			task->cl = &codelet_RW_W;			\
															
 
																+		else if (smode0 == STARPU_RW && smode1 == STARPU_RW)	\
															
 
																+			task->cl = &codelet_RW_RW;			\
															
 
																+									\
															
 
																+		ret = starpu_task_submit(task);				\
															
 
																+		if (ret == -ENODEV) goto enodev;			\
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");   \
															
 
																+	}
															
 
																+
															
 
																+	SUBMIT(R,R);
															
 
																+	SUBMIT(R,W);
															
 
																+	SUBMIT(R,RW);
															
 
																+	SUBMIT(W,R);
															
 
																+	SUBMIT(W,W);
															
 
																+	SUBMIT(W,RW);
															
 
																+	SUBMIT(RW,R);
															
 
																+	SUBMIT(RW,W);
															
 
																+	SUBMIT(RW,RW);
															
 
																+
															
 
																+	ret = starpu_task_wait_for_all();
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
															
 
																+	starpu_data_unregister(handle);
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+        return EXIT_SUCCESS;
															
 
																+
															
 
																+enodev:
															
 
																+	starpu_data_unregister(handle);
															
 
																+	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																+	/* yes, we do not perform the computation but we did detect that no one
															
 
																+ 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																+}
															
--- a/tests/datawizard/dsm_stress.c
+++ b/tests/datawizard/dsm_stress.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -21,6 +21,7 @@
 
																 #include <starpu.h>
															
 
																 #include <stdlib.h>
															
 
																 #include <pthread.h>
															
 
																+#include "../helper.h"
															
 
																 #define N	10000
															
@@ -33,7 +34,7 @@ static unsigned finished = 0;
 
																 static unsigned cnt = N;
															
 
																-starpu_data_handle v_handle, v_handle2;
															
 
																+starpu_data_handle_t v_handle, v_handle2;
															
 
																 static unsigned *v;
															
 
																 static unsigned *v2;
															
@@ -43,10 +44,10 @@ static void callback(void *arg)
 
																 	if (res == 0)
															
 
																 	{
															
 
																-		pthread_mutex_lock(&mutex);
															
 
																+		_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
															
 
																 		finished = 1;
															
 
																-		pthread_cond_signal(&cond);
															
 
																-		pthread_mutex_unlock(&mutex);
															
 
																+		_STARPU_PTHREAD_COND_SIGNAL(&cond);
															
 
																+		_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
															
 
																 	}
															
 
																 }
															
@@ -64,11 +65,12 @@ static void cpu_codelet_null(void *descr[], __attribute__ ((unused)) void *_args
 
																 {
															
 
																 }
															
 
																-static starpu_access_mode select_random_mode(void)
															
 
																+static enum starpu_access_mode select_random_mode(void)
															
 
																 {
															
 
																 	int r = rand();
															
 
																-	switch (r % 3) {
															
 
																+	switch (r % 3)
															
 
																+	{
															
 
																 		case 0:
															
 
																 			return STARPU_R;
															
 
																 		case 1:
															
@@ -79,22 +81,109 @@ static starpu_access_mode select_random_mode(void)
 
																 	return STARPU_RW;
															
 
																 }
															
 
																+static struct starpu_codelet cl_r_r =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																+	.cpu_funcs = {cpu_codelet_null, NULL},
															
 
																+	.cuda_funcs = {cuda_codelet_null, NULL},
															
 
																+        .opencl_funcs = {opencl_codelet_null, NULL},
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_R}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet cl_r_w =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																+	.cpu_funcs = {cpu_codelet_null, NULL},
															
 
																+	.cuda_funcs = {cuda_codelet_null, NULL},
															
 
																+        .opencl_funcs = {opencl_codelet_null, NULL},
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet cl_r_rw =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																+	.cpu_funcs = {cpu_codelet_null, NULL},
															
 
																+	.cuda_funcs = {cuda_codelet_null, NULL},
															
 
																+        .opencl_funcs = {opencl_codelet_null, NULL},
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_R, STARPU_RW}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet cl_w_r =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																+	.cpu_funcs = {cpu_codelet_null, NULL},
															
 
																+	.cuda_funcs = {cuda_codelet_null, NULL},
															
 
																+        .opencl_funcs = {opencl_codelet_null, NULL},
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_W, STARPU_R}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet cl_w_w =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																+	.cpu_funcs = {cpu_codelet_null, NULL},
															
 
																+	.cuda_funcs = {cuda_codelet_null, NULL},
															
 
																+        .opencl_funcs = {opencl_codelet_null, NULL},
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_W, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet cl_w_rw =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																+	.cpu_funcs = {cpu_codelet_null, NULL},
															
 
																+	.cuda_funcs = {cuda_codelet_null, NULL},
															
 
																+        .opencl_funcs = {opencl_codelet_null, NULL},
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_W, STARPU_RW}
															
 
																+};
															
 
																-static starpu_codelet cl = {
															
 
																+static struct starpu_codelet cl_rw_r =
															
 
																+{
															
 
																 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																-	.cpu_func = cpu_codelet_null,
															
 
																-	.cuda_func = cuda_codelet_null,
															
 
																-        .opencl_func = opencl_codelet_null,
															
 
																-	.nbuffers = 2
															
 
																+	.cpu_funcs = {cpu_codelet_null, NULL},
															
 
																+	.cuda_funcs = {cuda_codelet_null, NULL},
															
 
																+        .opencl_funcs = {opencl_codelet_null, NULL},
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_RW, STARPU_R}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet cl_rw_w =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																+	.cpu_funcs = {cpu_codelet_null, NULL},
															
 
																+	.cuda_funcs = {cuda_codelet_null, NULL},
															
 
																+        .opencl_funcs = {opencl_codelet_null, NULL},
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_RW, STARPU_W}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet cl_rw_rw =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																+	.cpu_funcs = {cpu_codelet_null, NULL},
															
 
																+	.cuda_funcs = {cuda_codelet_null, NULL},
															
 
																+        .opencl_funcs = {opencl_codelet_null, NULL},
															
 
																+	.nbuffers = 2,
															
 
																+	.modes = {STARPU_RW, STARPU_RW}
															
 
																 };
															
 
																 int main(int argc, char **argv)
															
 
																 {
															
 
																-	starpu_init(NULL);
															
 
																+	int ret;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																-	starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned));
															
 
																-	starpu_malloc((void **)&v2, VECTORSIZE*sizeof(unsigned));
															
 
																+	ret = starpu_malloc((void **)&v, VECTORSIZE*sizeof(unsigned));
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc");
															
 
																+	ret = starpu_malloc((void **)&v2, VECTORSIZE*sizeof(unsigned));
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_malloc");
															
 
																 	starpu_vector_data_register(&v_handle, 0, (uintptr_t)v, VECTORSIZE, sizeof(unsigned));
															
 
																 	starpu_vector_data_register(&v_handle2, 0, (uintptr_t)v2, VECTORSIZE, sizeof(unsigned));
															
@@ -103,36 +192,61 @@ int main(int argc, char **argv)
 
																 	for (iter = 0; iter < N; iter++)
															
 
																 	{
															
 
																 		struct starpu_task *task = starpu_task_create();
															
 
																-		task->cl = &cl;
															
 
																-		task->buffers[0].handle = v_handle;
															
 
																-		task->buffers[0].mode = select_random_mode();
															
 
																-
															
 
																-		task->buffers[1].handle = v_handle2;
															
 
																-		task->buffers[1].mode = select_random_mode();
															
 
																+		task->handles[0] = v_handle;
															
 
																+		task->handles[1] = v_handle2;
															
 
																+
															
 
																+		enum starpu_access_mode mode0 = select_random_mode();
															
 
																+		enum starpu_access_mode mode1 = select_random_mode();
															
 
																+
															
 
																+		if (mode0 == STARPU_R && mode1 == STARPU_R)
															
 
																+			task->cl = &cl_r_r;
															
 
																+		else if (mode0 == STARPU_R && mode1 == STARPU_W)
															
 
																+			task->cl = &cl_r_w;
															
 
																+		else if (mode0 == STARPU_R && mode1 == STARPU_RW)
															
 
																+			task->cl = &cl_r_rw;
															
 
																+		else if (mode0 == STARPU_W && mode1 == STARPU_R)
															
 
																+			task->cl = &cl_w_r;
															
 
																+		else if (mode0 == STARPU_W && mode1 == STARPU_W)
															
 
																+			task->cl = &cl_w_w;
															
 
																+		else if (mode0 == STARPU_W && mode1 == STARPU_RW)
															
 
																+			task->cl = &cl_w_rw;
															
 
																+		else if (mode0 == STARPU_RW && mode1 == STARPU_R)
															
 
																+			task->cl = &cl_rw_r;
															
 
																+		else if (mode0 == STARPU_RW && mode1 == STARPU_W)
															
 
																+			task->cl = &cl_rw_w;
															
 
																+		else if (mode0 == STARPU_RW && mode1 == STARPU_RW)
															
 
																+			task->cl = &cl_rw_rw;
															
 
																 		task->callback_func = callback;
															
 
																 		task->callback_arg = NULL;
															
 
																 		int ret = starpu_task_submit(task);
															
 
																-		if (ret == -ENODEV)
															
 
																-			goto enodev;
															
 
																+		if (ret == -ENODEV) goto enodev;
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																 	}
															
 
																-	pthread_mutex_lock(&mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_LOCK(&mutex);
															
 
																 	if (!finished)
															
 
																-		pthread_cond_wait(&cond, &mutex);
															
 
																-	pthread_mutex_unlock(&mutex);
															
 
																+		_STARPU_PTHREAD_COND_WAIT(&cond, &mutex);
															
 
																+	_STARPU_PTHREAD_MUTEX_UNLOCK(&mutex);
															
 
																+	starpu_data_unregister(v_handle);
															
 
																+	starpu_data_unregister(v_handle2);
															
 
																 	starpu_free(v);
															
 
																 	starpu_free(v2);
															
 
																 	starpu_shutdown();
															
 
																-	return 0;
															
 
																+	return EXIT_SUCCESS;
															
 
																 enodev:
															
 
																+	starpu_data_unregister(v_handle);
															
 
																+	starpu_data_unregister(v_handle2);
															
 
																+	starpu_free(v);
															
 
																+	starpu_free(v2);
															
 
																+	starpu_shutdown();
															
 
																 	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																 	/* yes, we do not perform the computation but we did detect that no one
															
 
																  	 * could perform the kernel, so this is not an error from StarPU */
															
 
																-	return 77;
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																 }
															
--- a/tests/datawizard/gpu_register.c
+++ b/tests/datawizard/gpu_register.c
@@ -0,0 +1,139 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Université de Bordeaux 1
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_opencl.h>
															
 
																+#include <starpu_cuda.h>
															
 
																+#include "../helper.h"
															
 
																+#include "scal.h"
															
 
																+
															
 
																+int main(int argc, char **argv)
															
 
																+{
															
 
																+	int ret;
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+#if CUDART_VERSION >= 4000
															
 
																+	unsigned *foo_gpu;
															
 
																+	unsigned *foo;
															
 
																+	starpu_data_handle_t handle;
															
 
																+	int n, i, size, pieces;
															
 
																+	int devid;
															
 
																+	unsigned workerid;
															
 
																+	int chosen = -1;
															
 
																+	cudaError_t cures;
															
 
																+#endif
															
 
																+#endif
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+#if CUDART_VERSION >= 4000 /* We need thread-safety of CUDA */
															
 
																+	/* TODO OpenCL, too */
															
 
																+	for (workerid = 0; workerid < starpu_worker_get_count(); workerid++) {
															
 
																+		if (starpu_worker_get_type(workerid) == STARPU_CUDA_WORKER) {
															
 
																+			chosen = workerid;
															
 
																+			break;
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	if (chosen == -1)
															
 
																+		return STARPU_TEST_SKIPPED;
															
 
																+
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
															
 
																+#endif
															
 
																+
															
 
																+	n = starpu_worker_get_count();
															
 
																+	size = 10 * n;
															
 
																+
															
 
																+	devid = starpu_worker_get_devid(chosen);
															
 
																+	cudaSetDevice(devid);
															
 
																+	cudaMalloc((void**)&foo_gpu, size * sizeof(*foo_gpu));
															
 
																+
															
 
																+	foo = calloc(size, sizeof(*foo));
															
 
																+	for (i = 0; i < size; i++)
															
 
																+		foo[i] = i;
															
 
																+
															
 
																+	cures = cudaMemcpy(foo_gpu, foo, size * sizeof(*foo_gpu), cudaMemcpyHostToDevice);
															
 
																+	if (STARPU_UNLIKELY(cures))
															
 
																+		STARPU_CUDA_REPORT_ERROR(cures);
															
 
																+
															
 
																+	starpu_vector_data_register(&handle, starpu_worker_get_memory_node(chosen), (uintptr_t)foo_gpu, size, sizeof(*foo_gpu));
															
 
																+
															
 
																+	/* Broadcast the data to force in-place partitioning */
															
 
																+	for (i = 0; i < n; i++)
															
 
																+		starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0);
															
 
																+
															
 
																+	/* Even with just one worker, split in at least two */
															
 
																+	if (n == 1)
															
 
																+		pieces = 2;
															
 
																+	else
															
 
																+		pieces = n;
															
 
																+
															
 
																+	struct starpu_data_filter f =
															
 
																+	{
															
 
																+		.filter_func = starpu_block_filter_func_vector,
															
 
																+		.nchildren = pieces,
															
 
																+	};
															
 
																+
															
 
																+	starpu_data_partition(handle, &f);
															
 
																+
															
 
																+	for (i = 0; i < pieces; i++) {
															
 
																+		struct starpu_task *task = starpu_task_create();
															
 
																+
															
 
																+		task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
															
 
																+		task->cl = &scal_codelet;
															
 
																+		task->execute_on_a_specific_worker = 1;
															
 
																+		task->workerid = i%n;
															
 
																+
															
 
																+		ret = starpu_task_submit(task);
															
 
																+		if (ret == -ENODEV) goto enodev;
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+	}
															
 
																+
															
 
																+	ret = starpu_task_wait_for_all();
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
															
 
																+
															
 
																+	starpu_data_unpartition(handle, starpu_worker_get_memory_node(chosen));
															
 
																+	starpu_data_unregister(handle);
															
 
																+
															
 
																+	cudaSetDevice(devid);
															
 
																+	cures = cudaMemcpy(foo, foo_gpu, size * sizeof(*foo_gpu), cudaMemcpyDeviceToHost);
															
 
																+	if (STARPU_UNLIKELY(cures))
															
 
																+		STARPU_CUDA_REPORT_ERROR(cures);
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	for (i = 0; i < size; i++) {
															
 
																+		if (foo[i] != i*2) {
															
 
																+			fprintf(stderr,"value %d is %d instead of %d\n", i, foo[i], 2*i);
															
 
																+			return EXIT_FAILURE;
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+        return EXIT_SUCCESS;
															
 
																+
															
 
																+enodev:
															
 
																+	starpu_data_unregister(handle);
															
 
																+#endif
															
 
																+#endif
															
 
																+	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																+	/* yes, we do not perform the computation but we did detect that no one
															
 
																+ 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																+}
															
--- a/tests/datawizard/handle_to_pointer.c
+++ b/tests/datawizard/handle_to_pointer.c
@@ -19,6 +19,7 @@
 
																 #include <starpu.h>
															
 
																 #include <stdlib.h>
															
 
																+#include "../helper.h"
															
 
																 static void cpu_task(void **buffers, void *args)
															
 
																 {
															
@@ -27,7 +28,7 @@ static void cpu_task(void **buffers, void *args)
 
																 	size_t size;
															
 
																 	numbers = (int *) STARPU_VECTOR_GET_PTR(buffers[0]);
															
 
																-	starpu_unpack_cl_args (args, &size);
															
 
																+	starpu_codelet_unpack_args (args, &size);
															
 
																 	for(i = 0; i < size; i++)
															
 
																 	{
															
@@ -43,7 +44,7 @@ static void cuda_task(void **buffers, void *args)
 
																 	size_t size;
															
 
																 	numbers = (int *)STARPU_VECTOR_GET_PTR(buffers[0]);
															
 
																-	starpu_unpack_cl_args (args, &size);
															
 
																+	starpu_codelet_unpack_args (args, &size);
															
 
																 	for(i = 0; i < size; i++)
															
 
																 	{
															
@@ -52,41 +53,44 @@ static void cuda_task(void **buffers, void *args)
 
																 }
															
 
																 #endif
															
 
																-static starpu_codelet cl = {
															
 
																+static struct starpu_codelet cl =
															
 
																+{
															
 
																 	.where = STARPU_CPU | STARPU_CUDA,
															
 
																-	.cpu_func = cpu_task,
															
 
																+	.cpu_funcs = {cpu_task, NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	.cuda_func = cuda_task,
															
 
																+	.cuda_funcs = {cuda_task, NULL},
															
 
																 #endif
															
 
																-	.nbuffers = 1
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_W}
															
 
																 };
															
 
																 int main(int argc, char *argv[])
															
 
																 {
															
 
																-	int err;
															
 
																+	int err, ret;
															
 
																 	size_t i;
															
 
																 	int *pointer;
															
 
																-	starpu_data_handle handle;
															
 
																+	starpu_data_handle_t handle;
															
 
																 	static const size_t count = 123;
															
 
																-	starpu_init(NULL);
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																 	err = starpu_malloc((void **)&pointer, count * sizeof(int));
															
 
																-	assert((err == 0) && (pointer != NULL));
															
 
																+	STARPU_ASSERT((err == 0) && (pointer != NULL));
															
 
																 	starpu_variable_data_register(&handle, 0, (uintptr_t)pointer,
															
 
																 				      sizeof(int));
															
 
																-	assert(starpu_handle_to_pointer(handle, 0) == pointer);
															
 
																+	STARPU_ASSERT(starpu_handle_to_pointer(handle, 0) == pointer);
															
 
																 	starpu_data_unregister(handle);
															
 
																 	starpu_vector_data_register(&handle, 0, (uintptr_t)pointer,
															
 
																 				    count, sizeof(int));
															
 
																-	assert(starpu_handle_to_pointer(handle, 0) == pointer);
															
 
																+	STARPU_ASSERT(starpu_handle_to_pointer(handle, 0) == pointer);
															
 
																 	starpu_data_unregister(handle);
															
 
																 	starpu_matrix_data_register(&handle, 0, (uintptr_t)pointer, 0,
															
 
																 				    count, 1, sizeof(int));
															
 
																-	assert(starpu_handle_to_pointer(handle, 0) == pointer);
															
 
																+	STARPU_ASSERT(starpu_handle_to_pointer(handle, 0) == pointer);
															
 
																 	starpu_data_unregister(handle);
															
 
																 	starpu_free(pointer);
															
@@ -95,24 +99,26 @@ int main(int argc, char *argv[])
 
																 	/* Lazy allocation.  */
															
 
																 	starpu_vector_data_register(&handle, -1, 0 /* NULL */,
															
 
																 				    count, sizeof(int));
															
 
																-	assert(starpu_handle_to_pointer(handle, 0) == NULL);
															
 
																+	STARPU_ASSERT(starpu_handle_to_pointer(handle, 0) == NULL);
															
 
																 	/* Pass the handle to a task.  */
															
 
																-	starpu_insert_task(&cl,
															
 
																+	err = starpu_insert_task(&cl,
															
 
																 			   STARPU_W, handle,
															
 
																 			   STARPU_VALUE, &count, sizeof(count),
															
 
																 			   0);
															
 
																+	if (err == -ENODEV)
															
 
																+		return STARPU_TEST_SKIPPED;
															
 
																 	/* Acquire the handle, forcing a local allocation.  */
															
 
																 	starpu_data_acquire(handle, STARPU_R);
															
 
																 	/* Make sure we have a local pointer to it.  */
															
 
																 	pointer = (int *) starpu_handle_to_pointer(handle, 0);
															
 
																-	assert(pointer != NULL);
															
 
																+	STARPU_ASSERT(pointer != NULL);
															
 
																 	for(i = 0; i < count; i++)
															
 
																 	{
															
 
																 		int *numbers = (int *)pointer;
															
 
																-		assert(numbers[i] == i);
															
 
																+		STARPU_ASSERT(numbers[i] == i);
															
 
																 	}
															
 
																 	starpu_data_release(handle);
															
--- a/tests/datawizard/in_place_partition.c
+++ b/tests/datawizard/in_place_partition.c
@@ -0,0 +1,102 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Université de Bordeaux 1
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_opencl.h>
															
 
																+#include "../helper.h"
															
 
																+#include "scal.h"
															
 
																+
															
 
																+int main(int argc, char **argv)
															
 
																+{
															
 
																+	unsigned *foo;
															
 
																+	starpu_data_handle_t handle;
															
 
																+	int ret;
															
 
																+	int n, i, size;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	ret = starpu_opencl_load_opencl_from_file("tests/datawizard/scal_opencl.cl", &opencl_program, NULL);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
															
 
																+#endif
															
 
																+
															
 
																+	n = starpu_worker_get_count();
															
 
																+	if (n == 1)
															
 
																+	{
															
 
																+		starpu_shutdown();
															
 
																+		return STARPU_TEST_SKIPPED;
															
 
																+	}
															
 
																+
															
 
																+	size = 10 * n;
															
 
																+
															
 
																+	foo = (unsigned *) calloc(size, sizeof(*foo));
															
 
																+	for (i = 0; i < size; i++)
															
 
																+		foo[i] = i;
															
 
																+
															
 
																+	starpu_vector_data_register(&handle, 0, (uintptr_t)foo, size, sizeof(*foo));
															
 
																+
															
 
																+	/* Broadcast the data to force in-place partitioning */
															
 
																+	for (i = 0; i < n; i++)
															
 
																+		starpu_data_prefetch_on_node(handle, starpu_worker_get_memory_node(i), 0);
															
 
																+
															
 
																+	struct starpu_data_filter f =
															
 
																+	{
															
 
																+		.filter_func = starpu_block_filter_func_vector,
															
 
																+		.nchildren = n,
															
 
																+	};
															
 
																+
															
 
																+	starpu_data_partition(handle, &f);
															
 
																+
															
 
																+	for (i = 0; i < f.nchildren; i++) {
															
 
																+		struct starpu_task *task = starpu_task_create();
															
 
																+
															
 
																+		task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
															
 
																+		task->cl = &scal_codelet;
															
 
																+		task->execute_on_a_specific_worker = 1;
															
 
																+		task->workerid = i;
															
 
																+
															
 
																+		ret = starpu_task_submit(task);
															
 
																+		if (ret == -ENODEV) goto enodev;
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+	}
															
 
																+
															
 
																+	ret = starpu_task_wait_for_all();
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_wait_for_all");
															
 
																+
															
 
																+	starpu_data_unpartition(handle, 0);
															
 
																+	starpu_data_unregister(handle);
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	ret = EXIT_SUCCESS;
															
 
																+	for (i = 0; i < size; i++) {
															
 
																+		if (foo[i] != i*2) {
															
 
																+			FPRINTF(stderr,"value %d is %d instead of %d\n", i, foo[i], 2*i);
															
 
																+			ret = EXIT_FAILURE;
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+        return ret;
															
 
																+
															
 
																+enodev:
															
 
																+	starpu_data_unregister(handle);
															
 
																+	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																+	/* yes, we do not perform the computation but we did detect that no one
															
 
																+ 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																+}
															
--- a/tests/datawizard/increment_redux.c
+++ b/tests/datawizard/increment_redux.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -15,7 +15,9 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																+#include <config.h>
															
 
																 #include <starpu.h>
															
 
																+#include "../helper.h"
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 #include <starpu_cuda.h>
															
@@ -24,9 +26,10 @@
 
																 #include <starpu_opencl.h>
															
 
																 #endif
															
 
																+#warning memory leak
															
 
																 static unsigned var = 0;
															
 
																-static starpu_data_handle handle;
															
 
																+static starpu_data_handle_t handle;
															
 
																 /*
															
 
																  *	Reduction methods
															
@@ -35,6 +38,8 @@ static starpu_data_handle handle;
 
																 #ifdef STARPU_USE_CUDA
															
 
																 static void redux_cuda_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
															
@@ -53,6 +58,8 @@ static void redux_cuda_kernel(void *descr[], void *arg)
 
																 static void neutral_cuda_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	/* This is a dummy technique of course */
															
@@ -65,6 +72,8 @@ static void neutral_cuda_kernel(void *descr[], void *arg)
 
																 #ifdef STARPU_USE_OPENCL
															
 
																 static void redux_opencl_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned h_dst, h_src;
															
 
																 	cl_mem d_dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
															
@@ -79,18 +88,20 @@ static void redux_opencl_kernel(void *descr[], void *arg)
 
																 	h_dst += h_src;
															
 
																-	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL); 
															
 
																+	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
															
 
																 }
															
 
																 static void neutral_opencl_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned h_dst = 0;
															
 
																 	cl_mem d_dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	cl_command_queue queue;
															
 
																 	starpu_opencl_get_current_queue(&queue);
															
 
																-	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL); 
															
 
																+	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
															
 
																 }
															
 
																 #endif
															
@@ -98,6 +109,8 @@ static void neutral_opencl_kernel(void *descr[], void *arg)
 
																 static void redux_cpu_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
															
 
																 	*dst = *dst + *src;
															
@@ -105,31 +118,35 @@ static void redux_cpu_kernel(void *descr[], void *arg)
 
																 static void neutral_cpu_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	*dst = 0;
															
 
																 }
															
 
																-static starpu_codelet redux_cl = {
															
 
																+static struct starpu_codelet redux_cl =
															
 
																+{
															
 
																 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	.cuda_func = redux_cuda_kernel,
															
 
																+	.cuda_funcs = {redux_cuda_kernel, NULL},
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-	.opencl_func = redux_opencl_kernel,
															
 
																+	.opencl_funcs = {redux_opencl_kernel, NULL},
															
 
																 #endif
															
 
																-	.cpu_func = redux_cpu_kernel,
															
 
																+	.cpu_funcs = {redux_cpu_kernel, NULL},
															
 
																 	.nbuffers = 2
															
 
																 };
															
 
																-static starpu_codelet neutral_cl = {
															
 
																+static struct starpu_codelet neutral_cl =
															
 
																+{
															
 
																 	.where = STARPU_CPU|STARPU_CUDA,
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	.cuda_func = neutral_cuda_kernel,
															
 
																+	.cuda_funcs = {neutral_cuda_kernel, NULL},
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-	.opencl_func = neutral_opencl_kernel,
															
 
																+	.opencl_funcs = {neutral_opencl_kernel, NULL},
															
 
																 #endif
															
 
																-	.cpu_func = neutral_cpu_kernel,
															
 
																+	.cpu_funcs = {neutral_cpu_kernel, NULL},
															
 
																 	.nbuffers = 1
															
 
																 };
															
@@ -141,6 +158,8 @@ static starpu_codelet neutral_cl = {
 
																 /* dummy OpenCL implementation */
															
 
																 static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((unused)))
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	cl_mem d_token = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	unsigned h_token;
															
@@ -149,7 +168,7 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((u
 
																 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																 	h_token++;
															
 
																-	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL); 
															
 
																+	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																 }
															
 
																 #endif
															
@@ -157,6 +176,8 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((u
 
																 #ifdef STARPU_USE_CUDA
															
 
																 static void increment_cuda_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	unsigned host_token;
															
@@ -173,25 +194,33 @@ static void increment_cuda_kernel(void *descr[], void *arg)
 
																 static void increment_cpu_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	*tokenptr = *tokenptr + 1;
															
 
																 }
															
 
																-static starpu_codelet increment_cl = {
															
 
																+static struct starpu_codelet increment_cl =
															
 
																+{
															
 
																 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	.cuda_func = increment_cuda_kernel,
															
 
																+	.cuda_funcs = {increment_cuda_kernel, NULL},
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-	.opencl_func = increment_opencl_kernel,
															
 
																+	.opencl_funcs = {increment_opencl_kernel, NULL},
															
 
																 #endif
															
 
																-	.cpu_func = increment_cpu_kernel,
															
 
																-	.nbuffers = 1
															
 
																+	.cpu_funcs = {increment_cpu_kernel, NULL},
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_REDUX}
															
 
																 };
															
 
																 int main(int argc, char **argv)
															
 
																 {
															
 
																-	starpu_init(NULL);
															
 
																+	int ret;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	starpu_variable_data_register(&handle, 0, (uintptr_t)&var, sizeof(unsigned));
															
@@ -208,26 +237,44 @@ int main(int argc, char **argv)
 
																 		for (t = 0; t < ntasks; t++)
															
 
																 		{
															
 
																 			struct starpu_task *task = starpu_task_create();
															
 
																-	
															
 
																+
															
 
																 			task->cl = &increment_cl;
															
 
																-	
															
 
																-			task->buffers[0].mode = STARPU_REDUX;
															
 
																-			task->buffers[0].handle = handle;
															
 
																-	
															
 
																-			int ret = starpu_task_submit(task);
															
 
																-			STARPU_ASSERT(!ret);
															
 
																+			task->handles[0] = handle;
															
 
																+			int ret = starpu_task_submit(task);
															
 
																+			if (ret == -ENODEV) goto enodev;
															
 
																+			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																 		}
															
 
																-		starpu_data_acquire(handle, STARPU_R);
															
 
																-		STARPU_ASSERT(var == ntasks*(loop + 1));
															
 
																+		ret = starpu_data_acquire(handle, STARPU_R);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
															
 
																+		if (var != ntasks * (loop+1))
															
 
																+		{
															
 
																+			starpu_data_release(handle);
															
 
																+			starpu_data_unregister(handle);
															
 
																+			goto err;
															
 
																+		}
															
 
																 		starpu_data_release(handle);
															
 
																 	}
															
 
																 	starpu_data_unregister(handle);
															
 
																-	STARPU_ASSERT(var == ntasks*nloops);
															
 
																-	
															
 
																+	if (var != ntasks * nloops)
															
 
																+		goto err;
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	return EXIT_SUCCESS;
															
 
																+
															
 
																+enodev:
															
 
																+	starpu_data_unregister(handle);
															
 
																+	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																+	/* yes, we do not perform the computation but we did detect that no one
															
 
																+ 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																+
															
 
																+err:
															
 
																 	starpu_shutdown();
															
 
																+	STARPU_RETURN(EXIT_FAILURE);
															
 
																-	return 0;
															
 
																 }
															
--- a/tests/datawizard/increment_redux_lazy.c
+++ b/tests/datawizard/increment_redux_lazy.c
@@ -0,0 +1,255 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2010, 2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include "../helper.h"
															
 
																+
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+#include <starpu_cuda.h>
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+#include <starpu_opencl.h>
															
 
																+#endif
															
 
																+
															
 
																+#warning memory leak
															
 
																+
															
 
																+static starpu_data_handle_t handle;
															
 
																+
															
 
																+/*
															
 
																+ *	Reduction methods
															
 
																+ */
															
 
																+
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+static void redux_cuda_kernel(void *descr[], void *arg)
															
 
																+{
															
 
																+	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																+	unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
															
 
																+
															
 
																+	unsigned host_dst, host_src;
															
 
																+
															
 
																+	/* This is a dummy technique of course */
															
 
																+	cudaMemcpy(&host_src, src, sizeof(unsigned), cudaMemcpyDeviceToHost);
															
 
																+	cudaMemcpy(&host_dst, dst, sizeof(unsigned), cudaMemcpyDeviceToHost);
															
 
																+	cudaThreadSynchronize();
															
 
																+
															
 
																+	host_dst += host_src;
															
 
																+
															
 
																+	cudaMemcpy(dst, &host_dst, sizeof(unsigned), cudaMemcpyHostToDevice);
															
 
																+	cudaThreadSynchronize();
															
 
																+}
															
 
																+
															
 
																+static void neutral_cuda_kernel(void *descr[], void *arg)
															
 
																+{
															
 
																+	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																+
															
 
																+	/* This is a dummy technique of course */
															
 
																+	unsigned host_dst = 0;
															
 
																+	cudaMemcpy(dst, &host_dst, sizeof(unsigned), cudaMemcpyHostToDevice);
															
 
																+	cudaThreadSynchronize();
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+static void redux_opencl_kernel(void *descr[], void *arg)
															
 
																+{
															
 
																+	unsigned h_dst, h_src;
															
 
																+
															
 
																+	cl_mem d_dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																+	cl_mem d_src = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[1]);
															
 
																+
															
 
																+	cl_command_queue queue;
															
 
																+	starpu_opencl_get_current_queue(&queue);
															
 
																+
															
 
																+	/* This is a dummy technique of course */
															
 
																+	clEnqueueReadBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
															
 
																+	clEnqueueReadBuffer(queue, d_src, CL_TRUE, 0, sizeof(unsigned), (void *)&h_src, 0, NULL, NULL);
															
 
																+
															
 
																+	h_dst += h_src;
															
 
																+
															
 
																+	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
															
 
																+}
															
 
																+
															
 
																+static void neutral_opencl_kernel(void *descr[], void *arg)
															
 
																+{
															
 
																+	unsigned h_dst = 0;
															
 
																+	cl_mem d_dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																+
															
 
																+	cl_command_queue queue;
															
 
																+	starpu_opencl_get_current_queue(&queue);
															
 
																+
															
 
																+	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+
															
 
																+
															
 
																+static void redux_cpu_kernel(void *descr[], void *arg)
															
 
																+{
															
 
																+	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																+	unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
															
 
																+	*dst = *dst + *src;
															
 
																+}
															
 
																+
															
 
																+static void neutral_cpu_kernel(void *descr[], void *arg)
															
 
																+{
															
 
																+	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																+	*dst = 0;
															
 
																+}
															
 
																+
															
 
																+static struct starpu_codelet redux_cl =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {redux_cuda_kernel, NULL},
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	.opencl_funcs = {redux_opencl_kernel, NULL},
															
 
																+#endif
															
 
																+	.cpu_funcs = {redux_cpu_kernel, NULL},
															
 
																+	.nbuffers = 2
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet neutral_cl =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA,
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {neutral_cuda_kernel, NULL},
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	.opencl_funcs = {neutral_opencl_kernel, NULL},
															
 
																+#endif
															
 
																+	.cpu_funcs = {neutral_cpu_kernel, NULL},
															
 
																+	.nbuffers = 1
															
 
																+};
															
 
																+
															
 
																+/*
															
 
																+ *	Increment codelet
															
 
																+ */
															
 
																+
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+/* dummy OpenCL implementation */
															
 
																+static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((unused)))
															
 
																+{
															
 
																+	cl_mem d_token = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																+	unsigned h_token;
															
 
																+
															
 
																+	cl_command_queue queue;
															
 
																+	starpu_opencl_get_current_queue(&queue);
															
 
																+
															
 
																+	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																+	h_token++;
															
 
																+	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+static void increment_cuda_kernel(void *descr[], void *arg)
															
 
																+{
															
 
																+	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																+	unsigned host_token;
															
 
																+
															
 
																+	/* This is a dummy technique of course */
															
 
																+	cudaMemcpy(&host_token, tokenptr, sizeof(unsigned), cudaMemcpyDeviceToHost);
															
 
																+	cudaThreadSynchronize();
															
 
																+
															
 
																+	host_token++;
															
 
																+
															
 
																+	cudaMemcpy(tokenptr, &host_token, sizeof(unsigned), cudaMemcpyHostToDevice);
															
 
																+	cudaThreadSynchronize();
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+static void increment_cpu_kernel(void *descr[], void *arg)
															
 
																+{
															
 
																+	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																+	*tokenptr = *tokenptr + 1;
															
 
																+}
															
 
																+
															
 
																+static struct starpu_codelet increment_cl =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {increment_cuda_kernel, NULL},
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	.opencl_funcs = {increment_opencl_kernel, NULL},
															
 
																+#endif
															
 
																+	.cpu_funcs = {increment_cpu_kernel, NULL},
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_REDUX}
															
 
																+};
															
 
																+
															
 
																+int main(int argc, char **argv)
															
 
																+{
															
 
																+	int ret;
															
 
																+	unsigned *var;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+
															
 
																+	starpu_variable_data_register(&handle, -1, (uintptr_t)NULL, sizeof(unsigned));
															
 
																+
															
 
																+	starpu_data_set_reduction_methods(handle, &redux_cl, &neutral_cl);
															
 
																+
															
 
																+	unsigned ntasks = 1024;
															
 
																+	unsigned nloops = 16;
															
 
																+
															
 
																+	unsigned loop;
															
 
																+	unsigned t;
															
 
																+
															
 
																+	for (loop = 0; loop < nloops; loop++)
															
 
																+	{
															
 
																+		for (t = 0; t < ntasks; t++)
															
 
																+		{
															
 
																+			struct starpu_task *task = starpu_task_create();
															
 
																+
															
 
																+			task->cl = &increment_cl;
															
 
																+			task->handles[0] = handle;
															
 
																+
															
 
																+			int ret = starpu_task_submit(task);
															
 
																+			if (ret == -ENODEV) goto enodev;
															
 
																+			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																+		}
															
 
																+
															
 
																+		ret = starpu_data_acquire(handle, STARPU_R);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
															
 
																+		var = (unsigned*) starpu_variable_get_local_ptr(handle);
															
 
																+		STARPU_ASSERT(*var == ntasks*(loop + 1));
															
 
																+		starpu_data_release(handle);
															
 
																+	}
															
 
																+
															
 
																+	ret = starpu_data_acquire(handle, STARPU_R);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
															
 
																+	var = (unsigned*) starpu_variable_get_local_ptr(handle);
															
 
																+	STARPU_ASSERT(*var == ntasks*nloops);
															
 
																+	starpu_data_release(handle);
															
 
																+	starpu_data_unregister(handle);
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	STARPU_RETURN(EXIT_SUCCESS);
															
 
																+
															
 
																+enodev:
															
 
																+	starpu_data_unregister(handle);
															
 
																+	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																+	/* yes, we do not perform the computation but we did detect that no one
															
 
																+ 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																+	starpu_shutdown();
															
 
																+	STARPU_RETURN(STARPU_TEST_SKIPPED);
															
 
																+}
															
--- a/tests/datawizard/increment_redux_v2.c
+++ b/tests/datawizard/increment_redux_v2.c
@@ -14,7 +14,9 @@
 
																  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																  */
															
 
																+#include <config.h>
															
 
																 #include <starpu.h>
															
 
																+#include "../helper.h"
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 #include <starpu_cuda.h>
															
@@ -23,9 +25,10 @@
 
																 #include <starpu_opencl.h>
															
 
																 #endif
															
 
																+#warning memory leak
															
 
																 static unsigned var = 0;
															
 
																-static starpu_data_handle handle;
															
 
																+static starpu_data_handle_t handle;
															
 
																 /*
															
 
																  *	Reduction methods
															
@@ -34,6 +37,8 @@ static starpu_data_handle handle;
 
																 #ifdef STARPU_USE_CUDA
															
 
																 static void redux_cuda_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
															
@@ -52,6 +57,8 @@ static void redux_cuda_kernel(void *descr[], void *arg)
 
																 static void neutral_cuda_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	/* This is a dummy technique of course */
															
@@ -64,6 +71,8 @@ static void neutral_cuda_kernel(void *descr[], void *arg)
 
																 #ifdef STARPU_USE_OPENCL
															
 
																 static void redux_opencl_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned h_dst, h_src;
															
 
																 	cl_mem d_dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
															
@@ -78,18 +87,20 @@ static void redux_opencl_kernel(void *descr[], void *arg)
 
																 	h_dst += h_src;
															
 
																-	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL); 
															
 
																+	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
															
 
																 }
															
 
																 static void neutral_opencl_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned h_dst = 0;
															
 
																 	cl_mem d_dst = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	cl_command_queue queue;
															
 
																 	starpu_opencl_get_current_queue(&queue);
															
 
																-	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL); 
															
 
																+	clEnqueueWriteBuffer(queue, d_dst, CL_TRUE, 0, sizeof(unsigned), (void *)&h_dst, 0, NULL, NULL);
															
 
																 }
															
 
																 #endif
															
@@ -97,6 +108,8 @@ static void neutral_opencl_kernel(void *descr[], void *arg)
 
																 static void redux_cpu_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	unsigned *src = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[1]);
															
 
																 	*dst = *dst + *src;
															
@@ -104,31 +117,35 @@ static void redux_cpu_kernel(void *descr[], void *arg)
 
																 static void neutral_cpu_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *dst = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	*dst = 0;
															
 
																 }
															
 
																-static starpu_codelet redux_cl = {
															
 
																+static struct starpu_codelet redux_cl =
															
 
																+{
															
 
																 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	.cuda_func = redux_cuda_kernel,
															
 
																+	.cuda_funcs = {redux_cuda_kernel, NULL},
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-	.opencl_func = redux_opencl_kernel,
															
 
																+	.opencl_funcs = {redux_opencl_kernel, NULL},
															
 
																 #endif
															
 
																-	.cpu_func = redux_cpu_kernel,
															
 
																+	.cpu_funcs = {redux_cpu_kernel, NULL},
															
 
																 	.nbuffers = 2
															
 
																 };
															
 
																-static starpu_codelet neutral_cl = {
															
 
																+static struct starpu_codelet neutral_cl =
															
 
																+{
															
 
																 	.where = STARPU_CPU|STARPU_CUDA,
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	.cuda_func = neutral_cuda_kernel,
															
 
																+	.cuda_funcs = {neutral_cuda_kernel, NULL},
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-	.opencl_func = neutral_opencl_kernel,
															
 
																+	.opencl_funcs = {neutral_opencl_kernel, NULL},
															
 
																 #endif
															
 
																-	.cpu_func = neutral_cpu_kernel,
															
 
																+	.cpu_funcs = {neutral_cpu_kernel, NULL},
															
 
																 	.nbuffers = 1
															
 
																 };
															
@@ -140,6 +157,8 @@ static starpu_codelet neutral_cl = {
 
																 /* dummy OpenCL implementation */
															
 
																 static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((unused)))
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	cl_mem d_token = (cl_mem)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	unsigned h_token;
															
@@ -148,7 +167,7 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((u
 
																 	clEnqueueReadBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																 	h_token++;
															
 
																-	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL); 
															
 
																+	clEnqueueWriteBuffer(queue, d_token, CL_TRUE, 0, sizeof(unsigned), (void *)&h_token, 0, NULL, NULL);
															
 
																 }
															
 
																 #endif
															
@@ -156,6 +175,8 @@ static void increment_opencl_kernel(void *descr[], void *cl_arg __attribute__((u
 
																 #ifdef STARPU_USE_CUDA
															
 
																 static void increment_cuda_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	unsigned host_token;
															
@@ -172,25 +193,47 @@ static void increment_cuda_kernel(void *descr[], void *arg)
 
																 static void increment_cpu_kernel(void *descr[], void *arg)
															
 
																 {
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																 	unsigned *tokenptr = (unsigned *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	*tokenptr = *tokenptr + 1;
															
 
																 }
															
 
																-static starpu_codelet increment_cl = {
															
 
																+static struct starpu_codelet increment_cl =
															
 
																+{
															
 
																 	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																-	.cuda_func = increment_cuda_kernel,
															
 
																+	.cuda_funcs = {increment_cuda_kernel, NULL},
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-	.opencl_func = increment_opencl_kernel,
															
 
																+	.opencl_funcs = {increment_opencl_kernel, NULL},
															
 
																 #endif
															
 
																-	.cpu_func = increment_cpu_kernel,
															
 
																-	.nbuffers = 1
															
 
																+	.cpu_funcs = {increment_cpu_kernel, NULL},
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_RW}
															
 
																+};
															
 
																+
															
 
																+static struct starpu_codelet increment_cl_redux =
															
 
																+{
															
 
																+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	.cuda_funcs = {increment_cuda_kernel, NULL},
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	.opencl_funcs = {increment_opencl_kernel, NULL},
															
 
																+#endif
															
 
																+	.cpu_funcs = {increment_cpu_kernel, NULL},
															
 
																+	.nbuffers = 1,
															
 
																+	.modes = {STARPU_REDUX}
															
 
																 };
															
 
																 int main(int argc, char **argv)
															
 
																 {
															
 
																-	starpu_init(NULL);
															
 
																+	int ret;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																 	starpu_variable_data_register(&handle, 0, (uintptr_t)&var, sizeof(unsigned));
															
@@ -207,26 +250,55 @@ int main(int argc, char **argv)
 
																 		for (t = 0; t < ntasks; t++)
															
 
																 		{
															
 
																 			struct starpu_task *task = starpu_task_create();
															
 
																-	
															
 
																-			task->cl = &increment_cl;
															
 
																-	
															
 
																-			task->buffers[0].mode = (t % 10 == 0)?STARPU_RW:STARPU_REDUX;
															
 
																-			task->buffers[0].handle = handle;
															
 
																-	
															
 
																-			int ret = starpu_task_submit(task);
															
 
																-			STARPU_ASSERT(!ret);
															
 
																+			if (t % 10 == 0)
															
 
																+			{
															
 
																+				task->cl = &increment_cl;
															
 
																+			}
															
 
																+			else
															
 
																+			{
															
 
																+				task->cl = &increment_cl_redux;
															
 
																+			}
															
 
																+			task->handles[0] = handle;
															
 
																+
															
 
																+			int ret = starpu_task_submit(task);
															
 
																+			if (ret == -ENODEV) goto enodev;
															
 
																+			STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_submit");
															
 
																 		}
															
 
																-		starpu_data_acquire(handle, STARPU_R);
															
 
																-		STARPU_ASSERT(var == ntasks*(loop + 1));
															
 
																+		ret = starpu_data_acquire(handle, STARPU_R);
															
 
																+		STARPU_CHECK_RETURN_VALUE(ret, "starpu_data_acquire");
															
 
																+		if (var != ntasks *(loop+1))
															
 
																+		{
															
 
																+			_STARPU_DEBUG("%d != %d\n", var, ntasks*(loop+1));
															
 
																+			starpu_data_release(handle);
															
 
																+			starpu_data_unregister(handle);
															
 
																+			goto err;
															
 
																+		}
															
 
																 		starpu_data_release(handle);
															
 
																 	}
															
 
																 	starpu_data_unregister(handle);
															
 
																-	STARPU_ASSERT(var == ntasks*nloops);
															
 
																+	if (var != ntasks *nloops)
															
 
																+	{
															
 
																+		_STARPU_DEBUG("%d != %d\n", var, ntasks*nloops);
															
 
																+		goto err;
															
 
																+	}
															
 
																+
															
 
																 	starpu_shutdown();
															
 
																-	return 0;
															
 
																+	return EXIT_SUCCESS;
															
 
																+
															
 
																+enodev:
															
 
																+	starpu_data_unregister(handle);
															
 
																+	fprintf(stderr, "WARNING: No one can execute this task\n");
															
 
																+	/* yes, we do not perform the computation but we did detect that no one
															
 
																+ 	 * could perform the kernel, so this is not an error from StarPU */
															
 
																+	starpu_shutdown();
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																+
															
 
																+err:
															
 
																+	starpu_shutdown();
															
 
																+	STARPU_RETURN(EXIT_FAILURE);
															
 
																 }
															
--- a/tests/datawizard/interfaces/bcsr/bcsr_cuda.cu
+++ b/tests/datawizard/interfaces/bcsr/bcsr_cuda.cu
@@ -0,0 +1,70 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_cuda.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+
															
 
																+extern struct test_config bcsr_config;
															
 
																+
															
 
																+__global__ void bcsr_cuda(int *nzval, uint32_t nnz, int *err, int factor)
															
 
																+{
															
 
																+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
															
 
																+
															
 
																+	if (i >= nnz)
															
 
																+		return;
															
 
																+
															
 
																+	if (nzval[i] != i*factor)
															
 
																+		*err = 1;
															
 
																+	else
															
 
																+		nzval[i] = -nzval[i];
															
 
																+}
															
 
																+
															
 
																+extern "C" void test_bcsr_cuda_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	int factor;
															
 
																+	int *ret;
															
 
																+	int *val;
															
 
																+	cudaError_t error;
															
 
																+	uint32_t nnz = STARPU_BCSR_GET_NNZ(buffers[0]);
															
 
																+	unsigned threads_per_block = 64;
															
 
																+	unsigned nblocks = (nnz + threads_per_block-1) / threads_per_block;
															
 
																+
															
 
																+	factor = *(int *) args;
															
 
																+	//val = (int *) starpu_bcsr_get_local_nzval((starpu_data_handle_t)buffers[0]);
															
 
																+	val = (int *) STARPU_BCSR_GET_NZVAL(buffers[0]);
															
 
																+
															
 
																+	error = cudaMalloc(&ret, sizeof(int));
															
 
																+	if (error != cudaSuccess)
															
 
																+		STARPU_CUDA_REPORT_ERROR(error);
															
 
																+
															
 
																+	error = cudaMemcpy(ret,
															
 
																+			   &bcsr_config.copy_failed,
															
 
																+			   sizeof(int),
															
 
																+			   cudaMemcpyHostToDevice);
															
 
																+	if (error != cudaSuccess)
															
 
																+		STARPU_CUDA_REPORT_ERROR(error);
															
 
																+
															
 
																+        bcsr_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>
															
 
																+		(val, nnz, ret, factor);
															
 
																+
															
 
																+	error = cudaMemcpy(&bcsr_config.copy_failed,
															
 
																+			   ret,
															
 
																+			   sizeof(int),
															
 
																+			   cudaMemcpyDeviceToHost);
															
 
																+	
															
 
																+	cudaFree(ret);
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
--- a/tests/datawizard/interfaces/bcsr/bcsr_interface.c
+++ b/tests/datawizard/interfaces/bcsr/bcsr_interface.c
@@ -0,0 +1,198 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <config.h>
															
 
																+#include <starpu.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+#include "../../../helper.h"
															
 
																+
															
 
																+/*
															
 
																+ * XXX : These values should not be changed. If you really understand all that
															
 
																+ * BCSR stuff, feel free to write a better example :)
															
 
																+ */
															
 
																+
															
 
																+/* Size of the matrix */
															
 
																+#define WIDTH          4
															
 
																+#define HEIGHT         4
															
 
																+#define SIZE           (WIDTH * HEIGHT)
															
 
																+
															
 
																+/* Size of the blocks */
															
 
																+#define R              2
															
 
																+#define C              2
															
 
																+#define BLOCK_SIZE     (R*C)
															
 
																+
															
 
																+/* The matrix is simply 0 1 2... There are SIZE-1 non zero values... */
															
 
																+#define NNZ            (SIZE-1)
															
 
																+
															
 
																+/* ... and SIZE/BLOCK_SIZE non zero blocks */
															
 
																+#define NNZ_BLOCKS     (SIZE/BLOCK_SIZE)
															
 
																+
															
 
																+
															
 
																+#ifdef STARPU_USE_CPU
															
 
																+static void test_bcsr_cpu_func(void *buffers[], void *args);
															
 
																+#endif /* !STARPU_USE_CPU */
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+extern void test_bcsr_cuda_func(void *buffers[], void *_args);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+extern void test_bcsr_opencl_func(void *buffers[], void *args);
															
 
																+#endif
															
 
																+
															
 
																+
															
 
																+static int nzval[NNZ];
															
 
																+static int nzval2[NNZ];
															
 
																+
															
 
																+static uint32_t colind[NNZ_BLOCKS];
															
 
																+static uint32_t colind2[NNZ_BLOCKS];
															
 
																+
															
 
																+static uint32_t rowptr[1+WIDTH/R];
															
 
																+static uint32_t rowptr2[1+WIDTH/R];
															
 
																+
															
 
																+static starpu_data_handle_t bcsr_handle;
															
 
																+static starpu_data_handle_t bcsr2_handle;
															
 
																+
															
 
																+
															
 
																+struct test_config bcsr_config =
															
 
																+{
															
 
																+#ifdef STARPU_USE_CPU
															
 
																+	.cpu_func      = test_bcsr_cpu_func,
															
 
																+#endif /* !STARPU_USE_CPU */
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	.cuda_func     = test_bcsr_cuda_func,
															
 
																+#endif /* !STARPU_USE_CUDA */
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	.opencl_func   = test_bcsr_opencl_func,
															
 
																+#endif /* !STARPU_USE_OPENCL */
															
 
																+	.handle        = &bcsr_handle,
															
 
																+	.dummy_handle  = &bcsr2_handle,
															
 
																+	.copy_failed   = 0,
															
 
																+	.name          = "bcsr_interface"
															
 
																+};
															
 
																+
															
 
																+static void
															
 
																+register_data(void)
															
 
																+{
															
 
																+	int i;
															
 
																+
															
 
																+	for (i = 0; i < NNZ; i++)
															
 
																+		nzval[i] = i;
															
 
																+
															
 
																+	colind[0] = 0;
															
 
																+	colind[1] = 2;
															
 
																+	colind[2] = 0;
															
 
																+	colind[3] = 2;
															
 
																+
															
 
																+	rowptr[0] = 0;
															
 
																+	rowptr[1] = 2;
															
 
																+	rowptr[2] = 4;
															
 
																+	
															
 
																+	starpu_bcsr_data_register(&bcsr_handle,
															
 
																+				  0,
															
 
																+				  NNZ_BLOCKS,
															
 
																+				  HEIGHT/R,
															
 
																+				  (uintptr_t) nzval,
															
 
																+				  colind,
															
 
																+				  rowptr,
															
 
																+				  0,
															
 
																+				  R,
															
 
																+				  C,
															
 
																+				  sizeof(nzval[0]));
															
 
																+
															
 
																+	starpu_bcsr_data_register(&bcsr2_handle,
															
 
																+				  0,
															
 
																+				  NNZ_BLOCKS,
															
 
																+				  HEIGHT/R,
															
 
																+				  (uintptr_t) nzval2,
															
 
																+				  colind2,
															
 
																+				  rowptr2,
															
 
																+				  0,
															
 
																+				  R,
															
 
																+				  C,
															
 
																+				  sizeof(nzval2[0]));
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+unregister_data(void)
															
 
																+{
															
 
																+	starpu_data_unregister(bcsr_handle);
															
 
																+	starpu_data_unregister(bcsr2_handle);
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+test_bcsr_cpu_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																+	int *val;
															
 
																+	int factor;
															
 
																+	int i;
															
 
																+
															
 
																+	uint32_t nnz = STARPU_BCSR_GET_NNZ(buffers[0]);
															
 
																+	val = (int *) STARPU_BCSR_GET_NZVAL(buffers[0]);
															
 
																+	factor = *(int *) args;
															
 
																+
															
 
																+	for (i = 0; i < nnz; i++)
															
 
																+	{
															
 
																+		if (val[i] != i * factor)
															
 
																+		{
															
 
																+			bcsr_config.copy_failed = 1;
															
 
																+			return;
															
 
																+		}
															
 
																+		val[i] *= -1;
															
 
																+	}
															
 
																+
															
 
																+	/* Check colind */
															
 
																+	uint32_t *col = STARPU_BCSR_GET_COLIND(buffers[0]);
															
 
																+	for (i = 0; i < NNZ_BLOCKS; i++)
															
 
																+		if (col[i] != colind[i])
															
 
																+			bcsr_config.copy_failed = 1;
															
 
																+
															
 
																+	/* Check rowptr */
															
 
																+	uint32_t *row = STARPU_BCSR_GET_ROWPTR(buffers[0]);
															
 
																+	for (i = 0; i < 1 + WIDTH/R; i++)
															
 
																+		if (row[i] != rowptr[i])
															
 
																+			bcsr_config.copy_failed = 1;
															
 
																+}
															
 
																+
															
 
																+int
															
 
																+main(void)
															
 
																+{
															
 
																+	data_interface_test_summary *summary;
															
 
																+	struct starpu_conf conf =
															
 
																+	{
															
 
																+		.ncpus   = -1,
															
 
																+		.ncuda   = 2,
															
 
																+		.nopencl = 1
															
 
																+	};
															
 
																+
															
 
																+	if (starpu_init(&conf) == -ENODEV)
															
 
																+		return STARPU_TEST_SKIPPED;
															
 
																+
															
 
																+	register_data();
															
 
																+
															
 
																+	summary = run_tests(&bcsr_config);
															
 
																+	if (!summary)
															
 
																+		exit(EXIT_FAILURE);
															
 
																+
															
 
																+	unregister_data();
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	data_interface_test_summary_print(stderr, summary);
															
 
																+
															
 
																+	return data_interface_test_summary_success(summary);
															
 
																+}
															
 
																+
															
--- a/tests/datawizard/interfaces/bcsr/bcsr_opencl.c
+++ b/tests/datawizard/interfaces/bcsr/bcsr_opencl.c
@@ -0,0 +1,130 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <config.h>
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_opencl.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+
															
 
																+#define KERNEL_LOCATION "tests/datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl"
															
 
																+extern struct test_config bcsr_config;
															
 
																+static struct starpu_opencl_program opencl_program;
															
 
																+
															
 
																+void
															
 
																+test_bcsr_opencl_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																+	int id, devid, ret;
															
 
																+	int factor = *(int *) args;
															
 
																+
															
 
																+        cl_int             err;
															
 
																+	cl_kernel          kernel;
															
 
																+	cl_command_queue   queue;
															
 
																+	cl_event           event;
															
 
																+
															
 
																+	ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
															
 
																+
															
 
																+	uint32_t nnz = STARPU_BCSR_GET_NNZ(buffers[0]);
															
 
																+	cl_mem nzval = (cl_mem)STARPU_BCSR_GET_NZVAL(buffers[0]);
															
 
																+
															
 
																+	cl_context context;
															
 
																+	id = starpu_worker_get_id();
															
 
																+	devid = starpu_worker_get_devid(id);
															
 
																+	starpu_opencl_get_context(devid, &context);
															
 
																+
															
 
																+	cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
															
 
																+		sizeof(int), &bcsr_config.copy_failed, &err);
															
 
																+
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+
															
 
																+	err = starpu_opencl_load_kernel(&kernel,
															
 
																+					&queue,
															
 
																+					&opencl_program,
															
 
																+					"test_bcsr_opencl",
															
 
																+					devid);
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+	int nargs;
															
 
																+	nargs = starpu_opencl_set_kernel_args(&err, &kernel,
															
 
																+					      sizeof(nzval), &nzval,
															
 
																+					      sizeof(nnz), &nnz,
															
 
																+					      sizeof(fail), &fail,
															
 
																+					      sizeof(factor), &factor,
															
 
																+					      0);
															
 
																+
															
 
																+	if (nargs != 4)
															
 
																+	{
															
 
																+		fprintf(stderr, "Failed to set argument #%d\n", err);
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+	}
															
 
																+			
															
 
																+	{
															
 
																+		size_t global = nnz;
															
 
																+		size_t local;
															
 
																+                size_t s;
															
 
																+                cl_device_id device;
															
 
																+
															
 
																+                starpu_opencl_get_device(devid, &device);
															
 
																+
															
 
																+                err = clGetKernelWorkGroupInfo (kernel,
															
 
																+						device,
															
 
																+						CL_KERNEL_WORK_GROUP_SIZE,
															
 
																+						sizeof(local),
															
 
																+						&local,
															
 
																+						&s);
															
 
																+                if (err != CL_SUCCESS)
															
 
																+			STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+                if (local > global)
															
 
																+			local = global;
															
 
																+
															
 
																+		err = clEnqueueNDRangeKernel(queue,
															
 
																+					kernel,
															
 
																+					1,
															
 
																+					NULL,
															
 
																+					&global,
															
 
																+					&local,
															
 
																+					0,
															
 
																+					NULL,
															
 
																+					&event);
															
 
																+
															
 
																+		if (err != CL_SUCCESS)
															
 
																+			STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+	}
															
 
																+
															
 
																+	err = clEnqueueReadBuffer(queue,
															
 
																+				  fail,
															
 
																+				  CL_TRUE,
															
 
																+				  0, 
															
 
																+				  sizeof(int),
															
 
																+				  &bcsr_config.copy_failed,
															
 
																+				  0,
															
 
																+				  NULL,
															
 
																+				  NULL);
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+	clFinish(queue);
															
 
																+	starpu_opencl_collect_stats(event);
															
 
																+	clReleaseEvent(event);
															
 
																+
															
 
																+	starpu_opencl_release_kernel(kernel);
															
 
																+        starpu_opencl_unload_opencl(&opencl_program);
															
 
																+}
															
--- a/tests/datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl
+++ b/tests/datawizard/interfaces/bcsr/bcsr_opencl_kernel.cl
@@ -0,0 +1,29 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+__kernel void test_bcsr_opencl(__global int *val,
															
 
																+			       unsigned int nx,
															
 
																+			       __global int *err,
															
 
																+			       int factor)
															
 
																+{
															
 
																+        const int i = get_global_id(0);
															
 
																+        if (i >=  nx)
															
 
																+		return;
															
 
																+
															
 
																+	if (val[i] != i * factor)
															
 
																+		*err = 1;
															
 
																+	else
															
 
																+		val[i] = - val[i];
															
 
																+}
															
--- a/tests/datawizard/interfaces/block/block_cuda.cu
+++ b/tests/datawizard/interfaces/block/block_cuda.cu
@@ -0,0 +1,80 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_cuda.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+
															
 
																+extern struct test_config block_config;
															
 
																+
															
 
																+static __global__ void block_cuda(int *block,
															
 
																+				  int nx, int ny, int nz,
															
 
																+				  unsigned ldy, unsigned ldz,
															
 
																+				  float factor, int *err)
															
 
																+{
															
 
																+        int i, j, k;
															
 
																+	int val = 0;
															
 
																+
															
 
																+        for (k = 0; k < nz ;k++)
															
 
																+	{
															
 
																+                for (j = 0; j < ny ;j++)
															
 
																+		{
															
 
																+                        for(i = 0; i < nx ;i++)
															
 
																+			{
															
 
																+				if (block[(k*ldz)+(j*ldy)+i] != factor * val)
															
 
																+				{
															
 
																+					*err = 1;
															
 
																+					return;
															
 
																+				}
															
 
																+				else
															
 
																+				{
															
 
																+					block[(k*ldz)+(j*ldy)+i] *= -1;
															
 
																+					val++;
															
 
																+				}
															
 
																+			}
															
 
																+                }
															
 
																+        }
															
 
																+}
															
 
																+
															
 
																+extern "C" void test_block_cuda_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	cudaError_t error;
															
 
																+	int *ret;
															
 
																+
															
 
																+	error = cudaMalloc(&ret, sizeof(int));
															
 
																+	if (error != cudaSuccess)
															
 
																+		STARPU_CUDA_REPORT_ERROR(error);
															
 
																+
															
 
																+	error = cudaMemcpy(ret, &block_config.copy_failed, sizeof(int), cudaMemcpyHostToDevice);
															
 
																+	if (error != cudaSuccess)
															
 
																+		STARPU_CUDA_REPORT_ERROR(error);
															
 
																+
															
 
																+	int nx = STARPU_BLOCK_GET_NX(buffers[0]);
															
 
																+	int ny = STARPU_BLOCK_GET_NY(buffers[0]);
															
 
																+	int nz = STARPU_BLOCK_GET_NZ(buffers[0]);
															
 
																+        unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]);
															
 
																+        unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]);
															
 
																+	int *block = (int *) STARPU_BLOCK_GET_PTR(buffers[0]);
															
 
																+	int factor = *(int*) args;
															
 
																+
															
 
																+        block_cuda<<<1,1, 0, starpu_cuda_get_local_stream()>>>
															
 
																+		(block, nx, ny, nz, ldy, ldz, factor, ret);
															
 
																+	error = cudaMemcpy(&block_config.copy_failed, ret, sizeof(int), cudaMemcpyDeviceToHost);
															
 
																+	if (error != cudaSuccess)
															
 
																+		STARPU_CUDA_REPORT_ERROR(error);
															
 
																+
															
 
																+	cudaFree(ret);
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
--- a/tests/datawizard/interfaces/block/block_interface.c
+++ b/tests/datawizard/interfaces/block/block_interface.c
@@ -0,0 +1,163 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <config.h>
															
 
																+#include <starpu.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+#include "../../../helper.h"
															
 
																+
															
 
																+#define NX 16
															
 
																+#define NY NX
															
 
																+#define NZ NX
															
 
																+
															
 
																+/* Prototypes */
															
 
																+static void register_data(void);
															
 
																+static void unregister_data(void);
															
 
																+static void test_block_cpu_func(void *buffers[], void *args);
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+extern void test_block_cuda_func(void *buffers[], void *_args);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+extern void test_block_opencl_func(void *buffers[], void *args);
															
 
																+#endif
															
 
																+
															
 
																+
															
 
																+static starpu_data_handle_t block_handle;
															
 
																+static starpu_data_handle_t block2_handle;
															
 
																+
															
 
																+struct test_config block_config =
															
 
																+{
															
 
																+	.cpu_func      = test_block_cpu_func,
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	.cuda_func     = test_block_cuda_func,
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	.opencl_func   = test_block_opencl_func,
															
 
																+#endif
															
 
																+	.handle        = &block_handle,
															
 
																+	.dummy_handle  = &block2_handle,
															
 
																+	.copy_failed   = 0,
															
 
																+	.name          = "block_interface"
															
 
																+};
															
 
																+
															
 
																+static int block[NX*NY*NZ];
															
 
																+static int block2[NX*NY*NZ];
															
 
																+
															
 
																+static void
															
 
																+register_data(void)
															
 
																+{
															
 
																+	/* Initializing data */
															
 
																+	int val = 0;
															
 
																+	int i, j, k;
															
 
																+	for (k = 0; k < NZ; k++)
															
 
																+		for (j = 0; j < NY; j++)
															
 
																+			for (i = 0; i < NX; i++)
															
 
																+                                block[(k*NX*NY)+(j*NX)+i] = val++;
															
 
																+
															
 
																+	/* Registering data */
															
 
																+	starpu_block_data_register(&block_handle,
															
 
																+                                    0,
															
 
																+                                    (uintptr_t)block,
															
 
																+				    NX,
															
 
																+				    NX * NY,
															
 
																+				    NX,
															
 
																+				    NY,
															
 
																+				    NZ,
															
 
																+				    sizeof(block[0]));
															
 
																+	starpu_block_data_register(&block2_handle,
															
 
																+                                    0,
															
 
																+                                    (uintptr_t)block2,
															
 
																+				    NX,
															
 
																+				    NX * NY,
															
 
																+				    NX,
															
 
																+				    NY,
															
 
																+				    NZ,
															
 
																+				    sizeof(block2[0]));
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+unregister_data(void)
															
 
																+{
															
 
																+	starpu_data_unregister(block_handle);
															
 
																+	starpu_data_unregister(block2_handle);
															
 
																+}
															
 
																+
															
 
																+static void test_block_cpu_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																+	int factor = *(int*)args;
															
 
																+	int nx = STARPU_BLOCK_GET_NX(buffers[0]);
															
 
																+	int ny = STARPU_BLOCK_GET_NY(buffers[0]);
															
 
																+	int nz = STARPU_BLOCK_GET_NZ(buffers[0]);
															
 
																+        unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]);
															
 
																+        unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]);
															
 
																+	int *block = (int *) STARPU_BLOCK_GET_PTR(buffers[0]);
															
 
																+	unsigned int i, j, k;
															
 
																+	int val = 0;
															
 
																+	block_config.copy_failed = 0;
															
 
																+	for (k = 0; k < nz; k++)
															
 
																+	{
															
 
																+		for (j = 0; j < ny; j++)
															
 
																+		{
															
 
																+			for (i = 0; i < nx; i++)
															
 
																+			{
															
 
																+                                if (block[(k*ldz)+(j*ldy)+i] != factor * val)
															
 
																+				{
															
 
																+					block_config.copy_failed = 1;
															
 
																+					return;
															
 
																+				}
															
 
																+				else
															
 
																+				{
															
 
																+					block[(k*ldz)+(j*ldy)+i] *= -1;
															
 
																+					val++;
															
 
																+				}
															
 
																+			}
															
 
																+		}
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+int
															
 
																+main(void)
															
 
																+{
															
 
																+	data_interface_test_summary *summary;
															
 
																+	struct starpu_conf conf =
															
 
																+	{
															
 
																+		.ncpus   = -1,
															
 
																+		.ncuda   = 2,
															
 
																+		.nopencl = 1
															
 
																+	};
															
 
																+
															
 
																+	if (starpu_init(&conf) == -ENODEV)
															
 
																+		goto enodev;
															
 
																+
															
 
																+	register_data();
															
 
																+
															
 
																+	summary = run_tests(&block_config);
															
 
																+	if (!summary)
															
 
																+		exit(EXIT_FAILURE);
															
 
																+
															
 
																+	unregister_data();
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	data_interface_test_summary_print(stderr, summary);
															
 
																+
															
 
																+	return data_interface_test_summary_success(summary);
															
 
																+
															
 
																+enodev:
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																+}
															
 
																+
															
--- a/tests/datawizard/interfaces/block/block_opencl.c
+++ b/tests/datawizard/interfaces/block/block_opencl.c
@@ -0,0 +1,120 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <config.h>
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_opencl.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+
															
 
																+#define KERNEL_LOCATION "tests/datawizard/interfaces/block/block_opencl_kernel.cl"
															
 
																+extern struct test_config block_config;
															
 
																+static struct starpu_opencl_program opencl_program;
															
 
																+
															
 
																+void
															
 
																+test_block_opencl_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																+	int id, devid, ret;
															
 
																+	int factor = *(int *) args;
															
 
																+
															
 
																+        cl_int             err;
															
 
																+	cl_kernel          kernel;
															
 
																+	cl_command_queue   queue;
															
 
																+	cl_event           event;
															
 
																+
															
 
																+	ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
															
 
																+
															
 
																+	int nx = STARPU_BLOCK_GET_NX(buffers[0]);
															
 
																+	int ny = STARPU_BLOCK_GET_NY(buffers[0]);
															
 
																+	int nz = STARPU_BLOCK_GET_NZ(buffers[0]);
															
 
																+        unsigned ldy = STARPU_BLOCK_GET_LDY(buffers[0]);
															
 
																+        unsigned ldz = STARPU_BLOCK_GET_LDZ(buffers[0]);
															
 
																+	cl_mem block = (cl_mem) STARPU_BLOCK_GET_DEV_HANDLE(buffers[0]);
															
 
																+
															
 
																+	cl_context context;
															
 
																+	id = starpu_worker_get_id();
															
 
																+	devid = starpu_worker_get_devid(id);
															
 
																+	starpu_opencl_get_context(devid, &context);
															
 
																+
															
 
																+	cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
															
 
																+		sizeof(int), &block_config.copy_failed, &err);
															
 
																+
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+
															
 
																+	err = starpu_opencl_load_kernel(&kernel,
															
 
																+					&queue,
															
 
																+					&opencl_program,
															
 
																+					"block_opencl",
															
 
																+					devid);
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+	int nargs;
															
 
																+	nargs = starpu_opencl_set_kernel_args(&err, &kernel,
															
 
																+					      sizeof(block), &block,
															
 
																+					      sizeof(nx), &nx,
															
 
																+					      sizeof(ny), &ny,
															
 
																+					      sizeof(nz), &nz,
															
 
																+					      sizeof(ldy), &ldy,
															
 
																+					      sizeof(ldz), &ldz,
															
 
																+					      sizeof(factor), &factor,
															
 
																+					      sizeof(fail), &fail,
															
 
																+					      0);
															
 
																+
															
 
																+	if (nargs != 8)
															
 
																+	{
															
 
																+		fprintf(stderr, "Failed to set argument #%d\n", nargs);
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+	}
															
 
																+			
															
 
																+	{
															
 
																+		size_t global = nx * ny * nz;
															
 
																+		err = clEnqueueNDRangeKernel(queue,
															
 
																+					     kernel,
															
 
																+					     1,
															
 
																+					     NULL,
															
 
																+					     &global,
															
 
																+					     NULL,
															
 
																+					     0,
															
 
																+					     NULL,
															
 
																+					     &event);
															
 
																+
															
 
																+		if (err != CL_SUCCESS)
															
 
																+			STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+	}
															
 
																+
															
 
																+	err = clEnqueueReadBuffer(queue,
															
 
																+				  fail,
															
 
																+				  CL_TRUE,
															
 
																+				  0, 
															
 
																+				  sizeof(int),
															
 
																+				  &block_config.copy_failed,
															
 
																+				  0,
															
 
																+				  NULL,
															
 
																+				  NULL);
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+	clFinish(queue);
															
 
																+	starpu_opencl_collect_stats(event);
															
 
																+	clReleaseEvent(event);
															
 
																+
															
 
																+	starpu_opencl_release_kernel(kernel);
															
 
																+        starpu_opencl_unload_opencl(&opencl_program);
															
 
																+}
															
--- a/tests/datawizard/interfaces/block/block_opencl_kernel.cl
+++ b/tests/datawizard/interfaces/block/block_opencl_kernel.cl
@@ -0,0 +1,46 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+__kernel void block_opencl(__global int *block,
															
 
																+			   int nx, int ny, int nz,
															
 
																+			   int ldy, int ldz,
															
 
																+			   int factor, __global int *err)
															
 
																+{
															
 
																+        const int id = get_global_id(0);
															
 
																+	if (id > 0)
															
 
																+		return;
															
 
																+
															
 
																+	unsigned int i, j, k;
															
 
																+	int val = 0;
															
 
																+	for (k = 0; k < nz; k++)
															
 
																+	{
															
 
																+		for (j = 0; j < ny; j++)
															
 
																+		{
															
 
																+			for (i = 0; i < nx; i++)
															
 
																+			{
															
 
																+                                if (block[(k*ldz)+(j*ldy)+i] != factor * val)
															
 
																+				{
															
 
																+					*err = 1;
															
 
																+					return;
															
 
																+				}
															
 
																+				else
															
 
																+				{
															
 
																+					block[(k*ldz)+(j*ldy)+i] *= -1;
															
 
																+					val++;
															
 
																+				}
															
 
																+			}
															
 
																+		}
															
 
																+	}
															
 
																+}
															
--- a/tests/datawizard/interfaces/copy_interfaces.c
+++ b/tests/datawizard/interfaces/copy_interfaces.c
@@ -0,0 +1,106 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2012  Centre National de la Recherche Scientifique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include "../../helper.h"
															
 
																+#include <datawizard/coherency.h>
															
 
																+
															
 
																+static int check_copy(starpu_data_handle_t handle, char *header)
															
 
																+{
															
 
																+	void *old_interface, *new_interface;
															
 
																+	starpu_data_handle_t new_handle;
															
 
																+	int ret=0;
															
 
																+
															
 
																+	starpu_data_register_same(&new_handle, handle);
															
 
																+
															
 
																+	if (!getenv("STARPU_SSILENT") && new_handle->ops->display)
															
 
																+	{
															
 
																+		fprintf(stderr, "%s: ", header);
															
 
																+		new_handle->ops->display(new_handle, stderr);
															
 
																+		fprintf(stderr, "\n");
															
 
																+	}
															
 
																+
															
 
																+	old_interface = starpu_data_get_interface_on_node(handle, 0);
															
 
																+	new_interface = starpu_data_get_interface_on_node(new_handle, 0);
															
 
																+
															
 
																+	if (new_handle->ops->compare(old_interface, new_interface) == 0)
															
 
																+	{
															
 
																+		FPRINTF(stderr, "Error when copying %s data\n", header);
															
 
																+		assert(0);
															
 
																+		ret = 1;
															
 
																+	}
															
 
																+	starpu_data_unregister(handle);
															
 
																+	starpu_data_unregister(new_handle);
															
 
																+	return ret;
															
 
																+}
															
 
																+
															
 
																+int main(int argc, char **argv)
															
 
																+{
															
 
																+	int ret;
															
 
																+	starpu_data_handle_t handle;
															
 
																+
															
 
																+	ret = starpu_init(NULL);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+
															
 
																+	{
															
 
																+		int x=42;
															
 
																+		starpu_variable_data_register(&handle, 0, (uintptr_t)&x, sizeof(x));
															
 
																+		ret = check_copy(handle, "variable");
															
 
																+	}
															
 
																+
															
 
																+	if (ret == 0)
															
 
																+	{
															
 
																+		int xx[] = {12, 23, 45};
															
 
																+		starpu_vector_data_register(&handle, 0, (uintptr_t)xx, 3, sizeof(xx[0]));
															
 
																+		ret = check_copy(handle, "vector");
															
 
																+	}
															
 
																+
															
 
																+	if (ret == 0)
															
 
																+	{
															
 
																+		int NX=3;
															
 
																+		int NY=2;
															
 
																+		int matrix[NX][NY];
															
 
																+		starpu_matrix_data_register(&handle, 0, (uintptr_t)matrix, NX, NX, NY, sizeof(matrix[0]));
															
 
																+		ret = check_copy(handle, "matrix");
															
 
																+	}
															
 
																+
															
 
																+	if (ret == 0)
															
 
																+	{
															
 
																+		int NX=3;
															
 
																+		int NY=2;
															
 
																+		int NZ=4;
															
 
																+		int block[NX*NY*NZ];
															
 
																+		starpu_block_data_register(&handle, 0, (uintptr_t)block, NX, NX*NY, NX, NY, NZ, sizeof(block[0]));
															
 
																+		ret = check_copy(handle, "block");
															
 
																+	}
															
 
																+
															
 
																+	if (ret == 0)
															
 
																+	{
															
 
																+		uint32_t nnz = 2;
															
 
																+		unsigned nrow = 5;
															
 
																+		float nzvalA[20];
															
 
																+		uint32_t colind[1];
															
 
																+		uint32_t rowptr[2];
															
 
																+		starpu_csr_data_register(&handle, 0, nnz, nrow, (uintptr_t)nzvalA, colind, rowptr, 0, sizeof(float));
															
 
																+		ret = check_copy(handle, "csr");
															
 
																+	}
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+	return ret;
															
 
																+}
															
 
																+
															
 
																+
															
--- a/tests/datawizard/interfaces/csr/csr_cuda.cu
+++ b/tests/datawizard/interfaces/csr/csr_cuda.cu
@@ -0,0 +1,68 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_cuda.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+
															
 
																+extern struct test_config csr_config;
															
 
																+
															
 
																+__global__ void csr_cuda(int *nzval, uint32_t nnz, int *err, int factor)
															
 
																+{
															
 
																+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
															
 
																+
															
 
																+	if (i >= nnz)
															
 
																+		return;
															
 
																+
															
 
																+	if (nzval[i] != (i+1)*factor)
															
 
																+		*err = 1;
															
 
																+	else
															
 
																+		nzval[i] = -nzval[i];
															
 
																+}
															
 
																+
															
 
																+extern "C" void test_csr_cuda_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	int factor;
															
 
																+	int *ret;
															
 
																+	int *val;
															
 
																+	cudaError_t error;
															
 
																+	uint32_t nnz = STARPU_CSR_GET_NNZ(buffers[0]);
															
 
																+	unsigned threads_per_block = 64;
															
 
																+	unsigned nblocks = (nnz + threads_per_block-1) / threads_per_block;
															
 
																+
															
 
																+	factor = *(int *) args;
															
 
																+	val = (int *) STARPU_CSR_GET_NZVAL(buffers[0]);
															
 
																+
															
 
																+	error = cudaMalloc(&ret, sizeof(int));
															
 
																+	if (error != cudaSuccess)
															
 
																+		STARPU_CUDA_REPORT_ERROR(error);
															
 
																+
															
 
																+	error = cudaMemcpy(ret,
															
 
																+			   &csr_config.copy_failed,
															
 
																+			   sizeof(int),
															
 
																+			   cudaMemcpyHostToDevice);
															
 
																+	if (error != cudaSuccess)
															
 
																+		STARPU_CUDA_REPORT_ERROR(error);
															
 
																+
															
 
																+        csr_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>> (val, nnz, ret, factor);
															
 
																+
															
 
																+	error = cudaMemcpy(&csr_config.copy_failed,
															
 
																+			   ret,
															
 
																+			   sizeof(int),
															
 
																+			   cudaMemcpyDeviceToHost);
															
 
																+	
															
 
																+	cudaFree(ret);
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
--- a/tests/datawizard/interfaces/csr/csr_interface.c
+++ b/tests/datawizard/interfaces/csr/csr_interface.c
@@ -0,0 +1,170 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <config.h>
															
 
																+#include <starpu.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+#include "../../../helper.h"
															
 
																+
															
 
																+#define WIDTH  8
															
 
																+#define HEIGHT 4
															
 
																+#define SIZE   (WIDTH * HEIGHT)
															
 
																+#define NNZ    (SIZE-1)
															
 
																+
															
 
																+#ifdef STARPU_USE_CPU
															
 
																+static void test_csr_cpu_func(void *buffers[], void *args);
															
 
																+#endif /* !STARPU_USE_CPU */
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+extern void test_csr_cuda_func(void *buffers[], void *_args);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+extern void test_csr_opencl_func(void *buffers[], void *args);
															
 
																+#endif
															
 
																+
															
 
																+
															
 
																+static int nzval[NNZ];
															
 
																+static int nzval2[NNZ];
															
 
																+
															
 
																+static uint32_t colind[NNZ];
															
 
																+static uint32_t colind2[NNZ];
															
 
																+
															
 
																+static uint32_t rowptr[HEIGHT+1];
															
 
																+static uint32_t rowptr2[HEIGHT+1];
															
 
																+
															
 
																+static starpu_data_handle_t csr_handle;
															
 
																+static starpu_data_handle_t csr2_handle;
															
 
																+
															
 
																+struct test_config csr_config =
															
 
																+{
															
 
																+#ifdef STARPU_USE_CPU
															
 
																+	.cpu_func      = test_csr_cpu_func,
															
 
																+#endif /* ! STARPU_USE_CPU */
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	.cuda_func     = test_csr_cuda_func,
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	.opencl_func   = test_csr_opencl_func,
															
 
																+#endif
															
 
																+	.handle        = &csr_handle,
															
 
																+	.dummy_handle  = &csr2_handle,
															
 
																+	.copy_failed   = 0,
															
 
																+	.name          = "csr_interface"
															
 
																+};
															
 
																+
															
 
																+static void
															
 
																+register_data(void)
															
 
																+{
															
 
																+	int i;
															
 
																+	for (i = 1; i < SIZE; i++)
															
 
																+	{
															
 
																+		nzval[i-1] = i;
															
 
																+		nzval2[i-1] = 42;
															
 
																+
															
 
																+		colind[i-1] = i % WIDTH;
															
 
																+		colind2[i-1] = colind[i];
															
 
																+	}
															
 
																+
															
 
																+	rowptr[0] = 1;
															
 
																+	rowptr2[0] = 1;
															
 
																+	for (i = 1; i < HEIGHT; i++)
															
 
																+	{
															
 
																+		rowptr[i] = i * WIDTH;
															
 
																+		rowptr2[i] = rowptr[i];
															
 
																+	}
															
 
																+	rowptr[HEIGHT] = NNZ + 1;
															
 
																+	rowptr2[HEIGHT] = rowptr[HEIGHT];
															
 
																+
															
 
																+	starpu_csr_data_register(&csr_handle,
															
 
																+				 0,
															
 
																+				 NNZ,
															
 
																+				 HEIGHT,
															
 
																+				 (uintptr_t) nzval,
															
 
																+				 colind,
															
 
																+				 rowptr,
															
 
																+				 0,
															
 
																+				 sizeof(nzval[0]));
															
 
																+	starpu_csr_data_register(&csr2_handle,
															
 
																+				 0,
															
 
																+				 NNZ,
															
 
																+				 HEIGHT,
															
 
																+				 (uintptr_t) nzval2,
															
 
																+				 colind2,
															
 
																+				 rowptr2,
															
 
																+				 0,
															
 
																+				 sizeof(nzval2[0]));
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+unregister_data(void)
															
 
																+{
															
 
																+	starpu_data_unregister(csr_handle);
															
 
																+	starpu_data_unregister(csr2_handle);
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+test_csr_cpu_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																+	int *val;
															
 
																+	int factor;
															
 
																+	int i;
															
 
																+
															
 
																+	uint32_t nnz = STARPU_CSR_GET_NNZ(buffers[0]);
															
 
																+	val = (int *) STARPU_CSR_GET_NZVAL(buffers[0]);
															
 
																+	factor = *(int *) args;
															
 
																+
															
 
																+	for (i = 0; i < nnz; i++)
															
 
																+	{
															
 
																+		if (val[i] != (i+1) * factor)
															
 
																+		{
															
 
																+			csr_config.copy_failed = 1;
															
 
																+			return;
															
 
																+		}
															
 
																+		val[i] *= -1;
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+int
															
 
																+main(void)
															
 
																+{
															
 
																+	data_interface_test_summary *summary;
															
 
																+	struct starpu_conf conf =
															
 
																+	{
															
 
																+		.ncpus   = -1,
															
 
																+		.ncuda   = 2,
															
 
																+		.nopencl = 1
															
 
																+	};
															
 
																+
															
 
																+	if (starpu_init(&conf) == -ENODEV)
															
 
																+		goto enodev;
															
 
																+
															
 
																+	register_data();
															
 
																+
															
 
																+	summary = run_tests(&csr_config);
															
 
																+	if (!summary)
															
 
																+		exit(EXIT_FAILURE);
															
 
																+
															
 
																+	unregister_data();
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	data_interface_test_summary_print(stderr, summary);
															
 
																+
															
 
																+	return data_interface_test_summary_success(summary);
															
 
																+
															
 
																+enodev:
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																+}
															
--- a/tests/datawizard/interfaces/csr/csr_opencl.c
+++ b/tests/datawizard/interfaces/csr/csr_opencl.c
@@ -0,0 +1,130 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <config.h>
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_opencl.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+
															
 
																+#define KERNEL_LOCATION "tests/datawizard/interfaces/csr/csr_opencl_kernel.cl"
															
 
																+extern struct test_config csr_config;
															
 
																+static struct starpu_opencl_program opencl_program;
															
 
																+
															
 
																+void
															
 
																+test_csr_opencl_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																+	int id, devid, ret;
															
 
																+	int factor = *(int *) args;
															
 
																+
															
 
																+        cl_int             err;
															
 
																+	cl_kernel          kernel;
															
 
																+	cl_command_queue   queue;
															
 
																+	cl_event           event;
															
 
																+
															
 
																+	ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION, &opencl_program, NULL);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
															
 
																+
															
 
																+	uint32_t nnz = STARPU_CSR_GET_NNZ(buffers[0]);
															
 
																+	cl_mem nzval = (cl_mem)STARPU_CSR_GET_NZVAL(buffers[0]);
															
 
																+
															
 
																+	cl_context context;
															
 
																+	id = starpu_worker_get_id();
															
 
																+	devid = starpu_worker_get_devid(id);
															
 
																+	starpu_opencl_get_context(devid, &context);
															
 
																+
															
 
																+	cl_mem fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
															
 
																+		sizeof(int), &csr_config.copy_failed, &err);
															
 
																+
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+
															
 
																+	err = starpu_opencl_load_kernel(&kernel,
															
 
																+					&queue,
															
 
																+					&opencl_program,
															
 
																+					"test_csr_opencl",
															
 
																+					devid);
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+	int nargs;
															
 
																+	nargs = starpu_opencl_set_kernel_args(&err, &kernel,
															
 
																+					      sizeof(nzval), &nzval,
															
 
																+					      sizeof(nnz), &nnz,
															
 
																+					      sizeof(fail), &fail,
															
 
																+					      sizeof(factor), &factor,
															
 
																+					      0);
															
 
																+
															
 
																+	if (nargs != 4)
															
 
																+	{
															
 
																+		fprintf(stderr, "Failed to set argument #%d\n", err);
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+	}
															
 
																+			
															
 
																+	{
															
 
																+		size_t global = nnz;
															
 
																+		size_t local;
															
 
																+                size_t s;
															
 
																+                cl_device_id device;
															
 
																+
															
 
																+                starpu_opencl_get_device(devid, &device);
															
 
																+
															
 
																+                err = clGetKernelWorkGroupInfo (kernel,
															
 
																+						device,
															
 
																+						CL_KERNEL_WORK_GROUP_SIZE,
															
 
																+						sizeof(local),
															
 
																+						&local,
															
 
																+						&s);
															
 
																+                if (err != CL_SUCCESS)
															
 
																+			STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+                if (local > global)
															
 
																+			local = global;
															
 
																+
															
 
																+		err = clEnqueueNDRangeKernel(queue,
															
 
																+					kernel,
															
 
																+					1,
															
 
																+					NULL,
															
 
																+					&global,
															
 
																+					&local,
															
 
																+					0,
															
 
																+					NULL,
															
 
																+					&event);
															
 
																+
															
 
																+		if (err != CL_SUCCESS)
															
 
																+			STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+	}
															
 
																+
															
 
																+	err = clEnqueueReadBuffer(queue,
															
 
																+				  fail,
															
 
																+				  CL_TRUE,
															
 
																+				  0, 
															
 
																+				  sizeof(int),
															
 
																+				  &csr_config.copy_failed,
															
 
																+				  0,
															
 
																+				  NULL,
															
 
																+				  NULL);
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+	clFinish(queue);
															
 
																+	starpu_opencl_collect_stats(event);
															
 
																+	clReleaseEvent(event);
															
 
																+
															
 
																+	starpu_opencl_release_kernel(kernel);
															
 
																+        starpu_opencl_unload_opencl(&opencl_program);
															
 
																+}
															
--- a/tests/datawizard/interfaces/csr/csr_opencl_kernel.cl
+++ b/tests/datawizard/interfaces/csr/csr_opencl_kernel.cl
@@ -0,0 +1,29 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+__kernel void test_csr_opencl(__global int *val,
															
 
																+			      unsigned int nx,
															
 
																+			      __global int *err,
															
 
																+			      int factor)
															
 
																+{
															
 
																+        const int i = get_global_id(0);
															
 
																+        if (i >=  nx)
															
 
																+		return;
															
 
																+
															
 
																+	if (val[i] != (i+1) * factor)
															
 
																+		*err = 1;
															
 
																+	else
															
 
																+		val[i] = - val[i];
															
 
																+}
															
--- a/tests/datawizard/interfaces/matrix/matrix_cuda.cu
+++ b/tests/datawizard/interfaces/matrix/matrix_cuda.cu
@@ -0,0 +1,71 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_cuda.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+
															
 
																+extern struct test_config matrix_config;
															
 
																+
															
 
																+__global__ void matrix_cuda(int *val, unsigned n, int *err, int factor)
															
 
																+{
															
 
																+        unsigned i =  blockIdx.x*blockDim.x + threadIdx.x;
															
 
																+
															
 
																+	if (i >= n)
															
 
																+		return;
															
 
																+
															
 
																+	if (val[i] != i*factor)
															
 
																+		*err = 1;
															
 
																+	else
															
 
																+		val[i] = -val[i];
															
 
																+}
															
 
																+
															
 
																+extern "C" void test_matrix_cuda_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	int factor;
															
 
																+	int *ret;
															
 
																+	int *val;
															
 
																+	cudaError_t error;
															
 
																+	unsigned int nx, ny, n;
															
 
																+
															
 
																+	nx = STARPU_MATRIX_GET_NX(buffers[0]);
															
 
																+	ny = STARPU_MATRIX_GET_NY(buffers[0]);
															
 
																+	n = nx * ny;
															
 
																+	unsigned threads_per_block = 64;
															
 
																+	unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
															
 
																+	factor = *(int *) args;
															
 
																+	val = (int *) STARPU_MATRIX_GET_PTR(buffers[0]);
															
 
																+
															
 
																+	error = cudaMalloc(&ret, sizeof(int));
															
 
																+	if (error != cudaSuccess)
															
 
																+		STARPU_CUDA_REPORT_ERROR(error);
															
 
																+
															
 
																+	error = cudaMemcpy(ret,
															
 
																+			   &matrix_config.copy_failed,
															
 
																+			   sizeof(int),
															
 
																+			   cudaMemcpyHostToDevice);
															
 
																+	if (error != cudaSuccess)
															
 
																+		STARPU_CUDA_REPORT_ERROR(error);
															
 
																+
															
 
																+        matrix_cuda<<<nblocks,threads_per_block,2,starpu_cuda_get_local_stream()>>>(val, n, ret, factor);
															
 
																+
															
 
																+	error = cudaMemcpy(&matrix_config.copy_failed,
															
 
																+			   ret,
															
 
																+			   sizeof(int),
															
 
																+			   cudaMemcpyDeviceToHost);
															
 
																+	
															
 
																+	cudaFree(ret);
															
 
																+	cudaStreamSynchronize(starpu_cuda_get_local_stream());
															
 
																+}
															
--- a/tests/datawizard/interfaces/matrix/matrix_interface.c
+++ b/tests/datawizard/interfaces/matrix/matrix_interface.c
@@ -0,0 +1,145 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <config.h>
															
 
																+#include <starpu.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+#include "../../../helper.h"
															
 
																+
															
 
																+#define WIDTH  16
															
 
																+#define HEIGHT 16
															
 
																+
															
 
																+#ifdef STARPU_USE_CPU
															
 
																+static void test_matrix_cpu_func(void *buffers[], void *args);
															
 
																+#endif /* !STARPU_USE_CPU */
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+extern void test_matrix_cuda_func(void *buffers[], void *_args);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+extern void test_matrix_opencl_func(void *buffers[], void *args);
															
 
																+#endif
															
 
																+
															
 
																+
															
 
																+static starpu_data_handle_t matrix_handle;
															
 
																+static starpu_data_handle_t matrix2_handle;
															
 
																+
															
 
																+struct test_config matrix_config =
															
 
																+{
															
 
																+#ifdef STARPU_USE_CPU
															
 
																+	.cpu_func      = test_matrix_cpu_func,
															
 
																+#endif /* ! STARPU_USE_CPU */
															
 
																+#ifdef STARPU_USE_CUDA
															
 
																+	.cuda_func     = test_matrix_cuda_func,
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_OPENCL
															
 
																+	.opencl_func   = test_matrix_opencl_func,
															
 
																+#endif
															
 
																+	.handle        = &matrix_handle,
															
 
																+	.dummy_handle  = &matrix2_handle,
															
 
																+	.copy_failed   = 0,
															
 
																+	.name          = "matrix_interface"
															
 
																+};
															
 
																+
															
 
																+static int matrix[WIDTH * HEIGHT];
															
 
																+static int matrix2[WIDTH * HEIGHT];
															
 
																+
															
 
																+static void
															
 
																+register_data(void)
															
 
																+{
															
 
																+	int i;
															
 
																+	int size = WIDTH * HEIGHT;
															
 
																+	for (i = 0; i < size; i++)
															
 
																+		matrix[i] = i;
															
 
																+
															
 
																+	starpu_matrix_data_register(&matrix_handle,
															
 
																+				    0,
															
 
																+				    (uintptr_t) matrix,
															
 
																+				    WIDTH, /* ld */
															
 
																+				    WIDTH,
															
 
																+				    HEIGHT,
															
 
																+				    sizeof(matrix[0]));
															
 
																+	starpu_matrix_data_register(&matrix2_handle,
															
 
																+				    0,
															
 
																+				    (uintptr_t) matrix2,
															
 
																+				    WIDTH, /* ld */
															
 
																+				    WIDTH,
															
 
																+				    HEIGHT,
															
 
																+				    sizeof(matrix[0]));
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+unregister_data(void)
															
 
																+{
															
 
																+	starpu_data_unregister(matrix_handle);
															
 
																+	starpu_data_unregister(matrix2_handle);
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+test_matrix_cpu_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																+	int *val;
															
 
																+	int factor;
															
 
																+	int i;
															
 
																+	unsigned int nx, ny;
															
 
																+
															
 
																+	nx = STARPU_MATRIX_GET_NX(buffers[0]);
															
 
																+	ny = STARPU_MATRIX_GET_NY(buffers[0]);
															
 
																+	val = (int *) STARPU_MATRIX_GET_PTR(buffers[0]);
															
 
																+	factor = *(int *) args;
															
 
																+
															
 
																+	for (i = 0; i < nx*ny; i++)
															
 
																+	{
															
 
																+		if (val[i] != i * factor)
															
 
																+		{
															
 
																+			matrix_config.copy_failed = 1;
															
 
																+			return;
															
 
																+		}
															
 
																+		val[i] *= -1;
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																+int
															
 
																+main(void)
															
 
																+{
															
 
																+	data_interface_test_summary *summary;
															
 
																+	struct starpu_conf conf =
															
 
																+	{
															
 
																+		.ncpus   = -1,
															
 
																+		.ncuda   = 2,
															
 
																+		.nopencl = 1
															
 
																+	};
															
 
																+
															
 
																+	if (starpu_init(&conf) == -ENODEV)
															
 
																+		goto enodev;
															
 
																+
															
 
																+	register_data();
															
 
																+
															
 
																+	summary = run_tests(&matrix_config);
															
 
																+	if (!summary)
															
 
																+		exit(EXIT_FAILURE);
															
 
																+
															
 
																+	unregister_data();
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	data_interface_test_summary_print(stderr, summary);
															
 
																+
															
 
																+	return data_interface_test_summary_success(summary);
															
 
																+
															
 
																+enodev:
															
 
																+	return STARPU_TEST_SKIPPED;
															
 
																+}
															
--- a/tests/datawizard/interfaces/matrix/matrix_opencl.c
+++ b/tests/datawizard/interfaces/matrix/matrix_opencl.c
@@ -0,0 +1,129 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+#include <config.h>
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_opencl.h>
															
 
																+#include "../test_interfaces.h"
															
 
																+
															
 
																+#define KERNEL_LOCATION "tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl"
															
 
																+
															
 
																+extern struct test_config matrix_config;
															
 
																+static struct starpu_opencl_program matrix_program;
															
 
																+
															
 
																+void test_matrix_opencl_func(void *buffers[], void *args)
															
 
																+{
															
 
																+	STARPU_SKIP_IF_VALGRIND;
															
 
																+
															
 
																+	int id, devid, factor, ret;
															
 
																+	unsigned int n;
															
 
																+
															
 
																+        cl_int             err;
															
 
																+	cl_kernel          kernel;
															
 
																+	cl_command_queue   queue;
															
 
																+	cl_event           event;
															
 
																+	cl_context         context;
															
 
																+	cl_mem             val, fail;
															
 
																+
															
 
																+	ret = starpu_opencl_load_opencl_from_file(KERNEL_LOCATION,
															
 
																+						  &matrix_program,
															
 
																+						  NULL);
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_opencl_load_opencl_from_file");
															
 
																+
															
 
																+	factor = *(int *)args;
															
 
																+	n = STARPU_MATRIX_GET_NX(buffers[0]);
															
 
																+	n*= STARPU_MATRIX_GET_NY(buffers[0]);
															
 
																+	val = (cl_mem)STARPU_MATRIX_GET_DEV_HANDLE(buffers[0]);
															
 
																+
															
 
																+	id = starpu_worker_get_id();
															
 
																+	devid = starpu_worker_get_devid(id);
															
 
																+	starpu_opencl_get_context(devid, &context);
															
 
																+
															
 
																+	err = starpu_opencl_load_kernel(&kernel,
															
 
																+					&queue,
															
 
																+					&matrix_program,
															
 
																+					"matrix_opencl",
															
 
																+					devid);
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+	fail = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
															
 
																+		sizeof(int), &matrix_config.copy_failed, &err);
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+	/* Setting args */
															
 
																+	int nargs;
															
 
																+	nargs = starpu_opencl_set_kernel_args(&err, &kernel,
															
 
																+					sizeof(val), &val,
															
 
																+					sizeof(n), &n,
															
 
																+					sizeof(fail), &fail,
															
 
																+					sizeof(factor), &factor,
															
 
																+					0);
															
 
																+	if (nargs != 4)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+	{
															
 
																+		size_t global=n;
															
 
																+		size_t local;
															
 
																+                size_t s;
															
 
																+                cl_device_id device;
															
 
																+
															
 
																+                starpu_opencl_get_device(devid, &device);
															
 
																+
															
 
																+                err = clGetKernelWorkGroupInfo (kernel,
															
 
																+						device,
															
 
																+						CL_KERNEL_WORK_GROUP_SIZE,
															
 
																+						sizeof(local),
															
 
																+						&local,
															
 
																+						&s);
															
 
																+                if (err != CL_SUCCESS)
															
 
																+			STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+                if (local > global)
															
 
																+			local = global;
															
 
																+
															
 
																+		err = clEnqueueNDRangeKernel(queue,
															
 
																+					kernel,
															
 
																+					1,
															
 
																+					NULL,
															
 
																+					&global,
															
 
																+					&local,
															
 
																+					0,
															
 
																+					NULL,
															
 
																+					&event);
															
 
																+
															
 
																+		if (err != CL_SUCCESS)
															
 
																+			STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+	}
															
 
																+
															
 
																+	err = clEnqueueReadBuffer(queue,
															
 
																+				  fail,
															
 
																+				  CL_TRUE,
															
 
																+				  0, 
															
 
																+				  sizeof(int),
															
 
																+				  &matrix_config.copy_failed,
															
 
																+				  0,
															
 
																+				  NULL,
															
 
																+				  NULL);
															
 
																+	if (err != CL_SUCCESS)
															
 
																+		STARPU_OPENCL_REPORT_ERROR(err);
															
 
																+
															
 
																+	clFinish(queue);
															
 
																+	starpu_opencl_collect_stats(event);
															
 
																+	clReleaseEvent(event);
															
 
																+
															
 
																+	starpu_opencl_release_kernel(kernel);
															
 
																+        starpu_opencl_unload_opencl(&matrix_program);
															
 
																+}
															
 
																+
															
--- a/tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl
+++ b/tests/datawizard/interfaces/matrix/matrix_opencl_kernel.cl
@@ -0,0 +1,31 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2011  Institut National de Recherche en Informatique et Automatique
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+__kernel void matrix_opencl(__global int *val,
															
 
																+				 unsigned int nx,
															
 
																+				 __global int *err,
															
 
																+				 int factor)
															
 
																+{
															
 
																+        const int i = get_global_id(0);
															
 
																+	if (i >= nx)
															
 
																+		return;
															
 
																+
															
 
																+	if (val[i] != i * factor)
															
 
																+		*err = i;
															
 
																+	else
															
 
																+		val[i] *= -1;
															
 
																+}
															
 
																+
															
--- a/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c
+++ b/tests/datawizard/interfaces/multiformat/multiformat_conversion_codelets.c
	`@@ -1,2 +0,0 @@`
	`-LIBS += -lqwt-qt4`
	`-INCLUDEPATH += /usr/include/qwt-qt4`
	`@@ -0,0 +1,2 @@`
			`+LIBS += @STARPU_QWT_LDFLAGS@`
			`+INCLUDEPATH += @STARPU_QWT_INCLUDE@`