12 years ago · f01e362c0a
--- a/AUTHORS
+++ b/AUTHORS
@@ -10,6 +10,8 @@ David Gómez <david_gomez1380@yahoo.com.mx>
 
																 Sylvain Henry <sylvain.henry@inria.fr>
															
 
																 Mehdi Juhoor <mjuhoor@gmail.com>
															
 
																 Antoine Lucas <antoine.lucas.33@gmail.com>
															
 
																+Brice Mortier <brice.mortier@etu.u-bordeaux1.fr>
															
 
																+Damien Pasqualinotto <dam.pasqualinotto@wanadoo.fr>
															
 
																 Nguyen Quôc-Dinh <nguyen.quocdinh@gmail.com>
															
 
																 Cyril Roelandt <cyril.roelandt@inria.fr>
															
 
																 Anthony Roy <theanthony33@gmail.com>
															
--- a/ChangeLog
+++ b/ChangeLog
@@ -18,6 +18,8 @@ StarPU 1.2.0 (svn revision xxxx)
 
																 ==============================================
															
 
																 New features:
															
 
																+  * Xeon Phi support
															
 
																+  * SCC support
															
 
																   * New function starpu_sched_ctx_exec_parallel_code to execute a
															
 
																     parallel code on the workers of the given scheduler context
															
 
																   * MPI:
															
@@ -28,6 +30,8 @@ New features:
 
																 	  before the corresponding data, which allows the receiver to
															
 
																 	  allocate data correctly, and to submit the matching receive of
															
 
																 	  the envelope.
															
 
																+  * New STARPU_COMMUTE flag which can be passed along STARPU_W or STARPU_RW to
															
 
																+    let starpu commute write accesses.
															
 
																 Small features:
															
 
																   * Add cl_arg_free field to enable automatic free(cl_arg) on task
															
--- a/Makefile.am
+++ b/Makefile.am
@@ -67,6 +67,9 @@ versinclude_HEADERS = 				\
 
																 	include/starpu_fxt.h			\
															
 
																 	include/starpu_cuda.h			\
															
 
																 	include/starpu_opencl.h			\
															
 
																+	include/starpu_sink.h			\
															
 
																+	include/starpu_mic.h			\
															
 
																+	include/starpu_scc.h			\
															
 
																 	include/starpu_expert.h			\
															
 
																 	include/starpu_profiling.h		\
															
 
																 	include/starpu_bound.h			\
															
--- a/configure.ac
+++ b/configure.ac
@@ -100,6 +100,31 @@ else
 
																    LC_ALL=C svnversion $srcdir > ./STARPU-REVISION
															
 
																 fi
															
 
																+AM_CONDITIONAL([STARPU_CROSS_COMPILING], [test "x$cross_compiling" = "xyes"])
															
 
																+
															
 
																+###############################################################################
															
 
																+#                                                                             #
															
 
																+#                           MIC device compilation                            #
															
 
																+#   (Must be done in beginning to change prefix in the whole configuration)   #
															
 
																+#                                                                             #
															
 
																+###############################################################################
															
 
																+
															
 
																+AC_ARG_ENABLE(mic, [AS_HELP_STRING([--enable-mic],
															
 
																+	      [use MIC device(s)])], [enable_mic=yes], [enable_mic=no])
															
 
																+AC_ARG_ENABLE(mic-rma, [AS_HELP_STRING([--disable-mic-rma],
															
 
																+	      [use MIC RMA transfer])], [], [enable_mic_rma=yes])
															
 
																+
															
 
																+if test x$enable_mic = xyes ; then
															
 
																+	AC_DEFINE(STARPU_USE_MIC, [1], [MIC workers support is enabled])
															
 
																+fi
															
 
																+if test x$enable_mic_rma = xyes ; then
															
 
																+	AC_DEFINE([STARPU_MIC_USE_RMA], [1], [MIC RMA transfer is enable])
															
 
																+fi
															
 
																+
															
 
																+AM_CONDITIONAL([STARPU_USE_MIC], [test "x$enable_mic" = "xyes"])
															
 
																+
															
 
																+###############################################################################
															
 
																+
															
 
																 AC_PATH_PROGS([STARPU_MS_LIB], [lib])
															
 
																 AC_ARG_VAR([STARPU_MS_LIB], [Path to Microsoft's Visual Studio `lib' tool])
															
 
																 AM_CONDITIONAL([STARPU_HAVE_MS_LIB], [test "x$STARPU_MS_LIB" != "x"])
															
@@ -826,6 +851,19 @@ if test x$disable_asynchronous_opencl_copy = xyes ; then
 
																    AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY], [1], [Define to 1 to disable asynchronous copy between CPU and OpenCL devices])
															
 
																 fi
															
 
																+AC_MSG_CHECKING(whether asynchronous MIC copy should be disabled)
															
 
																+AC_ARG_ENABLE(asynchronous-mic-copy, [AS_HELP_STRING([--disable-asynchronous-mic-copy],
															
 
																+			[disable asynchronous copy between CPU and MIC devices])],
															
 
																+			enable_asynchronous_mic_copy=$enableval, enable_asynchronous_mic_copy=yes)
															
 
																+disable_asynchronous_mic_copy=no
															
 
																+if test x$enable_asynchronous_mic_copy = xno ; then
															
 
																+   disable_asynchronous_mic_copy=yes
															
 
																+fi
															
 
																+AC_MSG_RESULT($disable_asynchronous_mic_copy)
															
 
																+if test x$disable_asynchronous_mic_copy = xyes ; then
															
 
																+   AC_DEFINE([STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY], [1], [Define to 1 to disable asynchronous copy between CPU and MIC devices])
															
 
																+fi
															
 
																+
															
 
																 ###############################################################################
															
 
																 #                                                                             #
															
 
																 #                                 Drivers                                     #
															
@@ -880,6 +918,252 @@ if test x$enable_blocking = xno -a x$enable_simgrid != xyes ; then
 
																 	AC_DEFINE(STARPU_NON_BLOCKING_DRIVERS, [1], [drivers must progress])
															
 
																 fi
															
 
																+###############################################################################
															
 
																+#                                                                             #
															
 
																+#                                 MIC settings                                #
															
 
																+#                                                                             #
															
 
																+###############################################################################
															
 
																+
															
 
																+AC_MSG_CHECKING(maximum number of MIC devices)
															
 
																+AC_ARG_ENABLE(maxmicdev, [AS_HELP_STRING([--enable-maxmicdev=<number>],
															
 
																+			[maximum number of MIC devices])],
															
 
																+			nmaxmicdev=$enableval, nmaxmicdev=4)
															
 
																+AC_MSG_RESULT($nmaxmicdev)
															
 
																+
															
 
																+AC_DEFINE_UNQUOTED(STARPU_MAXMICDEVS, [$nmaxmicdev],
															
 
																+	[maximum number of MIC devices])
															
 
																+
															
 
																+AC_MSG_CHECKING(maximum number of MIC cores)
															
 
																+AC_ARG_ENABLE(maxmicdev, [AS_HELP_STRING([--enable-maxmiccore=<number>],
															
 
																+			[maximum number of MIC cores])],
															
 
																+			nmaxmiccore=$enableval, nmaxmiccore=128)
															
 
																+AC_MSG_RESULT($nmaxmiccore)
															
 
																+
															
 
																+AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmiccore],
															
 
																+	[maximum number of MIC cores])
															
 
																+
															
 
																+AC_ARG_WITH(coi-dir,
															
 
																+	[AS_HELP_STRING([--with-coi-dir=<path>],
															
 
																+	[specify the MIC's COI installation directory])],
															
 
																+	[coi_dir="$withval"],
															
 
																+	[coi_dir=no])
															
 
																+
															
 
																+AC_ARG_WITH(coi-include-dir,
															
 
																+	[AS_HELP_STRING([--with-coi-include-dir=<path>],
															
 
																+	[specify where the MIC's COI headers are installed])],
															
 
																+	[coi_include_dir="$withval"],
															
 
																+	[coi_include_dir=no])
															
 
																+
															
 
																+AC_ARG_WITH(coi-lib-dir,
															
 
																+	[AS_HELP_STRING([--with-coi-lib-dir=<path>],
															
 
																+	[specify where the MIC's COI libraries are installed])],
															
 
																+	[coi_lib_dir="$withval"],
															
 
																+	[coi_lib_dir=no])
															
 
																+
															
 
																+AC_DEFUN([STARPU_CHECK_COI_RUNTIME],
															
 
																+[
															
 
																+    __coi_dir=$1
															
 
																+    __coi_include_dir=$2
															
 
																+    __coi_lib_dir=$3
															
 
																+    __coi_lib_name=$4
															
 
																+
															
 
																+    if test "$__coi_dir" != "no" -a "$__coi_dir" != "" ; then
															
 
																+	AC_MSG_CHECKING(whether MIC's COI runtime is available in $__coi_dir)
															
 
																+    else
															
 
																+	AC_MSG_CHECKING(whether MIC's COI runtime is available)
															
 
																+    fi
															
 
																+    AC_MSG_RESULT()
															
 
																+
															
 
																+    if test "$__coi_include_dir" = "no" -a "$__coi_dir" != "no" ; then
															
 
																+        __coi_include_dir="${__coi_dir}/include"
															
 
																+    fi
															
 
																+    if test "$__coi_lib_dir" = "no" -a "$__coi_dir" != "no" ; then
															
 
																+        __coi_lib_dir="${__coi_dir}/lib"
															
 
																+    fi
															
 
																+
															
 
																+    SAVED_CPPFLAGS="$CPPFLAGS"
															
 
																+    SAVED_LDFLAGS="$LDFLAGS"
															
 
																+
															
 
																+    if test "$__coi_include_dir" != "no" ; then
															
 
																+        CPPFLAGS="${CPPFLAGS} -I$__coi_include_dir"
															
 
																+    fi
															
 
																+    if test "$__coi_lib_dir" != "no" ; then
															
 
																+	LDFLAGS="${LDFLAGS} -L$__coi_lib_dir"
															
 
																+    fi
															
 
																+
															
 
																+    AC_CHECK_HEADER([source/COIEngine_source.h],[have_valid_coi=yes],[have_valid_coi=no])
															
 
																+
															
 
																+    if test "$have_valid_coi" = "yes" ; then
															
 
																+	AC_HAVE_LIBRARY([$__coi_lib_name],[have_valid_coi=yes],[have_valid_coi=no])
															
 
																+
															
 
																+        if test "$have_valid_coi" = "no" ; then
															
 
																+            if test "$3" = "no" -a "$__coi_dir" != "no" ; then
															
 
																+		# ${__coi_dir}/lib didn't work, let's try with lib64
															
 
																+                __coi_lib_dir="$__coi_dir/lib64"
															
 
																+		LDFLAGS="${SAVED_LDFLAGS} -L$__coi_lib_dir"
															
 
																+	        AC_HAVE_LIBRARY([$__coi_lib_name],[have_valid_coi=yes],[have_valid_coi=no])
															
 
																+            fi
															
 
																+        fi
															
 
																+    fi
															
 
																+
															
 
																+    if test "$have_valid_coi" = "yes" -a "$__coi_include_dir" != "no"; then
															
 
																+        STARPU_COI_CPPFLAGS="-I$__coi_include_dir"
															
 
																+    fi
															
 
																+
															
 
																+    if test "$have_valid_coi" = "yes" -a "$__coi_lib_dir" != "no"; then
															
 
																+        STARPU_COI_LDFLAGS="-L$__coi_lib_dir -l$__coi_lib_name"
															
 
																+    fi
															
 
																+
															
 
																+    CPPFLAGS="${SAVED_CPPFLAGS}"
															
 
																+    LDFLAGS="${SAVED_LDFLAGS}"
															
 
																+])
															
 
																+
															
 
																+if test x$enable_mic = xyes ; then
															
 
																+
															
 
																+    STARPU_CHECK_COI_RUNTIME($coi_dir, $coi_include_dir, $coi_lib_dir, "coi_host")
															
 
																+
															
 
																+    # Host runtime is not compatible, we are probably cross-compiling
															
 
																+    # Let's have a look for the device runtime which lib has a different name
															
 
																+    if test "$have_valid_coi" = "no" ; then
															
 
																+	    STARPU_CHECK_COI_RUNTIME($coi_dir, $coi_include_dir, $coi_lib_dir, "coi_device")
															
 
																+    fi
															
 
																+
															
 
																+    if test "$have_valid_coi" = "no" ; then
															
 
																+	AC_MSG_ERROR([cannot find MIC's COI runtime])
															
 
																+    fi
															
 
																+
															
 
																+    AC_SUBST(STARPU_COI_CPPFLAGS)
															
 
																+    AC_SUBST(STARPU_COI_LDFLAGS)
															
 
																+fi
															
 
																+
															
 
																+###############################################################################
															
 
																+#                                                                             #
															
 
																+#                                 SCC settings                                #
															
 
																+#                                                                             #
															
 
																+###############################################################################
															
 
																+
															
 
																+AC_ARG_ENABLE([rcce], [AS_HELP_STRING([--disable-rcce],
															
 
																+			  [do not use SCC device(s)])], [], enable_rcce=maybe)
															
 
																+
															
 
																+nmaxsccdev=47
															
 
																+AC_DEFINE_UNQUOTED(STARPU_MAXSCCDEVS, [$nmaxsccdev], [maximum number of SCC devices])
															
 
																+
															
 
																+AC_ARG_WITH(rcce-dir,
															
 
																+			[AS_HELP_STRING([--with-rcce-dir=<path>],
															
 
																+							[specify RCCE installation directory])],
															
 
																+			[
															
 
																+				rcce_dir="$withval"
															
 
																+				enable_rcce=yes
															
 
																+			],
															
 
																+			rcce_dir=no)
															
 
																+
															
 
																+AC_ARG_WITH(rcce-include-dir,
															
 
																+			[AS_HELP_STRING([--with-rcce-include-dir=<path>],
															
 
																+							[specify where RCCE headers are installed])],
															
 
																+			[
															
 
																+				rcce_include_dir="$withval"
															
 
																+				enable_rcce=yes
															
 
																+			],
															
 
																+			rcce_include_dir=no)
															
 
																+
															
 
																+AC_ARG_WITH(rcce-lib-dir,
															
 
																+			[AS_HELP_STRING([--with-rcce-lib-dir=<path>],
															
 
																+							[specify where RCCE libraries are installed])],
															
 
																+			[
															
 
																+			 	rcce_lib_dir="$withval"
															
 
																+			 	enable_rcce=yes
															
 
																+			],
															
 
																+			rcce_lib_dir=no)
															
 
																+
															
 
																+if test x$enable_rcce = xyes -o x$enable_rcce = xmaybe ; then
															
 
																+	have_valid_rcce=yes
															
 
																+
															
 
																+	SAVED_LDFLAGS="${LDFLAGS}"
															
 
																+	SAVED_CPPFLAGS="${CPPFLAGS}"
															
 
																+	SAVED_LIBS="${LIBS}"
															
 
																+
															
 
																+	if test x$rcce_include_dir != xno ; then
															
 
																+		STARPU_RCCE_CPPFLAGS="-I${rcce_include_dir}"
															
 
																+	elif test x$rcce_dir != xno ; then
															
 
																+		STARPU_RCCE_CPPFLAGS="-I${rcce_dir}/include"
															
 
																+	fi
															
 
																+
															
 
																+	CPPFLAGS="${CPPFLAGS} ${STARPU_RCCE_CPPFLAGS}"
															
 
																+	AC_CHECK_HEADER([RCCE.h], [], [have_valid_rcce=no])
															
 
																+
															
 
																+	if test x$rcce_lib_dir != xno ; then
															
 
																+		STARPU_RCCE_LDFLAGS="-L${rcce_lib_dir}"
															
 
																+	elif test x$rcce_dir != xno ; then
															
 
																+		STARPU_RCCE_LDFLAGS="-L${rcce_lib}/lib"
															
 
																+	fi
															
 
																+
															
 
																+	LDFLAGS="${LDFLAGS} ${STARPU_RCCE_LDFLAGS}"
															
 
																+	AC_CHECK_LIB([RCCE_bigflags_nongory_nopwrmgmt], [RCCE_init], [], [have_valid_rcce=no])
															
 
																+
															
 
																+	# in case RCCE was explicitely required, but is not available, this is an error
															
 
																+	if test x$enable_rcce = xyes -a x$have_valid_rcce = xno ; then
															
 
																+		AC_MSG_ERROR([cannot find RCCE library])
															
 
																+	fi
															
 
																+
															
 
																+	if test x$have_valid_rcce = xyes ; then
															
 
																+		STARPU_RCCE_CPPFLAGS="${STARPU_RCCE_CPPFLAGS} -DSCC"
															
 
																+		STARPU_RCCE_LDFLAGS="${STARPU_RCCE_LDFLAGS} -lRCCE_bigflags_nongory_nopwrmgmt -ldl"
															
 
																+
															
 
																+		AC_DEFINE(STARPU_USE_SCC, [1], [SCC support is enabled])
															
 
																+
															
 
																+		AC_SUBST(STARPU_RCCE_CFLAGS)
															
 
																+		AC_SUBST(STARPU_RCCE_CPPFLAGS)
															
 
																+		AC_SUBST(STARPU_RCCE_LDFLAGS)
															
 
																+	fi
															
 
																+
															
 
																+	LDFLAGS="${SAVED_LDFLAGS}"
															
 
																+	CPPFLAGS="${SAVED_CPPFLAGS}"
															
 
																+	LIBS="${SAVED_LIBS}"
															
 
																+
															
 
																+	enable_rcce=$have_valid_rcce
															
 
																+fi
															
 
																+
															
 
																+AM_CONDITIONAL(STARPU_USE_SCC, test x$enable_rcce = xyes)
															
 
																+
															
 
																+AC_MSG_CHECKING(whether RCCE should be used)
															
 
																+AC_MSG_RESULT($enable_rcce)
															
 
																+
															
 
																+
															
 
																+###############################################################################
															
 
																+#                                                                             #
															
 
																+#                             MP Common settings                              #
															
 
																+#                                                                             #
															
 
																+###############################################################################
															
 
																+
															
 
																+AM_CONDITIONAL([STARPU_USE_MP], [test "x$enable_mic" = "xyes" -o "x$enable_mpi" = "xyes" -o "x$enable_rcce" = "xyes"])
															
 
																+
															
 
																+AC_ARG_ENABLE([export-dynamic], [AS_HELP_STRING([--disable-export-dynamic],
															
 
																+			  [Prevent the linker from adding all symbols to the dynamic symbol table])], [], [])
															
 
																+
															
 
																+if test x$enable_mic = xyes -o x$enable_mpi = xyes -o x$enable_rcce = xyes ; then
															
 
																+	AC_DEFINE(STARPU_USE_MP, [1], [Message-passing SINKs support
															
 
																+		  is enabled])
															
 
																+
															
 
																+	if test x$enable_export_dynamic != xno ; then
															
 
																+		STARPU_EXPORT_DYNAMIC="-rdynamic"
															
 
																+	fi
															
 
																+fi
															
 
																+
															
 
																+AC_SUBST(STARPU_EXPORT_DYNAMIC)
															
 
																+
															
 
																+# Computes the maximum number of different kernels a message-passing sink
															
 
																+# can lookup for and launch.
															
 
																+AC_MSG_CHECKING(Maximum number of message-passing kernels)
															
 
																+AC_ARG_ENABLE(maxmpkernels, [AS_HELP_STRING([
															
 
																+	      -enable-maxmpkernels=<number>],
															
 
																+	      [maximum number of kernels a message-passing sink can lookup
															
 
																+	      for and execute])],
															
 
																+	      maxmpkernels=$enableval, maxmpkernels=10)
															
 
																+AC_MSG_RESULT($maxmpkernels)
															
 
																+AC_DEFINE_UNQUOTED(STARPU_MAXMPKERNELS, [$maxmpkernels],
															
 
																+		[maximum number of message-passing kernels])
															
 
																+
															
 
																+###############################################################################
															
 
																 ###############################################################################
															
 
																 #                                                                             #
															
@@ -1075,6 +1359,7 @@ if test x$enable_simgrid = xyes ; then
 
																 	maxnodes=16
															
 
																 else
															
 
																 	# We have one memory node shared by all CPU workers, one node per GPU
															
 
																+	# and per MIC device
															
 
																 	nodes=1
															
 
																 	if test x$enable_cuda = xyes ; then
															
 
																 		# we could have used nmaxcudadev + 1, but this would certainly give an
															
@@ -1086,6 +1371,14 @@ else
 
																 		# odd number.
															
 
																 		nodes=`expr $nodes + $nmaxopencldev`
															
 
																 	fi
															
 
																+	if test x$enable_mic = xyes ; then
															
 
																+		nodes=`expr $nodes + $nmaxmicdev`
															
 
																+	fi
															
 
																+	if test x$enable_rcce = xyes ; then
															
 
																+		# Only 1 memory node for the shared memory.
															
 
																+		nodes=`expr $nodes + 1`
															
 
																+	fi
															
 
																+
															
 
																 	# set maxnodes to the next power of 2 greater than nodes
															
 
																 	maxnodes=1
															
 
																 	while test "$maxnodes" -lt "$nodes"
															
@@ -1137,7 +1430,7 @@ AC_CHECK_FUNCS([clock_gettime])
 
																 # Compute the maximum number of workers (we round it to 16 for alignment
															
 
																 # purposes).
															
 
																-nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxopencldev + 15 \) / 16 \) `
															
 
																+nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxopencldev + $nmaxmiccore + $nmaxsccdev + 15 \) / 16 \) `
															
 
																 AC_MSG_CHECKING(Maximum number of workers)
															
 
																 AC_MSG_RESULT($nmaxworkers)
															
 
																 AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers])
															
@@ -1868,6 +2161,7 @@ AC_SUBST([LIBSTARPU_LINK])
 
																 # File configuration
															
 
																 AC_CONFIG_COMMANDS([executable-scripts], [
															
 
																   chmod +x tests/regression/regression.sh
															
 
																+  chmod +x tests/loader-cross.sh
															
 
																   chmod +x gcc-plugin/tests/run-test
															
 
																   chmod +x tools/starpu_codelet_profile
															
 
																   chmod +x tools/starpu_codelet_histo_profile
															
@@ -1914,6 +2208,7 @@ AC_OUTPUT([
 
																 	examples/Makefile
															
 
																 	examples/stencil/Makefile
															
 
																 	tests/Makefile
															
 
																+	tests/loader-cross.sh
															
 
																 	doc/Makefile
															
 
																 	mpi/Makefile
															
 
																 	mpi/src/Makefile
															
@@ -1937,6 +2232,8 @@ AC_MSG_NOTICE([
 
																 	CPUs   enabled: $enable_cpu
															
 
																 	CUDA   enabled: $enable_cuda
															
 
																 	OpenCL enabled: $enable_opencl
															
 
																+	SCC    enabled: $enable_rcce
															
 
																+	MIC    enabled: $enable_mic
															
 
																 	Compile-time limits
															
 
																 	(change these with --enable-maxcpus, --enable-maxcudadev,
															
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -61,6 +61,7 @@ void scal_sse_func(void *buffers[], void *cl_arg)
 
																 struct starpu_codelet cl = @{
															
 
																     .where = STARPU_CPU,
															
 
																     .cpu_funcs = @{ scal_cpu_func, scal_sse_func, NULL @},
															
 
																+    .cpu_funcs_name = @{ "scal_cpu_func", "scal_sse_func", NULL @},
															
 
																     .nbuffers = 1,
															
 
																     .modes = @{ STARPU_RW @}
															
 
																 @};
															
@@ -100,6 +101,7 @@ struct starpu_codelet cl = @{
 
																     .where = STARPU_CPU|STARPU_CUDA,
															
 
																     .can_execute = can_execute,
															
 
																     .cpu_funcs = @{ cpu_func, NULL @},
															
 
																+    .cpu_funcs_name = @{ "cpu_func", NULL @},
															
 
																     .cuda_funcs = @{ gpu_func, NULL @}
															
 
																     .nbuffers = 1,
															
 
																     .modes = @{ STARPU_RW @}
															
@@ -146,6 +148,7 @@ struct starpu_codelet cl = @{
 
																     .where = STARPU_CPU|STARPU_CUDA,
															
 
																     .can_execute = can_execute,
															
 
																     .cpu_funcs = @{ cpu_func, NULL @},
															
 
																+    .cpu_funcs_name = @{ "cpu_func", NULL @},
															
 
																     .cuda_funcs = @{ scal_gpu_13, scal_gpu_20, NULL @},
															
 
																     .nbuffers = 1,
															
 
																     .modes = @{ STARPU_RW @}
															
@@ -359,6 +362,7 @@ static struct starpu_perfmodel mult_perf_model = @{
 
																 struct starpu_codelet cl = @{
															
 
																     .where = STARPU_CPU,
															
 
																     .cpu_funcs = @{ cpu_mult, NULL @},
															
 
																+    .cpu_funcs_name = @{ "cpu_mult", NULL @},
															
 
																     .nbuffers = 3,
															
 
																     .modes = @{ STARPU_R, STARPU_R, STARPU_W @},
															
 
																     /* for the scheduling policy to be able to use performance models */
															
@@ -520,6 +524,7 @@ void func_cpu(void *descr[], void *_args)
 
																 struct starpu_codelet mycodelet = @{
															
 
																         .where = STARPU_CPU,
															
 
																         .cpu_funcs = @{ func_cpu, NULL @},
															
 
																+        .cpu_funcs_name = @{ "func_cpu", NULL @},
															
 
																         .nbuffers = 2,
															
 
																         .modes = @{ STARPU_RW, STARPU_RW @}
															
 
																 @};
															
@@ -623,6 +628,7 @@ the codelets for initialization and reduction:
 
																 struct starpu_codelet bzero_variable_cl =
															
 
																 @{
															
 
																         .cpu_funcs = @{ bzero_variable_cpu, NULL @},
															
 
																+        .cpu_funcs_name = @{ "bzero_variable_cpu", NULL @},
															
 
																         .cuda_funcs = @{ bzero_variable_cuda, NULL @},
															
 
																         .nbuffers = 1,
															
 
																 @}
															
@@ -645,6 +651,7 @@ static void accumulate_variable_cuda(void *descr[], void *cl_arg)
 
																 struct starpu_codelet accumulate_variable_cl =
															
 
																 @{
															
 
																         .cpu_funcs = @{ accumulate_variable_cpu, NULL @},
															
 
																+        .cpu_funcs_name = @{ "accumulate_variable_cpu", NULL @},
															
 
																         .cuda_funcs = @{ accumulate_variable_cuda, NULL @},
															
 
																         .nbuffers = 1,
															
 
																 @}
															
@@ -827,6 +834,7 @@ static struct starpu_codelet cl =
 
																     .type = STARPU_FORKJOIN,
															
 
																     .max_parallelism = INT_MAX,
															
 
																     .cpu_funcs = @{scal_cpu_func, NULL@},
															
 
																+    .cpu_funcs_name = @{"scal_cpu_func", NULL@},
															
 
																     .nbuffers = 1,
															
 
																 @};
															
 
																 @end smallexample
															
@@ -870,6 +878,7 @@ static struct starpu_codelet cl =
 
																     .type = STARPU_SPMD,
															
 
																     .max_parallelism = INT_MAX,
															
 
																     .cpu_funcs = @{ func, NULL @},
															
 
																+    .cpu_funcs_name = @{ "func", NULL @},
															
 
																     .nbuffers = 1,
															
 
																 @}
															
 
																 @end smallexample
															
@@ -977,6 +986,7 @@ void opencl_to_cpu_func(void *buffers[], void *args);
 
																 struct starpu_codelet opencl_to_cpu_cl = @{
															
 
																     .where = STARPU_CPU,
															
 
																     .cpu_funcs = @{ opencl_to_cpu_func, NULL @},
															
 
																+    .cpu_funcs_name = @{ "opencl_to_cpu_func", NULL @},
															
 
																     .nbuffers = 1,
															
 
																     .modes = @{ STARPU_RW @}
															
 
																 @};
															
@@ -1287,6 +1297,7 @@ struct starpu_codelet dummy_big_cl =
 
																 	.cuda_funcs = @{dummy_big_kernel, NULL@},
															
 
																 	.opencl_funcs = @{dummy_big_kernel, NULL@},
															
 
																 	.cpu_funcs = @{dummy_big_kernel, NULL@},
															
 
																+	.cpu_funcs_name = @{"dummy_big_kernel", NULL@},
															
 
																 	.nbuffers = STARPU_NMAXBUFS+1,
															
 
																 	.dyn_modes = modes
															
 
																 @};
															
--- a/doc/chapters/api.texi
+++ b/doc/chapters/api.texi
@@ -24,6 +24,8 @@
 
																 * Theoretical lower bound on execution time API::
															
 
																 * CUDA extensions::
															
 
																 * OpenCL extensions::
															
 
																+* MIC extensions::
															
 
																+* SCC extensions::
															
 
																 * Miscellaneous helpers::
															
 
																 * FXT Support::
															
 
																 * FFT Support::
															
@@ -104,6 +106,14 @@ be specified with the @code{STARPU_NCUDA} environment variable.
 
																 This is the number of OpenCL devices that StarPU can use. This can
															
 
																 also be specified with the @code{STARPU_NOPENCL} environment variable.
															
 
																+@item @code{int nmic} (default = -1)
															
 
																+This is the number of MIC devices that StarPU can use. This can
															
 
																+also be specified with the @code{STARPU_NMIC} environment variable.
															
 
																+
															
 
																+@item @code{int nscc} (default = -1)
															
 
																+This is the number of SCC devices that StarPU can use. This can
															
 
																+also be specified with the @code{STARPU_NSCC} environment variable.
															
 
																+
															
 
																 @item @code{unsigned use_explicit_workers_bindid} (default = 0)
															
 
																 If this flag is set, the @code{workers_bindid} array indicates where the
															
 
																 different workers are bound, otherwise StarPU automatically selects where to
															
@@ -139,6 +149,26 @@ the @code{STARPU_WORKERS_OPENCLID} environment variable.
 
																 If the @code{use_explicit_workers_opencl_gpuid} flag is set, this array
															
 
																 contains the logical identifiers of the OpenCL devices to be used.
															
 
																+@item @code{unsigned use_explicit_workers_mic_gpuid} (default = 0)
															
 
																+If this flag is set, the MIC workers will be attached to the MIC devices
															
 
																+specified in the @code{workers_mic_gpuid} array. Otherwise, StarPU affects
															
 
																+the MIC devices in a round-robin fashion. This can also be specified with
															
 
																+the @code{STARPU_WORKERS_MICID} environment variable.
															
 
																+
															
 
																+@item @code{unsigned workers_mic_gpuid[STARPU_NMAXWORKERS]}
															
 
																+If the @code{use_explicit_workers_mic_gpuid} flag is set, this array
															
 
																+contains the logical identifiers of the MIC devices to be used.
															
 
																+
															
 
																+@item @code{unsigned use_explicit_workers_scc_gpuid} (default = 0)
															
 
																+If this flag is set, the SCC workers will be attached to the SCC devices
															
 
																+specified in the @code{workers_scc_gpuid} array. Otherwise, StarPU affects
															
 
																+the SCC devices in a round-robin fashion. This can also be specified with
															
 
																+the @code{STARPU_WORKERS_SCCID} environment variable.
															
 
																+
															
 
																+@item @code{unsigned workers_scc_gpuid[STARPU_NMAXWORKERS]}
															
 
																+If the @code{use_explicit_workers_scc_gpuid} flag is set, this array
															
 
																+contains the logical identifiers of the SCC devices to be used.
															
 
																+
															
 
																 @item @code{int calibrate} (default = 0)
															
 
																 If this flag is set, StarPU will calibrate the performance models when
															
 
																 executing tasks. If this value is equal to @code{-1}, the default value is
															
@@ -162,6 +192,11 @@ task scheduler will however still however still try varying combined worker
 
																 sizes to look for the most efficient ones.
															
 
																 This can also be specified with the @code{STARPU_SINGLE_COMBINED_WORKER} environment variable.
															
 
																+@item @code{mic_sink_program_path} (default = NULL)
															
 
																+Path to the program to execute on the MIC device, compiled for MIC
															
 
																+architecture. When set to NULL, StarPU automatically looks next to the host
															
 
																+program location.
															
 
																+
															
 
																 @item @code{int disable_asynchronous_copy} (default = 0)
															
 
																 This flag should be set to 1 to disable asynchronous copies between
															
 
																 CPUs and all accelerators. This can also be specified with the
															
@@ -189,6 +224,13 @@ it is therefore necessary to disable asynchronous data transfers.
 
																 This can also be specified at compilation time by giving to the
															
 
																 configure script the option @code{--disable-asynchronous-opencl-copy}.
															
 
																+@item @code{int disable_asynchronous_mic_copy} (default = 0)
															
 
																+This flag should be set to 1 to disable asynchronous copies between
															
 
																+CPUs and MIC accelerators. This can also be specified with the
															
 
																+@code{STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY} environment variable.
															
 
																+This can also be specified at compilation time by giving to the
															
 
																+configure script the option @code{--disable-asynchronous-mic-copy}.
															
 
																+
															
 
																 @item @code{int *cuda_opengl_interoperability} (default = NULL)
															
 
																 This can be set to an array of CUDA device identifiers for which
															
 
																 @code{cudaGLSetGLDevice} should be called instead of @code{cudaSetDevice}. Its
															
@@ -223,6 +265,12 @@ Upon successful completion, this function returns 0. Otherwise, @code{-ENODEV}
 
																 indicates that no worker was available (so that StarPU was not initialized).
															
 
																 @end deftypefun
															
 
																+@deftypefun int starpu_initialize ({struct starpu_conf *}@var{conf}, int @var{argc}, {char ***}@var{argv})
															
 
																+This is the same as @code{starpu_init}, but also takes the @code{argc} and
															
 
																+@code{argv} as gotten by the application. This is needed for SCC
															
 
																+execution to initialize the communication library.
															
 
																+@end deftypefun
															
 
																+
															
 
																 @deftypefun int starpu_conf_init ({struct starpu_conf *}@var{conf})
															
 
																 This function initializes the @var{conf} structure passed as argument
															
 
																 with the default values. In case some configuration parameters are already
															
@@ -319,6 +367,8 @@ The different values are:
 
																 @item @code{STARPU_CPU_WORKER}
															
 
																 @item @code{STARPU_CUDA_WORKER}
															
 
																 @item @code{STARPU_OPENCL_WORKER}
															
 
																+@item @code{STARPU_MIC_WORKER}
															
 
																+@item @code{STARPU_SCC_WORKER}
															
 
																 @end table
															
 
																 @end deftp
															
@@ -348,6 +398,20 @@ This function returns the number of OpenCL devices controlled by StarPU. The ret
 
																 value should be at most @code{STARPU_MAXOPENCLDEVS}.
															
 
																 @end deftypefun
															
 
																+@deftypefun unsigned starpu_mic_worker_get_count (void)
															
 
																+This function returns the number of MIC workers controlled by StarPU.
															
 
																+@end deftypefun
															
 
																+
															
 
																+@deftypefun unsigned starpu_mic_device_get_count (void)
															
 
																+This function returns the number of MIC devices controlled by StarPU. The returned
															
 
																+value should be at most @code{STARPU_MAXMICDEVS}.
															
 
																+@end deftypefun
															
 
																+
															
 
																+@deftypefun unsigned starpu_scc_worker_get_count (void)
															
 
																+This function returns the number of SCC devices controlled by StarPU. The returned
															
 
																+value should be at most @code{STARPU_MAXSCCDEVS}.
															
 
																+@end deftypefun
															
 
																+
															
 
																 @deftypefun int starpu_worker_get_id (void)
															
 
																 This function returns the identifier of the current worker, i.e the one associated to the calling
															
 
																 thread. The returned value is either -1 if the current context is not a StarPU
															
@@ -420,6 +484,9 @@ todo
 
																 @item @code{STARPU_CPU_RAM}
															
 
																 @item @code{STARPU_CUDA_RAM}
															
 
																 @item @code{STARPU_OPENCL_RAM}
															
 
																+@item @code{STARPU_MIC_RAM}
															
 
																+@item @code{STARPU_SCC_RAM}
															
 
																+@item @code{STARPU_SCC_SHM}
															
 
																 @end table
															
 
																 @end deftp
															
@@ -501,6 +568,12 @@ ignored for now.
 
																 @end table
															
 
																 @end deftp
															
 
																+In addition to that, @code{STARPU_COMMUTE} can be passed along @code{STARPU_W}
															
 
																+or @code{STARPU_RW} to express that StarPU can let tasks commute, which is
															
 
																+useful e.g. when bringing a contribution into some data, which can be done
															
 
																+in any order (but still require sequential consistency against reads or
															
 
																+non-commutative writes).
															
 
																+
															
 
																 @deftp {Data Type} {starpu_data_handle_t}
															
 
																 StarPU uses @code{starpu_data_handle_t} as an opaque handle to manage a piece of
															
 
																 data. Once a piece of data has been registered to StarPU, it is associated to a
															
@@ -1364,8 +1437,8 @@ be useful to provide more specific method in case of e.g. available particular
 
																 CUDA or OpenCL support.
															
 
																 @table @asis
															
 
																-@item @code{int (*@{ram,cuda,opencl@}_to_@{ram,cuda,opencl@})(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
															
 
																-These 12 functions define how to copy data from the @var{src_interface}
															
 
																+@item @code{int (*@{ram,cuda,opencl,mic@}_to_@{ram,cuda,opencl,mic@})(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
															
 
																+These 14 functions define how to copy data from the @var{src_interface}
															
 
																 interface on the @var{src_node} node to the @var{dst_interface} interface
															
 
																 on the @var{dst_node} node. They return 0 on success.
															
@@ -1386,6 +1459,22 @@ Must return 0 if the transfer was actually completed completely synchronously,
 
																 or -EAGAIN if at least some transfers are still ongoing and should be awaited
															
 
																 for by the core.
															
 
																+@item @code{int (*@{ram,mic@}_to_@{ram,mic@}_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
															
 
																+These 2 functions (@code{ram_to_ram} and @code{mic_to_mic} are not among them) define how to copy
															
 
																+data from the @var{src_interface} interface on the @var{src_node} node to the
															
 
																+@var{dst_interface} interface on the @var{dst_node} node.
															
 
																+Must return 0 if the transfer was actually completed completely synchronously,
															
 
																+or -EAGAIN if at least some transfers are still ongoing and should be awaited
															
 
																+for by the core.
															
 
																+
															
 
																+@item @code{int (*@{src,sink@}_to_@{src,sink@}_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
															
 
																+These 3 functions (@code{src_to_src} is not among them) define how to copy
															
 
																+data from the @var{src_interface} interface on the @var{src_node} node to the
															
 
																+@var{dst_interface} interface on the @var{dst_node} node.
															
 
																+Must return 0 if the transfer was actually completed completely synchronously,
															
 
																+or -EAGAIN if at least some transfers are still ongoing and should be awaited
															
 
																+for by the core.
															
 
																+
															
 
																 @item @code{int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data)}
															
 
																 Define how to copy data from the @var{src_interface} interface on the
															
 
																 @var{src_node} node to the @var{dst_interface} interface on the @var{dst_node}
															
@@ -1729,6 +1818,24 @@ pointer to a codelet which converts from CPU to CUDA
 
																 @item @code{struct starpu_codelet *cuda_to_cpu_cl}
															
 
																 pointer to a codelet which converts from CUDA to CPU
															
 
																+
															
 
																+@item @code{size_t mic_elemsize}
															
 
																+the size of each element on MIC devices,
															
 
																+
															
 
																+@item @code{struct starpu_codelet *cpu_to_mic_cl}
															
 
																+pointer to a codelet which converts from CPU to MIC
															
 
																+
															
 
																+@item @code{struct starpu_codelet *mic_to_cpu_cl}
															
 
																+pointer to a codelet which converts from MIC to CPU
															
 
																+
															
 
																+@item @code{size_t scc_elemsize}
															
 
																+the size of each element on SCC devices,
															
 
																+
															
 
																+@item @code{struct starpu_codelet *cpu_to_scc_cl}
															
 
																+pointer to a codelet which converts from CPU to SCC
															
 
																+
															
 
																+@item @code{struct starpu_codelet *scc_to_cpu_cl}
															
 
																+pointer to a codelet which converts from SCC to CPU
															
 
																 @end table
															
 
																 @end deftp
															
@@ -1791,7 +1898,19 @@ processing unit.
 
																 @defmac STARPU_OPENCL
															
 
																 This macro is used when setting the field @code{where} of a @code{struct
															
 
																-starpu_codelet} to specify the codelet may be executed on a OpenCL
															
 
																+starpu_codelet} to specify the codelet may be executed on an OpenCL
															
 
																+processing unit.
															
 
																+@end defmac
															
 
																+
															
 
																+@defmac STARPU_MIC
															
 
																+This macro is used when setting the field @code{where} of a @code{struct
															
 
																+starpu_codelet} to specify the codelet may be executed on a MIC
															
 
																+processing unit.
															
 
																+@end defmac
															
 
																+
															
 
																+@defmac STARPU_SCC
															
 
																+This macro is used when setting the field @code{where} of a @code{struct
															
 
																+starpu_codelet} to specify the codelet may be executed on an SCC
															
 
																 processing unit.
															
 
																 @end defmac
															
@@ -1864,6 +1983,12 @@ If the @code{where} field is set, then the @code{cpu_funcs} field is
 
																 ignored if @code{STARPU_CPU} does not appear in the @code{where}
															
 
																 field, it must be non-null otherwise.
															
 
																+@item @code{char * cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS]} (optional)
															
 
																+Is an array of strings which provide the name of the CPU functions referenced in
															
 
																+the @code{cpu_funcs} array. This can be used when running on MIC devices or the
															
 
																+SCC platform, for StarPU to simply look up the MIC function implementation
															
 
																+through its name.
															
 
																+
															
 
																 @item @code{starpu_cuda_func_t cuda_func} (optional)
															
 
																 This field has been made deprecated. One should use instead the
															
 
																 @code{cuda_funcs} field.
															
@@ -1891,6 +2016,28 @@ If the @code{where} field is set, then the @code{opencl_funcs} field
 
																 is ignored if @code{STARPU_OPENCL} does not appear in the @code{where}
															
 
																 field, it must be non-null otherwise.
															
 
																+@item @code{starpu_mic_func_t mic_funcs[STARPU_MAXIMPLEMENTATIONS]} (optional)
															
 
																+Is an array of function pointers to a function which returns the MIC
															
 
																+implementation of the codelet.
															
 
																+It must be terminated by a NULL value.
															
 
																+The functions prototype must be:
															
 
																+@code{starpu_mic_kernel_t mic_func(struct starpu_codelet *cl, unsigned nimpl);}.
															
 
																+If the @code{where} field is set, then the @code{mic_funcs} field
															
 
																+is ignored if @code{STARPU_MIC} does not appear in the @code{where}
															
 
																+field. It can be null if @code{cpu_funcs_name} is non-NULL, in which case StarPU
															
 
																+will simply make a symbol lookup to get the implementation.
															
 
																+
															
 
																+@item @code{starpu_scc_func_t scc_funcs[STARPU_MAXIMPLEMENTATIONS]} (optional)
															
 
																+Is an array of function pointers to a function which returns the SCC
															
 
																+implementation of the codelet.
															
 
																+It must be terminated by a NULL value.
															
 
																+The functions prototype must be:
															
 
																+@code{starpu_scc_kernel_t scc_func(struct starpu_codelet *cl, unsigned nimpl);}.
															
 
																+If the @code{where} field is set, then the @code{scc_funcs} field
															
 
																+is ignored if @code{STARPU_SCC} does not appear in the @code{where}
															
 
																+field. It can be null if @code{cpu_funcs_name} is non-NULL, in which case StarPU
															
 
																+will simply make a symbol lookup to get the implementation.
															
 
																+
															
 
																 @item @code{unsigned nbuffers}
															
 
																 Specifies the number of arguments taken by the codelet. These arguments are
															
 
																 managed by the DSM and are accessed from the @code{void *buffers[]}
															
@@ -2295,6 +2442,7 @@ executing. It thus does not include tasks waiting for dependencies.
 
																 This function returns the task currently executed by the worker, or
															
 
																 NULL if it is called either from a thread that is not a task or simply
															
 
																 because there is no task being executed at the moment.
															
 
																+This function must be called from the callback (not from the codelet).
															
 
																 @end deftypefun
															
 
																 @deftypefun void starpu_codelet_display_stats ({struct starpu_codelet} *@var{cl})
															
@@ -3271,6 +3419,48 @@ successfull. It returns 0 if the synchronous copy was successful, or
 
																 fails otherwise.
															
 
																 @end deftypefun
															
 
																+@node MIC extensions
															
 
																+@section MIC extensions
															
 
																+
															
 
																+@defmac STARPU_USE_MIC
															
 
																+This macro is defined when StarPU has been installed with MIC
															
 
																+support. It should be used in your code to detect the availability of
															
 
																+MIC.
															
 
																+@end defmac
															
 
																+
															
 
																+@deftypefun int starpu_mic_register_kernel({starpu_mic_func_symbol_t *}@var{symbol}, {const char *}@var{func_name})
															
 
																+Initiate a lookup on each MIC device to find the adress of the function
															
 
																+named FUNC_NAME, store them in the global array kernels and return
															
 
																+the index in the array through SYMBOL.
															
 
																+@end deftypefun
															
 
																+
															
 
																+@deftypefun starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t @var{symbol})
															
 
																+If success, return the pointer to the function defined by SYMBOL on the
															
 
																+device linked to the called device. This can for instance be used in a
															
 
																+@code{starpu_mic_func_t} implementation.
															
 
																+@end deftypefun
															
 
																+
															
 
																+@node SCC extensions
															
 
																+@section SCC extensions
															
 
																+
															
 
																+@defmac STARPU_USE_SCC
															
 
																+This macro is defined when StarPU has been installed with SCC
															
 
																+support. It should be used in your code to detect the availability of
															
 
																+SCC.
															
 
																+@end defmac
															
 
																+
															
 
																+@deftypefun int starpu_scc_register_kernel({starpu_scc_func_symbol_t *}@var{symbol}, {const char *}@var{func_name})
															
 
																+Initiate a lookup on each SCC device to find the adress of the function
															
 
																+named FUNC_NAME, store them in the global array kernels and return
															
 
																+the index in the array through SYMBOL.
															
 
																+@end deftypefun
															
 
																+
															
 
																+@deftypefun starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t @var{symbol})
															
 
																+If success, return the pointer to the function defined by SYMBOL on the
															
 
																+device linked to the called device. This can for instance be used in a
															
 
																+@code{starpu_scc_func_t} implementation.
															
 
																+@end deftypefun
															
 
																+
															
 
																 @node Miscellaneous helpers
															
 
																 @section Miscellaneous helpers
															
@@ -3824,7 +4014,11 @@ number of CUDA workers.
 
																 @item @code{unsigned nhwopenclgpus}
															
 
																 Total number of OpenCL devices, as detected. May be different from the actual
															
 
																-number of CUDA workers.
															
 
																+number of OpenCL workers.
															
 
																+
															
 
																+@item @code{unsigned nhscc}
															
 
																+Total number of SCC cores, as detected. May be different from the actual
															
 
																+number of core workers.
															
 
																 @item @code{unsigned ncpus}
															
 
																 Actual number of CPU workers used by StarPU.
															
@@ -3835,6 +4029,9 @@ Actual number of CUDA workers used by StarPU.
 
																 @item @code{unsigned nopenclgpus}
															
 
																 Actual number of OpenCL workers used by StarPU.
															
 
																+@item @code{unsigned nsccdevices}
															
 
																+Actual number of SCC workers used by StarPU.
															
 
																+
															
 
																 @item @code{unsigned workers_bindid[STARPU_NMAXWORKERS]}
															
 
																 Indicates the successive cpu identifier that should be used to bind the
															
 
																 workers. It is either filled according to the user's explicit
															
@@ -3843,17 +4040,29 @@ variable. Otherwise, a round-robin policy is used to distributed the workers
 
																 over the cpus.
															
 
																 @item @code{unsigned workers_cuda_gpuid[STARPU_NMAXWORKERS]}
															
 
																-Indicates the successive cpu identifier that should be used by the CUDA
															
 
																+Indicates the successive CUDA identifier that should be used by the CUDA
															
 
																 driver.  It is either filled according to the user's explicit parameters (from
															
 
																 starpu_conf) or according to the STARPU_WORKERS_CUDAID env. variable. Otherwise,
															
 
																 they are taken in ID order.
															
 
																 @item @code{unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS]}
															
 
																-Indicates the successive cpu identifier that should be used by the OpenCL
															
 
																+Indicates the successive OpenCL identifier that should be used by the OpenCL
															
 
																 driver.  It is either filled according to the user's explicit parameters (from
															
 
																 starpu_conf) or according to the STARPU_WORKERS_OPENCLID env. variable. Otherwise,
															
 
																 they are taken in ID order.
															
 
																+@item @code{unsigned workers_mic_deviceid[STARPU_NMAXWORKERS]}
															
 
																+Indicates the successive MIC devices that should be used by the MIC
															
 
																+driver.  It is either filled according to the user's explicit parameters (from
															
 
																+starpu_conf) or according to the STARPU_WORKERS_MICID env. variable. Otherwise,
															
 
																+they are taken in ID order.
															
 
																+
															
 
																+@item @code{unsigned workers_scc_deviceid[STARPU_NMAXWORKERS]}
															
 
																+Indicates the successive SCC devices that should be used by the SCC
															
 
																+driver.  It is either filled according to the user's explicit parameters (from
															
 
																+starpu_conf) or according to the STARPU_WORKERS_SCCID env. variable. Otherwise,
															
 
																+they are taken in ID order.
															
 
																+
															
 
																 @end table
															
 
																 @end deftp
															
@@ -3892,7 +4101,7 @@ The workerids managed by the collection
 
																 The number of workerids
															
 
																 @item @code{pthread_key_t cursor_key} (optional)
															
 
																 The cursor needed to iterate the collection (depending on the data structure)
															
 
																-@item @code{int type}
															
 
																+@item @code{enum starpu_worker_collection_type type}
															
 
																 The type of structure (currently STARPU_WORKER_LIST is the only one available)
															
 
																 @item @code{unsigned (*has_next)(struct starpu_worker_collection *workers)}
															
 
																 Checks if there is a next worker
															
@@ -3914,7 +4123,7 @@ Deinitialize the cursor if there is one
 
																 @end table
															
 
																 @end deftp
															
 
																-@deftypefun struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection (unsigned @var{sched_ctx_id}, int @var{type})
															
 
																+@deftypefun struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection (unsigned @var{sched_ctx_id}, enum starpu_worker_collection_type @var{type})
															
 
																 Create a worker collection of the type indicated by the last parameter for the context specified through the first parameter.
															
 
																 @end deftypefun
															
--- a/doc/chapters/basic-examples.texi
+++ b/doc/chapters/basic-examples.texi
@@ -132,6 +132,7 @@ struct starpu_codelet cl =
 
																 @{
															
 
																     .where = STARPU_CPU,
															
 
																     .cpu_funcs = @{ cpu_func, NULL @},
															
 
																+    .cpu_funcs_name = @{ "cpu_func", NULL @},
															
 
																     .nbuffers = 0
															
 
																 @};
															
 
																 @end smallexample
															
@@ -642,6 +643,7 @@ void scal_cpu_func(void *buffers[], void *cl_arg)
 
																 struct starpu_codelet cl =
															
 
																 @{
															
 
																     .cpu_funcs = @{ scal_cpu_func, NULL @},
															
 
																+    .cpu_funcs_name = @{ "scal_cpu_func", NULL @},
															
 
																     .nbuffers = 1,
															
 
																     .modes = @{ STARPU_RW @}
															
 
																 @};
															
@@ -822,6 +824,7 @@ static struct starpu_codelet cl =
 
																 @{
															
 
																     .cuda_funcs = @{ scal_cuda_func, NULL @},
															
 
																     .cpu_funcs = @{ scal_cpu_func, NULL @},
															
 
																+    .cpu_funcs_name = @{ "scal_cpu_func", NULL @},
															
 
																     .opencl_funcs = @{ scal_opencl_func, NULL @},
															
 
																     .nbuffers = 1,
															
 
																     .modes = @{ STARPU_RW @}
															
--- a/doc/chapters/configuration.texi
+++ b/doc/chapters/configuration.texi
@@ -473,6 +473,10 @@ fail when copying data asynchronously. When using this implementation,
 
																 it is therefore necessary to disable asynchronous data transfers.
															
 
																 @end defvr
															
 
																+@defvr {Environment variable} STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY
															
 
																+Disable asynchronous copies between CPU and MIC devices.
															
 
																+@end defvr
															
 
																+
															
 
																 @defvr {Environment variable} STARPU_DISABLE_CUDA_GPU_GPU_DIRECT
															
 
																 Disable direct CUDA transfers from GPU to GPU, and let CUDA copy through RAM
															
 
																 instead. This permits to test the performance effect of GPU-Direct.
															
--- a/doc/chapters/mic-scc-support.texi
+++ b/doc/chapters/mic-scc-support.texi
@@ -0,0 +1,55 @@
 
																+@c -*-texinfo-*-
															
 
																+
															
 
																+@c This file is part of the StarPU Handbook.
															
 
																+@c Copyright (C) 2013  Universit@'e de Bordeaux 1
															
 
																+@c See the file starpu.texi for copying conditions.
															
 
																+
															
 
																+@section Compilation
															
 
																+
															
 
																+SCC support just needs the presence of the RCCE library.
															
 
																+
															
 
																+MIC support actually needs two compilations of StarPU, one for the host and one for
															
 
																+the device. The @code{mic-configure} script can be used to achieve this: it basically
															
 
																+calls @code{configure} as appropriate from two new directories: @code{build_mic} and
															
 
																+@code{build_host}. @code{make} and @code{make install} can then be used as usual and will
															
 
																+recurse into both directories.
															
 
																+
															
 
																+@c TODO: move to configuration section ?
															
 
																+
															
 
																+It can be parameterized with the following environment variables:
															
 
																+
															
 
																+@table @asis
															
 
																+@item @code{MIC_HOST}
															
 
																+Defines the value of the @code{--host} parameter passed to @code{configure} for the
															
 
																+cross-compilation. The current default is @code{x86_64-k1om-linux}.
															
 
																+
															
 
																+@item @code{MIC_CC_PATH}
															
 
																+Defines the path to the MIC cross-compiler. The current default is @code{/usr/linux-k1om-4.7/bin/}.
															
 
																+
															
 
																+@item @code{COI_DIR}
															
 
																+Defines the path to the COI library. The current default is @code{/opt/intel/mic/coi}
															
 
																+@end table
															
 
																+
															
 
																+@section Porting applications to MIC/SCC
															
 
																+
															
 
																+The simplest way to port an application to MIC/SCC is to add the
															
 
																+@code{cpu_funcs_name} field in the codelet, to provide StarPU with the function
															
 
																+name of the CPU implementation. StarPU will thus simply use the existing CPU
															
 
																+implementation (cross-rebuilt in the MIC case). The functions have to be
															
 
																+globally-visible (i.e. not @code{static}) for StarPU to be able to look them up.
															
 
																+
															
 
																+For SCC execution, @code{starpu_initialize} also has to be used instead of @code{starpu_init}, so
															
 
																+as to pass @code{argc} and @code{argv}.
															
 
																+
															
 
																+@section Launching programs
															
 
																+
															
 
																+SCC programs are started through RCCE
															
 
																+
															
 
																+MIC programs are started from the host. StarPU automatically
															
 
																+starts the same program on MIC devices. It however needs to get
															
 
																+the MIC-cross-built binary. It will look for the file given by the
															
 
																+@code{STARPU_MIC_SINK_PROGRAM_NAME} environment variable or in the directory
															
 
																+given by the @code{STARPU_MIC_SINK_PROGRAM_PATH} environment variable, or in
															
 
																+the @code{mic_sink_program_path} field of the @code{starpu_config} structure.
															
 
																+It will also look in the current directory for the same binary name plus a
															
 
																+@code{-mic} or @code{_mic} suffix.
															
--- a/doc/chapters/perf-optimization.texi
+++ b/doc/chapters/perf-optimization.texi
@@ -79,7 +79,9 @@ dependencies on that data.
 
																 In the same vein, accumulation of results in the same data can become a
															
 
																 bottleneck. The use of the @code{STARPU_REDUX} mode permits to optimize such
															
 
																-accumulation (@pxref{Data reduction}).
															
 
																+accumulation (@pxref{Data reduction}). To a lesser extent, the use of the
															
 
																+@code{STARPU_COMMUTE} flag keeps the bottleneck, but at least permits the
															
 
																+accumulation to happen in any order.
															
 
																 Applications often need a data just for temporary results.  In such a case,
															
 
																 registration can be made without an initial value, for instance this produces a vector data:
															
@@ -554,6 +556,7 @@ CUDA or OpenCL execution:
 
																 static struct starpu_codelet cl11 =
															
 
																 @{
															
 
																 	.cpu_funcs = @{chol_cpu_codelet_update_u11, NULL@},
															
 
																+	.cpu_funcs_name = @{"chol_cpu_codelet_update_u11", NULL@},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	.cuda_funcs = @{chol_cublas_codelet_update_u11, NULL@},
															
 
																 #elif defined(STARPU_SIMGRID)
															
--- a/doc/chapters/sc_hypervisor.texi
+++ b/doc/chapters/sc_hypervisor.texi
@@ -106,7 +106,9 @@ parallel kernels and the number of instruction to be executed by each task.
 
																 The number of flops to be executed by a context are passed as parameter when they are registered to the hypervisor,
															
 
																  (@code{sc_hypervisor_register_ctx(sched_ctx_id, flops)}) and the one to be executed by each task are passed when the task is submitted.
															
 
																 The corresponding field in the @code{starpu_task} data structure is @code{flops} and
															
 
																-the corresponding macro in @code{starpu_insert_task} function is @code{STARPU_FLOPS}. When the task is executed
															
 
																+the corresponding macro in @code{starpu_insert_task} function is
															
 
																+@code{STARPU_FLOPS} (but take care of passing a double, not an integer, otherwise
															
 
																+parameter passing will be bogus). When the task is executed
															
 
																 the resizing process is triggered.
															
 
																 @cartouche
															
 
																 @smallexample
															
--- a/doc/chapters/vector_scal_c.texi
+++ b/doc/chapters/vector_scal_c.texi
@@ -1,7 +1,7 @@
 
																 @c -*-texinfo-*-
															
 
																 @c This file is part of the StarPU Handbook.
															
 
																-@c Copyright (C) 2009-2011  Université de Bordeaux 1
															
 
																+@c Copyright (C) 2009-2011, 2013  Université de Bordeaux 1
															
 
																 @c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																 @c See the file starpu.texi for copying conditions.
															
@@ -26,6 +26,7 @@ static struct starpu_codelet cl = @{
 
																     .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
															
 
																     /* CPU implementation of the codelet */
															
 
																     .cpu_funcs = @{ scal_cpu_func, scal_sse_func, NULL @},
															
 
																+    .cpu_funcs_name = @{ "scal_cpu_func", "scal_sse_func", NULL @},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																     /* CUDA implementation of the codelet */
															
 
																     .cuda_funcs = @{ scal_cuda_func, NULL @},
															
--- a/doc/starpu.texi
+++ b/doc/starpu.texi
@@ -74,6 +74,7 @@ was last updated on @value{UPDATED}.
 
																 * Tips and Tricks::             Tips and tricks to know about
															
 
																 * StarPU MPI support::          How to combine StarPU with MPI
															
 
																 * StarPU FFT support::          How to perform FFT computations with StarPU
															
 
																+* StarPU MIC/SCC support::      How to build and run StarPU applications on MIC and SCC
															
 
																 * C Extensions::                Easier StarPU programming with GCC
															
 
																 * SOCL OpenCL Extensions::      How to use OpenCL on top of StarPU
															
 
																 * Scheduling Contexts in StarPU::         How to use Scheduling Context of StarPU
															
@@ -163,6 +164,14 @@ was last updated on @value{UPDATED}.
 
																 @include chapters/fft-support.texi
															
 
																 @c ---------------------------------------------------------------------
															
 
																+@c MIC/SCC support
															
 
																+@c ---------------------------------------------------------------------
															
 
																+
															
 
																+@node StarPU MIC/SCC support
															
 
																+@chapter StarPU MIC/SCC support
															
 
																+@include chapters/mic-scc-support.texi
															
 
																+
															
 
																+@c ---------------------------------------------------------------------
															
 
																 @c C Extensions
															
 
																 @c ---------------------------------------------------------------------
															
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -20,7 +20,7 @@ AM_CFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STAR
 
																 AM_CXXFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
															
 
																 LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(MAGMA_LIBS) $(HWLOC_LIBS) @LIBS@
															
 
																 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include
															
 
																-AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) 
															
 
																+AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS)
															
 
																 SUBDIRS = stencil
															
@@ -138,10 +138,15 @@ endif
 
																 if !STARPU_HAVE_WINDOWS
															
 
																 ## test loader program
															
 
																+if !STARPU_CROSS_COMPILING
															
 
																 LOADER			=	loader
															
 
																 loader_CPPFLAGS =  $(AM_CFLAGS) $(AM_CPPFLAGS) -I$(top_builddir)/src/
															
 
																 LOADER_BIN		=	$(abs_top_builddir)/examples/$(LOADER)
															
 
																 loader_SOURCES		=	../tests/loader.c
															
 
																+else
															
 
																+LOADER			=
															
 
																+LOADER_BIN		=	$(top_builddir)/tests/loader-cross.sh
															
 
																+endif
															
 
																 if STARPU_HAVE_AM111
															
 
																 TESTS_ENVIRONMENT	=	top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)"
															
@@ -305,9 +310,12 @@ basic_examples_vector_scal_SOURCES =		\
 
																 	basic_examples/vector_scal_cpu.c
															
 
																 if STARPU_HAVE_ICC
															
 
																+if STARPU_CROSS_COMPILING
															
 
																+else
															
 
																 basic_examples_vector_scal_SOURCES +=		\
															
 
																 	basic_examples/vector_scal_cpu_icc.icc
															
 
																 endif
															
 
																+endif
															
 
																 if STARPU_USE_CUDA
															
 
																 basic_examples_vector_scal_SOURCES +=		\
															
--- a/examples/basic_examples/mult.c
+++ b/examples/basic_examples/mult.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
@@ -77,7 +77,7 @@ static unsigned zdim = 512;
 
																  * registered data with the "matrix" data interface, we use the matrix macros.
															
 
																  */
															
 
																-static void cpu_mult(void *descr[], __attribute__((unused))  void *arg)
															
 
																+void cpu_mult(void *descr[], __attribute__((unused))  void *arg)
															
 
																 {
															
 
																 	float *subA, *subB, *subC;
															
 
																 	uint32_t nxC, nyC, nyA;
															
@@ -264,6 +264,7 @@ static struct starpu_codelet cl =
 
																         /* we can only execute that kernel on a CPU yet */
															
 
																         /* CPU implementation of the codelet */
															
 
																         .cpu_funcs = {cpu_mult, NULL},
															
 
																+        .cpu_funcs_name = {"cpu_mult", NULL},
															
 
																         /* the codelet manipulates 3 buffers that are managed by the
															
 
																          * DSM */
															
 
																         .nbuffers = 3,
															
--- a/examples/basic_examples/vector_scal.c
+++ b/examples/basic_examples/vector_scal.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																- * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2013  Université de Bordeaux 1
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -52,21 +52,34 @@ static struct starpu_perfmodel vector_scal_power_model =
 
																 static struct starpu_codelet cl =
															
 
																 {
															
 
																-	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
															
 
																+	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL | STARPU_MIC,
															
 
																 	/* CPU implementation of the codelet */
															
 
																 	.cpu_funcs = {
															
 
																 		scal_cpu_func
															
 
																-#ifdef STARPU_HAVE_ICC
															
 
																+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
															
 
																 		, scal_cpu_func_icc
															
 
																 #endif
															
 
																 #ifdef __SSE__
															
 
																 		, scal_sse_func
															
 
																-#ifdef STARPU_HAVE_ICC
															
 
																+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
															
 
																 		, scal_sse_func_icc
															
 
																 #endif
															
 
																 #endif
															
 
																 		, NULL
															
 
																 	},
															
 
																+	.cpu_funcs_name = {
															
 
																+		"scal_cpu_func",
															
 
																+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
															
 
																+		"scal_cpu_func_icc",
															
 
																+#endif
															
 
																+#ifdef __SSE__
															
 
																+		"scal_sse_func",
															
 
																+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
															
 
																+		"scal_sse_func_icc"
															
 
																+#endif
															
 
																+#endif
															
 
																+	},
															
 
																+
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	/* CUDA implementation of the codelet */
															
 
																 	.cuda_funcs = {scal_cuda_func, NULL},
															
--- a/examples/basic_examples/vector_scal_c.c
+++ b/examples/basic_examples/vector_scal_c.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
															
 
																- * Copyright (C) 2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2011, 2013  Université de Bordeaux 1
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -43,6 +43,7 @@ static struct starpu_codelet cl =
 
																 	.modes = { STARPU_RW },
															
 
																 	/* CPU implementation of the codelet */
															
 
																 	.cpu_funcs = {scal_cpu_func, NULL},
															
 
																+	.cpu_funcs_name = {"scal_cpu_func", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	/* CUDA implementation of the codelet */
															
 
																 	.cuda_funcs = {scal_cuda_func, NULL},
															
--- a/examples/filters/fblock.c
+++ b/examples/filters/fblock.c
@@ -92,6 +92,7 @@ int main(int argc, char **argv)
 
																 	struct starpu_codelet cl =
															
 
																 	{
															
 
																                 .cpu_funcs = {cpu_func, NULL},
															
 
																+                .cpu_funcs_name = {"cpu_func", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																                 .cuda_funcs = {cuda_func, NULL},
															
 
																 #endif
															
@@ -147,6 +148,7 @@ int main(int argc, char **argv)
 
																                 task->callback_func = NULL;
															
 
																                 task->handles[0] = starpu_data_get_sub_data(handle, 1, i);
															
 
																                 task->cl_arg = &multiplier;
															
 
																+                task->cl_arg_size = sizeof(multiplier);
															
 
																                 ret = starpu_task_submit(task);
															
 
																                 if (ret)
															
--- a/examples/filters/fmatrix.c
+++ b/examples/filters/fmatrix.c
@@ -63,6 +63,7 @@ int main(int argc, char **argv)
 
																         struct starpu_codelet cl =
															
 
																 	{
															
 
																                 .cpu_funcs = {cpu_func, NULL},
															
 
																+                .cpu_funcs_name = {"cpu_func", NULL},
															
 
																                 .nbuffers = 1,
															
 
																 		.modes = {STARPU_RW},
															
 
																 		.name = "matrix_scal"
															
--- a/examples/filters/fvector.c
+++ b/examples/filters/fvector.c
@@ -46,6 +46,7 @@ int main(int argc, char **argv)
 
																         struct starpu_codelet cl =
															
 
																 	{
															
 
																                 .cpu_funcs = {cpu_func, NULL},
															
 
																+                .cpu_funcs_name = {"cpu_func", NULL},
															
 
																                 .nbuffers = 1,
															
 
																 		.modes = {STARPU_RW},
															
 
																 		.name = "vector_scal"
															
--- a/examples/filters/shadow.c
+++ b/examples/filters/shadow.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2012-2013  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -100,6 +100,7 @@ int main(int argc, char **argv)
 
																         struct starpu_codelet cl =
															
 
																 	{
															
 
																                 .cpu_funcs = {cpu_func, NULL},
															
 
																+                .cpu_funcs_name = {"cpu_func", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																                 .cuda_funcs = {cuda_func, NULL},
															
 
																 #endif
															
--- a/examples/filters/shadow2d.c
+++ b/examples/filters/shadow2d.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2012-2013  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -154,6 +154,7 @@ int main(int argc, char **argv)
 
																         struct starpu_codelet cl =
															
 
																 	{
															
 
																                 .cpu_funcs = {cpu_func, NULL},
															
 
																+                .cpu_funcs_name = {"cpu_func", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																                 .cuda_funcs = {cuda_func, NULL},
															
 
																 #endif
															
--- a/examples/filters/shadow3d.c
+++ b/examples/filters/shadow3d.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2012-2013  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -121,6 +121,7 @@ int main(int argc, char **argv)
 
																         struct starpu_codelet cl =
															
 
																 	{
															
 
																                 .cpu_funcs = {cpu_func, NULL},
															
 
																+                .cpu_funcs_name = {"cpu_func", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																                 .cuda_funcs = {cuda_func, NULL},
															
 
																 #endif
															
--- a/examples/incrementer/incrementer.c
+++ b/examples/incrementer/incrementer.c
@@ -66,6 +66,7 @@ int main(int argc, char **argv)
 
																 	struct starpu_codelet cl =
															
 
																 	{
															
 
																 		.cpu_funcs = {cpu_codelet, NULL},
															
 
																+		.cpu_funcs_name = {"cpu_codelet", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 		.cuda_funcs = {cuda_codelet, NULL},
															
 
																 #endif
															
--- a/examples/interface/complex_codelet.h
+++ b/examples/interface/complex_codelet.h
@@ -51,6 +51,7 @@ void compare_complex_codelet(void *descr[], void *_args)
 
																 struct starpu_codelet cl_compare =
															
 
																 {
															
 
																 	.cpu_funcs = {compare_complex_codelet, NULL},
															
 
																+	.cpu_funcs_name = {"compare_complex_codelet", NULL},
															
 
																 	.nbuffers = 2,
															
 
																 	.modes = {STARPU_R, STARPU_R},
															
 
																 	.name = "cl_compare"
															
@@ -76,6 +77,7 @@ void display_complex_codelet(void *descr[], void *_args)
 
																 struct starpu_codelet cl_display =
															
 
																 {
															
 
																 	.cpu_funcs = {display_complex_codelet, NULL},
															
 
																+	.cpu_funcs_name = {"display_complex_codelet", NULL},
															
 
																 	.nbuffers = 1,
															
 
																 	.modes = {STARPU_R},
															
 
																 	.name = "cl_display"
															
--- a/examples/openmp/vector_scal_omp.c
+++ b/examples/openmp/vector_scal_omp.c
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																- * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2013  Université de Bordeaux 1
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -65,6 +65,7 @@ static struct starpu_codelet cl =
 
																 	.type = STARPU_FORKJOIN,
															
 
																 	.max_parallelism = INT_MAX,
															
 
																 	.cpu_funcs = {scal_cpu_func, NULL},
															
 
																+	.cpu_funcs_name = {"scal_cpu_func", NULL},
															
 
																 	.nbuffers = 1,
															
 
																 	.model = &vector_scal_model,
															
 
																 };
															
--- a/examples/pi/pi.c
+++ b/examples/pi/pi.c
@@ -32,7 +32,7 @@ static unsigned ntasks = 1024;
 
																 static unsigned long long nshot_per_task = 16*1024*1024ULL;
															
 
																-static void cpu_kernel(void *descr[], void *cl_arg)
															
 
																+void cpu_kernel(void *descr[], void *cl_arg)
															
 
																 {
															
 
																 	unsigned *directions = (unsigned *)STARPU_VECTOR_GET_PTR(descr[0]);
															
 
																 	unsigned nx = nshot_per_task;
															
@@ -107,6 +107,7 @@ static struct starpu_perfmodel model =
 
																 static struct starpu_codelet pi_cl =
															
 
																 {
															
 
																 	.cpu_funcs = {cpu_kernel, NULL},
															
 
																+	.cpu_funcs_name = {"cpu_kernel", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	.cuda_funcs = {cuda_kernel, NULL},
															
 
																 #endif
															
--- a/examples/pi/pi_redux.c
+++ b/examples/pi/pi_redux.c
@@ -138,7 +138,7 @@ static void parse_args(int argc, char **argv)
 
																  *	Monte-carlo kernel
															
 
																  */
															
 
																-static void pi_func_cpu(void *descr[], void *cl_arg __attribute__ ((unused)))
															
 
																+void pi_func_cpu(void *descr[], void *cl_arg __attribute__ ((unused)))
															
 
																 {
															
 
																 	int workerid = starpu_worker_get_id();
															
@@ -209,6 +209,7 @@ static struct starpu_perfmodel pi_model =
 
																 static struct starpu_codelet pi_cl =
															
 
																 {
															
 
																 	.cpu_funcs = {pi_func_cpu, NULL},
															
 
																+	.cpu_funcs_name = {"pi_func_cpu", NULL},
															
 
																 #ifdef STARPU_HAVE_CURAND
															
 
																 	.cuda_funcs = {pi_func_cuda, NULL},
															
 
																 #endif
															
@@ -227,6 +228,7 @@ static struct starpu_perfmodel pi_model_redux =
 
																 static struct starpu_codelet pi_cl_redux =
															
 
																 {
															
 
																 	.cpu_funcs = {pi_func_cpu, NULL},
															
 
																+	.cpu_funcs_name = {"pi_func_cpu", NULL},
															
 
																 #ifdef STARPU_HAVE_CURAND
															
 
																 	.cuda_funcs = {pi_func_cuda, NULL},
															
 
																 #endif
															
@@ -239,7 +241,7 @@ static struct starpu_codelet pi_cl_redux =
 
																  *	Codelets to implement reduction
															
 
																  */
															
 
																-static void init_cpu_func(void *descr[], void *cl_arg)
															
 
																+void init_cpu_func(void *descr[], void *cl_arg)
															
 
																 {
															
 
																         unsigned long *val = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																         *val = 0;
															
@@ -257,6 +259,7 @@ static void init_cuda_func(void *descr[], void *cl_arg)
 
																 static struct starpu_codelet init_codelet =
															
 
																 {
															
 
																         .cpu_funcs = {init_cpu_func, NULL},
															
 
																+        .cpu_funcs_name = {"init_cpu_func", NULL},
															
 
																 #ifdef STARPU_HAVE_CURAND
															
 
																         .cuda_funcs = {init_cuda_func, NULL},
															
 
																 #endif
															
@@ -284,7 +287,7 @@ static void redux_cuda_func(void *descr[], void *cl_arg)
 
																 }
															
 
																 #endif
															
 
																-static void redux_cpu_func(void *descr[], void *cl_arg)
															
 
																+void redux_cpu_func(void *descr[], void *cl_arg)
															
 
																 {
															
 
																 	unsigned long *a = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
 
																 	unsigned long *b = (unsigned long *)STARPU_VARIABLE_GET_PTR(descr[1]);
															
@@ -295,6 +298,7 @@ static void redux_cpu_func(void *descr[], void *cl_arg)
 
																 static struct starpu_codelet redux_codelet =
															
 
																 {
															
 
																 	.cpu_funcs = {redux_cpu_func, NULL},
															
 
																+	.cpu_funcs_name = {"redux_cpu_func", NULL},
															
 
																 #ifdef STARPU_HAVE_CURAND
															
 
																 	.cuda_funcs = {redux_cuda_func, NULL},
															
 
																 #endif
															
--- a/examples/ppm_downscaler/yuv_downscaler.c
+++ b/examples/ppm_downscaler/yuv_downscaler.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2011  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010  Mehdi Juhoor <mjuhoor@gmail.com>
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
@@ -52,7 +52,7 @@ void parse_args(int argc, char **argv)
 
																 #define FRAMESIZE	sizeof(struct yuv_frame)
															
 
																 #define NEW_FRAMESIZE	sizeof(struct yuv_new_frame)
															
 
																-static void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
															
 
																+void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
															
 
																 {
															
 
																 	uint8_t *input = (uint8_t *)STARPU_MATRIX_GET_PTR(descr[0]);
															
 
																 	const unsigned input_ld = STARPU_MATRIX_GET_LD(descr[0]);
															
@@ -86,6 +86,7 @@ static void ds_kernel_cpu(void *descr[], __attribute__((unused)) void *arg)
 
																 static struct starpu_codelet ds_codelet =
															
 
																 {
															
 
																 	.cpu_funcs = {ds_kernel_cpu, NULL},
															
 
																+	.cpu_funcs_name = {"ds_kernel_cpu", NULL},
															
 
																 	.nbuffers = 2, /* input -> output */
															
 
																 	.modes = {STARPU_R, STARPU_W},
															
 
																 	.model = NULL
															
--- a/examples/reductions/dot_product.c
+++ b/examples/reductions/dot_product.c
@@ -108,6 +108,7 @@ static struct starpu_codelet init_codelet =
 
																 {
															
 
																 	.can_execute = can_execute,
															
 
																 	.cpu_funcs = {init_cpu_func, NULL},
															
 
																+	.cpu_funcs_name = {"init_cpu_func", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	.cuda_funcs = {init_cuda_func, NULL},
															
 
																 #endif
															
@@ -190,6 +191,7 @@ static struct starpu_codelet redux_codelet =
 
																 {
															
 
																 	.can_execute = can_execute,
															
 
																 	.cpu_funcs = {redux_cpu_func, NULL},
															
 
																+	.cpu_funcs_name = {"redux_cpu_func", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	.cuda_funcs = {redux_cuda_func, NULL},
															
 
																 #endif
															
@@ -308,6 +310,7 @@ static struct starpu_codelet dot_codelet =
 
																 {
															
 
																 	.can_execute = can_execute,
															
 
																 	.cpu_funcs = {dot_cpu_func, NULL},
															
 
																+	.cpu_funcs_name = {"dot_cpu_func", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	.cuda_funcs = {dot_cuda_func, NULL},
															
 
																 #endif
															
--- a/examples/reductions/minmax_reduction.c
+++ b/examples/reductions/minmax_reduction.c
@@ -44,7 +44,7 @@ static starpu_data_handle_t _minmax_handle;
 
																  *	Codelet to create a neutral element
															
 
																  */
															
 
																-static void minmax_neutral_cpu_func(void *descr[], void *cl_arg)
															
 
																+void minmax_neutral_cpu_func(void *descr[], void *cl_arg)
															
 
																 {
															
 
																 	TYPE *array = (TYPE *)STARPU_VARIABLE_GET_PTR(descr[0]);
															
@@ -58,6 +58,7 @@ static void minmax_neutral_cpu_func(void *descr[], void *cl_arg)
 
																 static struct starpu_codelet minmax_init_codelet =
															
 
																 {
															
 
																 	.cpu_funcs = {minmax_neutral_cpu_func, NULL},
															
 
																+	.cpu_funcs_name = {"minmax_neutral_cpu_func", NULL},
															
 
																 	.modes = {STARPU_W},
															
 
																 	.nbuffers = 1,
															
 
																 	.name = "init"
															
@@ -86,6 +87,7 @@ void minmax_redux_cpu_func(void *descr[], void *cl_arg)
 
																 static struct starpu_codelet minmax_redux_codelet =
															
 
																 {
															
 
																 	.cpu_funcs = {minmax_redux_cpu_func, NULL},
															
 
																+	.cpu_funcs_name = {"minmax_redux_cpu_func", NULL},
															
 
																 	.modes = {STARPU_RW, STARPU_R},
															
 
																 	.nbuffers = 2,
															
 
																 	.name = "redux"
															
@@ -122,6 +124,7 @@ void minmax_cpu_func(void *descr[], void *cl_arg)
 
																 static struct starpu_codelet minmax_codelet =
															
 
																 {
															
 
																 	.cpu_funcs = {minmax_cpu_func, NULL},
															
 
																+	.cpu_funcs_name = {"minmax_cpu_func", NULL},
															
 
																 	.nbuffers = 2,
															
 
																 	.modes = {STARPU_R, STARPU_REDUX},
															
 
																 	.name = "minmax"
															
--- a/examples/spmd/vector_scal_spmd.c
+++ b/examples/spmd/vector_scal_spmd.c
@@ -84,6 +84,7 @@ static struct starpu_codelet cl =
 
																 	.type = STARPU_SPMD,
															
 
																 	.max_parallelism = INT_MAX,
															
 
																 	.cpu_funcs = {scal_cpu_func, NULL},
															
 
																+	.cpu_funcs_name = {"scal_cpu_func", NULL},
															
 
																 	.nbuffers = 1,
															
 
																 	.model = &vector_scal_model,
															
 
																 };
															
--- a/examples/stencil/Makefile.am
+++ b/examples/stencil/Makefile.am
@@ -16,7 +16,7 @@
 
																 AM_CFLAGS = $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
															
 
																 LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) @LIBS@
															
 
																 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include
															
 
																-AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS)
															
 
																+AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS)
															
 
																 if USE_MPI
															
 
																 LIBS += $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
															
--- a/examples/stencil/stencil-kernels.c
+++ b/examples/stencil/stencil-kernels.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2013  Université de Bordeaux 1
															
 
																  * Copyright (C) 2012  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -376,7 +376,7 @@ fprintf(stderr,"!!! DO update_func_opencl z %d OPENCL%d !!!\n", block->bz, worke
 
																 /*
															
 
																  * cl_update (CPU version)
															
 
																  */
															
 
																-static void update_func_cpu(void *descr[], void *arg)
															
 
																+void update_func_cpu(void *descr[], void *arg)
															
 
																 {
															
 
																 	struct block_description *block = (struct block_description *) arg;
															
 
																 	int workerid = starpu_worker_get_id();
															
@@ -457,6 +457,7 @@ static struct starpu_perfmodel cl_update_model =
 
																 struct starpu_codelet cl_update =
															
 
																 {
															
 
																 	.cpu_funcs = {update_func_cpu, NULL},
															
 
																+	.cpu_funcs_name = {"update_func_cpu", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	.cuda_funcs = {update_func_cuda, NULL},
															
 
																 #endif
															
@@ -541,7 +542,7 @@ unsigned top_per_worker[STARPU_NMAXWORKERS];
 
																 unsigned bottom_per_worker[STARPU_NMAXWORKERS];
															
 
																 /* top save, CPU version */
															
 
																-static void dummy_func_top_cpu(void *descr[] __attribute__((unused)), void *arg)
															
 
																+void dummy_func_top_cpu(void *descr[] __attribute__((unused)), void *arg)
															
 
																 {
															
 
																 	struct block_description *block = (struct block_description *) arg;
															
 
																 	int workerid = starpu_worker_get_id();
															
@@ -557,7 +558,7 @@ static void dummy_func_top_cpu(void *descr[] __attribute__((unused)), void *arg)
 
																 }
															
 
																 /* bottom save, CPU version */
															
 
																-static void dummy_func_bottom_cpu(void *descr[] __attribute__((unused)), void *arg)
															
 
																+void dummy_func_bottom_cpu(void *descr[] __attribute__((unused)), void *arg)
															
 
																 {
															
 
																 	struct block_description *block = (struct block_description *) arg;
															
 
																 	int workerid = starpu_worker_get_id();
															
@@ -657,6 +658,7 @@ static struct starpu_perfmodel save_cl_top_model =
 
																 struct starpu_codelet save_cl_bottom =
															
 
																 {
															
 
																 	.cpu_funcs = {dummy_func_bottom_cpu, NULL},
															
 
																+	.cpu_funcs_name = {"dummy_func_bottom_cpu", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	.cuda_funcs = {dummy_func_bottom_cuda, NULL},
															
 
																 #endif
															
@@ -671,6 +673,7 @@ struct starpu_codelet save_cl_bottom =
 
																 struct starpu_codelet save_cl_top =
															
 
																 {
															
 
																 	.cpu_funcs = {dummy_func_top_cpu, NULL},
															
 
																+	.cpu_funcs_name = {"dummy_func_top_cpu", NULL},
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	.cuda_funcs = {dummy_func_top_cuda, NULL},
															
 
																 #endif
															
--- a/examples/stencil/stencil-tasks.c
+++ b/examples/stencil/stencil-tasks.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010, 2013  Université de Bordeaux 1
															
 
																  * Copyright (C) 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -213,11 +213,12 @@ void create_task_update(unsigned iter, unsigned z, int local_rank)
 
																 }
															
 
																 /* Dummy empty codelet taking one buffer */
															
 
																-static void null_func(void *descr[] __attribute__((unused)), void *arg __attribute__((unused))) { }
															
 
																+void null_func(void *descr[] __attribute__((unused)), void *arg __attribute__((unused))) { }
															
 
																 static struct starpu_codelet null =
															
 
																 {
															
 
																 	.modes = { STARPU_W, STARPU_W },
															
 
																 	.cpu_funcs = {null_func, NULL},
															
 
																+	.cpu_funcs_name = {"null_func", NULL},
															
 
																 	.cuda_funcs = {null_func, NULL},
															
 
																 	.opencl_funcs = {null_func, NULL},
															
 
																 	.nbuffers = 2
															
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -92,6 +92,10 @@ struct starpu_conf
 
																 	int ncuda;
															
 
																 	/* number of GPU OpenCL device workers (-1 for default) */
															
 
																 	int nopencl;
															
 
																+	/* number of MIC device workers (-1 for default) */
															
 
																+	int nmic;
															
 
																+	/* number of SCC device workers (-1 for default) */
															
 
																+	int nscc;
															
 
																 	unsigned use_explicit_workers_bindid;
															
 
																 	unsigned workers_bindid[STARPU_NMAXWORKERS];
															
@@ -102,6 +106,12 @@ struct starpu_conf
 
																 	unsigned use_explicit_workers_opencl_gpuid;
															
 
																 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
															
 
																+	unsigned use_explicit_workers_mic_deviceid;
															
 
																+	unsigned workers_mic_deviceid[STARPU_NMAXWORKERS];
															
 
																+
															
 
																+	unsigned use_explicit_workers_scc_deviceid;
															
 
																+	unsigned workers_scc_deviceid[STARPU_NMAXWORKERS];
															
 
																+
															
 
																 	/* calibrate bus (-1 for default) */
															
 
																 	int bus_calibrate;
															
@@ -111,6 +121,10 @@ struct starpu_conf
 
																 	/* Create only one combined worker, containing all CPU workers */
															
 
																 	int single_combined_worker;
															
 
																+	/* Path to the kernel to execute on the MIC device, compiled
															
 
																+	 * for MIC architecture. */
															
 
																+	char *mic_sink_program_path;
															
 
																+
															
 
																 	/* indicate if all asynchronous copies should be disabled */
															
 
																 	int disable_asynchronous_copy;
															
@@ -120,6 +134,9 @@ struct starpu_conf
 
																 	/* indicate if asynchronous copies to OpenCL devices should be disabled */
															
 
																 	int disable_asynchronous_opencl_copy;
															
 
																+	/* indicate if asynchronous copies to MIC devices should be disabled */
															
 
																+	int disable_asynchronous_mic_copy;
															
 
																+
															
 
																 	/* Enable CUDA/OpenGL interoperation on these CUDA devices */
															
 
																 	unsigned *cuda_opengl_interoperability;
															
 
																 	unsigned n_cuda_opengl_interoperability;
															
@@ -140,6 +157,12 @@ int starpu_conf_init(struct starpu_conf *conf);
 
																  */
															
 
																 int starpu_init(struct starpu_conf *conf) STARPU_WARN_UNUSED_RESULT;
															
 
																+/* Alternative initialization method with argc and argv. This is use by
															
 
																+ * MIC, MPI, and SCC implementation.
															
 
																+ * Don't call starpu_init and starpu_initialize in the same program.
															
 
																+ */
															
 
																+int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv);
															
 
																+
															
 
																 /* Shutdown method: note that statistics are only generated once StarPU is
															
 
																  * shutdown */
															
 
																 void starpu_shutdown(void);
															
@@ -156,6 +179,8 @@ void starpu_display_stats();
 
																 void starpu_get_version(int *major, int *minor, int *release);
															
 
																+int starpu_worker_get_mp_nodeid(int id);
															
 
																+
															
 
																 #ifdef __cplusplus
															
 
																 }
															
 
																 #endif
															
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -25,6 +25,8 @@
 
																 #undef STARPU_USE_CPU
															
 
																 #undef STARPU_USE_CUDA
															
 
																 #undef STARPU_USE_OPENCL
															
 
																+#undef STARPU_USE_MIC
															
 
																+#undef STARPU_USE_SCC
															
 
																 #undef STARPU_SIMGRID
															
@@ -70,9 +72,12 @@
 
																 #undef STARPU_MAXCPUS
															
 
																 #undef STARPU_MAXCUDADEVS
															
 
																 #undef STARPU_MAXOPENCLDEVS
															
 
																+#undef STARPU_MAXMICDEVS
															
 
																+#undef STARPU_MAXSCCDEVS
															
 
																 #undef STARPU_NMAXWORKERS
															
 
																 #undef STARPU_NMAX_SCHED_CTXS
															
 
																 #undef STARPU_MAXIMPLEMENTATIONS
															
 
																+#undef STARPU_MAXMPKERNELS
															
 
																 #undef STARPU_USE_SC_HYPERVISOR
															
 
																 #undef STARPU_HAVE_GLPK_H
															
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -35,7 +35,9 @@ enum starpu_data_access_mode
 
																 	STARPU_W=(1<<1),
															
 
																 	STARPU_RW=(STARPU_R|STARPU_W),
															
 
																 	STARPU_SCRATCH=(1<<2),
															
 
																-	STARPU_REDUX=(1<<3)
															
 
																+	STARPU_REDUX=(1<<3),
															
 
																+	STARPU_COMMUTE=(1<<4)
															
 
																+	/* Note: other STARPU_* values in include/starpu_task_util.h */
															
 
																 };
															
 
																 struct starpu_data_descr
															
@@ -102,7 +104,14 @@ enum starpu_node_kind
 
																 	STARPU_UNUSED     = 0x00,
															
 
																 	STARPU_CPU_RAM    = 0x01,
															
 
																 	STARPU_CUDA_RAM   = 0x02,
															
 
																-	STARPU_OPENCL_RAM = 0x03
															
 
																+	STARPU_OPENCL_RAM = 0x03,
															
 
																+	STARPU_MIC_RAM    = 0x05,
															
 
																+
															
 
																+	/* This node kind is not used anymore, but implementations in interfaces
															
 
																+	 * will be useful for MPI. */
															
 
																+	STARPU_SCC_RAM    = 0x06,
															
 
																+
															
 
																+	STARPU_SCC_SHM    = 0x07
															
 
																 };
															
 
																 unsigned starpu_worker_get_memory_node(unsigned workerid);
															
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -45,6 +45,7 @@ struct starpu_data_copy_methods
 
																 	int (*ram_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																 	int (*ram_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																 	int (*ram_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+	int (*ram_to_mic)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																 	/* src type is cuda */
															
 
																 	int (*cuda_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
@@ -56,6 +57,14 @@ struct starpu_data_copy_methods
 
																 	int (*opencl_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																 	int (*opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+	/* src type is mic */
															
 
																+	int (*mic_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
															
 
																+
															
 
																+	/* scc case */
															
 
																+	int (*scc_src_to_sink)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+	int (*scc_sink_to_src)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+	int (*scc_sink_to_sink)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																 	/* for asynchronous CUDA transfers */
															
 
																 	int (*ram_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
															
@@ -74,6 +83,12 @@ struct starpu_data_copy_methods
 
																 	int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event);
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	/* Asynchronous MIC transfers */
															
 
																+	int (*ram_to_mic_async)(void *src_intreface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+	int (*mic_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
															
 
																+#endif
															
 
																+
															
 
																 	int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
															
 
																 };
															
@@ -162,6 +177,8 @@ extern struct starpu_data_interface_ops starpu_interface_matrix_ops;
 
																 /* Matrix interface for dense matrices */
															
 
																 struct starpu_matrix_interface
															
 
																 {
															
 
																+	enum starpu_data_interface_id id;
															
 
																+
															
 
																 	uintptr_t ptr;
															
 
																 	uintptr_t dev_handle;
															
 
																 	size_t offset;
															
@@ -192,6 +209,8 @@ size_t starpu_matrix_get_elemsize(starpu_data_handle_t handle);
 
																  */
															
 
																 struct starpu_coo_interface
															
 
																 {
															
 
																+	enum starpu_data_interface_id id;
															
 
																+
															
 
																 	uint32_t  *columns;
															
 
																 	uint32_t  *rows;
															
 
																 	uintptr_t values;
															
@@ -229,6 +248,8 @@ void starpu_coo_data_register(starpu_data_handle_t *handleptr, unsigned home_nod
 
																 /* TODO: rename to 3dmatrix? */
															
 
																 struct starpu_block_interface
															
 
																 {
															
 
																+	enum starpu_data_interface_id id;
															
 
																+
															
 
																 	uintptr_t ptr;
															
 
																 	uintptr_t dev_handle;
															
 
																 	size_t offset;
															
@@ -263,6 +284,8 @@ size_t starpu_block_get_elemsize(starpu_data_handle_t handle);
 
																 /* vector interface for contiguous (non-strided) buffers */
															
 
																 struct starpu_vector_interface
															
 
																 {
															
 
																+	enum starpu_data_interface_id id;
															
 
																+
															
 
																 	uintptr_t ptr;
															
 
																 	uintptr_t dev_handle;
															
 
																 	size_t offset;
															
@@ -285,9 +308,12 @@ uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle);
 
																 /* variable interface for a single data (not a vector, a matrix, a list, ...) */
															
 
																 struct starpu_variable_interface
															
 
																 {
															
 
																+	enum starpu_data_interface_id id;
															
 
																+
															
 
																 	uintptr_t ptr;
															
 
																+	uintptr_t dev_handle;
															
 
																+	size_t offset;
															
 
																 	size_t elemsize;
															
 
																-	/* No dev_handle, since it can not be filtered, offset will always be zero */
															
 
																 };
															
 
																 void starpu_variable_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, size_t size);
															
@@ -296,10 +322,10 @@ uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle);
 
																 /* helper methods */
															
 
																 #define STARPU_VARIABLE_GET_PTR(interface)	(((struct starpu_variable_interface *)(interface))->ptr)
															
 
																+#define STARPU_VARIABLE_GET_OFFSET(interface)	(((struct starpu_variable_interface *)(interface))->offset)
															
 
																 #define STARPU_VARIABLE_GET_ELEMSIZE(interface)	(((struct starpu_variable_interface *)(interface))->elemsize)
															
 
																 #define STARPU_VARIABLE_GET_DEV_HANDLE(interface) \
															
 
																 	(((struct starpu_variable_interface *)(interface))->ptr)
															
 
																-#define STARPU_VARIABLE_GET_OFFSET 0
															
 
																 /* void interface. There is no data really associated to that interface, but it
															
 
																  * may be used as a synchronization mechanism. It also permits to express an
															
@@ -311,6 +337,8 @@ void starpu_void_data_register(starpu_data_handle_t *handle);
 
																 /* CSR interface for sparse matrices (compressed sparse row representation) */
															
 
																 struct starpu_csr_interface
															
 
																 {
															
 
																+	enum starpu_data_interface_id id;
															
 
																+
															
 
																 	uint32_t nnz; /* number of non-zero entries */
															
 
																 	uint32_t nrow; /* number of rows */
															
 
																 	uintptr_t nzval; /* non-zero values */
															
@@ -352,6 +380,8 @@ size_t starpu_csr_get_elemsize(starpu_data_handle_t handle);
 
																  * representation) */
															
 
																 struct starpu_bcsr_interface
															
 
																 {
															
 
																+	enum starpu_data_interface_id id;
															
 
																+
															
 
																 	uint32_t nnz; /* number of non-zero BLOCKS */
															
 
																 	uint32_t nrow; /* number of rows (in terms of BLOCKS) */
															
@@ -406,13 +436,19 @@ struct starpu_multiformat_data_interface_ops
 
																 	size_t cuda_elemsize;
															
 
																 	struct starpu_codelet *cpu_to_cuda_cl;
															
 
																 	struct starpu_codelet *cuda_to_cpu_cl;
															
 
																+	size_t mic_elemsize;
															
 
																+	struct starpu_codelet *cpu_to_mic_cl;
															
 
																+	struct starpu_codelet *mic_to_cpu_cl;
															
 
																 };
															
 
																 struct starpu_multiformat_interface
															
 
																 {
															
 
																+	enum starpu_data_interface_id id;
															
 
																+
															
 
																 	void *cpu_ptr;
															
 
																 	void *cuda_ptr;
															
 
																 	void *opencl_ptr;
															
 
																+	void *mic_ptr;
															
 
																 	uint32_t nx;
															
 
																 	struct starpu_multiformat_data_interface_ops *ops;
															
 
																 };
															
@@ -422,6 +458,7 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handle, unsigned hom
 
																 #define STARPU_MULTIFORMAT_GET_CPU_PTR(interface)  (((struct starpu_multiformat_interface *)(interface))->cpu_ptr)
															
 
																 #define STARPU_MULTIFORMAT_GET_CUDA_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->cuda_ptr)
															
 
																 #define STARPU_MULTIFORMAT_GET_OPENCL_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->opencl_ptr)
															
 
																+#define STARPU_MULTIFORMAT_GET_MIC_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->mic_ptr)
															
 
																 #define STARPU_MULTIFORMAT_GET_NX(interface)  (((struct starpu_multiformat_interface *)(interface))->nx)
															
 
																 enum starpu_data_interface_id starpu_data_get_interface_id(starpu_data_handle_t handle);
															
--- a/include/starpu_mic.h
+++ b/include/starpu_mic.h
@@ -0,0 +1,35 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2012  Inria
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+
															
 
																+#ifndef __STARPU_MIC_H__
															
 
																+#define __STARPU_MIC_H__
															
 
																+
															
 
																+#include <starpu_config.h>
															
 
																+
															
 
																+
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+
															
 
																+typedef void *starpu_mic_func_symbol_t;
															
 
																+
															
 
																+int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name);
															
 
																+
															
 
																+starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol);
															
 
																+
															
 
																+#endif /* STARPU_USE_MIC */
															
 
																+
															
 
																+
															
 
																+#endif /* __STARPU_MIC_H__ */
															
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -43,8 +43,10 @@ enum starpu_perfmodel_archtype
 
																 	STARPU_CPU_DEFAULT = 0,
															
 
																 	/* CPU combined workers between 0 and STARPU_MAXCPUS-1 */
															
 
																 	STARPU_CUDA_DEFAULT = STARPU_MAXCPUS,
															
 
																-	STARPU_OPENCL_DEFAULT = STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS
															
 
																+	STARPU_OPENCL_DEFAULT = STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS,
															
 
																 	/* STARPU_OPENCL_DEFAULT + devid */
															
 
																+	STARPU_MIC_DEFAULT = STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS,
															
 
																+	STARPU_SCC_DEFAULT = STARPU_MIC_DEFAULT + STARPU_MAXMICDEVS
															
 
																 };
															
 
																 #ifdef __STDC_VERSION__
															
@@ -54,15 +56,19 @@ enum starpu_perfmodel_archtype
 
																 _Static_assert(STARPU_CPU_DEFAULT == 0,
															
 
																 	       "invalid STARPU_CPU_DEFAULT value");
															
 
																-_Static_assert(STARPU_CUDA_DEFAULT > STARPU_CPU_DEFAULT,
															
 
																-	       "invalid STARPU_CPU_DEFAULT value");
															
 
																+_Static_assert(STARPU_CPU_DEFAULT < STARPU_CUDA_DEFAULT,
															
 
																+	       "invalid STARPU_{CPU,CUDA}_DEFAULT values");
															
 
																 _Static_assert(STARPU_CUDA_DEFAULT < STARPU_OPENCL_DEFAULT,
															
 
																 	       "invalid STARPU_{CUDA,OPENCL}_DEFAULT values");
															
 
																+_Static_assert(STARPU_OPENCL_DEFAULT < STARPU_MIC_DEFAULT,
															
 
																+	       "invalid STARPU_{OPENCL,MIC}_DEFAULT values");
															
 
																+_Static_assert(STARPU_MIC_DEFAULT < STARPU_SCC_DEFAULT,
															
 
																+	       "invalid STARPU_{MIC,SCC}_DEFAULT values");
															
 
																 #  endif
															
 
																 #endif
															
 
																-#define STARPU_NARCH_VARIATIONS	(STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS)
															
 
																+#define STARPU_NARCH_VARIATIONS	(STARPU_MIC_DEFAULT + STARPU_MAXMICDEVS)
															
 
																 struct starpu_perfmodel_history_entry
															
 
																 {
															
--- a/include/starpu_scc.h
+++ b/include/starpu_scc.h
@@ -0,0 +1,35 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2012  Inria
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+
															
 
																+#ifndef __STARPU_SCC_H__
															
 
																+#define __STARPU_SCC_H__
															
 
																+
															
 
																+#include <starpu_config.h>
															
 
																+
															
 
																+
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+
															
 
																+typedef void *starpu_scc_func_symbol_t;
															
 
																+
															
 
																+int starpu_scc_register_kernel(starpu_scc_func_symbol_t *symbol, const char *func_name);
															
 
																+
															
 
																+starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol);
															
 
																+
															
 
																+#endif /* STARPU_USE_SCC */
															
 
																+
															
 
																+
															
 
																+#endif /* __STARPU_SCC_H__ */
															
--- a/include/starpu_sched_ctx.h
+++ b/include/starpu_sched_ctx.h
@@ -114,7 +114,7 @@ void* starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id);
 
																  * WORKERS IN CONTEXT 
															
 
																 */
															
 
																 /* create a worker collection for a context, the type can be only STARPU_WORKER_LIST for now, which corresponds to a simple list */
															
 
																-struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, int type);
															
 
																+struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type type);
															
 
																 /* free the worker collection when removing the context */
															
 
																 void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id);
															
--- a/include/starpu_sink.h
+++ b/include/starpu_sink.h
@@ -0,0 +1,23 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2012  Inria
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+
															
 
																+#ifndef __STARPU_SINK_H__
															
 
																+#define __STARPU_SINK_H__
															
 
																+
															
 
																+void starpu_sink_common_worker(int argc, char **argv);
															
 
																+
															
 
																+#endif /* __STARPU_SINK_H__ */
															
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -37,6 +37,8 @@ extern "C"
 
																 #define STARPU_CPU	((1ULL)<<1)
															
 
																 #define STARPU_CUDA	((1ULL)<<3)
															
 
																 #define STARPU_OPENCL	((1ULL)<<6)
															
 
																+#define STARPU_MIC	((1ULL)<<7)
															
 
																+#define STARPU_SCC	((1ULL)<<8)
															
 
																 /* Codelet types */
															
 
																 enum starpu_codelet_type
															
@@ -65,6 +67,11 @@ typedef uint64_t starpu_tag_t;
 
																 typedef void (*starpu_cpu_func_t)(void **, void*);    /* CPU core */
															
 
																 typedef void (*starpu_cuda_func_t)(void **, void*);   /* NVIDIA CUDA device */
															
 
																 typedef void (*starpu_opencl_func_t)(void **, void*); /* OpenCL CUDA device */
															
 
																+typedef void (*starpu_mic_kernel_t)(void **, void*); /* MIC device */
															
 
																+typedef void (*starpu_scc_kernel_t)(void **, void*); /* SCC device */
															
 
																+
															
 
																+typedef starpu_mic_kernel_t (*starpu_mic_func_t)(void);
															
 
																+typedef starpu_scc_kernel_t (*starpu_scc_func_t)(void);
															
 
																 #define STARPU_MULTIPLE_CPU_IMPLEMENTATIONS    ((starpu_cpu_func_t) -1)
															
 
																 #define STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS   ((starpu_cuda_func_t) -1)
															
@@ -91,6 +98,10 @@ struct starpu_codelet
 
																 	starpu_cpu_func_t cpu_funcs[STARPU_MAXIMPLEMENTATIONS];
															
 
																 	starpu_cuda_func_t cuda_funcs[STARPU_MAXIMPLEMENTATIONS];
															
 
																 	starpu_opencl_func_t opencl_funcs[STARPU_MAXIMPLEMENTATIONS];
															
 
																+	starpu_mic_func_t mic_funcs[STARPU_MAXIMPLEMENTATIONS];
															
 
																+	starpu_scc_func_t scc_funcs[STARPU_MAXIMPLEMENTATIONS];
															
 
																+
															
 
																+	char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS];
															
 
																 	/* how many buffers do the codelet takes as argument ? */
															
 
																 	unsigned nbuffers;
															
--- a/include/starpu_task_util.h
+++ b/include/starpu_task_util.h
@@ -35,18 +35,18 @@ void starpu_create_sync_task(starpu_tag_t sync_tag, unsigned ndeps, starpu_tag_t
 
																 				void (*callback)(void *), void *callback_arg);
															
 
																 /* Constants used by the starpu_insert_task helper to determine the different types of argument */
															
 
																-#define STARPU_VALUE		(1<<4)	/* Pointer to a constant value */
															
 
																-#define STARPU_CALLBACK		(1<<5)	/* Callback function */
															
 
																-#define STARPU_CALLBACK_WITH_ARG	(1<<6)	/* Callback function */
															
 
																-#define STARPU_CALLBACK_ARG	(1<<7)	/* Argument of the callback function (of type void *) */
															
 
																-#define STARPU_PRIORITY		(1<<8)	/* Priority associated to the task */
															
 
																-#define STARPU_EXECUTE_ON_NODE	(1<<9)	/* Used by MPI to define which task is going to execute the codelet */
															
 
																-#define STARPU_EXECUTE_ON_DATA	(1<<10)	/* Used by MPI to define which task is going to execute the codelet */
															
 
																-#define STARPU_DATA_ARRAY       (1<<11) /* Array of data handles */
															
 
																-#define STARPU_TAG              (1<<12) /* Tag */
															
 
																-#define STARPU_HYPERVISOR_TAG	(1<<13)	/* Used to tag a task after whose execution we'll execute  a code */
															
 
																-#define STARPU_FLOPS	        (1<<14)	/* Used to specify the number of flops needed to be executed by a task */
															
 
																-#define STARPU_SCHED_CTX	(1<<15)	/* Used to specify the sched_ctx to which the task will be submitted */
															
 
																+#define STARPU_VALUE		(1<<19)	/* Pointer to a constant value */
															
 
																+#define STARPU_CALLBACK		(1<<20)	/* Callback function */
															
 
																+#define STARPU_CALLBACK_WITH_ARG	(1<<21)	/* Callback function */
															
 
																+#define STARPU_CALLBACK_ARG	(1<<22)	/* Argument of the callback function (of type void *) */
															
 
																+#define STARPU_PRIORITY		(1<<23)	/* Priority associated to the task */
															
 
																+#define STARPU_EXECUTE_ON_NODE	(1<<24)	/* Used by MPI to define which task is going to execute the codelet */
															
 
																+#define STARPU_EXECUTE_ON_DATA	(1<<25)	/* Used by MPI to define which task is going to execute the codelet */
															
 
																+#define STARPU_DATA_ARRAY       (1<<26) /* Array of data handles */
															
 
																+#define STARPU_TAG              (1<<27) /* Tag */
															
 
																+#define STARPU_HYPERVISOR_TAG	(1<<28)	/* Used to tag a task after whose execution we'll execute  a code */
															
 
																+#define STARPU_FLOPS	        (1<<29)	/* Used to specify the number of flops needed to be executed by a task */
															
 
																+#define STARPU_SCHED_CTX	(1<<30)	/* Used to specify the sched_ctx to which the task will be submitted */
															
 
																 /* Wrapper to create a task. */
															
 
																 int starpu_insert_task(struct starpu_codelet *cl, ...);
															
--- a/include/starpu_util.h
+++ b/include/starpu_util.h
@@ -177,7 +177,7 @@ STARPU_ATOMIC_SOMETHING(or, old | value)
 
																 #define STARPU_SYNCHRONIZE() __asm__ __volatile__("sync" ::: "memory")
															
 
																 #endif
															
 
																-#if defined(__i386__)
															
 
																+#if defined(__i386__) || defined(__KNC__) || defined(__KNF__)
															
 
																 #define STARPU_RMB() __asm__ __volatile__("lock; addl $0,0(%%esp)" ::: "memory")
															
 
																 #define STARPU_WMB() __asm__ __volatile__("lock; addl $0,0(%%esp)" ::: "memory")
															
 
																 #elif defined(__x86_64__)
															
--- a/include/starpu_worker.h
+++ b/include/starpu_worker.h
@@ -36,7 +36,17 @@ enum starpu_worker_archtype
 
																 	STARPU_ANY_WORKER,    /* any worker, used in the hypervisor */
															
 
																 	STARPU_CPU_WORKER,    /* CPU core */
															
 
																 	STARPU_CUDA_WORKER,   /* NVIDIA CUDA device */
															
 
																-	STARPU_OPENCL_WORKER  /* OpenCL device */
															
 
																+	STARPU_OPENCL_WORKER, /* OpenCL device */
															
 
																+	STARPU_MIC_WORKER,    /* Intel MIC device */
															
 
																+	STARPU_SCC_WORKER     /* Intel SCC device */
															
 
																+};
															
 
																+
															
 
																+/* Represent the topology of sink devices, contains useful informations about
															
 
																+ * their capabilities */
															
 
																+// XXX: unused.
															
 
																+struct starpu_sink_topology
															
 
																+{
															
 
																+	unsigned nb_cpus;
															
 
																 };
															
 
																 struct starpu_sched_ctx_iterator
															
@@ -61,10 +71,20 @@ struct starpu_machine_topology
 
																 	unsigned nhwcpus;
															
 
																 	unsigned nhwcudagpus;
															
 
																 	unsigned nhwopenclgpus;
															
 
																+	unsigned nhwscc;
															
 
																 	unsigned ncpus;
															
 
																 	unsigned ncudagpus;
															
 
																 	unsigned nopenclgpus;
															
 
																+	unsigned nsccdevices;
															
 
																+
															
 
																+	/* Topology of MP nodes (mainly MIC and SCC) as well as necessary
															
 
																+	 * objects to communicate with them. */
															
 
																+	unsigned nhwmicdevices;
															
 
																+	unsigned nmicdevices;
															
 
																+
															
 
																+	unsigned nhwmiccores[STARPU_MAXMICDEVS]; // Each MIC node has its set of cores.
															
 
																+	unsigned nmiccores[STARPU_MAXMICDEVS];
															
 
																 	/* Where to bind workers ? */
															
 
																 	unsigned workers_bindid[STARPU_NMAXWORKERS];
															
@@ -74,6 +94,18 @@ struct starpu_machine_topology
 
																 	/* Which GPU(s) do we use for OpenCL ? */
															
 
																 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
															
 
																+
															
 
																+	/* Which MIC core(s) do we use ? */
															
 
																+	/* unsigned workers_mic_deviceid[STARPU_NMAXWORKERS]; */
															
 
																+
															
 
																+	/* Which SCC(s) do we use ? */
															
 
																+	unsigned workers_scc_deviceid[STARPU_NMAXWORKERS];
															
 
																+};
															
 
																+
															
 
																+/* types of structures the worker collection can implement */
															
 
																+enum starpu_worker_collection_type
															
 
																+{
															
 
																+	STARPU_WORKER_LIST
															
 
																 };
															
 
																 /* generic structure used by the scheduling contexts to iterate the workers */
															
@@ -83,8 +115,8 @@ struct starpu_worker_collection
 
																 	void *workerids;
															
 
																 	/* the number of workers in the collection */
															
 
																 	unsigned nworkers;
															
 
																-	/* the type of structure (STARPU_WORKER_LIST,...) */
															
 
																-	int type;
															
 
																+	/* the type of structure */
															
 
																+	enum starpu_worker_collection_type type;
															
 
																 	/* checks if there is another element in collection */
															
 
																 	unsigned (*has_next)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
															
 
																 	/* return the next element in the collection */
															
@@ -101,9 +133,6 @@ struct starpu_worker_collection
 
																 	void (*init_iterator)(struct starpu_worker_collection *workers, struct starpu_sched_ctx_iterator *it);
															
 
																 };
															
 
																-/* types of structures the worker collection can implement */
															
 
																-#define STARPU_WORKER_LIST 0
															
 
																-
															
 
																 /* This function returns the number of workers (ie. processing units executing
															
 
																  * StarPU tasks). The returned value should be at most STARPU_NMAXWORKERS. */
															
 
																 unsigned starpu_worker_get_count(void);
															
@@ -113,6 +142,10 @@ unsigned starpu_worker_is_combined_worker(int id);
 
																 unsigned starpu_cpu_worker_get_count(void);
															
 
																 unsigned starpu_cuda_worker_get_count(void);
															
 
																 unsigned starpu_opencl_worker_get_count(void);
															
 
																+unsigned starpu_mic_worker_get_count(void);
															
 
																+unsigned starpu_scc_worker_get_count(void);
															
 
																+
															
 
																+unsigned starpu_mic_device_get_count(void);
															
 
																 /* Return the identifier of the thread in case this is associated to a worker.
															
 
																  * This will return -1 if this function is called directly from the application
															
@@ -166,6 +199,8 @@ void starpu_worker_get_name(int id, char *dst, size_t maxlen);
 
																  */
															
 
																 int starpu_worker_get_devid(int id);
															
 
																+int starpu_worker_get_mp_nodeid(int id);
															
 
																+
															
 
																 #ifdef __cplusplus
															
 
																 }
															
 
																 #endif
															
--- a/libstarpu-mic.pc.in
+++ b/libstarpu-mic.pc.in
@@ -0,0 +1,28 @@
 
																+# StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+#
															
 
																+# Copyright (C) 2009-2011, 2013  Université de Bordeaux 1
															
 
																+# Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																+#
															
 
																+# StarPU is free software; you can redistribute it and/or modify
															
 
																+# it under the terms of the GNU Lesser General Public License as published by
															
 
																+# the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+# your option) any later version.
															
 
																+#
															
 
																+# StarPU is distributed in the hope that it will be useful, but
															
 
																+# WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+#
															
 
																+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+
															
 
																+prefix=@prefix@
															
 
																+exec_prefix=@exec_prefix@
															
 
																+libdir=@libdir@
															
 
																+includedir=@includedir@
															
 
																+
															
 
																+Name: starpu
															
 
																+Description: offers support for heterogeneous multicore architecture
															
 
																+Version: @PACKAGE_VERSION@
															
 
																+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API
															
 
																+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SC_HYPERVISOR@
															
 
																+Libs.private: @LDFLAGS@ @LIBS@
															
 
																+Requires: @HWLOC_REQUIRES@
															
--- a/libstarpu.pc.in
+++ b/libstarpu.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
																 Description: offers support for heterogeneous multicore architecture
															
 
																 Version: @PACKAGE_VERSION@
															
 
																 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API
															
 
																-Libs: -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SC_HYPERVISOR@
															
 
																+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SC_HYPERVISOR@
															
 
																 Libs.private: @LDFLAGS@ @LIBS@
															
 
																 Requires: @HWLOC_REQUIRES@
															
--- a/mic-configure
+++ b/mic-configure
@@ -0,0 +1,90 @@
 
																+#!/bin/bash
															
 
																+
															
 
																+ROOT_DIR=$PWD
															
 
																+[ -n "$MIC_HOST" ] || MIC_HOST=x86_64-k1om-linux
															
 
																+[ -n "$MIC_CC_PATH" ] || MIC_CC_PATH=/usr/linux-k1om-4.7/bin/
															
 
																+[ -n "$COI_DIR" ] || COI_DIR=/opt/intel/mic/coi
															
 
																+DEFAULT_PREFIX=/usr/local
															
 
																+
															
 
																+export PATH=${MIC_CC_PATH}${PATH:+:${PATH}}
															
 
																+
															
 
																+cat > ./mic-config.log << EOF
															
 
																+This file was created by StarPU mic-configure
															
 
																+
															
 
																+ $ $0 $*
															
 
																+EOF
															
 
																+
															
 
																+for arch in mic host
															
 
																+do
															
 
																+
															
 
																+	# We call the configure script from a build directory further in the
															
 
																+	# arborescence
															
 
																+	command="${ROOT_DIR}/configure --enable-mic --with-coi-dir=$COI_DIR"
															
 
																+	prefix_found=no
															
 
																+
															
 
																+	if test x$arch = xmic ; then
															
 
																+		command="$command --without-hwloc --with-coi-lib-dir=$COI_DIR/device-linux-release/lib --host=$MIC_HOST"
															
 
																+	else
															
 
																+		command="$command --with-coi-lib-dir=$COI_DIR/host-linux-release/lib"
															
 
																+	fi
															
 
																+
															
 
																+	for arg in $*
															
 
																+	do
															
 
																+		if [ ${arg:0:9} = '--prefix=' ]
															
 
																+		then
															
 
																+			prefix_found=yes
															
 
																+			prefix="${arg:9}"
															
 
																+			command="$command ${arg}/${arch}"
															
 
																+		else
															
 
																+			command="$command $arg"
															
 
																+		fi
															
 
																+
															
 
																+	done
															
 
																+
															
 
																+	# If the user didn't specify a directory where to install the library
															
 
																+	# we apply the default one
															
 
																+	if test x$prefix_found = xno ; then
															
 
																+		command="$command --prefix=${DEFAULT_PREFIX}/$arch"
															
 
																+		prefix=${DEFAULT_PREFIX}
															
 
																+	fi
															
 
																+
															
 
																+	# If the build directory doesn't exist yet, create it
															
 
																+	if [ ! -d "${ROOT_DIR}/build_${arch}" ] ; then
															
 
																+		mkdir "build_${arch}"
															
 
																+	fi
															
 
																+
															
 
																+	cd "build_${arch}"
															
 
																+
															
 
																+	if test x$arch = xmic ; then
															
 
																+		LDFLAGS=-export-dynamic $command
															
 
																+	else
															
 
																+		$command
															
 
																+	fi
															
 
																+	if [ "$?" != 0 ]
															
 
																+	then
															
 
																+		exit $?
															
 
																+	fi
															
 
																+	cd "${ROOT_DIR}"
															
 
																+done
															
 
																+
															
 
																+cat > Makefile << EOF
															
 
																+all:
															
 
																+	\$(MAKE) -C build_host
															
 
																+	\$(MAKE) -C build_mic
															
 
																+
															
 
																+clean:
															
 
																+	\$(MAKE) -C build_host clean
															
 
																+	\$(MAKE) -C build_mic clean
															
 
																+
															
 
																+distclean: clean
															
 
																+	rm -f Makefile
															
 
																+
															
 
																+check:
															
 
																+	\$(MAKE) -C build_host check
															
 
																+	\$(MAKE) -C build_mic check
															
 
																+
															
 
																+install:
															
 
																+	\$(MAKE) -C build_host install
															
 
																+	\$(MAKE) -C build_mic install
															
 
																+	ln -sf "${prefix}/mic/lib/pkgconfig/starpu-1.2.pc" "${prefix}/mic/lib/pkgconfig/starpu-1.2-mic.pc"
															
 
																+EOF
															
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -49,10 +49,10 @@ endif STARPU_HAVE_WINDOWS
 
																 lib_LTLIBRARIES = libstarpu-@STARPU_EFFECTIVE_VERSION@.la
															
 
																-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include/ -DBUILDING_STARPU
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include/ $(STARPU_RCCE_CPPFLAGS) -DBUILDING_STARPU
															
 
																-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS)
															
 
																-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = -lm $(HWLOC_LIBS) $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LIBS) $(STARPU_GLPK_LDFLAGS)
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_RCCE_CFLAGS) $(FXT_CFLAGS)
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = -lm $(HWLOC_LIBS) $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_RCCE_LDFLAGS) $(FXT_LIBS) $(STARPU_GLPK_LDFLAGS)
															
 
																 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) $(FXT_LDFLAGS) -no-undefined									\
															
 
																   -version-info $(libstarpu_so_version)
															
@@ -105,11 +105,20 @@ noinst_HEADERS = 						\
 
																 	common/uthash.h						\
															
 
																 	common/barrier_counter.h				\
															
 
																 	drivers/driver_common/driver_common.h			\
															
 
																+	drivers/mp_common/mp_common.h				\
															
 
																+	drivers/mp_common/source_common.h			\
															
 
																+	drivers/mp_common/sink_common.h				\
															
 
																 	drivers/cpu/driver_cpu.h				\
															
 
																 	drivers/cuda/driver_cuda.h				\
															
 
																 	drivers/opencl/driver_opencl.h				\
															
 
																 	drivers/opencl/driver_opencl_utils.h			\
															
 
																 	debug/starpu_debug_helpers.h				\
															
 
																+	drivers/mic/driver_mic_common.h				\
															
 
																+	drivers/mic/driver_mic_source.h				\
															
 
																+	drivers/mic/driver_mic_sink.h				\
															
 
																+	drivers/scc/driver_scc_common.h				\
															
 
																+	drivers/scc/driver_scc_source.h				\
															
 
																+	drivers/scc/driver_scc_sink.h				\
															
 
																 	debug/traces/starpu_fxt.h				\
															
 
																 	profiling/bound.h					\
															
 
																 	profiling/profiling.h					\
															
@@ -256,5 +265,40 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.
 
																 endif
															
 
																 endif
															
 
																+if STARPU_USE_SCC
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/scc/driver_scc_common.c
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/scc/driver_scc_source.c
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/scc/driver_scc_sink.c
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/scc/driver_scc_utils.c
															
 
																+endif
															
 
																+
															
 
																+
															
 
																+#########################################
															
 
																+#										#
															
 
																+#        Generic MP compilation			#
															
 
																+#										#
															
 
																+#########################################
															
 
																+
															
 
																+if STARPU_USE_MP
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mp_common/mp_common.c
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mp_common/source_common.c
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mp_common/sink_common.c
															
 
																+endif
															
 
																+
															
 
																+#########################################
															
 
																+#										#
															
 
																+#	     MIC compilation				#
															
 
																+#										#
															
 
																+#########################################
															
 
																+
															
 
																+if STARPU_USE_MIC
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_common.c
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_source.c
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_sink.c
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_utils.c
															
 
																+endif
															
 
																+
															
 
																+#########################################
															
 
																+
															
 
																 showcheck:
															
 
																 	-cat /dev/null
															
--- a/src/common/fxt.h
+++ b/src/common/fxt.h
@@ -37,6 +37,8 @@
 
																 #define _STARPU_FUT_CPU_KEY	0x101
															
 
																 #define _STARPU_FUT_CUDA_KEY	0x102
															
 
																 #define _STARPU_FUT_OPENCL_KEY	0x103
															
 
																+#define _STARPU_FUT_MIC_KEY	0x104
															
 
																+#define _STARPU_FUT_SCC_KEY	0x105
															
 
																 #define _STARPU_FUT_WORKER_INIT_START	0x5100
															
 
																 #define _STARPU_FUT_WORKER_INIT_END	0x5101
															
--- a/src/common/utils.c
+++ b/src/common/utils.c
@@ -130,8 +130,14 @@ char *_starpu_get_home_path(void)
 
																 		path = getenv("HOME");
															
 
																 	if (!path)
															
 
																 		path = getenv("USERPROFILE");
															
 
																-	if (!path)
															
 
																-		_STARPU_ERROR("couldn't find a home place to put starpu data\n");
															
 
																+	if (!path) {
															
 
																+		static int warn;
															
 
																+		if (!warn) {
															
 
																+			warn = 1;
															
 
																+			_STARPU_DISP("couldn't find a home place to put starpu data, using /tmp\n");
															
 
																+		}
															
 
																+		path = "/tmp";
															
 
																+	}
															
 
																 	return path;
															
 
																 }
															
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -74,23 +74,29 @@
 
																 	DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_SPIN_INIT_OR_UNLOCK_POST, \
															
 
																 			struct _starpu_spinlock *, lock)
															
 
																+#if defined(__KNC__) || defined(__KNF__)
															
 
																+#define STARPU_DEBUG_PREFIX "[starpu-mic]"
															
 
																+#else
															
 
																+#define STARPU_DEBUG_PREFIX "[starpu]"
															
 
																+#endif
															
 
																+
															
 
																 #ifdef STARPU_VERBOSE
															
 
																-#  define _STARPU_DEBUG(fmt, args ...) do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, "[starpu][%s] " fmt ,__starpu_func__ ,##args); fflush(stderr); }} while(0)
															
 
																+#  define _STARPU_DEBUG(fmt, args ...) do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,##args); fflush(stderr); }} while(0)
															
 
																 #else
															
 
																 #  define _STARPU_DEBUG(fmt, args ...) do { } while (0)
															
 
																 #endif
															
 
																 #ifdef STARPU_VERBOSE0
															
 
																-#  define _STARPU_LOG_IN()             do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, "[starpu][%ld][%s] -->\n", pthread_self(), __starpu_func__ ); }} while(0)
															
 
																-#  define _STARPU_LOG_OUT()            do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, "[starpu][%ld][%s] <--\n", pthread_self(), __starpu_func__ ); }} while(0)
															
 
																-#  define _STARPU_LOG_OUT_TAG(outtag)  do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, "[starpu][%ld][%s] <-- (%s)\n", pthread_self(), __starpu_func__, outtag); }} while(0)
															
 
																+#  define _STARPU_LOG_IN()             do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s] -->\n", pthread_self(), __starpu_func__ ); }} while(0)
															
 
																+#  define _STARPU_LOG_OUT()            do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s] <--\n", pthread_self(), __starpu_func__ ); }} while(0)
															
 
																+#  define _STARPU_LOG_OUT_TAG(outtag)  do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%ld][%s] <-- (%s)\n", pthread_self(), __starpu_func__, outtag); }} while(0)
															
 
																 #else
															
 
																 #  define _STARPU_LOG_IN()
															
 
																 #  define _STARPU_LOG_OUT()
															
 
																 #  define _STARPU_LOG_OUT_TAG(outtag)
															
 
																 #endif
															
 
																-#define _STARPU_DISP(fmt, args ...) do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, "[starpu][%s] " fmt ,__starpu_func__ ,##args); }} while(0)
															
 
																+#define _STARPU_DISP(fmt, args ...) do { if (!getenv("STARPU_SILENT")) {fprintf(stderr, STARPU_DEBUG_PREFIX"[%s] " fmt ,__starpu_func__ ,##args); }} while(0)
															
 
																 #define _STARPU_ERROR(fmt, args ...)                                                  \
															
 
																 	do {                                                                          \
															
 
																                 fprintf(stderr, "\n\n[starpu][%s] Error: " fmt ,__starpu_func__ ,##args);    \
															
--- a/src/core/dependencies/data_concurrency.c
+++ b/src/core/dependencies/data_concurrency.c
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2010-2013  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -55,6 +55,7 @@ static struct _starpu_data_requester *may_unlock_data_req_list_head(starpu_data_
 
																 	if (handle->refcnt == 0)
															
 
																 		return _starpu_data_requester_list_pop_front(req_list);
															
 
																+	/* Already writing to it, do not let another write access through */
															
 
																 	if (handle->current_mode == STARPU_W)
															
 
																 		return NULL;
															
@@ -193,7 +194,7 @@ static unsigned attempt_to_submit_data_request_from_job(struct _starpu_job *j, u
 
																 	/* Note that we do not access j->task->handles, but j->ordered_buffers
															
 
																 	 * which is a sorted copy of it. */
															
 
																 	starpu_data_handle_t handle = _STARPU_JOB_GET_ORDERED_BUFFER_HANDLE(j, buffer_index);
															
 
																-	enum starpu_data_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer_index);
															
 
																+	enum starpu_data_access_mode mode = _STARPU_JOB_GET_ORDERED_BUFFER_MODE(j, buffer_index) & ~STARPU_COMMUTE;
															
 
																 	return _starpu_attempt_to_submit_data_request(1, handle, mode, NULL, NULL, j, buffer_index);
															
 
																 }
															
--- a/src/core/dependencies/implicit_data_deps.c
+++ b/src/core/dependencies/implicit_data_deps.c
@@ -46,23 +46,22 @@ static void _starpu_add_dependency(starpu_data_handle_t handle STARPU_ATTRIBUTE_
 
																 	_starpu_add_ghost_dependency(handle, _starpu_get_job_associated_to_task(previous)->job_id, next);
															
 
																 }
															
 
																-/* Read after Write (RAW) or Read after Read (RAR) */
															
 
																-static void _starpu_add_reader_after_writer(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task)
															
 
																+/* Add pre_sync_task as new accessor among the existing ones, making it depend on the last synchronization task if any.  */
															
 
																+static void _starpu_add_accessor(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task)
															
 
																 {
															
 
																 	/* Add this task to the list of readers */
															
 
																 	struct _starpu_task_wrapper_list *link = (struct _starpu_task_wrapper_list *) malloc(sizeof(struct _starpu_task_wrapper_list));
															
 
																 	link->task = post_sync_task;
															
 
																-	link->next = handle->last_submitted_readers;
															
 
																-	handle->last_submitted_readers = link;
															
 
																+	link->next = handle->last_submitted_accessors;
															
 
																+	handle->last_submitted_accessors = link;
															
 
																-	/* This task depends on the previous writer if any */
															
 
																-	if (handle->last_submitted_writer && handle->last_submitted_writer != post_sync_task)
															
 
																+	/* This task depends on the previous synchronization task if any */
															
 
																+	if (handle->last_sync_task && handle->last_sync_task != post_sync_task)
															
 
																 	{
															
 
																-		_STARPU_DEP_DEBUG("RAW %p\n", handle);
															
 
																-		struct starpu_task *task_array[1] = {handle->last_submitted_writer};
															
 
																+		struct starpu_task *task_array[1] = {handle->last_sync_task};
															
 
																 		_starpu_task_declare_deps_array(pre_sync_task, 1, task_array, 0);
															
 
																-		_starpu_add_dependency(handle, handle->last_submitted_writer, pre_sync_task);
															
 
																-		_STARPU_DEP_DEBUG("dep %p -> %p\n", handle->last_submitted_writer, pre_sync_task);
															
 
																+		_starpu_add_dependency(handle, handle->last_sync_task, pre_sync_task);
															
 
																+		_STARPU_DEP_DEBUG("dep %p -> %p\n", handle->last_sync_task, pre_sync_task);
															
 
																 	}
															
 
																         else
															
 
																         {
															
@@ -82,12 +81,12 @@ static void _starpu_add_reader_after_writer(starpu_data_handle_t handle, struct
 
																 #ifdef HAVE_AYUDAME_H
															
 
																 		|| AYU_event
															
 
																 #endif
															
 
																-		) && handle->last_submitted_ghost_writer_id_is_valid)
															
 
																+		) && handle->last_submitted_ghost_sync_id_is_valid)
															
 
																 	{
															
 
																-		_STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_writer_id,
															
 
																+		_STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_sync_id,
															
 
																 			_starpu_get_job_associated_to_task(pre_sync_task)->job_id);
															
 
																-		_starpu_add_ghost_dependency(handle, handle->last_submitted_ghost_writer_id, pre_sync_task);
															
 
																-		_STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_writer_id, pre_sync_task);
															
 
																+		_starpu_add_ghost_dependency(handle, handle->last_submitted_ghost_sync_id, pre_sync_task);
															
 
																+		_STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_sync_id, pre_sync_task);
															
 
																 	}
															
 
																 	if (!pre_sync_task->cl) {
															
@@ -99,27 +98,27 @@ static void _starpu_add_reader_after_writer(starpu_data_handle_t handle, struct
 
																 	}
															
 
																 }
															
 
																-/* Write after Read (WAR) */
															
 
																-static void _starpu_add_writer_after_readers(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task)
															
 
																+/* This adds a new synchronization task which depends on all the previous accessors */
															
 
																+static void _starpu_add_sync_task(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task)
															
 
																 {
															
 
																-	/* Count the readers */
															
 
																-	unsigned nreaders = 0;
															
 
																+	/* Count the existing accessors */
															
 
																+	unsigned naccessors = 0;
															
 
																 	struct _starpu_task_wrapper_list *l;
															
 
																-	l = handle->last_submitted_readers;
															
 
																+	l = handle->last_submitted_accessors;
															
 
																 	while (l)
															
 
																 	{
															
 
																 		if (l->task != post_sync_task)
															
 
																-			nreaders++;
															
 
																+			naccessors++;
															
 
																 		l = l->next;
															
 
																 	}
															
 
																-	_STARPU_DEP_DEBUG("%d readers\n", nreaders);
															
 
																+	_STARPU_DEP_DEBUG("%d accessors\n", naccessors);
															
 
																-	if (nreaders > 0)
															
 
																+	if (naccessors > 0)
															
 
																 	{
															
 
																 		/* Put all tasks in the list into task_array */
															
 
																-		struct starpu_task *task_array[nreaders];
															
 
																+		struct starpu_task *task_array[naccessors];
															
 
																 		unsigned i = 0;
															
 
																-		l = handle->last_submitted_readers;
															
 
																+		l = handle->last_submitted_accessors;
															
 
																 		while (l)
															
 
																 		{
															
 
																 			STARPU_ASSERT(l->task);
															
@@ -134,80 +133,31 @@ static void _starpu_add_writer_after_readers(starpu_data_handle_t handle, struct
 
																 			l = l->next;
															
 
																 			free(prev);
															
 
																 		}
															
 
																-		_starpu_task_declare_deps_array(pre_sync_task, nreaders, task_array, 0);
															
 
																+		_starpu_task_declare_deps_array(pre_sync_task, naccessors, task_array, 0);
															
 
																 	}
															
 
																 #ifndef STARPU_USE_FXT
															
 
																 	if (_starpu_bound_recording)
															
 
																 #endif
															
 
																 	{
															
 
																-		/* Declare all dependencies with ghost readers */
															
 
																-		struct _starpu_jobid_list *ghost_readers_id = handle->last_submitted_ghost_readers_id;
															
 
																-		while (ghost_readers_id)
															
 
																+		/* Declare all dependencies with ghost accessors */
															
 
																+		struct _starpu_jobid_list *ghost_accessors_id = handle->last_submitted_ghost_accessors_id;
															
 
																+		while (ghost_accessors_id)
															
 
																 		{
															
 
																-			unsigned long id = ghost_readers_id->id;
															
 
																+			unsigned long id = ghost_accessors_id->id;
															
 
																 			_STARPU_TRACE_GHOST_TASK_DEPS(id,
															
 
																 				_starpu_get_job_associated_to_task(pre_sync_task)->job_id);
															
 
																 			_starpu_add_ghost_dependency(handle, id, pre_sync_task);
															
 
																 			_STARPU_DEP_DEBUG("dep ID%lu -> %p\n", id, pre_sync_task);
															
 
																-			struct _starpu_jobid_list *prev = ghost_readers_id;
															
 
																-			ghost_readers_id = ghost_readers_id->next;
															
 
																+			struct _starpu_jobid_list *prev = ghost_accessors_id;
															
 
																+			ghost_accessors_id = ghost_accessors_id->next;
															
 
																 			free(prev);
															
 
																 		}
															
 
																-		handle->last_submitted_ghost_readers_id = NULL;
															
 
																+		handle->last_submitted_ghost_accessors_id = NULL;
															
 
																 	}
															
 
																-	handle->last_submitted_readers = NULL;
															
 
																-	handle->last_submitted_writer = post_sync_task;
															
 
																-
															
 
																-	if (!post_sync_task->cl) {
															
 
																-		/* Add a reference to be released in _starpu_handle_job_termination */
															
 
																-		_starpu_spin_lock(&handle->header_lock);
															
 
																-		handle->busy_count++;
															
 
																-		_starpu_spin_unlock(&handle->header_lock);
															
 
																-		_starpu_get_job_associated_to_task(post_sync_task)->implicit_dep_handle = handle;
															
 
																-	}
															
 
																-}
															
 
																-
															
 
																-/* Write after Write (WAW) */
															
 
																-static void _starpu_add_writer_after_writer(starpu_data_handle_t handle, struct starpu_task *pre_sync_task, struct starpu_task *post_sync_task)
															
 
																-{
															
 
																-	/* (Read) Write */
															
 
																-	/* This task depends on the previous writer */
															
 
																-	if (handle->last_submitted_writer && handle->last_submitted_writer != post_sync_task)
															
 
																-	{
															
 
																-		struct starpu_task *task_array[1] = {handle->last_submitted_writer};
															
 
																-		_starpu_task_declare_deps_array(pre_sync_task, 1, task_array, 0);
															
 
																-		_starpu_add_dependency(handle, handle->last_submitted_writer, pre_sync_task);
															
 
																-		_STARPU_DEP_DEBUG("dep %p -> %p\n", handle->last_submitted_writer, pre_sync_task);
															
 
																-	}
															
 
																-        else
															
 
																-        {
															
 
																-		_STARPU_DEP_DEBUG("No dep\n");
															
 
																-        }
															
 
																-
															
 
																-	/* If there is a ghost writer instead, we
															
 
																-	 * should declare a ghost dependency here, and
															
 
																-	 * invalidate the ghost value. */
															
 
																-#ifndef STARPU_USE_FXT
															
 
																-	if (_starpu_bound_recording)
															
 
																-#endif
															
 
																-	{
															
 
																-		if (handle->last_submitted_ghost_writer_id_is_valid)
															
 
																-		{
															
 
																-			_STARPU_TRACE_GHOST_TASK_DEPS(handle->last_submitted_ghost_writer_id, 
															
 
																-				_starpu_get_job_associated_to_task(pre_sync_task)->job_id);
															
 
																-			_starpu_add_ghost_dependency(handle, handle->last_submitted_ghost_writer_id, pre_sync_task);
															
 
																-			_STARPU_DEP_DEBUG("dep ID%lu -> %p\n", handle->last_submitted_ghost_writer_id, pre_sync_task);
															
 
																-			handle->last_submitted_ghost_writer_id_is_valid = 0;
															
 
																-		}
															
 
																-                else
															
 
																-                {
															
 
																-			_STARPU_DEP_DEBUG("No dep ID\n");
															
 
																-                }
															
 
																-	}
															
 
																-
															
 
																-	handle->last_submitted_writer = post_sync_task;
															
 
																+	handle->last_submitted_accessors = NULL;
															
 
																+	handle->last_sync_task = post_sync_task;
															
 
																 	if (!post_sync_task->cl) {
															
 
																 		/* Add a reference to be released in _starpu_handle_job_termination */
															
@@ -245,7 +195,6 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 
																 		if (pre_sync_job->reduction_task || post_sync_job->reduction_task)
															
 
																 			return NULL;
															
 
																-		_STARPU_DEP_DEBUG("Tasks %p %p\n", pre_sync_task, post_sync_task);
															
 
																 		/* In case we are generating the DAG, we add an implicit
															
 
																 		 * dependency between the pre and the post sync tasks in case
															
 
																 		 * they are not the same. */
															
@@ -261,54 +210,75 @@ struct starpu_task *_starpu_detect_implicit_data_deps_with_handle(struct starpu_
 
																 		enum starpu_data_access_mode previous_mode = handle->last_submitted_mode;
															
 
																-		if (mode & STARPU_W)
															
 
																+		_STARPU_DEP_DEBUG("Handle %p Tasks %p %p %x->%x\n", handle, pre_sync_task, post_sync_task, previous_mode, mode);
															
 
																+
															
 
																+		/*
															
 
																+		 * Tasks can access the data concurrently only if they have the
															
 
																+		 * same access mode, which can only be either:
															
 
																+		 * - write with STARPU_COMMUTE
															
 
																+		 * - read
															
 
																+		 * - redux
															
 
																+		 *
															
 
																+		 * In other cases, the tasks have to depend on each other.
															
 
																+		 */
															
 
																+
															
 
																+		if ((mode & STARPU_W && mode & STARPU_COMMUTE && previous_mode & STARPU_W && previous_mode && STARPU_COMMUTE)
															
 
																+		  || (mode == STARPU_R && previous_mode == STARPU_R)
															
 
																+		  || (mode == STARPU_REDUX && previous_mode == STARPU_REDUX))
															
 
																 		{
															
 
																-			_STARPU_DEP_DEBUG("W %p\n", handle);
															
 
																-			if (previous_mode & STARPU_W)
															
 
																-			{
															
 
																-				_STARPU_DEP_DEBUG("WAW %p\n", handle);
															
 
																-				_starpu_add_writer_after_writer(handle, pre_sync_task, post_sync_task);
															
 
																-			}
															
 
																-			else
															
 
																-			{
															
 
																-				/* The task submitted previously were in read-only
															
 
																-				 * mode: this task must depend on all those read-only
															
 
																-				 * tasks and we get rid of the list of readers */
															
 
																-				_STARPU_DEP_DEBUG("WAR %p\n", handle);
															
 
																-				_starpu_add_writer_after_readers(handle, pre_sync_task, post_sync_task);
															
 
																-			}
															
 
																+			_STARPU_DEP_DEBUG("concurrently\n");
															
 
																+			/* Can access concurrently with current tasks */
															
 
																+			_starpu_add_accessor(handle, pre_sync_task, post_sync_task);
															
 
																 		}
															
 
																 		else
															
 
																 		{
															
 
																-			_STARPU_DEP_DEBUG("R %p %d -> %d\n", handle, previous_mode, mode);
															
 
																-			/* Add a reader, after a writer or a reader. */
															
 
																-			STARPU_ASSERT(pre_sync_task);
															
 
																-			STARPU_ASSERT(post_sync_task);
															
 
																+			/* Can not access concurrently, have to wait for existing accessors */
															
 
																+			struct _starpu_task_wrapper_list *l = handle->last_submitted_accessors;
															
 
																+			_STARPU_DEP_DEBUG("dependency\n");
															
 
																-			STARPU_ASSERT(mode & (STARPU_R|STARPU_REDUX));
															
 
																-
															
 
																-			if (!(previous_mode & STARPU_W) && (mode != previous_mode))
															
 
																+			if (l && l->next)
															
 
																 			{
															
 
																-				/* Read after Redux or Redux after Read: we
															
 
																-				 * insert a dummy synchronization task so that
															
 
																-				 * we don't need to have a gigantic number of
															
 
																-				 * dependencies between all readers and all
															
 
																-				 * redux tasks. */
															
 
																-
															
 
																-				/* Create an empty task */
															
 
																-				struct starpu_task *new_sync_task;
															
 
																-				new_sync_task = starpu_task_create();
															
 
																-				STARPU_ASSERT(new_sync_task);
															
 
																-				new_sync_task->cl = NULL;
															
 
																+				/* Several previous accessors */
															
 
																+
															
 
																+				if (mode == STARPU_W)
															
 
																+				{
															
 
																+					/* Optimization: this task can not
															
 
																+					 * combine with others anyway, use it
															
 
																+					 * as synchronization task by making it
															
 
																+					 * wait for the previous ones. */
															
 
																+					_starpu_add_sync_task(handle, pre_sync_task, post_sync_task);
															
 
																+				} else {
															
 
																+					_STARPU_DEP_DEBUG("several predecessors, adding sync task\n");
															
 
																+					/* insert an empty synchronization task
															
 
																+					 * which waits for the whole set,
															
 
																+					 * instead of creating a quadratic
															
 
																+					 * number of dependencies. */
															
 
																+					struct starpu_task *sync_task = starpu_task_create();
															
 
																+					STARPU_ASSERT(sync_task);
															
 
																+					sync_task->cl = NULL;
															
 
																 #ifdef STARPU_USE_FXT
															
 
																-				_starpu_get_job_associated_to_task(new_sync_task)->model_name = "sync_task_redux";
															
 
																+					_starpu_get_job_associated_to_task(sync_task)->model_name = "sync_task_redux";
															
 
																 #endif
															
 
																+					/* Make this task wait for the previous ones */
															
 
																+					_starpu_add_sync_task(handle, sync_task, sync_task);
															
 
																+					/* And the requested task wait for this one */
															
 
																+					_starpu_add_accessor(handle, pre_sync_task, post_sync_task);
															
 
																-				_starpu_add_writer_after_readers(handle, new_sync_task, new_sync_task);
															
 
																-
															
 
																-				task = new_sync_task;
															
 
																+					task = sync_task;
															
 
																+				}
															
 
																+			}
															
 
																+			else
															
 
																+			{
															
 
																+				if (l)
															
 
																+				{
															
 
																+					/* One previous accessor, make it the sync
															
 
																+					 * task, and start depending on it. */
															
 
																+					handle->last_sync_task = l->task;
															
 
																+					handle->last_submitted_accessors = NULL;
															
 
																+					free(l);
															
 
																+				}
															
 
																+				_starpu_add_accessor(handle, pre_sync_task, post_sync_task);
															
 
																 			}
															
 
																-			_starpu_add_reader_after_writer(handle, pre_sync_task, post_sync_task);
															
 
																 		}
															
 
																 		handle->last_submitted_mode = mode;
															
 
																 	}
															
@@ -374,18 +344,18 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
																 		/* If this is the last writer, there is no point in adding
															
 
																 		 * extra deps to that tasks that does not exists anymore */
															
 
																-		if (task == handle->last_submitted_writer)
															
 
																+		if (task == handle->last_sync_task)
															
 
																 		{
															
 
																-			handle->last_submitted_writer = NULL;
															
 
																+			handle->last_sync_task = NULL;
															
 
																 #ifndef STARPU_USE_FXT
															
 
																 			if (_starpu_bound_recording)
															
 
																 #endif
															
 
																 			{
															
 
																 				/* Save the previous writer as the ghost last writer */
															
 
																-				handle->last_submitted_ghost_writer_id_is_valid = 1;
															
 
																+				handle->last_submitted_ghost_sync_id_is_valid = 1;
															
 
																 				struct _starpu_job *ghost_job = _starpu_get_job_associated_to_task(task);
															
 
																-				handle->last_submitted_ghost_writer_id = ghost_job->job_id;
															
 
																+				handle->last_submitted_ghost_sync_id = ghost_job->job_id;
															
 
																 			}
															
 
																 		}
															
@@ -397,7 +367,7 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
																 		/* Same if this is one of the readers: we go through the list
															
 
																 		 * of readers and remove the task if it is found. */
															
 
																 		struct _starpu_task_wrapper_list *l;
															
 
																-		l = handle->last_submitted_readers;
															
 
																+		l = handle->last_submitted_accessors;
															
 
																 		struct _starpu_task_wrapper_list *prev = NULL;
															
 
																 #ifdef STARPU_DEVEL
															
 
																 #warning TODO: use double-linked list to make finding ourself fast
															
@@ -419,9 +389,9 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
																 					struct _starpu_job *ghost_reader_job = _starpu_get_job_associated_to_task(task);
															
 
																 					struct _starpu_jobid_list *link = (struct _starpu_jobid_list *) malloc(sizeof(struct _starpu_jobid_list));
															
 
																 					STARPU_ASSERT(link);
															
 
																-					link->next = handle->last_submitted_ghost_readers_id;
															
 
																+					link->next = handle->last_submitted_ghost_accessors_id;
															
 
																 					link->id = ghost_reader_job->job_id;
															
 
																-					handle->last_submitted_ghost_readers_id = link;
															
 
																+					handle->last_submitted_ghost_accessors_id = link;
															
 
																 				}
															
 
																 				if (prev)
															
@@ -431,7 +401,7 @@ void _starpu_release_data_enforce_sequential_consistency(struct starpu_task *tas
 
																 				else
															
 
																 				{
															
 
																 					/* This is the first element of the list */
															
 
																-					handle->last_submitted_readers = next;
															
 
																+					handle->last_submitted_accessors = next;
															
 
																 				}
															
 
																 				/* XXX can we really find the same task again
															
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -51,6 +51,8 @@ typedef void (*_starpu_cl_func_t)(void **, void *);
 
																 #define _STARPU_CPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_CPU)
															
 
																 #define _STARPU_CUDA_MAY_PERFORM(j)      ((j)->task->cl->where & STARPU_CUDA)
															
 
																 #define _STARPU_OPENCL_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_OPENCL)
															
 
																+#define _STARPU_MIC_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_MIC)
															
 
																+#define _STARPU_SCC_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_SCC)
															
 
																 /* A job is the internal representation of a task. */
															
 
																 LIST_TYPE(_starpu_job,
															
@@ -116,6 +118,10 @@ LIST_TYPE(_starpu_job,
 
																 	 * so we need a flag to differentiate them from "normal" tasks. */
															
 
																 	unsigned reduction_task;
															
 
																+	/* Used by MIC driver to record codelet start time instead of using a
															
 
																+	 * local variable */
															
 
																+	struct timespec cl_start;
															
 
																+
															
 
																 #ifdef STARPU_USE_FXT
															
 
																 	/* A symbol name may be associated to the job directly for debug
															
 
																 	 * purposes (for instance if the codelet is NULL). */
															
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -67,6 +67,7 @@ static unsigned was_benchmarked = 0;
 
																 static unsigned ncpus = 0;
															
 
																 static unsigned ncuda = 0;
															
 
																 static unsigned nopencl = 0;
															
 
																+static unsigned nmic = 0;
															
 
																 /* Benchmarking the performance of the bus */
															
@@ -91,6 +92,11 @@ static double opencldev_latency_dtoh[STARPU_MAXNODES] = {0.0};
 
																 static struct dev_timing opencldev_timing_per_cpu[STARPU_MAXNODES*STARPU_MAXCPUS];
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static double mic_time_host_to_device[STARPU_MAXNODES] = {0.0};
															
 
																+static double mic_time_device_to_host[STARPU_MAXNODES] = {0.0};
															
 
																+#endif /* STARPU_USE_MIC */
															
 
																+
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																 static hwloc_topology_t hwtopology;
															
 
																 #endif
															
@@ -632,7 +638,7 @@ static void benchmark_all_gpu_devices(void)
 
																 	_STARPU_DISP("can not measure bus in simgrid mode, please run starpu_calibrate_bus in non-simgrid mode to make sure the bus performance model was calibrated\n");
															
 
																 	STARPU_ABORT();
															
 
																 #else /* !SIMGRID */
															
 
																-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
															
 
																+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC)
															
 
																 	unsigned i;
															
 
																 #endif
															
 
																 #ifdef HAVE_CUDA_MEMCPY_PEER
															
@@ -695,6 +701,19 @@ static void benchmark_all_gpu_devices(void)
 
																 	}
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	/* TODO: implement real calibration ! For now we only put an arbitrary
															
 
																+	 * value for each device during at the declaration as a bug fix, else
															
 
																+	 * we get problems on heft scheduler */
															
 
																+        nmic = _starpu_mic_src_get_device_count();
															
 
																+
															
 
																+	for (i = 0; i < STARPU_MAXNODES; i++)
															
 
																+	{
															
 
																+		mic_time_host_to_device[i] = 0.1;
															
 
																+		mic_time_device_to_host[i] = 0.1;
															
 
																+	}
															
 
																+#endif /* STARPU_USE_MIC */
															
 
																+
															
 
																 #ifdef STARPU_HAVE_HWLOC
															
 
																 	hwloc_set_cpubind(hwtopology, former_cpuset, HWLOC_CPUBIND_THREAD);
															
 
																 #elif __linux__
															
@@ -1082,6 +1101,9 @@ static void write_bus_latency_file_content(void)
 
																 #ifdef STARPU_USE_OPENCL
															
 
																         maxnode += nopencl;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+        maxnode += nmic;
															
 
																+#endif
															
 
																         for (src = 0; src < STARPU_MAXNODES; src++)
															
 
																 	{
															
 
																 		for (dst = 0; dst < STARPU_MAXNODES; dst++)
															
@@ -1290,6 +1312,9 @@ static void write_bus_bandwidth_file_content(void)
 
																 #ifdef STARPU_USE_OPENCL
															
 
																         maxnode += nopencl;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+        maxnode += nmic;
															
 
																+#endif
															
 
																 	for (src = 0; src < STARPU_MAXNODES; src++)
															
 
																 	{
															
 
																 		for (dst = 0; dst < STARPU_MAXNODES; dst++)
															
@@ -1300,7 +1325,7 @@ static void write_bus_bandwidth_file_content(void)
 
																 			{
															
 
																 				bandwidth = NAN;
															
 
																 			}
															
 
																-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
															
 
																+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC)
															
 
																 			else if (src != dst)
															
 
																 			{
															
 
																 				double slowness = 0.0;
															
@@ -1319,12 +1344,19 @@ static void write_bus_bandwidth_file_content(void)
 
																 						slowness += cudadev_timing_htod[dst];
															
 
																 				}
															
 
																 #endif
															
 
																+				/* TODO: generalize computation */
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																-				if (src > ncuda)
															
 
																+				if (src > ncuda && src <= ncuda + nopencl)
															
 
																 					slowness += opencldev_timing_dtoh[src-ncuda];
															
 
																-				if (dst > ncuda)
															
 
																+				if (dst > ncuda && dst <= ncuda + nopencl)
															
 
																 					slowness += opencldev_timing_htod[dst-ncuda];
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+				if (src > ncuda + nopencl)
															
 
																+					slowness += mic_time_device_to_host[src - (ncuda + nopencl)];
															
 
																+				if (dst > ncuda + nopencl)
															
 
																+					slowness += mic_time_host_to_device[dst - (ncuda + nopencl)];
															
 
																+#endif
															
 
																 				bandwidth = 1.0/slowness;
															
 
																 			}
															
 
																 #endif
															
@@ -1364,6 +1396,9 @@ void starpu_bus_print_bandwidth(FILE *f)
 
																 #ifdef STARPU_USE_OPENCL
															
 
																         maxnode += nopencl;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+        maxnode += nmic;
															
 
																+#endif
															
 
																 	fprintf(f, "from/to\t");
															
 
																 	fprintf(f, "RAM\t");
															
@@ -1501,7 +1536,7 @@ static void check_bus_config_file(void)
 
																 	{
															
 
																                 FILE *f;
															
 
																                 int ret;
															
 
																-		unsigned read_cuda = -1, read_opencl = -1;
															
 
																+		unsigned read_cuda = -1, read_opencl = -1, read_mic = -1;
															
 
																                 unsigned read_cpus = -1;
															
 
																                 // Loading configuration from file
															
@@ -1517,6 +1552,10 @@ static void check_bus_config_file(void)
 
																 		ret = fscanf(f, "%d\t", &read_opencl);
															
 
																 		STARPU_ASSERT(ret == 1);
															
 
																                 _starpu_drop_comments(f);
															
 
																+		ret = fscanf(f, "%d\t", &read_mic);
															
 
																+		if (ret == 0)
															
 
																+			read_mic = 0;
															
 
																+                _starpu_drop_comments(f);
															
 
																                 fclose(f);
															
 
																                 // Loading current configuration
															
@@ -1527,6 +1566,9 @@ static void check_bus_config_file(void)
 
																 #ifdef STARPU_USE_OPENCL
															
 
																                 nopencl = _starpu_opencl_get_device_count();
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+                nmic = _starpu_mic_src_get_device_count();
															
 
																+#endif /* STARPU_USE_MIC */
															
 
																                 // Checking if both configurations match
															
 
																                 if (read_cpus != ncpus)
															
@@ -1547,6 +1589,12 @@ static void check_bus_config_file(void)
 
																                         _starpu_bus_force_sampling();
															
 
																 			_STARPU_DISP("... done\n");
															
 
																                 }
															
 
																+                else if (read_mic != nmic)
															
 
																+		{
															
 
																+                        _STARPU_DISP("Current configuration does not match the bus performance model (MIC: (stored) %d != (current) %d), recalibrating...\n", read_mic, nmic);
															
 
																+                        _starpu_bus_force_sampling();
															
 
																+			_STARPU_DISP("... done\n");
															
 
																+                }
															
 
																         }
															
 
																 }
															
@@ -1567,6 +1615,7 @@ static void write_bus_config_file_content(void)
 
																         fprintf(f, "%u # Number of CPUs\n", ncpus);
															
 
																         fprintf(f, "%d # Number of CUDA devices\n", ncuda);
															
 
																         fprintf(f, "%d # Number of OpenCL devices\n", nopencl);
															
 
																+        fprintf(f, "%d # Number of MIC devices\n", nmic);
															
 
																         fclose(f);
															
 
																 }
															
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -366,6 +366,22 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 
																 			   archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
															
 
																 			   narchs > STARPU_MAXOPENCLDEVS ? narchs - STARPU_MAXOPENCLDEVS : 0);
															
 
																 	}
															
 
																+
															
 
																+	/* Parsing MIC devs */
															
 
																+	_starpu_drop_comments(f);
															
 
																+	ret = fscanf(f, "%u\n", &narchs);
															
 
																+	if (ret == 0)
															
 
																+		narchs = 0;
															
 
																+
															
 
																+	archmin += STARPU_MAXOPENCLDEVS;
															
 
																+	_STARPU_DEBUG("Parsing %u MIC devices\n", narchs);
															
 
																+	if (narchs > 0)
															
 
																+	{
															
 
																+		parse_arch(f, model, scan_history,
															
 
																+			   archmin,
															
 
																+			   archmin + STARPU_MIN(narchs, STARPU_MAXMICDEVS),
															
 
																+			   narchs > STARPU_MAXMICDEVS ? narchs - STARPU_MAXMICDEVS : 0);
															
 
																+	}
															
 
																 }
															
@@ -447,6 +463,7 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
																 		{
															
 
																 			case STARPU_CUDA_DEFAULT:
															
 
																 			case STARPU_OPENCL_DEFAULT:
															
 
																+			case STARPU_MIC_DEFAULT:
															
 
																 				arch_base = arch;
															
 
																 				idx++;
															
 
																 				break;
															
@@ -479,42 +496,48 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
																 	}
															
 
																 	/* Writing stuff */
															
 
																+
															
 
																 	char *name = "unknown";
															
 
																 	unsigned substract_to_arch = 0;
															
 
																 	for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
															
 
																 	{
															
 
																+		unsigned char arch_already_visited = 0;
															
 
																+
															
 
																 		switch (arch)
															
 
																 		{
															
 
																 			case STARPU_CPU_DEFAULT:
															
 
																-				arch_base = arch;
															
 
																 				name = "CPU";
															
 
																-				fprintf(f, "##################\n");
															
 
																-				fprintf(f, "# %ss\n", name);
															
 
																-				fprintf(f, "# maximum number of %ss\n", name);
															
 
																-				fprintf(f, "%u\n", my_narch = narch[0]);
															
 
																+				my_narch = narch[0];
															
 
																 				break;
															
 
																 			case STARPU_CUDA_DEFAULT:
															
 
																-				arch_base = arch;
															
 
																 				name = "CUDA";
															
 
																 				substract_to_arch = STARPU_MAXCPUS;
															
 
																-				fprintf(f, "##################\n");
															
 
																-				fprintf(f, "# %ss\n", name);
															
 
																-				fprintf(f, "# number of %s architectures\n", name);
															
 
																-				fprintf(f, "%u\n", my_narch = narch[1]);
															
 
																+				my_narch = narch[1];
															
 
																 				break;
															
 
																 			case STARPU_OPENCL_DEFAULT:
															
 
																-				arch_base = arch;
															
 
																 				name = "OPENCL";
															
 
																-				substract_to_arch += STARPU_MAXCUDADEVS;
															
 
																-				fprintf(f, "##################\n");
															
 
																-				fprintf(f, "# %ss\n", name);
															
 
																-				fprintf(f, "# number of %s architectures\n", name);
															
 
																-				fprintf(f, "%u\n", my_narch = narch[2]);
															
 
																+				my_narch = narch[2];
															
 
																+				break;
															
 
																+			case STARPU_MIC_DEFAULT:
															
 
																+				name = "MIC";
															
 
																+				my_narch = narch[3];
															
 
																 				break;
															
 
																 			default:
															
 
																+				/* The current worker arch was already written,
															
 
																+				 * we don't need to write it again */
															
 
																+				arch_already_visited = 1;
															
 
																 				break;
															
 
																 		}
															
 
																+		if (!arch_already_visited)
															
 
																+		{
															
 
																+			arch_base = arch;
															
 
																+			fprintf(f, "##################\n");
															
 
																+			fprintf(f, "# %ss\n", name);
															
 
																+			fprintf(f, "# number of %s architectures\n", name);
															
 
																+			fprintf(f, "%u\n", my_narch);
															
 
																+		}
															
 
																+
															
 
																 		unsigned max_impl = 0;
															
 
																 		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
															
 
																 		{
															
@@ -1024,6 +1047,12 @@ void starpu_perfmodel_get_arch_name(enum starpu_perfmodel_archtype arch, char *a
 
																 		int devid = arch - STARPU_OPENCL_DEFAULT;
															
 
																 		snprintf(archname, maxlen, "opencl_%d_impl_%u", devid,nimpl);
															
 
																 	}
															
 
																+	else if ((STARPU_MIC_DEFAULT <= arch)
															
 
																+		&& (arch < STARPU_MIC_DEFAULT + STARPU_MAXMICDEVS))
															
 
																+	{
															
 
																+		int devid = arch - STARPU_MIC_DEFAULT;
															
 
																+		snprintf(archname, maxlen, "mic_%d_impl_%u", devid, nimpl);
															
 
																+	}
															
 
																 	else
															
 
																 	{
															
 
																 		STARPU_ABORT();
															
--- a/src/core/sched_ctx.c
+++ b/src/core/sched_ctx.c
@@ -842,7 +842,7 @@ void* starpu_sched_ctx_get_policy_data(unsigned sched_ctx_id)
 
																 	return sched_ctx->policy_data;
															
 
																 }
															
 
																-struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, int worker_collection_type)
															
 
																+struct starpu_worker_collection* starpu_sched_ctx_create_worker_collection(unsigned sched_ctx_id, enum starpu_worker_collection_type  worker_collection_type)
															
 
																 {
															
 
																 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
															
 
																 	sched_ctx->workers = (struct starpu_worker_collection*)malloc(sizeof(struct starpu_worker_collection));
															
@@ -881,6 +881,7 @@ static unsigned _get_workers_list(struct _starpu_sched_ctx *sched_ctx, int **wor
 
																 	}
															
 
																 	return nworkers;
															
 
																 }
															
 
																+
															
 
																 void starpu_sched_ctx_delete_worker_collection(unsigned sched_ctx_id)
															
 
																 {
															
 
																 	struct _starpu_sched_ctx *sched_ctx = _starpu_get_sched_ctx_struct(sched_ctx_id);
															
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -447,7 +447,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
																 {
															
 
																 	struct starpu_task *conversion_task;
															
 
																-#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
															
 
																+#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
															
 
																 	struct starpu_multiformat_interface *format_interface;
															
 
																 #endif
															
@@ -455,7 +455,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
																 	conversion_task->synchronous = 0;
															
 
																 	STARPU_TASK_SET_HANDLE(conversion_task, handle, 0);
															
 
																-#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
															
 
																+#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
															
 
																 	/* The node does not really matter here */
															
 
																 	format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
															
 
																 #endif
															
@@ -468,9 +468,13 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
																 	switch(node_kind)
															
 
																 	{
															
 
																 	case STARPU_CPU_RAM:
															
 
																+	case STARPU_SCC_RAM:
															
 
																+	case STARPU_SCC_SHM:
															
 
																 		switch (starpu_node_get_kind(handle->mf_node))
															
 
																 		{
															
 
																 		case STARPU_CPU_RAM:
															
 
																+		case STARPU_SCC_RAM:
															
 
																+		case STARPU_SCC_SHM:
															
 
																 			STARPU_ABORT();
															
 
																 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
															
 
																 		case STARPU_CUDA_RAM:
															
@@ -490,6 +494,15 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
																 			break;
															
 
																 		}
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+		case STARPU_MIC_RAM:
															
 
																+		{
															
 
																+			struct starpu_multiformat_data_interface_ops *mf_ops;
															
 
																+			mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
															
 
																+			conversion_task->cl = mf_ops->mic_to_cpu_cl;
															
 
																+			break;
															
 
																+		}
															
 
																+#endif
															
 
																 		default:
															
 
																 			_STARPU_ERROR("Oops : %u\n", handle->mf_node);
															
 
																 		}
															
@@ -512,6 +525,15 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
																 		break;
															
 
																 	}
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	case STARPU_MIC_RAM:
															
 
																+	{
															
 
																+		struct starpu_multiformat_data_interface_ops *mf_ops;
															
 
																+		mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
															
 
																+		conversion_task->cl = mf_ops->cpu_to_mic_cl;
															
 
																+		break;
															
 
																+	}
															
 
																+#endif
															
 
																 	default:
															
 
																 		STARPU_ABORT();
															
 
																 	}
															
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -341,6 +341,21 @@ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
 
																 	{
															
 
																 		cl->where |= STARPU_OPENCL;
															
 
																 	}
															
 
																+
															
 
																+	if (cl->mic_funcs[0] && is_where_unset)
															
 
																+	{
															
 
																+		cl->where |= STARPU_MIC;
															
 
																+	}
															
 
																+
															
 
																+	if (cl->scc_funcs[0] && is_where_unset)
															
 
																+	{
															
 
																+		cl->where |= STARPU_SCC;
															
 
																+	}
															
 
																+
															
 
																+	if (cl->cpu_funcs_name[0] && is_where_unset)
															
 
																+	{
															
 
																+		cl->where |= STARPU_MIC|STARPU_SCC;
															
 
																+	}
															
 
																 }
															
 
																 void _starpu_task_check_deprecated_fields(struct starpu_task *task)
															
@@ -876,6 +891,8 @@ _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
 
																 					return 0;
															
 
																 				case STARPU_CUDA_RAM:      /* Fall through */
															
 
																 				case STARPU_OPENCL_RAM:
															
 
																+				case STARPU_MIC_RAM:
															
 
																+				case STARPU_SCC_RAM:
															
 
																 					return 1;
															
 
																 				default:
															
 
																 					STARPU_ABORT();
															
@@ -883,12 +900,16 @@ _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
 
																 			break;
															
 
																 		case STARPU_CUDA_RAM:    /* Fall through */
															
 
																 		case STARPU_OPENCL_RAM:
															
 
																+		case STARPU_MIC_RAM:
															
 
																+		case STARPU_SCC_RAM:
															
 
																 			switch(starpu_node_get_kind(handle->mf_node))
															
 
																 			{
															
 
																 				case STARPU_CPU_RAM:
															
 
																 					return 1;
															
 
																 				case STARPU_CUDA_RAM:
															
 
																 				case STARPU_OPENCL_RAM:
															
 
																+				case STARPU_MIC_RAM:
															
 
																+				case STARPU_SCC_RAM:
															
 
																 					return 0;
															
 
																 				default:
															
 
																 					STARPU_ABORT();
															
@@ -925,3 +946,18 @@ unsigned starpu_task_get_implementation(struct starpu_task *task)
 
																 {
															
 
																 	return _starpu_get_job_associated_to_task(task)->nimpl;
															
 
																 }
															
 
																+
															
 
																+starpu_mic_func_t _starpu_task_get_mic_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
															
 
																+{
															
 
																+	return cl->mic_funcs[nimpl];
															
 
																+}
															
 
																+
															
 
																+starpu_scc_func_t _starpu_task_get_scc_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
															
 
																+{
															
 
																+	return cl->scc_funcs[nimpl];
															
 
																+}
															
 
																+
															
 
																+char *_starpu_task_get_cpu_name_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
															
 
																+{
															
 
																+	return cl->cpu_funcs_name[nimpl];
															
 
																+}
															
--- a/src/core/task.h
+++ b/src/core/task.h
@@ -72,6 +72,10 @@ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl);
 
																 starpu_cpu_func_t _starpu_task_get_cpu_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																 starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																 starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																+starpu_mic_func_t _starpu_task_get_mic_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																+starpu_scc_func_t _starpu_task_get_scc_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																+
															
 
																+char *_starpu_task_get_cpu_name_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
															
 
																 #define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0)
															
 
																 #define _STARPU_TASK_GET_INTERFACES(task) ((task->dyn_handles) ? task->dyn_interfaces : task->interfaces)
															
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -23,6 +23,9 @@
 
																 #include <core/debug.h>
															
 
																 #include <core/topology.h>
															
 
																 #include <drivers/cuda/driver_cuda.h>
															
 
																+#include <drivers/mic/driver_mic_source.h>
															
 
																+#include <drivers/scc/driver_scc_source.h>
															
 
																+#include <drivers/mp_common/source_common.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																 #include <profiling/profiling.h>
															
 
																 #include <common/uthash.h>
															
@@ -45,7 +48,7 @@
 
																 static unsigned topology_is_initialized = 0;
															
 
																-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
															
 
																+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
															
 
																 struct handle_entry
															
 
																 {
															
@@ -67,9 +70,9 @@ static unsigned may_bind_automatically = 0;
 
																  * Discover the topology of the machine
															
 
																  */
															
 
																-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
															
 
																+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC)  || defined(STARPU_SIMGRID)
															
 
																 static void
															
 
																-_starpu_initialize_workers_gpuid (int *explicit_workers_gpuid,
															
 
																+_starpu_initialize_workers_deviceid (int *explicit_workers_gpuid,
															
 
																 				  int *current, int *workers_gpuid,
															
 
																 				  const char *varname, unsigned nhwgpus)
															
 
																 {
															
@@ -144,7 +147,8 @@ _starpu_initialize_workers_gpuid (int *explicit_workers_gpuid,
 
																 			  workers_gpuid[i] = (unsigned)(i % nhwgpus);
															
 
																 		/* StarPU can use sampling techniques to bind threads
															
 
																-		 * correctly */
															
 
																+		 * correctly
															
 
																+		 * TODO: use a private value for each kind of device */
															
 
																 		may_bind_automatically = 1;
															
 
																 	}
															
 
																 }
															
@@ -157,7 +161,7 @@ _starpu_initialize_workers_cuda_gpuid (struct _starpu_machine_config *config)
 
																 	struct starpu_machine_topology *topology = &config->topology;
															
 
																 	struct starpu_conf *uconf = config->conf;
															
 
																-        _starpu_initialize_workers_gpuid (
															
 
																+        _starpu_initialize_workers_deviceid (
															
 
																 		uconf->use_explicit_workers_cuda_gpuid == 0
															
 
																 		? NULL
															
 
																 		: (int *)uconf->workers_cuda_gpuid,
															
@@ -184,7 +188,7 @@ _starpu_initialize_workers_opencl_gpuid (struct _starpu_machine_config*config)
 
																 	struct starpu_machine_topology *topology = &config->topology;
															
 
																 	struct starpu_conf *uconf = config->conf;
															
 
																-        _starpu_initialize_workers_gpuid(
															
 
																+        _starpu_initialize_workers_deviceid(
															
 
																 		uconf->use_explicit_workers_opencl_gpuid == 0
															
 
																 		? NULL
															
 
																 		: (int *)uconf->workers_opencl_gpuid,
															
@@ -258,6 +262,147 @@ _starpu_get_next_opencl_gpuid (struct _starpu_machine_config *config)
 
																 }
															
 
																 #endif
															
 
																+#if 0
															
 
																+#if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
															
 
																+static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
															
 
																+{
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																+	struct starpu_conf *uconf = config->conf;
															
 
																+
															
 
																+	_starpu_initialize_workers_deviceid(
															
 
																+		uconf->use_explicit_workers_mic_deviceid == 0
															
 
																+		? NULL
															
 
																+		: (int *)config->user_conf->workers_mic_deviceid,
															
 
																+		&(config->current_mic_deviceid),
															
 
																+		(int *)topology->workers_mic_deviceid,
															
 
																+		"STARPU_WORKERS_MICID",
															
 
																+		topology->nhwmiccores);
															
 
																+}
															
 
																+#endif
															
 
																+#endif
															
 
																+
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+static void _starpu_initialize_workers_scc_deviceid(struct _starpu_machine_config *config)
															
 
																+{
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																+	struct starpu_conf *uconf = config->conf;
															
 
																+
															
 
																+	_starpu_initialize_workers_deviceid(
															
 
																+		uconf->use_explicit_workers_scc_deviceid == 0
															
 
																+		? NULL
															
 
																+		: (int *) uconf->workers_scc_deviceid,
															
 
																+		&(config->current_scc_deviceid),
															
 
																+		(int *)topology->workers_scc_deviceid,
															
 
																+		"STARPU_WORKERS_SCCID",
															
 
																+		topology->nhwscc);
															
 
																+}
															
 
																+#endif /* STARPU_USE_SCC */
															
 
																+
															
 
																+#if 0
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
															
 
																+{
															
 
																+	unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
															
 
																+
															
 
																+	return (int)config->topology.workers_mic_deviceid[i];
															
 
																+}
															
 
																+#endif
															
 
																+#endif
															
 
																+
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+static inline int _starpu_get_next_scc_deviceid(struct _starpu_machine_config *config)
															
 
																+{
															
 
																+	unsigned i = ((config->current_scc_deviceid++) % config->topology.nsccdevices);
															
 
																+
															
 
																+	return (int)config->topology.workers_scc_deviceid[i];
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static void
															
 
																+_starpu_init_mic_topology (struct _starpu_machine_config *config, long mic_idx)
															
 
																+{
															
 
																+	/* Discover the topology of the mic node identifier by MIC_IDX. That
															
 
																+	 * means, make this StarPU instance aware of the number of cores available
															
 
																+	 * on this MIC device. Update the `nhwmiccores' topology field
															
 
																+	 * accordingly. */
															
 
																+
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																+
															
 
																+	int nbcores;
															
 
																+	_starpu_src_common_sink_nbcores (mic_nodes[mic_idx], &nbcores);
															
 
																+	topology->nhwmiccores[mic_idx] = nbcores;
															
 
																+}
															
 
																+
															
 
																+
															
 
																+static int
															
 
																+_starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
															
 
																+		       COIENGINE *coi_handle, COIPROCESS *coi_process)
															
 
																+{
															
 
																+	/* Initialize the MIC node of index MIC_IDX. */
															
 
																+
															
 
																+	struct starpu_conf *user_conf = config->conf;
															
 
																+
															
 
																+	char ***argv = _starpu_get_argv();
															
 
																+	const char *suffixes[] = {"-mic", "_mic", NULL};
															
 
																+
															
 
																+	/* Environment variables to send to the Sink, it informs it what kind
															
 
																+	 * of node it is (architecture and type) as there is no way to discover
															
 
																+	 * it itself */
															
 
																+	char mic_idx_env[32];
															
 
																+	sprintf(mic_idx_env, "DEVID=%d", mic_idx);
															
 
																+
															
 
																+	/* XXX: this is currently necessary so that the remote process does not
															
 
																+	 * segfault. */
															
 
																+	char nb_mic_env[32];
															
 
																+	sprintf(nb_mic_env, "NB_MIC=%d", 2);
															
 
																+
															
 
																+	const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", mic_idx_env, nb_mic_env, NULL};
															
 
																+
															
 
																+	char mic_sink_program_path[1024];
															
 
																+	/* Let's get the helper program to run on the MIC device */
															
 
																+	int mic_file_found =
															
 
																+	    _starpu_src_common_locate_file (mic_sink_program_path,
															
 
																+					    getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
															
 
																+					    getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
															
 
																+					    user_conf->mic_sink_program_path,
															
 
																+					    (argv ? (*argv)[0] : NULL),
															
 
																+					    suffixes);
															
 
																+
															
 
																+	if (0 != mic_file_found) {
															
 
																+		fprintf(stderr, "No MIC program specified, use the environment"
															
 
																+			"variable STARPU_MIC_SINK_PROGRAM_NAME or the environment"
															
 
																+			"or the field 'starpu_conf.mic_sink_program_path'"
															
 
																+			"to define it.\n");
															
 
																+
															
 
																+		return -1;
															
 
																+	}
															
 
																+
															
 
																+	COIRESULT res;
															
 
																+	/* Let's get the handle which let us manage the remote MIC device */
															
 
																+	res = COIEngineGetHandle(COI_ISA_MIC, mic_idx, coi_handle);
															
 
																+	if (STARPU_UNLIKELY(res != COI_SUCCESS))
															
 
																+		STARPU_MIC_SRC_REPORT_COI_ERROR(res);
															
 
																+
															
 
																+	/* We launch the helper on the MIC device, which will wait for us
															
 
																+	 * to give it work to do.
															
 
																+	 * As we will communicate further with the device throught scif we
															
 
																+	 * don't need to keep the process pointer */
															
 
																+	res = COIProcessCreateFromFile(*coi_handle, mic_sink_program_path, 0, NULL, 0,
															
 
																+				       mic_sink_env, 1, NULL, 0, NULL,
															
 
																+				       coi_process);
															
 
																+	if (STARPU_UNLIKELY(res != COI_SUCCESS))
															
 
																+		STARPU_MIC_SRC_REPORT_COI_ERROR(res);
															
 
																+
															
 
																+	/* Let's create the node structure, we'll communicate with the peer
															
 
																+	 * through scif thanks to it */
															
 
																+	mic_nodes[mic_idx] =
															
 
																+		_starpu_mp_common_node_create(STARPU_MIC_SOURCE, mic_idx);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																 static void
															
 
																 _starpu_init_topology (struct _starpu_machine_config *config)
															
@@ -284,6 +429,9 @@ _starpu_init_topology (struct _starpu_machine_config *config)
 
																 	_starpu_cpu_discover_devices(config);
															
 
																 	_starpu_cuda_discover_devices(config);
															
 
																 	_starpu_opencl_discover_devices(config);
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+	config->topology.nhwscc = _starpu_scc_src_get_device_count();
															
 
																+#endif
															
 
																 	topology_is_initialized = 1;
															
 
																 }
															
@@ -434,8 +582,137 @@ _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
 
																 	return config->topology.nhwcpus;
															
 
																 }
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static void
															
 
																+_starpu_init_mic_config (struct _starpu_machine_config *config,
															
 
																+			 struct starpu_conf *user_conf,
															
 
																+			 unsigned mic_idx)
															
 
																+{
															
 
																+	// Configure the MIC device of index MIC_IDX.
															
 
																+
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																+
															
 
																+	topology->nhwmiccores[mic_idx] = 0;
															
 
																+
															
 
																+	_starpu_init_mic_topology (config, mic_idx);
															
 
																+
															
 
																+	int nmiccores;
															
 
																+	nmiccores = starpu_get_env_number("STARPU_NMIC");
															
 
																+
															
 
																+	/* STARPU_NMIC is not set. Did the user specify anything ? */
															
 
																+	if (nmiccores == -1 && user_conf)
															
 
																+		nmiccores = user_conf->nmic;
															
 
																+
															
 
																+	if (nmiccores != 0)
															
 
																+	{
															
 
																+		if (nmiccores == -1)
															
 
																+		{
															
 
																+			/* Nothing was specified, so let's use the number of
															
 
																+			 * detected mic cores. ! */
															
 
																+			nmiccores = topology->nhwmiccores[mic_idx];
															
 
																+		    }
															
 
																+		else
															
 
																+		{
															
 
																+			if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
															
 
																+			{
															
 
																+				/* The user requires more MIC devices than there is available */
															
 
																+				fprintf(stderr,
															
 
																+					"# Warning: %d MIC devices requested. Only %d available.\n",
															
 
																+					nmiccores, topology->nhwmiccores[mic_idx]);
															
 
																+				nmiccores = topology->nhwmiccores[mic_idx];
															
 
																+			}
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	topology->nmiccores[mic_idx] = nmiccores;
															
 
																+	STARPU_ASSERT(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS);
															
 
																+
															
 
																+	/* _starpu_initialize_workers_mic_deviceid (config); */
															
 
																+
															
 
																+	unsigned miccore_id;
															
 
																+	for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
															
 
																+	{
															
 
																+		int worker_idx = topology->nworkers + miccore_id;
															
 
																+		enum starpu_perfmodel_archtype arch =
															
 
																+			(enum starpu_perfmodel_archtype)((int)STARPU_MIC_DEFAULT + devid);
															
 
																+		config->workers[worker_idx].arch = STARPU_MIC_WORKER;
															
 
																+		config->workers[worker_idx].perf_arch = arch;
															
 
																+		config->workers[worker_idx].mp_nodeid = mic_idx;
															
 
																+		config->workers[worker_idx].devid = miccore_id;
															
 
																+		config->workers[worker_idx].worker_mask = STARPU_MIC;
															
 
																+		config->worker_mask |= STARPU_MIC;
															
 
																+		_starpu_init_sched_ctx_for_worker(config->workers[worker_idx].workerid);
															
 
																+	}
															
 
																+
															
 
																+	topology->nworkers += topology->nmiccores[mic_idx];
															
 
																+    }
															
 
																+
															
 
																+
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static COIENGINE handles[2];
															
 
																+static COIPROCESS process[2];
															
 
																+#endif
															
 
																+
															
 
																+static void
															
 
																+_starpu_init_mp_config (struct _starpu_machine_config *config,
															
 
																+			struct starpu_conf *user_conf)
															
 
																+{
															
 
																+	/* Discover and configure the mp topology. That means:
															
 
																+	 * - discover the number of mp nodes;
															
 
																+	 * - initialize each discovered node;
															
 
																+	 * - discover the local topology (number of PUs/devices) of each node;
															
 
																+	 * - configure the workers accordingly.
															
 
																+	 */
															
 
																+
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																+
															
 
																+	// We currently only support MIC at this level.
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+
															
 
																+	/* Discover and initialize the number of MIC nodes through the mp
															
 
																+	 * infrastructure. */
															
 
																+	unsigned nhwmicdevices = _starpu_mic_src_get_device_count();
															
 
																+
															
 
																+	int reqmicdevices = starpu_get_env_number("STARPU_NMICDEVS");
															
 
																+	if (-1 == reqmicdevices)
															
 
																+		reqmicdevices = nhwmicdevices;
															
 
																+
															
 
																+	topology->nmicdevices = 0;
															
 
																+	unsigned i;
															
 
																+	for (i = 0; i < STARPU_MIN (nhwmicdevices, (unsigned) reqmicdevices); i++)
															
 
																+		if (0 == _starpu_init_mic_node (config, i, &handles[i], &process[i]))
															
 
																+			topology->nmicdevices++;
															
 
																+
															
 
																+	i = 0;
															
 
																+	for (; i < topology->nmicdevices; i++)
															
 
																+		_starpu_init_mic_config (config, user_conf, i);
															
 
																+#endif
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+_starpu_deinit_mic_node (unsigned mic_idx)
															
 
																+{
															
 
																+	_starpu_mp_common_send_command(mic_nodes[mic_idx], STARPU_EXIT, NULL, 0);
															
 
																+
															
 
																+	COIProcessDestroy(process[mic_idx], -1, 0, NULL, NULL);
															
 
																+
															
 
																+	_starpu_mp_common_node_destroy(mic_nodes[mic_idx]);
															
 
																+}
															
 
																+
															
 
																+static void
															
 
																+_starpu_deinit_mp_config (struct _starpu_machine_config *config)
															
 
																+{
															
 
																+	struct starpu_machine_topology *topology = &config->topology;
															
 
																+	unsigned i;
															
 
																+
															
 
																+	for (i = 0; i < topology->nmicdevices; i++)
															
 
																+		_starpu_deinit_mic_node (i);
															
 
																+	_starpu_mic_clear_kernels();
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																 static int
															
 
																-_starpu_init_machine_config (struct _starpu_machine_config *config)
															
 
																+_starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_config)
															
 
																 {
															
 
																 	int i;
															
 
																 	for (i = 0; i < STARPU_NMAXWORKERS; i++)
															
@@ -498,6 +775,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 
																 		int devid = _starpu_get_next_cuda_gpuid(config);
															
 
																 		enum starpu_perfmodel_archtype arch =
															
 
																 			(enum starpu_perfmodel_archtype)((int)STARPU_CUDA_DEFAULT + devid);
															
 
																+		config->workers[worker_idx].mp_nodeid = -1;
															
 
																 		config->workers[worker_idx].devid = devid;
															
 
																 		config->workers[worker_idx].perf_arch = arch;
															
 
																 		config->workers[worker_idx].worker_mask = STARPU_CUDA;
															
@@ -572,6 +850,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 
																 		config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
															
 
																 		enum starpu_perfmodel_archtype arch =
															
 
																 			(enum starpu_perfmodel_archtype)((int)STARPU_OPENCL_DEFAULT + devid);
															
 
																+		config->workers[worker_idx].mp_nodeid = -1;
															
 
																 		config->workers[worker_idx].devid = devid;
															
 
																 		config->workers[worker_idx].perf_arch = arch;
															
 
																 		config->workers[worker_idx].worker_mask = STARPU_OPENCL;
															
@@ -582,6 +861,78 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 
																 	topology->nworkers += topology->nopenclgpus;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+	int nscc = config->conf->nscc;
															
 
																+
															
 
																+	unsigned nb_scc_nodes = _starpu_scc_src_get_device_count();
															
 
																+
															
 
																+	if (nscc != 0)
															
 
																+	{
															
 
																+		/* The user did not disable SCC. We need to count
															
 
																+		 * the number of devices */
															
 
																+		int nb_devices = nb_scc_nodes;
															
 
																+
															
 
																+		if (nscc == -1)
															
 
																+		{
															
 
																+			/* Nothing was specified, so let's choose ! */
															
 
																+			nscc = nb_devices;
															
 
																+			if (nscc > STARPU_MAXSCCDEVS)
															
 
																+			{
															
 
																+				_STARPU_DISP("Warning: %d SCC devices available. Only %d enabled. Use configuration option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nb_devices, STARPU_MAXSCCDEVS);
															
 
																+				nscc = STARPU_MAXSCCDEVS;
															
 
																+			}
															
 
																+		}
															
 
																+		else
															
 
																+		{
															
 
																+			/* Let's make sure this value is OK. */
															
 
																+			if (nscc > nb_devices)
															
 
																+			{
															
 
																+				/* The user requires more SCC devices than there is available */
															
 
																+				_STARPU_DISP("Warning: %d SCC devices requested. Only %d available.\n", nscc, nb_devices);
															
 
																+				nscc = nb_devices;
															
 
																+			}
															
 
																+			/* Let's make sure this value is OK. */
															
 
																+			if (nscc > STARPU_MAXSCCDEVS)
															
 
																+			{
															
 
																+				_STARPU_DISP("Warning: %d SCC devices requested. Only %d enabled. Use configure option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nscc, STARPU_MAXSCCDEVS);
															
 
																+				nscc = STARPU_MAXSCCDEVS;
															
 
																+			}
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	/* Now we know how many SCC devices will be used */
															
 
																+	topology->nsccdevices = nscc;
															
 
																+	STARPU_ASSERT(topology->nsccdevices + topology->nworkers <= STARPU_NMAXWORKERS);
															
 
																+
															
 
																+	_starpu_initialize_workers_scc_deviceid(config);
															
 
																+
															
 
																+	unsigned sccdev;
															
 
																+	for (sccdev = 0; sccdev < topology->nsccdevices; sccdev++)
															
 
																+	{
															
 
																+		config->workers[topology->nworkers + sccdev].arch = STARPU_SCC_WORKER;
															
 
																+		int devid = _starpu_get_next_scc_deviceid(config);
															
 
																+		enum starpu_perfmodel_archtype arch = (enum starpu_perfmodel_archtype)((int)STARPU_SCC_DEFAULT + devid);
															
 
																+		config->workers[topology->nworkers + sccdev].mp_nodeid = -1;
															
 
																+		config->workers[topology->nworkers + sccdev].devid = devid;
															
 
																+		config->workers[topology->nworkers + sccdev].perf_arch = arch;
															
 
																+		config->workers[topology->nworkers + sccdev].worker_mask = STARPU_SCC;
															
 
																+		config->worker_mask |= STARPU_SCC;
															
 
																+	}
															
 
																+
															
 
																+	for (; sccdev < nb_scc_nodes; ++sccdev)
															
 
																+		_starpu_scc_exit_useless_node(sccdev);
															
 
																+
															
 
																+	topology->nworkers += topology->nsccdevices;
															
 
																+#endif /* STARPU_USE_SCC */
															
 
																+
															
 
																+
															
 
																+	/* Unless not requested, we need to complete configuration with the
															
 
																+	 * ones of the mp nodes. */
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	if (! no_mp_config)
															
 
																+	    _starpu_init_mp_config (config, config->conf);
															
 
																+#endif
															
 
																+
															
 
																 /* we put the CPU section after the accelerator : in case there was an
															
 
																  * accelerator found, we devote one cpu */
															
 
																 #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
															
@@ -591,8 +942,15 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 
																 	{
															
 
																 		if (ncpu == -1)
															
 
																 		{
															
 
																-			unsigned already_busy_cpus = topology->ncudagpus + topology->nopenclgpus;
															
 
																-			long avail_cpus = topology->nhwcpus - already_busy_cpus;
															
 
																+			unsigned mic_busy_cpus = 0;
															
 
																+			unsigned i = 0;
															
 
																+			for (i = 0; i < STARPU_MAXMICDEVS; i++)
															
 
																+				mic_busy_cpus += (topology->nmiccores[i] ? 1 : 0);
															
 
																+
															
 
																+			unsigned already_busy_cpus = mic_busy_cpus + topology->ncudagpus
															
 
																+				+ topology->nopenclgpus + topology->nsccdevices;
															
 
																+
															
 
																+			long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
															
 
																 			if (avail_cpus < 0)
															
 
																 				avail_cpus = 0;
															
 
																 			ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
															
@@ -617,6 +975,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 
																 		int worker_idx = topology->nworkers + cpu;
															
 
																 		config->workers[worker_idx].arch = STARPU_CPU_WORKER;
															
 
																 		config->workers[worker_idx].perf_arch = STARPU_CPU_DEFAULT;
															
 
																+		config->workers[worker_idx].mp_nodeid = -1;
															
 
																 		config->workers[worker_idx].devid = cpu;
															
 
																 		config->workers[worker_idx].worker_mask = STARPU_CPU;
															
 
																 		config->worker_mask |= STARPU_CPU;
															
@@ -745,7 +1104,7 @@ _starpu_bind_thread_on_cpus (
 
																 static void
															
 
																-_starpu_init_workers_binding (struct _starpu_machine_config *config)
															
 
																+_starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_config)
															
 
																 {
															
 
																 	/* launch one thread per CPU */
															
 
																 	unsigned ram_memory_node;
															
@@ -770,6 +1129,21 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config)
 
																 	 * combinations in a matrix which we initialize here. */
															
 
																 	_starpu_initialize_busid_matrix();
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	/* Each MIC device has its own memory node. */
															
 
																+	unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
															
 
																+
															
 
																+	// Register the memory nodes for the MIC devices.
															
 
																+	if (! no_mp_config) {
															
 
																+	    unsigned i = 0;
															
 
																+	    for (i = 0; i < config->topology.nmicdevices; i++) {
															
 
																+		mic_memory_nodes[i] = _starpu_memory_node_register (STARPU_MIC_RAM, i);
															
 
																+		_starpu_register_bus(0, mic_memory_nodes[i]);
															
 
																+		_starpu_register_bus(mic_memory_nodes[i], 0);
															
 
																+	    }
															
 
																+	}
															
 
																+#endif
															
 
																+
															
 
																 	unsigned worker;
															
 
																 	for (worker = 0; worker < config->topology.nworkers; worker++)
															
 
																 	{
															
@@ -852,6 +1226,38 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config)
 
																 				break;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+		        case STARPU_MIC_WORKER:
															
 
																+				//if (may_bind_automatically)
															
 
																+				//{
															
 
																+				//	/* StarPU is allowed to bind threads automatically */
															
 
																+				//	preferred_binding = _starpu_get_mic_affinity_vector(workerarg->devid);
															
 
																+				//	npreferred = config->topology.nhwcpus;
															
 
																+				//}
															
 
																+				is_a_set_of_accelerators = 1;
															
 
																+				memory_node = mic_memory_nodes[workerarg->mp_nodeid];
															
 
																+				_starpu_memory_node_add_nworkers(memory_node);
															
 
																+				/* memory_node = _starpu_memory_node_register(STARPU_MIC_RAM, workerarg->devid);*/
															
 
																+
															
 
																+				/* _starpu_register_bus(0, memory_node);
															
 
																+				 * _starpu_register_bus(memory_node, 0); */
															
 
																+				break;
															
 
																+#endif /* STARPU_USE_MIC */
															
 
																+
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+			case STARPU_SCC_WORKER:
															
 
																+			{
															
 
																+				/* Node 0 represents the SCC shared memory when we're on SCC. */
															
 
																+				struct _starpu_memory_node_descr *descr = _starpu_memory_node_get_description();
															
 
																+				descr->nodes[ram_memory_node] = STARPU_SCC_SHM;
															
 
																+
															
 
																+				is_a_set_of_accelerators = 0;
															
 
																+				memory_node = ram_memory_node;
															
 
																+				_starpu_memory_node_add_nworkers(memory_node);
															
 
																+			}
															
 
																+				break;
															
 
																+#endif
															
 
																+
															
 
																 			default:
															
 
																 				STARPU_ABORT();
															
 
																 		}
															
@@ -902,18 +1308,18 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config)
 
																 int
															
 
																-_starpu_build_topology (struct _starpu_machine_config *config)
															
 
																+_starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
															
 
																 {
															
 
																 	int ret;
															
 
																-	ret = _starpu_init_machine_config(config);
															
 
																+	ret = _starpu_init_machine_config(config, no_mp_config);
															
 
																 	if (ret)
															
 
																 		return ret;
															
 
																 	/* for the data management library */
															
 
																 	_starpu_memory_nodes_init();
															
 
																-	_starpu_init_workers_binding(config);
															
 
																+	_starpu_init_workers_binding(config, no_mp_config);
															
 
																 	return 0;
															
 
																 }
															
@@ -922,6 +1328,10 @@ void
 
																 _starpu_destroy_topology (
															
 
																 	struct _starpu_machine_config *config __attribute__ ((unused)))
															
 
																 {
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	_starpu_deinit_mp_config(config);
															
 
																+#endif
															
 
																+
															
 
																 	/* cleanup StarPU internal data structures */
															
 
																 	_starpu_memory_nodes_deinit();
															
--- a/src/core/topology.h
+++ b/src/core/topology.h
@@ -27,7 +27,7 @@
 
																 struct _starpu_machine_config;
															
 
																 /* Detect the number of memory nodes and where to bind the different workers. */
															
 
																-int _starpu_build_topology(struct _starpu_machine_config *config);
															
 
																+int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_config);
															
 
																 /* Destroy all resources used to store the topology of the machine. */
															
 
																 void _starpu_destroy_topology(struct _starpu_machine_config *config);
															
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -28,6 +28,8 @@
 
																 #include <core/task.h>
															
 
																 #include <profiling/profiling.h>
															
 
																 #include <starpu_task_list.h>
															
 
																+#include <drivers/mp_common/sink_common.h>
															
 
																+#include <drivers/scc/driver_scc_common.h>
															
 
																 #include <drivers/cpu/driver_cpu.h>
															
 
																 #include <drivers/cuda/driver_cuda.h>
															
@@ -51,6 +53,29 @@ static starpu_pthread_key_t worker_key;
 
																 static struct _starpu_machine_config config;
															
 
																+/* Pointers to argc and argv
															
 
																+ */
															
 
																+static int *my_argc = 0;
															
 
																+static char ***my_argv = NULL;
															
 
																+
															
 
																+/* Initialize value of static argc and argv, called when the process begins
															
 
																+ */
															
 
																+void _starpu_set_argc_argv(int *argc_param, char ***argv_param)
															
 
																+{
															
 
																+	my_argc = argc_param;
															
 
																+	my_argv = argv_param;
															
 
																+}
															
 
																+
															
 
																+int *_starpu_get_argc()
															
 
																+{
															
 
																+	return my_argc;
															
 
																+}
															
 
																+
															
 
																+char ***_starpu_get_argv()
															
 
																+{
															
 
																+	return my_argv;
															
 
																+}
															
 
																+
															
 
																 int _starpu_is_initialized(void)
															
 
																 {
															
 
																 	return initialized == INITIALIZED;
															
@@ -98,6 +123,14 @@ static uint32_t _starpu_worker_exists_and_can_execute(struct starpu_task *task,
 
																 				if (task->cl->opencl_funcs[impl] != NULL)
															
 
																 					test_implementation = 1;
															
 
																 				break;
															
 
																+			case STARPU_MIC_WORKER:
															
 
																+				if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->mic_funcs[impl] != NULL)
															
 
																+					test_implementation = 1;
															
 
																+				break;
															
 
																+			case STARPU_SCC_WORKER:
															
 
																+				if (task->cl->cpu_funcs_name[impl] != NULL || task->cl->scc_funcs[impl] != NULL)
															
 
																+					test_implementation = 1;
															
 
																+				break;
															
 
																 			default:
															
 
																 				STARPU_ABORT();
															
 
																 			}
															
@@ -140,6 +173,16 @@ uint32_t _starpu_worker_exists(struct starpu_task *task)
 
																 	    _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER))
															
 
																 		return 1;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	if ((task->cl->where & STARPU_MIC) &&
															
 
																+	    _starpu_worker_exists_and_can_execute(task, STARPU_MIC_WORKER))
															
 
																+		return 1;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+	if ((task->cl->where & STARPU_SCC) &&
															
 
																+	    _starpu_worker_exists_and_can_execute(task, STARPU_SCC_WORKER))
															
 
																+		return 1;
															
 
																+#endif
															
 
																 	return 0;
															
 
																 }
															
@@ -158,6 +201,11 @@ uint32_t _starpu_can_submit_opencl_task(void)
 
																 	return (STARPU_OPENCL & config.worker_mask);
															
 
																 }
															
 
																+uint32_t _starpu_can_submit_scc_task(void)
															
 
																+{
															
 
																+	return (STARPU_SCC & config.worker_mask);
															
 
																+}
															
 
																+
															
 
																 static int _starpu_can_use_nth_implementation(enum starpu_worker_archtype arch, struct starpu_codelet *cl, unsigned nimpl)
															
 
																 {
															
 
																 	switch(arch)
															
@@ -196,13 +244,26 @@ static int _starpu_can_use_nth_implementation(enum starpu_worker_archtype arch,
 
																 		starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
															
 
																 		return func != NULL;
															
 
																 	}
															
 
																+	case STARPU_MIC_WORKER:
															
 
																+	{
															
 
																+		starpu_mic_func_t func = _starpu_task_get_mic_nth_implementation(cl, nimpl);
															
 
																+		char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
															
 
																+
															
 
																+		return func != NULL || func_name != NULL;
															
 
																+	}
															
 
																+	case STARPU_SCC_WORKER:
															
 
																+	{
															
 
																+		starpu_scc_func_t func = _starpu_task_get_scc_nth_implementation(cl, nimpl);
															
 
																+		char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
															
 
																+
															
 
																+		return func != NULL || func_name != NULL;
															
 
																+	}
															
 
																 	default:
															
 
																 		STARPU_ASSERT_MSG(0, "Unknown arch type %d", arch);
															
 
																 	}
															
 
																 	return 0;
															
 
																 }
															
 
																-
															
 
																 int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
															
 
																 {
															
 
																 	/* TODO: check that the task operand sizes will fit on that device */
															
@@ -255,6 +316,10 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 
																  * Runtime initialization methods
															
 
																  */
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
															
 
																+#endif
															
 
																+
															
 
																 static void _starpu_init_worker_queue(struct _starpu_worker *workerarg)
															
 
																 {
															
 
																 	starpu_pthread_cond_t *cond = &workerarg->sched_cond;
															
@@ -374,6 +439,9 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 	for (worker = 0; worker < nworkers; worker++)
															
 
																 	{
															
 
																 		struct _starpu_worker *workerarg = &pconfig->workers[worker];
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+		unsigned mp_nodeid = workerarg->mp_nodeid;
															
 
																+#endif
															
 
																 		workerarg->config = pconfig;
															
@@ -393,6 +461,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 		workerarg->run_by_starpu = 1;
															
 
																 		workerarg->worker_is_running = 0;
															
 
																 		workerarg->worker_is_initialized = 0;
															
 
																+		workerarg->set = NULL;
															
 
																 		int ctx;
															
 
																 		for(ctx = 0; ctx < STARPU_NMAX_SCHED_CTXS; ctx++)
															
@@ -415,7 +484,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 		workerarg->status = STATUS_INITIALIZING;
															
 
																-		_STARPU_DEBUG("initialising worker %u\n", worker);
															
 
																+		_STARPU_DEBUG("initialising worker %u/%u\n", worker, nworkers);
															
 
																 		_starpu_init_worker_queue(workerarg);
															
@@ -425,7 +494,6 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 		{
															
 
																 #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
															
 
																 			case STARPU_CPU_WORKER:
															
 
																-				workerarg->set = NULL;
															
 
																 				driver.id.cpu_id = cpu;
															
 
																 				if (_starpu_may_launch_driver(pconfig->conf, &driver))
															
 
																 				{
															
@@ -437,6 +505,11 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 						workerarg,
															
 
																 						worker+1);
															
 
																 #ifdef STARPU_USE_FXT
															
 
																+					/* In tracing mode, make sure the
															
 
																+					 * thread is really started before
															
 
																+					 * starting another one, to make sure
															
 
																+					 * they appear in order in the trace.
															
 
																+					 */
															
 
																 					STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
															
 
																 					while (!workerarg->worker_is_running)
															
 
																 						STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
															
@@ -452,7 +525,6 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 #endif
															
 
																 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
															
 
																 			case STARPU_CUDA_WORKER:
															
 
																-				workerarg->set = NULL;
															
 
																 				driver.id.cuda_id = cuda;
															
 
																 				if (_starpu_may_launch_driver(pconfig->conf, &driver))
															
 
																 				{
															
@@ -487,7 +559,6 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 					break;
															
 
																 				}
															
 
																 #endif
															
 
																-				workerarg->set = NULL;
															
 
																 				STARPU_PTHREAD_CREATE_ON(
															
 
																 					workerarg->name,
															
 
																 					&workerarg->worker_thread,
															
@@ -503,6 +574,77 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 #endif
															
 
																 				break;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+			case STARPU_MIC_WORKER:
															
 
																+				/* We use the Gordon approach for the MIC,
															
 
																+				 * which consists in spawning only one thread
															
 
																+				 * per MIC device, which will control all MIC
															
 
																+				 * workers of this device. (by using a worker set). */
															
 
																+				if (mic_worker_set[mp_nodeid].started)
															
 
																+					goto worker_set_initialized;
															
 
																+
															
 
																+				mic_worker_set[mp_nodeid].nworkers = pconfig->topology.nmiccores[mp_nodeid];
															
 
																+
															
 
																+				/* We assume all MIC workers of a given MIC
															
 
																+				 * device are contiguous so that we can
															
 
																+				 * address them with the first one only. */
															
 
																+				mic_worker_set[mp_nodeid].workers = workerarg;
															
 
																+				mic_worker_set[mp_nodeid].set_is_initialized = 0;
															
 
																+
															
 
																+				STARPU_PTHREAD_CREATE_ON(
															
 
																+						workerarg->name,
															
 
																+						&mic_worker_set[mp_nodeid].worker_thread,
															
 
																+						NULL,
															
 
																+						_starpu_mic_src_worker,
															
 
																+						&mic_worker_set[mp_nodeid],
															
 
																+						worker+1);
															
 
																+
															
 
																+#ifdef STARPU_USE_FXT
															
 
																+				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
															
 
																+				while (!workerarg->worker_is_running)
															
 
																+					STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
															
 
																+				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
															
 
																+#endif
															
 
																+
															
 
																+				STARPU_PTHREAD_MUTEX_LOCK(&mic_worker_set[mp_nodeid].mutex);
															
 
																+				while (!mic_worker_set[mp_nodeid].set_is_initialized)
															
 
																+					STARPU_PTHREAD_COND_WAIT(&mic_worker_set[mp_nodeid].ready_cond,
															
 
																+								  &mic_worker_set[mp_nodeid].mutex);
															
 
																+				STARPU_PTHREAD_MUTEX_UNLOCK(&mic_worker_set[mp_nodeid].mutex);
															
 
																+
															
 
																+		worker_set_initialized:
															
 
																+				workerarg->set = &mic_worker_set[mp_nodeid];
															
 
																+				mic_worker_set[mp_nodeid].started = 1;
															
 
																+
															
 
																+#ifdef STARPU_USE_FXT
															
 
																+				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
															
 
																+				while (!workerarg->worker_is_running)
															
 
																+					STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
															
 
																+				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
															
 
																+#endif
															
 
																+
															
 
																+				break;
															
 
																+#endif /* STARPU_USE_MIC */
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+			case STARPU_SCC_WORKER:
															
 
																+				workerarg->worker_is_initialized = 0;
															
 
																+				STARPU_PTHREAD_CREATE_ON(
															
 
																+						workerarg->name,
															
 
																+						&workerarg->worker_thread,
															
 
																+						NULL,
															
 
																+						_starpu_scc_src_worker,
															
 
																+						workerarg,
															
 
																+						worker+1);
															
 
																+
															
 
																+#ifdef STARPU_USE_FXT
															
 
																+				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
															
 
																+				while (!workerarg->worker_is_running)
															
 
																+					STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
															
 
																+				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
															
 
																+#endif
															
 
																+				break;
															
 
																+#endif
															
 
																+
															
 
																 			default:
															
 
																 				STARPU_ABORT();
															
 
																 		}
															
@@ -560,6 +702,17 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
																 				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
															
 
																 				break;
															
 
																 #endif
															
 
																+			case STARPU_MIC_WORKER:
															
 
																+				/* Already waited above */
															
 
																+				break;
															
 
																+			case STARPU_SCC_WORKER:
															
 
																+				/* TODO: implement may_launch? */
															
 
																+				_STARPU_DEBUG("waiting for worker %u initialization\n", worker);
															
 
																+				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
															
 
																+				while (!workerarg->worker_is_initialized)
															
 
																+					STARPU_PTHREAD_COND_WAIT(&workerarg->ready_cond, &workerarg->mutex);
															
 
																+				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
															
 
																+				break;
															
 
																 			default:
															
 
																 				STARPU_ABORT();
															
 
																 		}
															
@@ -598,8 +751,11 @@ int starpu_conf_init(struct starpu_conf *conf)
 
																 		conf->ncpus = starpu_get_env_number("STARPU_NCPUS");
															
 
																 	conf->ncuda = starpu_get_env_number("STARPU_NCUDA");
															
 
																 	conf->nopencl = starpu_get_env_number("STARPU_NOPENCL");
															
 
																+	conf->nmic = starpu_get_env_number("STARPU_NMIC");
															
 
																+	conf->nscc = starpu_get_env_number("STARPU_NSCC");
															
 
																 	conf->calibrate = starpu_get_env_number("STARPU_CALIBRATE");
															
 
																 	conf->bus_calibrate = starpu_get_env_number("STARPU_BUS_CALIBRATE");
															
 
																+	conf->mic_sink_program_path = getenv("STARPU_MIC_PROGRAM_PATH");
															
 
																 	if (conf->calibrate == -1)
															
 
																 	     conf->calibrate = 0;
															
@@ -610,6 +766,8 @@ int starpu_conf_init(struct starpu_conf *conf)
 
																 	conf->use_explicit_workers_bindid = 0; /* TODO */
															
 
																 	conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
															
 
																 	conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
															
 
																+	conf->use_explicit_workers_mic_deviceid = 0; /* TODO */
															
 
																+	conf->use_explicit_workers_scc_deviceid = 0; /* TODO */
															
 
																 	conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
															
 
																 	if (conf->single_combined_worker == -1)
															
@@ -639,6 +797,14 @@ int starpu_conf_init(struct starpu_conf *conf)
 
																 		conf->disable_asynchronous_opencl_copy = 0;
															
 
																 #endif
															
 
																+#if defined(STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY)
															
 
																+	conf->disable_asynchronous_mic_copy = 1;
															
 
																+#else
															
 
																+	conf->disable_asynchronous_mic_copy = starpu_get_env_number("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY");
															
 
																+	if (conf->disable_asynchronous_mic_copy == -1)
															
 
																+		conf->disable_asynchronous_mic_copy = 0;
															
 
																+#endif
															
 
																+
															
 
																 	/* 64MiB by default */
															
 
																 	conf->trace_buffer_size = 64<<20;
															
 
																 	return 0;
															
@@ -672,10 +838,37 @@ void _starpu_conf_check_environment(struct starpu_conf *conf)
 
																 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_COPY", &conf->disable_asynchronous_copy);
															
 
																 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_CUDA_COPY", &conf->disable_asynchronous_cuda_copy);
															
 
																 	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_OPENCL_COPY", &conf->disable_asynchronous_opencl_copy);
															
 
																+	_starpu_conf_set_value_against_environment("STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY", &conf->disable_asynchronous_mic_copy);
															
 
																 }
															
 
																 int starpu_init(struct starpu_conf *user_conf)
															
 
																 {
															
 
																+	return starpu_initialize(user_conf, NULL, NULL);
															
 
																+}
															
 
																+
															
 
																+int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
															
 
																+{
															
 
																+	int is_a_sink = 0; /* Always defined. If the MP infrastructure is not
															
 
																+			    * used, we cannot be a sink. */
															
 
																+#ifdef STARPU_USE_MP
															
 
																+	_starpu_set_argc_argv(argc, argv);
															
 
																+
															
 
																+#	ifdef STARPU_USE_SCC
															
 
																+	/* In SCC case we look at the rank to know if we are a sink */
															
 
																+	if (_starpu_scc_common_mp_init() && !_starpu_scc_common_is_src_node())
															
 
																+		setenv("STARPU_SINK", "STARPU_SCC", 1);
															
 
																+#	endif
															
 
																+
															
 
																+	/* If StarPU was configured to use MP sinks, we have to control the
															
 
																+	 * kind on node we are running on : host or sink ? */
															
 
																+	if (getenv("STARPU_SINK"))
															
 
																+		is_a_sink = 1;
															
 
																+#else
															
 
																+	(void)argc;
															
 
																+	(void)argv;
															
 
																+
															
 
																+#endif /* STARPU_USE_MP */
															
 
																+
															
 
																 	int ret;
															
 
																 #ifndef STARPU_SIMGRID
															
@@ -783,11 +976,17 @@ int starpu_init(struct starpu_conf *user_conf)
 
																 	_starpu_load_bus_performance_files();
															
 
																-	ret = _starpu_build_topology(&config);
															
 
																+	/* Depending on whether we are a MP sink or not, we must build the
															
 
																+	 * topology with MP nodes or not. */
															
 
																+	ret = _starpu_build_topology(&config, is_a_sink ? 1 : 0);
															
 
																 	if (ret)
															
 
																 	{
															
 
																 		STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																 		init_count--;
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+		if (_starpu_scc_common_is_mp_initialized())
															
 
																+			_starpu_scc_src_mp_deinit();
															
 
																+#endif
															
 
																 		initialized = UNINITIALIZED;
															
 
																 		/* Let somebody else try to do it */
															
 
																 		STARPU_PTHREAD_COND_SIGNAL(&init_cond);
															
@@ -799,12 +998,14 @@ int starpu_init(struct starpu_conf *user_conf)
 
																 	 * threads */
															
 
																 	_starpu_initialize_current_task_key();
															
 
																-	_starpu_create_sched_ctx(config.conf->sched_policy_name, NULL, -1, 1, "init");
															
 
																+	if (!is_a_sink)
															
 
																+		_starpu_create_sched_ctx(config.conf->sched_policy_name, NULL, -1, 1, "init");
															
 
																 	_starpu_initialize_registered_performance_models();
															
 
																 	/* Launch "basic" workers (ie. non-combined workers) */
															
 
																-	_starpu_launch_drivers(&config);
															
 
																+	if (!is_a_sink)
															
 
																+		_starpu_launch_drivers(&config);
															
 
																 	STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
															
 
																 	initialized = INITIALIZED;
															
@@ -813,6 +1014,20 @@ int starpu_init(struct starpu_conf *user_conf)
 
																 	STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
															
 
																 	_STARPU_DEBUG("Initialisation finished\n");
															
 
																+
															
 
																+#ifdef STARPU_USE_MP
															
 
																+	/* Finally, if we are a MP sink, we never leave this function. Else,
															
 
																+	 * we enter an infinite event loop which listen for MP commands from
															
 
																+	 * the source. */
															
 
																+	if (is_a_sink) {
															
 
																+		_starpu_sink_common_worker();
															
 
																+
															
 
																+		/* We should normally never leave the loop as we don't want to
															
 
																+		 * really initialize STARPU */
															
 
																+		STARPU_ASSERT(0);
															
 
																+	}
															
 
																+#endif
															
 
																+
															
 
																 	return 0;
															
 
																 }
															
@@ -843,7 +1058,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig)
 
																  		 * we have to check if pthread_self() is the worker itself */
															
 
																 		if (set)
															
 
																 		{
															
 
																-			if (!set->joined)
															
 
																+			if (set->started)
															
 
																 			{
															
 
																 #ifdef STARPU_SIMGRID
															
 
																 				status = starpu_pthread_join(set->worker_thread, NULL);
															
@@ -857,7 +1072,7 @@ static void _starpu_terminate_workers(struct _starpu_machine_config *pconfig)
 
																 					_STARPU_DEBUG("starpu_pthread_join -> %d\n", status);
															
 
																 				}
															
 
																 #endif
															
 
																-				set->joined = 1;
															
 
																+				set->started = 0;
															
 
																 			}
															
 
																 		}
															
 
																 		else
															
@@ -1012,6 +1227,11 @@ void starpu_shutdown(void)
 
																 	if (AYU_event) AYU_event(AYU_FINISH, 0, NULL);
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+	if (_starpu_scc_common_is_mp_initialized())
															
 
																+		_starpu_scc_src_mp_deinit();
															
 
																+#endif
															
 
																+
															
 
																 	_STARPU_DEBUG("Shutdown finished\n");
															
 
																 }
															
@@ -1033,6 +1253,12 @@ int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
 
																 		case STARPU_OPENCL_WORKER:
															
 
																 			return config.topology.nopenclgpus;
															
 
																+		case STARPU_MIC_WORKER:
															
 
																+			return config.topology.nmicdevices;
															
 
																+
															
 
																+		case STARPU_SCC_WORKER:
															
 
																+			return config.topology.nsccdevices;
															
 
																+
															
 
																 		default:
															
 
																 			return -EINVAL;
															
 
																 	}
															
@@ -1073,6 +1299,26 @@ int starpu_asynchronous_opencl_copy_disabled(void)
 
																 	return config.conf->disable_asynchronous_opencl_copy;
															
 
																 }
															
 
																+int starpu_asynchronous_mic_copy_disabled(void)
															
 
																+{
															
 
																+	return config.conf->disable_asynchronous_mic_copy;
															
 
																+}
															
 
																+
															
 
																+unsigned starpu_mic_worker_get_count(void)
															
 
																+{
															
 
																+	int i = 0, count = 0;
															
 
																+	
															
 
																+	for (i = 0; i < STARPU_MAXMICDEVS; i++)
															
 
																+		count += config.topology.nmiccores[i];
															
 
																+	
															
 
																+	return count;
															
 
																+}
															
 
																+
															
 
																+unsigned starpu_scc_worker_get_count(void)
															
 
																+{
															
 
																+	return config.topology.nsccdevices;
															
 
																+}
															
 
																+
															
 
																 /* When analyzing performance, it is useful to see what is the processing unit
															
 
																  * that actually performed the task. This function returns the id of the
															
 
																  * processing unit actually executing it, therefore it makes no sense to use it
															
@@ -1146,6 +1392,11 @@ int starpu_combined_worker_get_rank(void)
 
																 	}
															
 
																 }
															
 
																+int starpu_worker_get_mp_nodeid(int id)
															
 
																+{
															
 
																+	return config.workers[id].mp_nodeid;
															
 
																+}
															
 
																+
															
 
																 int starpu_worker_get_devid(int id)
															
 
																 {
															
 
																 	return config.workers[id].devid;
															
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -37,6 +37,15 @@
 
																 #include <drivers/cuda/driver_cuda.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+#include <drivers/mic/driver_mic_source.h>
															
 
																+#endif /* STARPU_USE_MIC */
															
 
																+
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+#include <drivers/scc/driver_scc_source.h>
															
 
																+#endif
															
 
																+
															
 
																+
															
 
																 #include <drivers/cpu/driver_cpu.h>
															
 
																 #include <datawizard/datawizard.h>
															
@@ -51,6 +60,8 @@ struct _starpu_worker
 
																 	uint32_t worker_mask; /* what is the type of worker ? */
															
 
																 	enum starpu_perfmodel_archtype perf_arch; /* in case there are different models of the same arch */
															
 
																 	starpu_pthread_t worker_thread; /* the thread which runs the worker */
															
 
																+	int mp_nodeid; /* which mp node hold the cpu/gpu/etc (-1 for this
															
 
																+			* node) */
															
 
																 	unsigned devid; /* which cpu/gpu/etc is controlled by the worker ? */
															
 
																 	int bindid; /* which cpu is the driver bound to ? (logical index) */
															
 
																 	int workerid; /* uniquely identify the worker among all processing units types */
															
@@ -127,7 +138,7 @@ struct _starpu_worker_set
 
																         starpu_pthread_mutex_t mutex;
															
 
																 	starpu_pthread_t worker_thread; /* the thread which runs the worker */
															
 
																 	unsigned nworkers;
															
 
																-	unsigned joined; /* only one thread may call pthread_join*/
															
 
																+	unsigned started; /* Only one thread for the whole set */
															
 
																 	void *retval;
															
 
																 	struct _starpu_worker *workers;
															
 
																         starpu_pthread_cond_t ready_cond; /* indicate when the set is ready */
															
@@ -151,6 +162,12 @@ struct _starpu_machine_config
 
																 	/* Which GPU(s) do we use for OpenCL ? */
															
 
																 	int current_opencl_gpuid;
															
 
																+	/* Which MIC do we use? */
															
 
																+	int current_mic_deviceid;
															
 
																+
															
 
																+	/* Which SCC do we use? */
															
 
																+	int current_scc_deviceid;
															
 
																+
															
 
																 	/* Basic workers : each of this worker is running its own driver and
															
 
																 	 * can be combined with other basic workers. */
															
 
																 	struct _starpu_worker workers[STARPU_NMAXWORKERS];
															
@@ -181,6 +198,11 @@ struct _starpu_machine_config
 
																 	unsigned submitting;
															
 
																 };
															
 
																+/* Three functions to manage argv, argc */
															
 
																+void _starpu_set_argc_argv(int *argc, char ***argv);
															
 
																+int *_starpu_get_argc();
															
 
																+char ***_starpu_get_argv();
															
 
																+
															
 
																 /* Fill conf with environment variables */
															
 
																 void _starpu_conf_check_environment(struct starpu_conf *conf);
															
@@ -199,6 +221,9 @@ uint32_t _starpu_can_submit_cpu_task(void);
 
																 /* Is there a worker that can execute OpenCL code ? */
															
 
																 uint32_t _starpu_can_submit_opencl_task(void);
															
 
																+/* Is there a worker that can execute OpenCL code ? */
															
 
																+uint32_t _starpu_can_submit_scc_task(void);
															
 
																+
															
 
																 /* Check whether there is anything that the worker should do instead of
															
 
																  * sleeping (waiting on something to happen). */
															
 
																 unsigned _starpu_worker_can_block(unsigned memnode);
															
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -180,6 +180,11 @@ static int worker_supports_direct_access(unsigned node, unsigned handling_node)
 
																 #endif
															
 
																 		case STARPU_OPENCL_RAM:
															
 
																 			return 0;
															
 
																+		case STARPU_MIC_RAM:
															
 
																+			/* We don't handle direct MIC-MIC transfers yet */
															
 
																+			return 0;
															
 
																+		case STARPU_SCC_RAM:
															
 
																+			return 1;
															
 
																 		default:
															
 
																 			return 1;
															
 
																 	}
															
@@ -346,6 +351,9 @@ struct _starpu_data_request *_starpu_create_request_to_fetch_data(starpu_data_ha
 
																 								  unsigned async,
															
 
																 								  void (*callback_func)(void *), void *callback_arg)
															
 
																 {
															
 
																+	/* We don't care about commuting for data requests, that was handled before. */
															
 
																+	mode &= ~STARPU_COMMUTE;
															
 
																+
															
 
																 	/* This function is called with handle's header lock taken */
															
 
																 	_starpu_spin_checklocked(&handle->header_lock);
															
--- a/src/datawizard/coherency.h
+++ b/src/datawizard/coherency.h
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																+ * Copyright (C) 2009-2013  Université de Bordeaux 1
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -106,6 +106,9 @@ struct _starpu_data_state
 
																 	 * the req_list anymore), i.e. the number of holders of the
															
 
																 	 * current_mode rwlock */
															
 
																 	unsigned refcnt;
															
 
																+	/* Current access mode. Is always either STARPU_R, STARPU_W,
															
 
																+	 * STARPU_SCRATCH or STARPU_REDUX, but never a combination such as
															
 
																+	 * STARPU_RW. */
															
 
																 	enum starpu_data_access_mode current_mode;
															
 
																 	/* protect meta data */
															
 
																 	struct _starpu_spinlock header_lock;
															
@@ -138,7 +141,7 @@ struct _starpu_data_state
 
																 	/* Footprint which identifies data layout */
															
 
																 	uint32_t footprint;
															
 
																-	/* where is the data home ? -1 if none yet */
															
 
																+	/* where is the data home, i.e. which node it was registered from ? -1 if none yet */
															
 
																 	int home_node;
															
 
																 	/* what is the default write-through mask for that data ? */
															
@@ -163,8 +166,8 @@ struct _starpu_data_state
 
																 	 * read-only mode should depend on that task implicitely if the
															
 
																 	 * sequential_consistency flag is enabled. */
															
 
																 	enum starpu_data_access_mode last_submitted_mode;
															
 
																-	struct starpu_task *last_submitted_writer;
															
 
																-	struct _starpu_task_wrapper_list *last_submitted_readers;
															
 
																+	struct starpu_task *last_sync_task;
															
 
																+	struct _starpu_task_wrapper_list *last_submitted_accessors;
															
 
																 	/* If FxT is enabled, we keep track of "ghost dependencies": that is to
															
 
																 	 * say the dependencies that are not needed anymore, but that should
															
@@ -172,9 +175,9 @@ struct _starpu_data_state
 
																 	 * f(Aw) g(Aw), and that g is submitted after the termination of f, we
															
 
																 	 * want to have f->g appear in the DAG even if StarPU does not need to
															
 
																 	 * enforce this dependency anymore.*/
															
 
																-	unsigned last_submitted_ghost_writer_id_is_valid;
															
 
																-	unsigned long last_submitted_ghost_writer_id;
															
 
																-	struct _starpu_jobid_list *last_submitted_ghost_readers_id;
															
 
																+	unsigned last_submitted_ghost_sync_id_is_valid;
															
 
																+	unsigned long last_submitted_ghost_sync_id;
															
 
																+	struct _starpu_jobid_list *last_submitted_ghost_accessors_id;
															
 
																 	struct _starpu_task_wrapper_list *post_sync_tasks;
															
 
																 	unsigned post_sync_tasks_cnt;
															
--- a/src/datawizard/copy_driver.c
+++ b/src/datawizard/copy_driver.c
@@ -320,6 +320,83 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
																 		}
															
 
																 		break;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_MIC_RAM):
															
 
																+		/* RAM -> MIC */
															
 
																+		if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mic_copy_disabled() ||
															
 
																+				!(copy_methods->ram_to_mic_async || copy_methods->any_to_any))
															
 
																+		{
															
 
																+			/* this is not associated to a request so it's synchronous */
															
 
																+			STARPU_ASSERT(copy_methods->ram_to_mic || copy_methods->any_to_any);
															
 
																+			if (copy_methods->ram_to_mic)
															
 
																+				copy_methods->ram_to_mic(src_interface, src_node, dst_interface, dst_node);
															
 
																+			else
															
 
																+				copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
															
 
																+		}
															
 
																+		else
															
 
																+		{
															
 
																+			req->async_channel.type = STARPU_MIC_RAM;
															
 
																+			if (copy_methods->ram_to_mic_async)
															
 
																+				ret = copy_methods->ram_to_mic_async(src_interface, src_node, dst_interface, dst_node);
															
 
																+			else
															
 
																+			{
															
 
																+				STARPU_ASSERT(copy_methods->any_to_any);
															
 
																+				ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
															
 
																+			}
															
 
																+			_starpu_mic_init_event(&(req->async_channel.event.mic_event), dst_node);
															
 
																+		}
															
 
																+		break;
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_MIC_RAM,STARPU_CPU_RAM):
															
 
																+		/* MIC -> RAM */
															
 
																+		if (!req || starpu_asynchronous_copy_disabled() || starpu_asynchronous_mic_copy_disabled() ||
															
 
																+				!(copy_methods->mic_to_ram_async || copy_methods->any_to_any))
															
 
																+		{
															
 
																+			/* this is not associated to a request so it's synchronous */
															
 
																+			STARPU_ASSERT(copy_methods->mic_to_ram || copy_methods->any_to_any);
															
 
																+			if (copy_methods->mic_to_ram)
															
 
																+				copy_methods->mic_to_ram(src_interface, src_node, dst_interface, dst_node);
															
 
																+			else
															
 
																+				copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
															
 
																+		}
															
 
																+		else
															
 
																+		{
															
 
																+			req->async_channel.type = STARPU_MIC_RAM;
															
 
																+			if (copy_methods->mic_to_ram_async)
															
 
																+				ret = copy_methods->mic_to_ram_async(src_interface, src_node, dst_interface, dst_node);
															
 
																+			else
															
 
																+			{
															
 
																+				STARPU_ASSERT(copy_methods->any_to_any);
															
 
																+				ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
															
 
																+			}
															
 
																+			_starpu_mic_init_event(&(req->async_channel.event.mic_event), src_node);
															
 
																+		}
															
 
																+		break;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+		/* SCC RAM associated to the master process is considered as
															
 
																+		 * the main memory node. */
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_SCC_RAM):
															
 
																+		/* master private SCC RAM -> slave private SCC RAM */
															
 
																+		if (copy_methods->scc_src_to_sink)
															
 
																+			copy_methods->scc_src_to_sink(src_interface, src_node, dst_interface, dst_node);
															
 
																+		else
															
 
																+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
															
 
																+		break;
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_SCC_RAM,STARPU_CPU_RAM):
															
 
																+		/* slave private SCC RAM -> master private SCC RAM */
															
 
																+		if (copy_methods->scc_sink_to_src)
															
 
																+			copy_methods->scc_sink_to_src(src_interface, src_node, dst_interface, dst_node);
															
 
																+		else
															
 
																+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
															
 
																+		break;
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_SCC_RAM,STARPU_SCC_RAM):
															
 
																+		/* slave private SCC RAM -> slave private SCC RAM */
															
 
																+		if (copy_methods->scc_sink_to_sink)
															
 
																+			copy_methods->scc_sink_to_sink(src_interface, src_node, dst_interface, dst_node);
															
 
																+		else
															
 
																+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
															
 
																+		break;
															
 
																+#endif
															
 
																 	default:
															
 
																 		STARPU_ABORT();
															
 
																 		break;
															
@@ -438,6 +515,47 @@ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, u
 
																 				size,
															
 
																 				&async_channel->event.opencl_event);
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_MIC_RAM,STARPU_CPU_RAM):
															
 
																+		if (async_data)
															
 
																+			return _starpu_mic_copy_mic_to_ram_async(
															
 
																+					(void*) src + src_offset, src_node,
															
 
																+					(void*) dst + dst_offset, dst_node,
															
 
																+					size);
															
 
																+		else
															
 
																+			return _starpu_mic_copy_mic_to_ram(
															
 
																+					(void*) src + src_offset, src_node,
															
 
																+					(void*) dst + dst_offset, dst_node,
															
 
																+					size);
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_MIC_RAM):
															
 
																+		if (async_data)
															
 
																+			return _starpu_mic_copy_ram_to_mic_async(
															
 
																+					(void*) src + src_offset, src_node,
															
 
																+					(void*) dst + dst_offset, dst_node,
															
 
																+					size);
															
 
																+		else
															
 
																+			return _starpu_mic_copy_ram_to_mic(
															
 
																+					(void*) src + src_offset, src_node,
															
 
																+					(void*) dst + dst_offset, dst_node,
															
 
																+					size);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_SCC_RAM,STARPU_CPU_RAM):
															
 
																+		_starpu_scc_copy_sink_to_src(
															
 
																+				(void*) src + src_offset, src_node,
															
 
																+				(void*) dst + dst_offset, dst_node,
															
 
																+				size);
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_SCC_RAM):
															
 
																+		_starpu_scc_copy_src_to_sink(
															
 
																+				(void*) src + src_offset, src_node,
															
 
																+				(void*) dst + dst_offset, dst_node,
															
 
																+				size);
															
 
																+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_SCC_RAM,STARPU_SCC_RAM):
															
 
																+		_starpu_scc_copy_sink_to_sink(
															
 
																+				(void*) src + src_offset, src_node,
															
 
																+				(void*) dst + dst_offset, dst_node,
															
 
																+				size);
															
 
																+#endif
															
 
																 	default:
															
 
																 		STARPU_ABORT();
															
 
																 		return -1;
															
@@ -490,6 +608,11 @@ void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_
 
																 	      break;
															
 
																 	}
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	case STARPU_MIC_RAM:
															
 
																+		_starpu_mic_wait_request_completion(&(async_channel->event.mic_event));
															
 
																+		break;
															
 
																+#endif
															
 
																 	case STARPU_CPU_RAM:
															
 
																 	default:
															
 
																 		STARPU_ABORT();
															
@@ -541,6 +664,11 @@ unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *as
 
																 		break;
															
 
																 	}
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	case STARPU_MIC_RAM:
															
 
																+		success = _starpu_mic_request_is_complete(&(async_channel->event.mic_event));
															
 
																+		break;
															
 
																+#endif
															
 
																 	case STARPU_CPU_RAM:
															
 
																 	default:
															
 
																 		STARPU_ABORT();
															
--- a/src/datawizard/copy_driver.h
+++ b/src/datawizard/copy_driver.h
@@ -36,6 +36,18 @@
 
																 struct _starpu_data_request;
															
 
																 struct _starpu_data_replicate;
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+/* MIC need memory_node to now which MIC is concerned.
															
 
																+ * mark is used to wait asynchronous request.
															
 
																+ * signal is used to test asynchronous request. */
															
 
																+struct _starpu_mic_async_event
															
 
																+{
															
 
																+	unsigned memory_node;
															
 
																+	int mark;
															
 
																+	uint64_t *signal;
															
 
																+};
															
 
																+#endif
															
 
																+
															
 
																 /* this is a structure that can be queried to see whether an asynchronous
															
 
																  * transfer has terminated or not */
															
 
																 union _starpu_async_channel_event
															
@@ -54,6 +66,9 @@ union _starpu_async_channel_event
 
																 #ifdef STARPU_USE_OPENCL
															
 
																         cl_event opencl_event;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	struct _starpu_mic_async_event mic_event;
															
 
																+#endif
															
 
																 };
															
 
																 struct _starpu_async_channel
															
--- a/src/datawizard/data_request.h
+++ b/src/datawizard/data_request.h
@@ -43,7 +43,10 @@ LIST_TYPE(_starpu_data_request,
 
																 	struct _starpu_data_replicate *src_replicate;
															
 
																 	struct _starpu_data_replicate *dst_replicate;
															
 
																-	/* Which memory node will actually perform the transfer */
															
 
																+	/* Which memory node will actually perform the transfer.
															
 
																+	 * This is important in the CUDA/OpenCL case, where only the worker for
															
 
																+	 * the node can make the CUDA/OpenCL calls.
															
 
																+	 */
															
 
																 	unsigned handling_node;
															
 
																 	/*
															
--- a/src/datawizard/filters.c
+++ b/src/datawizard/filters.c
@@ -184,8 +184,8 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 
																 		STARPU_PTHREAD_MUTEX_INIT(&child->sequential_consistency_mutex, NULL);
															
 
																 		child->last_submitted_mode = STARPU_R;
															
 
																-		child->last_submitted_writer = NULL;
															
 
																-		child->last_submitted_readers = NULL;
															
 
																+		child->last_sync_task = NULL;
															
 
																+		child->last_submitted_accessors = NULL;
															
 
																 		child->post_sync_tasks = NULL;
															
 
																 		child->post_sync_tasks_cnt = 0;
															
@@ -195,9 +195,9 @@ void starpu_data_partition(starpu_data_handle_t initial_handle, struct starpu_da
 
																 		child->init_cl = initial_handle->init_cl;
															
 
																 #ifdef STARPU_USE_FXT
															
 
																-		child->last_submitted_ghost_writer_id_is_valid = 0;
															
 
																-		child->last_submitted_ghost_writer_id = 0;
															
 
																-		child->last_submitted_ghost_readers_id = NULL;
															
 
																+		child->last_submitted_ghost_sync_id_is_valid = 0;
															
 
																+		child->last_submitted_ghost_sync_id = 0;
															
 
																+		child->last_submitted_ghost_accessors_id = NULL;
															
 
																 #endif
															
 
																 		for (node = 0; node < STARPU_MAXNODES; node++)
															
--- a/src/datawizard/interfaces/bcsr_filters.c
+++ b/src/datawizard/interfaces/bcsr_filters.c
@@ -35,6 +35,7 @@ void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_inte
 
																 	uint32_t ptr_offset = c*r*id*elemsize;
															
 
																+	matrix_child->id = STARPU_MATRIX_INTERFACE_ID;
															
 
																 	matrix_child->nx = c;
															
 
																 	matrix_child->ny = r;
															
 
																 	matrix_child->ld = c;
															
--- a/src/datawizard/interfaces/bcsr_interface.c
+++ b/src/datawizard/interfaces/bcsr_interface.c
@@ -46,7 +46,7 @@ static int bcsr_compare(void *data_interface_a, void *data_interface_b);
 
																 static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle);
															
 
																-static struct starpu_data_interface_ops interface_bcsr_ops =
															
 
																+struct starpu_data_interface_ops starpu_interface_bcsr_ops =
															
 
																 {
															
 
																 	.register_data_handle = register_bcsr_handle,
															
 
																 	.allocate_data_on_node = allocate_bcsr_buffer_on_node,
															
@@ -82,6 +82,7 @@ static void register_bcsr_handle(starpu_data_handle_t handle, unsigned home_node
 
																 			local_interface->rowptr = NULL;
															
 
																 		}
															
 
																+		local_interface->id = bcsr_interface->id;
															
 
																 		local_interface->nnz = bcsr_interface->nnz;
															
 
																 		local_interface->nrow = bcsr_interface->nrow;
															
 
																 		local_interface->firstentry = bcsr_interface->firstentry;
															
@@ -98,6 +99,7 @@ void starpu_bcsr_data_register(starpu_data_handle_t *handleptr, unsigned home_no
 
																 {
															
 
																 	struct starpu_bcsr_interface bcsr_interface =
															
 
																 	{
															
 
																+		.id = STARPU_BCSR_INTERFACE_ID,
															
 
																 		.nzval = nzval,
															
 
																 		.colind = colind,
															
 
																 		.rowptr = rowptr,
															
@@ -109,7 +111,7 @@ void starpu_bcsr_data_register(starpu_data_handle_t *handleptr, unsigned home_no
 
																 		.elemsize = elemsize
															
 
																 	};
															
 
																-	starpu_data_register(handleptr, home_node, &bcsr_interface, &interface_bcsr_ops);
															
 
																+	starpu_data_register(handleptr, home_node, &bcsr_interface, &starpu_interface_bcsr_ops);
															
 
																 }
															
 
																 static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle)
															
--- a/src/datawizard/interfaces/block_filters.c
+++ b/src/datawizard/interfaces/block_filters.c
@@ -37,6 +37,7 @@ void starpu_block_filter_block(void *father_interface, void *child_interface, ST
 
																 	_starpu_filter_nparts_compute_chunk_size_and_offset(nx, nparts, elemsize, id, 1,
															
 
																 				       &chunk_size, &offset);
															
 
																+	block_child->id = block_father->id;
															
 
																 	block_child->nx = chunk_size;
															
 
																 	block_child->ny = ny;
															
 
																 	block_child->nz = nz;
															
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -26,6 +26,8 @@
 
																 #include <starpu_cuda.h>
															
 
																 #include <starpu_opencl.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																+#include <drivers/scc/driver_scc_source.h>
															
 
																+#include <drivers/mic/driver_mic_source.h>
															
 
																 static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
															
 
																 #ifdef STARPU_USE_CUDA
															
@@ -43,6 +45,17 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
																 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
															
 
																 static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+static int copy_scc_src_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_scc_sink_to_src(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_scc_sink_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+#endif
															
 
																 static const struct starpu_data_copy_methods block_copy_data_methods_s =
															
 
																 {
															
@@ -62,6 +75,17 @@ static const struct starpu_data_copy_methods block_copy_data_methods_s =
 
																 	.opencl_to_ram_async = copy_opencl_to_ram_async,
															
 
																 	.opencl_to_opencl_async = copy_opencl_to_opencl_async,
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+	.scc_src_to_sink = copy_scc_src_to_sink,
															
 
																+	.scc_sink_to_src = copy_scc_sink_to_src,
															
 
																+	.scc_sink_to_sink = copy_scc_sink_to_sink,
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	.ram_to_mic = copy_ram_to_mic,
															
 
																+	.mic_to_ram = copy_mic_to_ram,
															
 
																+	.ram_to_mic_async = copy_ram_to_mic_async,
															
 
																+	.mic_to_ram_async = copy_mic_to_ram_async,
															
 
																+#endif
															
 
																 };
															
@@ -74,7 +98,7 @@ static uint32_t footprint_block_interface_crc32(starpu_data_handle_t handle);
 
																 static int block_compare(void *data_interface_a, void *data_interface_b);
															
 
																 static void display_block_interface(starpu_data_handle_t handle, FILE *f);
															
 
																-static struct starpu_data_interface_ops interface_block_ops =
															
 
																+struct starpu_data_interface_ops starpu_interface_block_ops =
															
 
																 {
															
 
																 	.register_data_handle = register_block_handle,
															
 
																 	.allocate_data_on_node = allocate_block_buffer_on_node,
															
@@ -126,6 +150,7 @@ static void register_block_handle(starpu_data_handle_t handle, unsigned home_nod
 
																 			local_interface->ldz  = 0;
															
 
																 		}
															
 
																+		local_interface->id = block_interface->id;
															
 
																 		local_interface->nx = block_interface->nx;
															
 
																 		local_interface->ny = block_interface->ny;
															
 
																 		local_interface->nz = block_interface->nz;
															
@@ -140,6 +165,7 @@ void starpu_block_data_register(starpu_data_handle_t *handleptr, unsigned home_n
 
																 {
															
 
																 	struct starpu_block_interface block_interface =
															
 
																 	{
															
 
																+		.id = STARPU_BLOCK_INTERFACE_ID,
															
 
																 		.ptr = ptr,
															
 
																                 .dev_handle = ptr,
															
 
																                 .offset = 0,
															
@@ -151,7 +177,12 @@ void starpu_block_data_register(starpu_data_handle_t *handleptr, unsigned home_n
 
																 		.elemsize = elemsize
															
 
																 	};
															
 
																-	starpu_data_register(handleptr, home_node, &block_interface, &interface_block_ops);
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+	_starpu_scc_set_offset_in_shared_memory((void*)block_interface.ptr,
															
 
																+			(void**)&(block_interface.dev_handle), &(block_interface.offset));
															
 
																+#endif
															
 
																+
															
 
																+	starpu_data_register(handleptr, home_node, &block_interface, &starpu_interface_block_ops);
															
 
																 }
															
 
																 static uint32_t footprint_block_interface_crc32(starpu_data_handle_t handle)
															
@@ -584,6 +615,170 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_A
 
																 #endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+static int copy_scc_src_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	uint32_t nx = STARPU_BLOCK_GET_NX(dst_interface);
															
 
																+	uint32_t ny = STARPU_BLOCK_GET_NY(dst_interface);
															
 
																+	uint32_t nz = STARPU_BLOCK_GET_NZ(dst_interface);
															
 
																+
															
 
																+	size_t elemsize = STARPU_BLOCK_GET_ELEMSIZE(dst_interface);
															
 
																+
															
 
																+	uint32_t src_ldy = STARPU_BLOCK_GET_LDY(src_interface);
															
 
																+	uint32_t src_ldz = STARPU_BLOCK_GET_LDZ(src_interface);
															
 
																+	uint32_t dst_ldy = STARPU_BLOCK_GET_LDY(dst_interface);
															
 
																+	uint32_t dst_ldz = STARPU_BLOCK_GET_LDZ(dst_interface);
															
 
																+
															
 
																+	void *src_ptr = (void *)STARPU_BLOCK_GET_PTR(src_interface);
															
 
																+	void *dst_ptr = (void *)STARPU_BLOCK_GET_PTR(dst_interface);
															
 
																+
															
 
																+	unsigned y, z;
															
 
																+	for (z = 0; z < nz; ++z)
															
 
																+	{
															
 
																+		for (y = 0; y < ny; ++y)
															
 
																+		{
															
 
																+			uint32_t src_offset = (y*src_ldy + z*src_ldz) * elemsize;
															
 
																+			uint32_t dst_offset = (y*dst_ldy + z*dst_ldz) * elemsize;
															
 
																+
															
 
																+			_starpu_scc_copy_src_to_sink(src_ptr + src_offset, src_node,
															
 
																+							dst_ptr + dst_offset, dst_node, nx*elemsize);
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*nz*elemsize);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+static int copy_scc_sink_to_src(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	uint32_t nx = STARPU_BLOCK_GET_NX(dst_interface);
															
 
																+	uint32_t ny = STARPU_BLOCK_GET_NY(dst_interface);
															
 
																+	uint32_t nz = STARPU_BLOCK_GET_NZ(dst_interface);
															
 
																+
															
 
																+	size_t elemsize = STARPU_BLOCK_GET_ELEMSIZE(dst_interface);
															
 
																+
															
 
																+	uint32_t src_ldy = STARPU_BLOCK_GET_LDY(src_interface);
															
 
																+	uint32_t src_ldz = STARPU_BLOCK_GET_LDZ(src_interface);
															
 
																+	uint32_t dst_ldy = STARPU_BLOCK_GET_LDY(dst_interface);
															
 
																+	uint32_t dst_ldz = STARPU_BLOCK_GET_LDZ(dst_interface);
															
 
																+
															
 
																+	void *src_ptr = (void *)STARPU_BLOCK_GET_PTR(src_interface);
															
 
																+	void *dst_ptr = (void *)STARPU_BLOCK_GET_PTR(dst_interface);
															
 
																+
															
 
																+	unsigned y, z;
															
 
																+	for (z = 0; z < nz; ++z)
															
 
																+	{
															
 
																+		for (y = 0; y < ny; ++y)
															
 
																+		{
															
 
																+			uint32_t src_offset = (y*src_ldy + z*src_ldz) * elemsize;
															
 
																+			uint32_t dst_offset = (y*dst_ldy + z*dst_ldz) * elemsize;
															
 
																+
															
 
																+			_starpu_scc_copy_sink_to_src(src_ptr + src_offset, src_node,
															
 
																+							dst_ptr + dst_offset, dst_node, nx*elemsize);
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*nz*elemsize);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+static int copy_scc_sink_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	uint32_t nx = STARPU_BLOCK_GET_NX(dst_interface);
															
 
																+	uint32_t ny = STARPU_BLOCK_GET_NY(dst_interface);
															
 
																+	uint32_t nz = STARPU_BLOCK_GET_NZ(dst_interface);
															
 
																+
															
 
																+	size_t elemsize = STARPU_BLOCK_GET_ELEMSIZE(dst_interface);
															
 
																+
															
 
																+	uint32_t src_ldy = STARPU_BLOCK_GET_LDY(src_interface);
															
 
																+	uint32_t src_ldz = STARPU_BLOCK_GET_LDZ(src_interface);
															
 
																+	uint32_t dst_ldy = STARPU_BLOCK_GET_LDY(dst_interface);
															
 
																+	uint32_t dst_ldz = STARPU_BLOCK_GET_LDZ(dst_interface);
															
 
																+
															
 
																+	void *src_ptr = (void *)STARPU_BLOCK_GET_PTR(src_interface);
															
 
																+	void *dst_ptr = (void *)STARPU_BLOCK_GET_PTR(dst_interface);
															
 
																+
															
 
																+	unsigned y, z;
															
 
																+	for (z = 0; z < nz; ++z)
															
 
																+	{
															
 
																+		for (y = 0; y < ny; ++y)
															
 
																+		{
															
 
																+			uint32_t src_offset = (y*src_ldy + z*src_ldz) * elemsize;
															
 
																+			uint32_t dst_offset = (y*dst_ldy + z*dst_ldz) * elemsize;
															
 
																+
															
 
																+			_starpu_scc_copy_sink_to_sink(src_ptr + src_offset, src_node,
															
 
																+					dst_ptr + dst_offset, dst_node, nx*elemsize);
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*nz*elemsize);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+#endif /* STARPU_USE_SCC */
															
 
																+
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static int copy_mic_common(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
															
 
																+						   int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
															
 
																+{
															
 
																+	struct starpu_block_interface *src_block = src_interface;
															
 
																+	struct starpu_block_interface *dst_block = dst_interface;
															
 
																+	
															
 
																+	uint32_t nx = dst_block->nx;
															
 
																+	uint32_t ny = dst_block->ny;
															
 
																+	uint32_t nz = dst_block->nz;
															
 
																+	size_t elemsize = dst_block->elemsize;
															
 
																+
															
 
																+	uint32_t ldy_src = src_block->ldy;
															
 
																+	uint32_t ldz_src = src_block->ldz;
															
 
																+	uint32_t ldy_dst = dst_block->ldy;
															
 
																+	uint32_t ldz_dst = dst_block->ldz;
															
 
																+
															
 
																+	uintptr_t ptr_src = src_block->ptr;
															
 
																+	uintptr_t ptr_dst = dst_block->ptr;
															
 
																+
															
 
																+	unsigned y, z;
															
 
																+	for (z = 0; z < nz; z++)
															
 
																+	{
															
 
																+		for (y = 0; y < ny; y++)
															
 
																+		{
															
 
																+			uint32_t src_offset = (y*ldy_src + z*ldz_src)*elemsize;
															
 
																+			uint32_t dst_offset = (y*ldy_dst + z*ldz_dst)*elemsize;
															
 
																+
															
 
																+			copy_func((void *)(ptr_src + src_offset), src_node, (void *)(ptr_dst + dst_offset), dst_node, nx*elemsize);
															
 
																+		}
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*nz*elemsize);
															
 
																+
															
 
																+	return 0;
															
 
																+
															
 
																+}
															
 
																+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	return copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic);
															
 
																+}
															
 
																+
															
 
																+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	return copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram);
															
 
																+}
															
 
																+
															
 
																+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic_async);
															
 
																+	return -EAGAIN;
															
 
																+}
															
 
																+
															
 
																+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram_async);
															
 
																+	return -EAGAIN;
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																 /* as not all platform easily have a BLAS lib installed ... */
															
 
																 static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																 {
															
--- a/src/datawizard/interfaces/coo_interface.c
+++ b/src/datawizard/interfaces/coo_interface.c
@@ -89,6 +89,7 @@ register_coo_handle(starpu_data_handle_t handle, unsigned home_node,
 
																 			local_interface->rows = 0;
															
 
																 		}
															
 
																+		local_interface->id = coo_interface->id;
															
 
																 		local_interface->nx = coo_interface->nx;
															
 
																 		local_interface->ny = coo_interface->ny;
															
 
																 		local_interface->n_values = coo_interface->n_values;
															
@@ -189,7 +190,7 @@ display_coo_interface(starpu_data_handle_t handle, FILE *f)
 
																 	fprintf(f, "%u\t%u", coo_interface->nx, coo_interface->ny);
															
 
																 }
															
 
																-struct starpu_data_interface_ops _starpu_interface_coo_ops =
															
 
																+struct starpu_data_interface_ops starpu_interface_coo_ops =
															
 
																 {
															
 
																 	.register_data_handle  = register_coo_handle,
															
 
																 	.allocate_data_on_node = allocate_coo_buffer_on_node,
															
@@ -212,6 +213,7 @@ starpu_coo_data_register(starpu_data_handle_t *handleptr, unsigned home_node,
 
																 {
															
 
																 	struct starpu_coo_interface coo_interface =
															
 
																 	{
															
 
																+		.id = STARPU_COO_INTERFACE_ID,
															
 
																 		.values = values,
															
 
																 		.columns = columns,
															
 
																 		.rows = rows,
															
@@ -222,5 +224,5 @@ starpu_coo_data_register(starpu_data_handle_t *handleptr, unsigned home_node,
 
																 	};
															
 
																 	starpu_data_register(handleptr, home_node, &coo_interface,
															
 
																-			     &_starpu_interface_coo_ops);
															
 
																+			     &starpu_interface_coo_ops);
															
 
																 }
															
--- a/src/datawizard/interfaces/csr_filters.c
+++ b/src/datawizard/interfaces/csr_filters.c
@@ -46,6 +46,7 @@ void starpu_csr_filter_vertical_block(void *father_interface, void *child_interf
 
																 	uint32_t local_nnz = rowptr[first_index + child_nrow] - rowptr[first_index];
															
 
																+	csr_child->id = csr_father->id;
															
 
																 	csr_child->nnz = local_nnz;
															
 
																 	csr_child->nrow = child_nrow;
															
 
																 	csr_child->firstentry = local_firstentry;
															
--- a/src/datawizard/interfaces/csr_interface.c
+++ b/src/datawizard/interfaces/csr_interface.c
@@ -27,6 +27,8 @@
 
																 #include <starpu_cuda.h>
															
 
																 #include <starpu_opencl.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																+#include <drivers/scc/driver_scc_source.h>
															
 
																+#include <drivers/mic/driver_mic_source.h>
															
 
																 static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
															
@@ -42,7 +44,7 @@ static size_t csr_interface_get_size(starpu_data_handle_t handle);
 
																 static int csr_compare(void *data_interface_a, void *data_interface_b);
															
 
																 static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle);
															
 
																-static struct starpu_data_interface_ops interface_csr_ops =
															
 
																+struct starpu_data_interface_ops starpu_interface_csr_ops =
															
 
																 {
															
 
																 	.register_data_handle = register_csr_handle,
															
 
																 	.allocate_data_on_node = allocate_csr_buffer_on_node,
															
@@ -76,6 +78,7 @@ static void register_csr_handle(starpu_data_handle_t handle, unsigned home_node,
 
																 			local_interface->colind = NULL;
															
 
																 		}
															
 
																+		local_interface->id = csr_interface->id;
															
 
																 		local_interface->rowptr = csr_interface->rowptr;
															
 
																 		local_interface->nnz = csr_interface->nnz;
															
 
																 		local_interface->nrow = csr_interface->nrow;
															
@@ -91,6 +94,7 @@ void starpu_csr_data_register(starpu_data_handle_t *handleptr, unsigned home_nod
 
																 {
															
 
																 	struct starpu_csr_interface csr_interface =
															
 
																 	{
															
 
																+		.id = STARPU_CSR_INTERFACE_ID,
															
 
																 		.nnz = nnz,
															
 
																 		.nrow = nrow,
															
 
																 		.nzval = nzval,
															
@@ -100,7 +104,7 @@ void starpu_csr_data_register(starpu_data_handle_t *handleptr, unsigned home_nod
 
																 		.elemsize = elemsize
															
 
																 	};
															
 
																-	starpu_data_register(handleptr, home_node, &csr_interface, &interface_csr_ops);
															
 
																+	starpu_data_register(handleptr, home_node, &csr_interface, &starpu_interface_csr_ops);
															
 
																 }
															
 
																 static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle)
															
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -83,6 +83,40 @@ void _starpu_data_interface_shutdown()
 
																 	registered_tag_handles = NULL;
															
 
																 }
															
 
																+struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id)
															
 
																+{
															
 
																+	switch (interface_id)
															
 
																+	{
															
 
																+		case STARPU_MATRIX_INTERFACE_ID:
															
 
																+			return &starpu_interface_matrix_ops;
															
 
																+
															
 
																+		case STARPU_BLOCK_INTERFACE_ID:
															
 
																+			return &starpu_interface_block_ops;
															
 
																+
															
 
																+		case STARPU_VECTOR_INTERFACE_ID:
															
 
																+			return &starpu_interface_vector_ops;
															
 
																+
															
 
																+		case STARPU_CSR_INTERFACE_ID:
															
 
																+			return &starpu_interface_csr_ops;
															
 
																+
															
 
																+		case STARPU_BCSR_INTERFACE_ID:
															
 
																+			return &starpu_interface_bcsr_ops;
															
 
																+
															
 
																+		case STARPU_VARIABLE_INTERFACE_ID:
															
 
																+			return &starpu_interface_variable_ops;
															
 
																+
															
 
																+		case STARPU_VOID_INTERFACE_ID:
															
 
																+			return &starpu_interface_void_ops;
															
 
																+
															
 
																+		case STARPU_MULTIFORMAT_INTERFACE_ID:
															
 
																+			return &starpu_interface_multiformat_ops;
															
 
																+
															
 
																+		default:
															
 
																+			STARPU_ABORT();
															
 
																+			return NULL;
															
 
																+	}
															
 
																+}
															
 
																+
															
 
																 /* Register the mapping from PTR to HANDLE.  If PTR is already mapped to
															
 
																  * some handle, the new mapping shadows the previous one.   */
															
 
																 void _starpu_data_register_ram_pointer(starpu_data_handle_t handle, void *ptr)
															
@@ -163,8 +197,8 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 
																 	STARPU_PTHREAD_MUTEX_INIT(&handle->sequential_consistency_mutex, NULL);
															
 
																 	handle->last_submitted_mode = STARPU_R;
															
 
																-	handle->last_submitted_writer = NULL;
															
 
																-	handle->last_submitted_readers = NULL;
															
 
																+	handle->last_sync_task = NULL;
															
 
																+	handle->last_submitted_accessors = NULL;
															
 
																 	handle->post_sync_tasks = NULL;
															
 
																 	handle->post_sync_tasks_cnt = 0;
															
@@ -176,9 +210,9 @@ static void _starpu_register_new_data(starpu_data_handle_t handle,
 
																 	handle->reduction_req_list = _starpu_data_requester_list_new();
															
 
																 #ifdef STARPU_USE_FXT
															
 
																-	handle->last_submitted_ghost_writer_id_is_valid = 0;
															
 
																-	handle->last_submitted_ghost_writer_id = 0;
															
 
																-	handle->last_submitted_ghost_readers_id = NULL;
															
 
																+	handle->last_submitted_ghost_sync_id_is_valid = 0;
															
 
																+	handle->last_submitted_ghost_sync_id = 0;
															
 
																+	handle->last_submitted_ghost_accessors_id = NULL;
															
 
																 #endif
															
 
																 	handle->wt_mask = wt_mask;
															
@@ -569,7 +603,10 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
																 		 * XXX : This is quite hacky, could we submit a task instead ?
															
 
																 		 */
															
 
																 		if (_starpu_data_is_multiformat_handle(handle) &&
															
 
																-			starpu_node_get_kind(handle->mf_node) != STARPU_CPU_RAM)
															
 
																+			(  starpu_node_get_kind(handle->mf_node) != STARPU_CPU_RAM
															
 
																+			&& starpu_node_get_kind(handle->mf_node) != STARPU_SCC_RAM
															
 
																+			&& starpu_node_get_kind(handle->mf_node) != STARPU_SCC_SHM
															
 
																+			 ))
															
 
																 		{
															
 
																 			_STARPU_DEBUG("Conversion needed\n");
															
 
																 			void *buffers[1];
															
@@ -598,7 +635,18 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
																 					break;
															
 
																 				}
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+				case STARPU_MIC_RAM:
															
 
																+				{
															
 
																+					struct starpu_multiformat_data_interface_ops *mf_ops;
															
 
																+					mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
															
 
																+					cl = mf_ops->mic_to_cpu_cl;
															
 
																+					break;
															
 
																+				}
															
 
																+#endif
															
 
																 				case STARPU_CPU_RAM:      /* Impossible ! */
															
 
																+				case STARPU_SCC_RAM:      /* Impossible ! */
															
 
																+				case STARPU_SCC_SHM:      /* Impossible ! */
															
 
																 				default:
															
 
																 					STARPU_ABORT();
															
 
																 			}
															
--- a/src/datawizard/interfaces/data_interface.h
+++ b/src/datawizard/interfaces/data_interface.h
@@ -1,7 +1,7 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																  * Copyright (C) 2009-2012  Université de Bordeaux 1
															
 
																- * Copyright (C) 2010, 2012  Centre National de la Recherche Scientifique
															
 
																+ * Copyright (C) 2010, 2012, 2013  Centre National de la Recherche Scientifique
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
 
																  * it under the terms of the GNU Lesser General Public License as published by
															
@@ -21,8 +21,31 @@
 
																 #include <starpu.h>
															
 
																 #include <common/config.h>
															
 
																+/* Generic type representing an interface, for now it's only used before
															
 
																+ * execution on message-passing devices but it can be useful in other cases.
															
 
																+ */
															
 
																+union _starpu_interface
															
 
																+{
															
 
																+	struct starpu_matrix_interface matrix;
															
 
																+	struct starpu_block_interface block;
															
 
																+	struct starpu_vector_interface vector;
															
 
																+	struct starpu_csr_interface csr;
															
 
																+	struct starpu_coo_interface coo;
															
 
																+	struct starpu_bcsr_interface bcsr;
															
 
																+	struct starpu_variable_interface variable;
															
 
																+	struct starpu_multiformat_interface multiformat;
															
 
																+};
															
 
																+
															
 
																 /* Some data interfaces or filters use this interface internally */
															
 
																 extern struct starpu_data_interface_ops starpu_interface_matrix_ops;
															
 
																+extern struct starpu_data_interface_ops starpu_interface_block_ops;
															
 
																+extern struct starpu_data_interface_ops starpu_interface_vector_ops;
															
 
																+extern struct starpu_data_interface_ops starpu_interface_csr_ops;
															
 
																+extern struct starpu_data_interface_ops starpu_interface_bcsr_ops;
															
 
																+extern struct starpu_data_interface_ops starpu_interface_variable_ops;
															
 
																+extern struct starpu_data_interface_ops starpu_interface_void_ops;
															
 
																+extern struct starpu_data_interface_ops starpu_interface_multiformat_ops;
															
 
																+
															
 
																 void _starpu_data_free_interfaces(starpu_data_handle_t handle)
															
 
																 	STARPU_ATTRIBUTE_INTERNAL;
															
@@ -33,6 +56,8 @@ extern void _starpu_data_interface_init(void) STARPU_ATTRIBUTE_INTERNAL;
 
																 extern int _starpu_data_check_not_busy(starpu_data_handle_t handle) STARPU_ATTRIBUTE_INTERNAL;
															
 
																 extern void _starpu_data_interface_shutdown(void) STARPU_ATTRIBUTE_INTERNAL;
															
 
																+struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id);
															
 
																+
															
 
																 extern void _starpu_data_register_ram_pointer(starpu_data_handle_t handle,
															
 
																 						void *ptr)
															
 
																 	STARPU_ATTRIBUTE_INTERNAL;
															
--- a/src/datawizard/interfaces/matrix_filters.c
+++ b/src/datawizard/interfaces/matrix_filters.c
@@ -41,6 +41,7 @@ void starpu_matrix_filter_block(void *father_interface, void *child_interface, S
 
																 						     &child_nx, &offset);
															
 
																 	/* update the child's interface */
															
 
																+	matrix_child->id = matrix_father->id;
															
 
																 	matrix_child->nx = child_nx;
															
 
																 	matrix_child->ny = ny;
															
 
																 	matrix_child->elemsize = elemsize;
															
@@ -115,6 +116,7 @@ void starpu_matrix_filter_vertical_block(void *father_interface, void *child_int
 
																 						     matrix_father->ld,
															
 
																 						     &child_ny, &offset);
															
 
																+	matrix_child->id = matrix_father->id;
															
 
																 	matrix_child->nx = nx;
															
 
																 	matrix_child->ny = child_ny;
															
 
																 	matrix_child->elemsize = elemsize;
															
--- a/src/datawizard/interfaces/matrix_interface.c
+++ b/src/datawizard/interfaces/matrix_interface.c
@@ -24,6 +24,8 @@
 
																 #include <starpu_cuda.h>
															
 
																 #include <starpu_opencl.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																+#include <drivers/scc/driver_scc_source.h>
															
 
																+#include <drivers/mic/driver_mic_source.h>
															
 
																 /* If you can promise that there is no stride in your matrices, you can define this */
															
 
																 // #define NO_STRIDE
															
@@ -47,6 +49,17 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
																 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
															
 
																 static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+static int copy_scc_src_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_scc_sink_to_src(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_scc_sink_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+#endif
															
 
																 static const struct starpu_data_copy_methods matrix_copy_data_methods_s =
															
 
																 {
															
@@ -76,6 +89,17 @@ static const struct starpu_data_copy_methods matrix_copy_data_methods_s =
 
																 	.opencl_to_ram_async = copy_opencl_to_ram_async,
															
 
																 	.opencl_to_opencl_async = copy_opencl_to_opencl_async,
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+	.scc_src_to_sink = copy_scc_src_to_sink,
															
 
																+	.scc_sink_to_src = copy_scc_sink_to_src,
															
 
																+	.scc_sink_to_sink = copy_scc_sink_to_sink,
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	.ram_to_mic = copy_ram_to_mic,
															
 
																+	.mic_to_ram = copy_mic_to_ram,
															
 
																+	.ram_to_mic_async = copy_ram_to_mic_async,
															
 
																+	.mic_to_ram_async = copy_mic_to_ram_async,
															
 
																+#endif
															
 
																 };
															
 
																 static void register_matrix_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
															
@@ -127,6 +151,7 @@ static void register_matrix_handle(starpu_data_handle_t handle, unsigned home_no
 
																 			local_interface->ld  = 0;
															
 
																 		}
															
 
																+		local_interface->id = matrix_interface->id;
															
 
																 		local_interface->nx = matrix_interface->nx;
															
 
																 		local_interface->ny = matrix_interface->ny;
															
 
																 		local_interface->elemsize = matrix_interface->elemsize;
															
@@ -151,6 +176,7 @@ void starpu_matrix_data_register(starpu_data_handle_t *handleptr, unsigned home_
 
																 {
															
 
																 	struct starpu_matrix_interface matrix_interface =
															
 
																 	{
															
 
																+		.id = STARPU_MATRIX_INTERFACE_ID,
															
 
																 		.ptr = ptr,
															
 
																 		.ld = ld,
															
 
																 		.nx = nx,
															
@@ -160,6 +186,11 @@ void starpu_matrix_data_register(starpu_data_handle_t *handleptr, unsigned home_
 
																                 .offset = 0
															
 
																 	};
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+	_starpu_scc_set_offset_in_shared_memory((void*)matrix_interface.ptr,
															
 
																+			(void**)&(matrix_interface.dev_handle), &(matrix_interface.offset));
															
 
																+#endif
															
 
																+
															
 
																 	starpu_data_register(handleptr, home_node, &matrix_interface, &starpu_interface_matrix_ops);
															
 
																 }
															
@@ -558,6 +589,147 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_A
 
																 #endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+static int copy_scc_src_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	uint32_t nx = STARPU_MATRIX_GET_NX(dst_interface);
															
 
																+	uint32_t ny = STARPU_MATRIX_GET_NY(dst_interface);
															
 
																+
															
 
																+	size_t elemsize = STARPU_MATRIX_GET_ELEMSIZE(dst_interface);
															
 
																+
															
 
																+	uint32_t src_ld = STARPU_MATRIX_GET_LD(src_interface);
															
 
																+	uint32_t dst_ld = STARPU_MATRIX_GET_LD(dst_interface);
															
 
																+
															
 
																+	void *src_ptr = (void *)STARPU_MATRIX_GET_PTR(src_interface);
															
 
																+	void *dst_ptr = (void *)STARPU_MATRIX_GET_PTR(dst_interface);
															
 
																+
															
 
																+	unsigned y;
															
 
																+	for (y = 0; y < ny; ++y)
															
 
																+	{
															
 
																+		uint32_t src_offset = y*src_ld*elemsize;
															
 
																+		uint32_t dst_offset = y*dst_ld*elemsize;
															
 
																+
															
 
																+		_starpu_scc_copy_src_to_sink(src_ptr + src_offset, src_node,
															
 
																+						dst_ptr + dst_offset, dst_node, nx*elemsize);
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*elemsize);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+static int copy_scc_sink_to_src(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	uint32_t nx = STARPU_MATRIX_GET_NX(dst_interface);
															
 
																+	uint32_t ny = STARPU_MATRIX_GET_NY(dst_interface);
															
 
																+
															
 
																+	size_t elemsize = STARPU_MATRIX_GET_ELEMSIZE(dst_interface);
															
 
																+
															
 
																+	uint32_t src_ld = STARPU_MATRIX_GET_LD(src_interface);
															
 
																+	uint32_t dst_ld = STARPU_MATRIX_GET_LD(dst_interface);
															
 
																+
															
 
																+	void *src_ptr = (void *)STARPU_MATRIX_GET_PTR(src_interface);
															
 
																+	void *dst_ptr = (void *)STARPU_MATRIX_GET_PTR(dst_interface);
															
 
																+
															
 
																+	unsigned y;
															
 
																+	for (y = 0; y < ny; ++y)
															
 
																+	{
															
 
																+		uint32_t src_offset = y*src_ld*elemsize;
															
 
																+		uint32_t dst_offset = y*dst_ld*elemsize;
															
 
																+
															
 
																+		_starpu_scc_copy_sink_to_src(src_ptr + src_offset, src_node,
															
 
																+						dst_ptr + dst_offset, dst_node, nx*elemsize);
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*elemsize);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+static int copy_scc_sink_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	uint32_t nx = STARPU_MATRIX_GET_NX(dst_interface);
															
 
																+	uint32_t ny = STARPU_MATRIX_GET_NY(dst_interface);
															
 
																+
															
 
																+	size_t elemsize = STARPU_MATRIX_GET_ELEMSIZE(dst_interface);
															
 
																+
															
 
																+	uint32_t src_ld = STARPU_MATRIX_GET_LD(src_interface);
															
 
																+	uint32_t dst_ld = STARPU_MATRIX_GET_LD(dst_interface);
															
 
																+
															
 
																+	void *src_ptr = (void *)STARPU_MATRIX_GET_PTR(src_interface);
															
 
																+	void *dst_ptr = (void *)STARPU_MATRIX_GET_PTR(dst_interface);
															
 
																+
															
 
																+	unsigned y;
															
 
																+	for (y = 0; y < ny; ++y)
															
 
																+	{
															
 
																+		uint32_t src_offset = y*src_ld*elemsize;
															
 
																+		uint32_t dst_offset = y*dst_ld*elemsize;
															
 
																+
															
 
																+		_starpu_scc_copy_sink_to_sink(src_ptr + src_offset, src_node,
															
 
																+						dst_ptr + dst_offset, dst_node, nx*elemsize);
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*elemsize);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+#endif /* STARPU_USE_SCC */
															
 
																+
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static int copy_mic_common(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
															
 
																+						   int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
															
 
																+{
															
 
																+	struct starpu_matrix_interface *src_matrix = src_interface;
															
 
																+	struct starpu_matrix_interface *dst_matrix = dst_interface;
															
 
																+
															
 
																+	unsigned y;
															
 
																+	uint32_t nx = dst_matrix->nx;
															
 
																+	uint32_t ny = dst_matrix->ny;
															
 
																+	size_t elemsize = dst_matrix->elemsize;
															
 
																+
															
 
																+	uint32_t ld_src = src_matrix->ld;
															
 
																+	uint32_t ld_dst = dst_matrix->ld;
															
 
																+
															
 
																+	uintptr_t ptr_src = src_matrix->ptr;
															
 
																+	uintptr_t ptr_dst = dst_matrix->ptr;
															
 
																+
															
 
																+
															
 
																+	for (y = 0; y < ny; y++)
															
 
																+	{
															
 
																+		uint32_t src_offset = y*ld_src*elemsize;
															
 
																+		uint32_t dst_offset = y*ld_dst*elemsize;
															
 
																+
															
 
																+		copy_func((void *)(ptr_src + src_offset), src_node, (void *)(ptr_dst + dst_offset), dst_node, nx*elemsize);
															
 
																+	}
															
 
																+
															
 
																+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)nx*ny*elemsize);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	return copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic);
															
 
																+}
															
 
																+
															
 
																+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	return copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram);
															
 
																+}
															
 
																+
															
 
																+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic_async);
															
 
																+	return -EAGAIN;
															
 
																+}
															
 
																+
															
 
																+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram_async);
															
 
																+	return -EAGAIN;
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																 /* as not all platform easily have a  lib installed ... */
															
 
																 static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
															
 
																 {
															
--- a/src/datawizard/interfaces/multiformat_interface.c
+++ b/src/datawizard/interfaces/multiformat_interface.c
@@ -23,6 +23,7 @@
 
																 #include <starpu_cuda.h>
															
 
																 #include <starpu_opencl.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																+#include <drivers/mic/driver_mic_source.h>
															
 
																 #include <core/task.h>
															
 
																 static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
															
@@ -41,6 +42,12 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_A
 
																 static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
															
 
																 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
															
 
																+#endif
															
 
																 static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
															
 
																 {
															
@@ -65,6 +72,12 @@ static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
 
																         .ram_to_opencl_async = copy_ram_to_opencl_async,
															
 
																 	.opencl_to_ram_async = copy_opencl_to_ram_async,
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	.ram_to_mic = copy_ram_to_mic,
															
 
																+	.mic_to_ram = copy_mic_to_ram,
															
 
																+	.ram_to_mic_async = copy_ram_to_mic_async,
															
 
																+	.mic_to_ram_async = copy_mic_to_ram_async,
															
 
																+#endif
															
 
																 };
															
 
																 static void register_multiformat_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
															
@@ -86,7 +99,7 @@ get_mf_ops(void *data_interface)
 
																 	return mf->ops;
															
 
																 }
															
 
																-static struct starpu_data_interface_ops interface_multiformat_ops =
															
 
																+struct starpu_data_interface_ops starpu_interface_multiformat_ops =
															
 
																 {
															
 
																 	.register_data_handle  = register_multiformat_handle,
															
 
																 	.allocate_data_on_node = allocate_multiformat_buffer_on_node,
															
@@ -121,6 +134,10 @@ static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, unsigned
 
																 		case STARPU_OPENCL_RAM:
															
 
																 			return multiformat_interface->opencl_ptr;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+		case STARPU_MIC_RAM:
															
 
																+			return multiformat_interface->mic_ptr;
															
 
																+#endif
															
 
																 		default:
															
 
																 			STARPU_ABORT();
															
 
																 	}
															
@@ -147,6 +164,9 @@ static void register_multiformat_handle(starpu_data_handle_t handle, unsigned ho
 
																 #ifdef STARPU_USE_OPENCL
															
 
																 			local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+			local_interface->mic_ptr    = multiformat_interface->mic_ptr;
															
 
																+#endif
															
 
																 		}
															
 
																 		else
															
 
																 		{
															
@@ -157,7 +177,11 @@ static void register_multiformat_handle(starpu_data_handle_t handle, unsigned ho
 
																 #ifdef STARPU_USE_OPENCL
															
 
																 			local_interface->opencl_ptr = NULL;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+			local_interface->mic_ptr    = NULL;
															
 
																+#endif
															
 
																 		}
															
 
																+		local_interface->id = multiformat_interface->id;
															
 
																 		local_interface->nx = multiformat_interface->nx;
															
 
																 		local_interface->ops = multiformat_interface->ops;
															
 
																 	}
															
@@ -173,17 +197,21 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
 
																 	_starpu_codelet_check_deprecated_fields(format_ops->opencl_to_cpu_cl);
															
 
																 	_starpu_codelet_check_deprecated_fields(format_ops->cpu_to_cuda_cl);
															
 
																 	_starpu_codelet_check_deprecated_fields(format_ops->cuda_to_cpu_cl);
															
 
																+	_starpu_codelet_check_deprecated_fields(format_ops->cpu_to_mic_cl);
															
 
																+	_starpu_codelet_check_deprecated_fields(format_ops->mic_to_cpu_cl);
															
 
																 	struct starpu_multiformat_interface multiformat =
															
 
																 	{
															
 
																+		.id         = STARPU_MULTIFORMAT_INTERFACE_ID,
															
 
																 		.cpu_ptr    = ptr,
															
 
																 		.cuda_ptr   = NULL,
															
 
																 		.opencl_ptr = NULL,
															
 
																+		.mic_ptr    = NULL,
															
 
																 		.nx         = nobjects,
															
 
																 		.ops        = format_ops
															
 
																 	};
															
 
																-	starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
															
 
																+	starpu_data_register(handleptr, home_node, &multiformat, &starpu_interface_multiformat_ops);
															
 
																 }
															
 
																 static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
															
@@ -204,6 +232,9 @@ static int multiformat_compare(void *data_interface_a, void *data_interface_b)
 
																 #ifdef STARPU_USE_OPENCL
															
 
																 			&& (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+		    && (multiformat_a->ops->mic_elemsize == multiformat_b->ops->mic_elemsize)
															
 
																+#endif
															
 
																 		);
															
 
																 }
															
@@ -263,16 +294,26 @@ static starpu_ssize_t allocate_multiformat_buffer_on_node(void *data_interface_,
 
																 		goto fail_opencl;
															
 
																 	multiformat_interface->opencl_ptr = (void *) addr;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	size = multiformat_interface->nx * multiformat_interface->ops->mic_elemsize;
															
 
																+	allocated_memory += size;
															
 
																+	addr = starpu_malloc_on_node(dst_node, size);
															
 
																+	if (!addr)
															
 
																+		goto fail_mic;
															
 
																+	multiformat_interface->mic_ptr = (void *) addr;
															
 
																+#endif
															
 
																 	return allocated_memory;
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+fail_mic:
															
 
																+#endif
															
 
																 #ifdef STARPU_USE_OPENCL
															
 
																+	starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->opencl_ptr, multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
															
 
																 fail_opencl:
															
 
																-#ifdef STARPU_USE_CUDA
															
 
																-	starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cuda_ptr, multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
															
 
																-#endif
															
 
																 #endif
															
 
																 #ifdef STARPU_USE_CUDA
															
 
																+	starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cuda_ptr, multiformat_interface->nx * multiformat_interface->ops->cuda_elemsize);
															
 
																 fail_cuda:
															
 
																 #endif
															
 
																 	starpu_free_on_node(dst_node, (uintptr_t) multiformat_interface->cpu_ptr, multiformat_interface->nx * multiformat_interface->ops->cpu_elemsize);
															
@@ -298,6 +339,11 @@ static void free_multiformat_buffer_on_node(void *data_interface, unsigned node)
 
																 				   multiformat_interface->nx * multiformat_interface->ops->opencl_elemsize);
															
 
																 	multiformat_interface->opencl_ptr = NULL;
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	starpu_free_on_node(node, (uintptr_t) multiformat_interface->mic_ptr,
															
 
																+				   multiformat_interface->nx * multiformat_interface->ops->mic_elemsize);
															
 
																+	multiformat_interface->mic_ptr = NULL;
															
 
																+#endif
															
 
																 }
															
@@ -645,3 +691,70 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
 
																 	return 0;
															
 
																 }
															
 
																 #endif
															
 
																+
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+static int copy_mic_common_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
															
 
																+						   int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
															
 
																+{
															
 
																+	struct starpu_multiformat_interface *src_multiformat = src_interface;
															
 
																+	struct starpu_multiformat_interface *dst_multiformat = dst_interface;
															
 
																+
															
 
																+	STARPU_ASSERT(src_multiformat != NULL);
															
 
																+	STARPU_ASSERT(dst_multiformat != NULL);
															
 
																+	STARPU_ASSERT(dst_multiformat->ops != NULL);
															
 
																+
															
 
																+	size_t size = dst_multiformat->nx * dst_multiformat->ops->mic_elemsize;
															
 
																+	if (src_multiformat->mic_ptr == NULL)
															
 
																+	{
															
 
																+		src_multiformat->mic_ptr = malloc(size);
															
 
																+		if (src_multiformat->mic_ptr == NULL)
															
 
																+			return -ENOMEM;
															
 
																+	}
															
 
																+	
															
 
																+	copy_func(src_multiformat->cpu_ptr, src_node, dst_multiformat->cpu_ptr, dst_node, size);
															
 
																+
															
 
																+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+static int copy_mic_common_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
															
 
																+						   int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
															
 
																+{
															
 
																+	struct starpu_multiformat_interface *src_multiformat = src_interface;
															
 
																+	struct starpu_multiformat_interface *dst_multiformat = dst_interface;
															
 
																+
															
 
																+	STARPU_ASSERT(src_multiformat != NULL);
															
 
																+	STARPU_ASSERT(dst_multiformat != NULL);
															
 
																+	STARPU_ASSERT(dst_multiformat->ops != NULL);
															
 
																+			
															
 
																+	size_t size = src_multiformat->nx * src_multiformat->ops->mic_elemsize;
															
 
																+	copy_func(src_multiformat->mic_ptr, src_node, dst_multiformat->mic_ptr, dst_node, size);
															
 
																+
															
 
																+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
															
 
																+
															
 
																+	return 0;
															
 
																+}
															
 
																+
															
 
																+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	return copy_mic_common_ram_to_mic(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic);
															
 
																+}
															
 
																+
															
 
																+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	return copy_mic_common_mic_to_ram(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram);
															
 
																+}
															
 
																+
															
 
																+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	copy_mic_common_ram_to_mic(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic_async);
															
 
																+	return -EAGAIN;
															
 
																+}
															
 
																+
															
 
																+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
															
 
																+{
															
 
																+	copy_mic_common_mic_to_ram(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram_async);
															
 
																+	return -EAGAIN;
															
 
																+}
															
 
																+#endif
															
--- a/src/datawizard/interfaces/variable_interface.c
+++ b/src/datawizard/interfaces/variable_interface.c
@@ -24,6 +24,8 @@
 
																 #include <starpu_cuda.h>
															
 
																 #include <starpu_opencl.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																+#include <drivers/scc/driver_scc_source.h>
															
 
																+#include <drivers/mic/driver_mic_source.h>
															
 
																 static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
															
@@ -41,7 +43,7 @@ static uint32_t footprint_variable_interface_crc32(starpu_data_handle_t handle);
 
																 static int variable_compare(void *data_interface_a, void *data_interface_b);
															
 
																 static void display_variable_interface(starpu_data_handle_t handle, FILE *f);
															
 
																-static struct starpu_data_interface_ops interface_variable_ops =
															
 
																+struct starpu_data_interface_ops starpu_interface_variable_ops =
															
 
																 {
															
 
																 	.register_data_handle = register_variable_handle,
															
 
																 	.allocate_data_on_node = allocate_variable_buffer_on_node,
															
@@ -65,6 +67,7 @@ static void *variable_handle_to_pointer(starpu_data_handle_t handle, unsigned no
 
																 static void register_variable_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
															
 
																 {
															
 
																+	struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)data_interface;
															
 
																 	unsigned node;
															
 
																 	for (node = 0; node < STARPU_MAXNODES; node++)
															
 
																 	{
															
@@ -73,14 +76,19 @@ static void register_variable_handle(starpu_data_handle_t handle, unsigned home_
 
																 		if (node == home_node)
															
 
																 		{
															
 
																-			local_interface->ptr = STARPU_VARIABLE_GET_PTR(data_interface);
															
 
																+			local_interface->ptr = variable_interface->ptr;
															
 
																+			local_interface->dev_handle = variable_interface->dev_handle;
															
 
																+			local_interface->offset = variable_interface->offset;
															
 
																 		}
															
 
																 		else
															
 
																 		{
															
 
																 			local_interface->ptr = 0;
															
 
																+			local_interface->dev_handle = 0;
															
 
																+			local_interface->offset = 0;
															
 
																 		}
															
 
																-		local_interface->elemsize = STARPU_VARIABLE_GET_ELEMSIZE(data_interface);
															
 
																+		local_interface->id = variable_interface->id;
															
 
																+		local_interface->elemsize = variable_interface->elemsize;
															
 
																 	}
															
 
																 }
															
@@ -90,11 +98,19 @@ void starpu_variable_data_register(starpu_data_handle_t *handleptr, unsigned hom
 
																 {
															
 
																 	struct starpu_variable_interface variable =
															
 
																 	{
															
 
																+		.id = STARPU_VARIABLE_INTERFACE_ID,
															
 
																 		.ptr = ptr,
															
 
																+		.dev_handle = ptr,
															
 
																+		.offset = 0,
															
 
																 		.elemsize = elemsize
															
 
																 	};
															
 
																-	starpu_data_register(handleptr, home_node, &variable, &interface_variable_ops);
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+	_starpu_scc_set_offset_in_shared_memory((void*)variable.ptr, (void**)&(variable.dev_handle),
															
 
																+			&(variable.offset));
															
 
																+#endif
															
 
																+
															
 
																+	starpu_data_register(handleptr, home_node, &variable, &starpu_interface_variable_ops);
															
 
																 }
															
--- a/src/datawizard/interfaces/vector_filters.c
+++ b/src/datawizard/interfaces/vector_filters.c
@@ -35,6 +35,7 @@ void starpu_vector_filter_block(void *father_interface, void *child_interface, S
 
																 	_starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1,
															
 
																 						     &child_nx, &offset);
															
 
																+	vector_child->id = vector_father->id;
															
 
																 	vector_child->nx = child_nx;
															
 
																 	vector_child->elemsize = elemsize;
															
@@ -95,6 +96,8 @@ void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interf
 
																 	STARPU_ASSERT_MSG(length_first < nx, "First part is too long: %u vs %u", length_first, nx);
															
 
																+	vector_child->id = vector_father->id;
															
 
																+
															
 
																 	/* this is the first child */
															
 
																 	if (id == 0)
															
 
																 	{
															
@@ -138,6 +141,7 @@ void starpu_vector_filter_list(void *father_interface, void *child_interface, st
 
																 	uint32_t chunk_size = length_tab[id];
															
 
																+	vector_child->id = vector_father->id;
															
 
																 	vector_child->nx = chunk_size;
															
 
																 	vector_child->elemsize = elemsize;
															
--- a/src/datawizard/interfaces/vector_interface.c
+++ b/src/datawizard/interfaces/vector_interface.c
@@ -24,6 +24,8 @@
 
																 #include <starpu_cuda.h>
															
 
																 #include <starpu_opencl.h>
															
 
																 #include <drivers/opencl/driver_opencl.h>
															
 
																+#include <drivers/mic/driver_mic_source.h>
															
 
																+#include <drivers/scc/driver_scc_source.h>
															
 
																 static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
															
@@ -41,7 +43,7 @@ static uint32_t footprint_vector_interface_crc32(starpu_data_handle_t handle);
 
																 static int vector_compare(void *data_interface_a, void *data_interface_b);
															
 
																 static void display_vector_interface(starpu_data_handle_t handle, FILE *f);
															
 
																-static struct starpu_data_interface_ops interface_vector_ops =
															
 
																+struct starpu_data_interface_ops starpu_interface_vector_ops =
															
 
																 {
															
 
																 	.register_data_handle = register_vector_handle,
															
 
																 	.allocate_data_on_node = allocate_vector_buffer_on_node,
															
@@ -89,6 +91,7 @@ static void register_vector_handle(starpu_data_handle_t handle, unsigned home_no
 
																                         local_interface->offset = 0;
															
 
																 		}
															
 
																+		local_interface->id = vector_interface->id;
															
 
																 		local_interface->nx = vector_interface->nx;
															
 
																 		local_interface->elemsize = vector_interface->elemsize;
															
 
																 	}
															
@@ -100,6 +103,7 @@ void starpu_vector_data_register(starpu_data_handle_t *handleptr, unsigned home_
 
																 {
															
 
																 	struct starpu_vector_interface vector =
															
 
																 	{
															
 
																+		.id = STARPU_VECTOR_INTERFACE_ID,
															
 
																 		.ptr = ptr,
															
 
																 		.nx = nx,
															
 
																 		.elemsize = elemsize,
															
@@ -107,7 +111,11 @@ void starpu_vector_data_register(starpu_data_handle_t *handleptr, unsigned home_
 
																                 .offset = 0
															
 
																 	};
															
 
																-	starpu_data_register(handleptr, home_node, &vector, &interface_vector_ops);
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+	_starpu_scc_set_offset_in_shared_memory((void*)vector.ptr, (void**)&(vector.dev_handle), &(vector.offset));
															
 
																+#endif
															
 
																+
															
 
																+	starpu_data_register(handleptr, home_node, &vector, &starpu_interface_vector_ops);
															
 
																 }
															
--- a/src/datawizard/interfaces/void_interface.c
+++ b/src/datawizard/interfaces/void_interface.c
@@ -40,7 +40,7 @@ static uint32_t footprint_void_interface_crc32(starpu_data_handle_t handle);
 
																 static int void_compare(void *data_interface_a, void *data_interface_b);
															
 
																 static void display_void_interface(starpu_data_handle_t handle, FILE *f);
															
 
																-static struct starpu_data_interface_ops interface_void_ops =
															
 
																+struct starpu_data_interface_ops starpu_interface_void_ops =
															
 
																 {
															
 
																 	.register_data_handle = register_void_handle,
															
 
																 	.allocate_data_on_node = allocate_void_buffer_on_node,
															
@@ -64,7 +64,7 @@ static void register_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UN
 
																 /* declare a new data with the void interface */
															
 
																 void starpu_void_data_register(starpu_data_handle_t *handleptr)
															
 
																 {
															
 
																-	starpu_data_register(handleptr, 0, NULL, &interface_void_ops);
															
 
																+	starpu_data_register(handleptr, 0, NULL, &starpu_interface_void_ops);
															
 
																 }
															
--- a/src/datawizard/malloc.c
+++ b/src/datawizard/malloc.c
@@ -177,6 +177,13 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 
																 	}
															
 
																 #endif /* STARPU_SIMGRID */
															
 
																+	if (_starpu_can_submit_scc_task())
															
 
																+	{
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+		_starpu_scc_allocate_shared_memory(A, dim);
															
 
																+#endif
															
 
																+	}
															
 
																+	else
															
 
																 #ifdef STARPU_HAVE_POSIX_MEMALIGN
															
 
																 	if (_malloc_align != sizeof(void*))
															
 
																 	{
															
@@ -318,6 +325,12 @@ int starpu_free_flags(void *A, size_t dim, int flags)
 
																 	}
															
 
																 #endif /* STARPU_SIMGRID */
															
 
																+	if (_starpu_can_submit_scc_task())
															
 
																+	{
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+		_starpu_scc_free_shared_memory(A);
															
 
																+#endif
															
 
																+	} else
															
 
																 	free(A);
															
 
																 out:
															
@@ -406,6 +419,18 @@ starpu_malloc_on_node(unsigned dst_node, size_t size)
 
																 #endif
															
 
																 			}
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+		case STARPU_MIC_RAM:
															
 
																+			if (_starpu_mic_allocate_memory((void **)(&addr), size, dst_node))
															
 
																+				addr = 0;
															
 
																+			break;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+		case STARPU_SCC_RAM:
															
 
																+			if (_starpu_scc_allocate_memory((void **)(&addr), size, dst_node))
															
 
																+				addr = 0;
															
 
																+			break;
															
 
																+#endif
															
 
																 		default:
															
 
																 			STARPU_ABORT();
															
 
																 	}
															
@@ -461,6 +486,16 @@ starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 
																                         break;
															
 
																 		}
															
 
																 #endif
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+		case STARPU_MIC_RAM:
															
 
																+			_starpu_mic_free_memory((void*) addr, size, dst_node);
															
 
																+			break;
															
 
																+#endif
															
 
																+#ifdef STARPU_USE_SCC
															
 
																+		case STARPU_SCC_RAM:
															
 
																+			_starpu_scc_free_memory((void *) addr, dst_node);
															
 
																+			break;
															
 
																+#endif
															
 
																 		default:
															
 
																 			STARPU_ABORT();
															
 
																 	}
															
--- a/src/datawizard/memalloc.c
+++ b/src/datawizard/memalloc.c
@@ -73,44 +73,34 @@ void _starpu_deinit_mem_chunk_lists(void)
 
																 static void lock_all_subtree(starpu_data_handle_t handle)
															
 
																 {
															
 
																-	if (handle->nchildren == 0)
															
 
																-	{
															
 
																-		/* this is a leaf */
															
 
																-		while (_starpu_spin_trylock(&handle->header_lock))
															
 
																-			_starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
															
 
																-	}
															
 
																-	else
															
 
																+	unsigned child;
															
 
																+
															
 
																+	/* lock parent */
															
 
																+	while (_starpu_spin_trylock(&handle->header_lock))
															
 
																+		_starpu_datawizard_progress(_starpu_memory_node_get_local_key(), 0);
															
 
																+
															
 
																+	/* lock all sub-subtrees children */
															
 
																+	for (child = 0; child < handle->nchildren; child++)
															
 
																 	{
															
 
																-		/* lock all sub-subtrees children */
															
 
																-		unsigned child;
															
 
																-		for (child = 0; child < handle->nchildren; child++)
															
 
																-		{
															
 
																-			starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
															
 
																-			lock_all_subtree(child_handle);
															
 
																-		}
															
 
																+		starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
															
 
																+		lock_all_subtree(child_handle);
															
 
																 	}
															
 
																 }
															
 
																 static void unlock_all_subtree(starpu_data_handle_t handle)
															
 
																 {
															
 
																-	if (handle->nchildren == 0)
															
 
																-	{
															
 
																-		/* this is a leaf */
															
 
																-		_starpu_spin_unlock(&handle->header_lock);
															
 
																-	}
															
 
																-	else
															
 
																+	/* lock all sub-subtrees children
															
 
																+	 * Note that this is done in the reverse order of the
															
 
																+	 * lock_all_subtree so that we avoid deadlock */
															
 
																+	unsigned i;
															
 
																+	for (i =0; i < handle->nchildren; i++)
															
 
																 	{
															
 
																-		/* lock all sub-subtrees children
															
 
																-		 * Note that this is done in the reverse order of the
															
 
																-		 * lock_all_subtree so that we avoid deadlock */
															
 
																-		unsigned i;
															
 
																-		for (i =0; i < handle->nchildren; i++)
															
 
																-		{
															
 
																-			unsigned child = handle->nchildren - 1 - i;
															
 
																-			starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
															
 
																-			unlock_all_subtree(child_handle);
															
 
																-		}
															
 
																+		unsigned child = handle->nchildren - 1 - i;
															
 
																+		starpu_data_handle_t child_handle = starpu_data_get_child(handle, child);
															
 
																+		unlock_all_subtree(child_handle);
															
 
																 	}
															
 
																+
															
 
																+	_starpu_spin_unlock(&handle->header_lock);
															
 
																 }
															
 
																 static unsigned may_free_subtree(starpu_data_handle_t handle, unsigned node)
															
@@ -336,7 +326,7 @@ static size_t try_to_free_mem_chunk(struct _starpu_mem_chunk *mc, unsigned node)
 
																 	}
															
 
																 	else
															
 
																 	{
															
 
																-		/* try to lock all the leafs of the subtree */
															
 
																+		/* try to lock all the subtree */
															
 
																 		lock_all_subtree(handle);
															
 
																 		/* check if they are all "free" */
															
@@ -418,7 +408,7 @@ static unsigned try_to_reuse_mem_chunk(struct _starpu_mem_chunk *mc, unsigned no
 
																 	STARPU_ASSERT(old_data);
															
 
																-	/* try to lock all the leafs of the subtree */
															
 
																+	/* try to lock all the subtree */
															
 
																 	lock_all_subtree(old_data);
															
 
																 	/* check if they are all "free" */
															
--- a/src/datawizard/memory_manager.c
+++ b/src/datawizard/memory_manager.c
@@ -59,7 +59,7 @@ int _starpu_memory_manager_can_allocate_size(size_t size, unsigned node)
 
																 		used_size[node] += size;
															
 
																 		ret = 1;
															
 
																 	}
															
 
																-	else if (used_size[node] + size < global_size[node])
															
 
																+	else if (used_size[node] + size <= global_size[node])
															
 
																 	{
															
 
																 		used_size[node] += size;
															
 
																 		ret = 1;
															
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -20,6 +20,8 @@
 
																 #include <util/starpu_data_cpy.h>
															
 
																 #include <core/task.h>
															
 
																 #include <datawizard/datawizard.h>
															
 
																+#include <drivers/mic/driver_mic_source.h>
															
 
																+#include <drivers/mp_common/source_common.h>
															
 
																 void starpu_data_set_reduction_methods(starpu_data_handle_t handle,
															
 
																 				       struct starpu_codelet *redux_cl,
															
@@ -68,6 +70,12 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 
																 		case STARPU_OPENCL_WORKER:
															
 
																 			init_func = _starpu_task_get_opencl_nth_implementation(init_cl, 0);
															
 
																 			break;
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+		case STARPU_MIC_WORKER:
															
 
																+			init_func = _starpu_mic_src_get_kernel_from_codelet(init_cl, 0);
															
 
																+			break;
															
 
																+#endif
															
 
																+			/* TODO: SCC */
															
 
																 		default:
															
 
																 			STARPU_ABORT();
															
 
																 			break;
															
@@ -75,7 +83,27 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 
																 	STARPU_ASSERT(init_func);
															
 
																-	init_func(&replicate->data_interface, NULL);
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+	if (starpu_worker_get_type(workerid) == STARPU_MIC_WORKER)
															
 
																+	{
															
 
																+		const struct _starpu_mp_node *node = _starpu_mic_src_get_actual_thread_mp_node();
															
 
																+		enum _starpu_mp_command answer;
															
 
																+		void *arg = NULL;
															
 
																+		int arg_size = 0;
															
 
																+
															
 
																+		// XXX: give the correct coreid.
															
 
																+		_starpu_src_common_execute_kernel(node,
															
 
																+						  (void(*)(void))init_func, 0,
															
 
																+						  &handle, &(replicate->data_interface), 1,
															
 
																+						  NULL, 0);
															
 
																+		answer = _starpu_mp_common_recv_command (node, &arg, &arg_size);
															
 
																+		STARPU_ASSERT (answer == STARPU_EXECUTION_COMPLETED);
															
 
																+	}
															
 
																+	else
															
 
																+#endif
															
 
																+	{
															
 
																+		init_func(&replicate->data_interface, NULL);
															
 
																+	}
															
 
																 	replicate->initialized = 1;
															
 
																 }
															
@@ -305,13 +333,13 @@ void _starpu_data_end_reduction_mode(starpu_data_handle_t handle)
 
																 			redux_task->cl = handle->redux_cl;
															
 
																 			STARPU_ASSERT(redux_task->cl);
															
 
																-			if (!redux_task->cl->modes[0])
															
 
																-				redux_task->cl->modes[0] = STARPU_RW;
															
 
																-			if (!redux_task->cl->modes[1])
															
 
																-				redux_task->cl->modes[1] = STARPU_R;
															
 
																+			if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 0))
															
 
																+				STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_RW, 0);
															
 
																+			if (!(STARPU_CODELET_GET_MODE(redux_task->cl, 1))
															
 
																+				STARPU_CODELET_SET_MODE(redux_task->cl, STARPU_R, 1);
															
 
																-			STARPU_ASSERT_MSG(redux_task->cl->modes[0] == STARPU_RW, "First parameter of reduction codelet has to be RW");
															
 
																-			STARPU_ASSERT_MSG(redux_task->cl->modes[1] == STARPU_R, "Second parameter of reduction codelet has to be R");
															
 
																+			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 0) == STARPU_RW, "First parameter of reduction codelet has to be RW");
															
 
																+			STARPU_ASSERT_MSG(STARPU_CODELET_GET_MODE(redux_task->cl, 1) == STARPU_R, "Second parameter of reduction codelet has to be R");
															
 
																 			STARPU_TASK_SET_HANDLE(redux_task, handle, 0);
															
 
																 			STARPU_TASK_SET_HANDLE(redux_task, replicate_array[replicate], 1);
															
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -31,12 +31,16 @@
 
																 static char *cpus_worker_colors[STARPU_NMAXWORKERS] = {"/greens9/7", "/greens9/6", "/greens9/5", "/greens9/4",  "/greens9/9", "/greens9/3",  "/greens9/2",  "/greens9/1"  };
															
 
																 static char *cuda_worker_colors[STARPU_NMAXWORKERS] = {"/ylorrd9/9", "/ylorrd9/6", "/ylorrd9/3", "/ylorrd9/1", "/ylorrd9/8", "/ylorrd9/7", "/ylorrd9/4", "/ylorrd9/2",  "/ylorrd9/1"};
															
 
																 static char *opencl_worker_colors[STARPU_NMAXWORKERS] = {"/blues9/9", "/blues9/6", "/blues9/3", "/blues9/1", "/blues9/8", "/blues9/7", "/blues9/4", "/blues9/2",  "/blues9/1"};
															
 
																+static char *mic_worker_colors[STARPU_NMAXWORKERS] = {"/reds9/9", "/reds9/6", "/reds9/3", "/reds9/1", "/reds9/8", "/reds9/7", "/reds9/4", "/reds9/2",  "/reds9/1"};
															
 
																+static char *scc_worker_colors[STARPU_NMAXWORKERS] = {"/reds9/9", "/reds9/6", "/reds9/3", "/reds9/1", "/reds9/8", "/reds9/7", "/reds9/4", "/reds9/2",  "/reds9/1"};
															
 
																 static char *other_worker_colors[STARPU_NMAXWORKERS] = {"/greys9/9", "/greys9/8", "/greys9/7", "/greys9/6"};
															
 
																 static char *worker_colors[STARPU_NMAXWORKERS];
															
 
																 static unsigned opencl_index = 0;
															
 
																 static unsigned cuda_index = 0;
															
 
																 static unsigned cpus_index = 0;
															
 
																+static unsigned mic_index = 0;
															
 
																+static unsigned scc_index = 0;
															
 
																 static unsigned other_index = 0;
															
 
																 static void set_next_other_worker_color(int workerid)
															
@@ -59,6 +63,16 @@ static void set_next_opencl_worker_color(int workerid)
 
																 	worker_colors[workerid] = opencl_worker_colors[opencl_index++];
															
 
																 }
															
 
																+static void set_next_mic_worker_color(int workerid)
															
 
																+{
															
 
																+	worker_colors[workerid] = mic_worker_colors[mic_index++];
															
 
																+}
															
 
																+
															
 
																+static void set_next_scc_worker_color(int workerid)
															
 
																+{
															
 
																+	worker_colors[workerid] = scc_worker_colors[scc_index++];
															
 
																+}
															
 
																+
															
 
																 static const char *get_worker_color(int workerid)
															
 
																 {
															
 
																 	return worker_colors[workerid];
															
@@ -345,6 +359,16 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 
																 			kindstr = "OPENCL";
															
 
																 			archtype = STARPU_OPENCL_DEFAULT + devid;
															
 
																 			break;
															
 
																+		case _STARPU_FUT_MIC_KEY:
															
 
																+			set_next_mic_worker_color(workerid);
															
 
																+			kindstr = "mic";
															
 
																+			archtype = STARPU_MIC_DEFAULT + devid;
															
 
																+			break;
															
 
																+		case _STARPU_FUT_SCC_KEY:
															
 
																+			set_next_scc_worker_color(workerid);
															
 
																+			kindstr = "scc";
															
 
																+			archtype = STARPU_SCC_DEFAULT + devid;
															
 
																+			break;
															
 
																 		default:
															
 
																 			STARPU_ABORT();
															
 
																 	}
															
--- a/src/drivers/gordon/driver_gordon.c
+++ b/src/drivers/gordon/driver_gordon.c
@@ -374,7 +374,8 @@ void *gordon_worker_inject(struct _starpu_worker_set *arg)
 
																 					struct _starpu_job_list *chunk_list;
															
 
																 					if (chunk != (nchunks -1))
															
 
																 					{
															
 
																-						/* split the list in 2 parts : list = chunk_list | tail */
															
 
																+						/* split the list in 2 parts :
															
 
																+						 * list = chunk_list | tail */
															
 
																 						chunk_list = _starpu_job_list_new();
															
 
																 						/* find the end */
															
--- a/src/drivers/mic/driver_mic_common.c
+++ b/src/drivers/mic/driver_mic_common.c
@@ -0,0 +1,120 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2012  Inria
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include <drivers/mp_common/mp_common.h>
															
 
																+#include <drivers/mic/driver_mic_common.h>
															
 
																+
															
 
																+
															
 
																+void _starpu_mic_common_report_scif_error(const char *func, const char *file, const int line, const int status)
															
 
																+{
															
 
																+	const char *errormsg = strerror(status);
															
 
																+	printf("Common: oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
															
 
																+	STARPU_ASSERT(0);
															
 
																+}
															
 
																+
															
 
																+/* Handles the error so the caller (which must be generic) doesn't have to
															
 
																+ * care about it.
															
 
																+ */
															
 
																+
															
 
																+void _starpu_mic_common_send(const struct _starpu_mp_node *node, void *msg, int len)
															
 
																+{
															
 
																+	if ((scif_send(node->mp_connection.mic_endpoint, msg, len, SCIF_SEND_BLOCK)) < 0)
															
 
																+		STARPU_MP_COMMON_REPORT_ERROR(node, errno);
															
 
																+}
															
 
																+
															
 
																+/* Handles the error so the caller (which must be generic) doesn't have to
															
 
																+ * care about it.
															
 
																+ */
															
 
																+
															
 
																+void _starpu_mic_common_recv(const struct _starpu_mp_node *node, void *msg, int len)
															
 
																+{
															
 
																+	if ((scif_recv(node->mp_connection.mic_endpoint, msg, len, SCIF_RECV_BLOCK)) < 0)
															
 
																+		STARPU_MP_COMMON_REPORT_ERROR(node, errno);
															
 
																+}
															
 
																+
															
 
																+/* Handles the error so the caller (which must be generic) doesn't have to
															
 
																+ * care about it.
															
 
																+ */
															
 
																+void _starpu_mic_common_dt_send(const struct _starpu_mp_node *mp_node, void *msg, int len)
															
 
																+{
															
 
																+	if ((scif_send(mp_node->host_sink_dt_connection.mic_endpoint, msg, len, SCIF_SEND_BLOCK)) < 0)
															
 
																+		STARPU_MP_COMMON_REPORT_ERROR(mp_node, errno);
															
 
																+}
															
 
																+
															
 
																+/* Handles the error so the caller (which must be generic) doesn't have to
															
 
																+ * care about it.
															
 
																+ */
															
 
																+void _starpu_mic_common_dt_recv(const struct _starpu_mp_node *mp_node, void *msg, int len)
															
 
																+{
															
 
																+	if ((scif_recv(mp_node->host_sink_dt_connection.mic_endpoint, msg, len, SCIF_SEND_BLOCK)) < 0)
															
 
																+		STARPU_MP_COMMON_REPORT_ERROR(mp_node, errno);
															
 
																+}
															
 
																+
															
 
																+void _starpu_mic_common_connect(scif_epd_t *endpoint, uint16_t remote_node,
															
 
																+				uint16_t local_port_number, uint16_t remote_port_number)
															
 
																+{
															
 
																+	/* Endpoint only useful for the initialization of the connection */
															
 
																+	struct scif_portID portID;
															
 
																+
															
 
																+	portID.node = remote_node;
															
 
																+	portID.port = remote_port_number;
															
 
																+
															
 
																+	if ((*endpoint = scif_open()) < 0)
															
 
																+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
															
 
																+
															
 
																+	if ((scif_bind(*endpoint, local_port_number)) < 0)
															
 
																+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
															
 
																+
															
 
																+	_STARPU_DEBUG("Connecting to MIC %d on %d:%d...\n", remote_node, local_port_number, remote_port_number);
															
 
																+	while (scif_connect(*endpoint, &portID) == -1)
															
 
																+	{
															
 
																+		if (errno != ECONNREFUSED)
															
 
																+			STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
															
 
																+	}
															
 
																+	_STARPU_DEBUG("done\n");
															
 
																+}
															
 
																+
															
 
																+/* Wait and accept the connection from the wanted device on the port PORT_NUMBER
															
 
																+ * and then initialize the connection, the resutling endpoint is stored in ENDPOINT */
															
 
																+void _starpu_mic_common_accept(scif_epd_t *endpoint, uint16_t port_number)
															
 
																+{
															
 
																+	/* Unused variables, only useful to make scif_accept don't cause
															
 
																+	 * a seg fault when trying to access PEER parameter */
															
 
																+	struct scif_portID portID;
															
 
																+
															
 
																+	/* Endpoint only useful for the initialization of the connection */
															
 
																+	int init_epd;
															
 
																+
															
 
																+	if ((init_epd = scif_open()) < 0)
															
 
																+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
															
 
																+
															
 
																+	if ((scif_bind(init_epd, port_number)) < 0)
															
 
																+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
															
 
																+
															
 
																+	/* We fix the maximum number of request to 1 as we
															
 
																+	 * only need one connection, more would be an error */
															
 
																+	if ((scif_listen(init_epd, 1)) < 0)
															
 
																+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
															
 
																+
															
 
																+	_STARPU_DEBUG("MIC accepting connection on %u...\n", port_number);
															
 
																+	if ((scif_accept(init_epd, &portID, endpoint, SCIF_ACCEPT_SYNC)) < 0)
															
 
																+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
															
 
																+	_STARPU_DEBUG("done\n", init_epd);
															
 
																+
															
 
																+	scif_close(init_epd);
															
 
																+}
															
--- a/src/drivers/mic/driver_mic_common.h
+++ b/src/drivers/mic/driver_mic_common.h
@@ -0,0 +1,70 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2012  Inria
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+
															
 
																+#ifndef __DRIVER_MIC_COMMON_H__
															
 
																+#define __DRIVER_MIC_COMMON_H__
															
 
																+
															
 
																+
															
 
																+#include <common/config.h>
															
 
																+
															
 
																+
															
 
																+#ifdef STARPU_USE_MIC
															
 
																+
															
 
																+#define STARPU_TO_MIC_ID(id) ((id) + 1)
															
 
																+
															
 
																+/* TODO: rather allocate ports on the host and pass them as parameters to the device process */
															
 
																+#define STARPU_MIC_PORTS_BEGIN 1099
															
 
																+
															
 
																+#define STARPU_MIC_SOURCE_PORT_NUMBER STARPU_MIC_PORTS_BEGIN
															
 
																+#define STARPU_MIC_SINK_PORT_NUMBER(id) ((id) + STARPU_MIC_PORTS_BEGIN + 1)
															
 
																+
															
 
																+#define STARPU_MIC_SOURCE_DT_PORT_NUMBER (STARPU_MAXMICDEVS + STARPU_MIC_PORTS_BEGIN + 1)
															
 
																+#define STARPU_MIC_SINK_DT_PORT_NUMBER(id) ((id) + STARPU_MAXMICDEVS + STARPU_MIC_PORTS_BEGIN + 2)
															
 
																+
															
 
																+#define STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(me, peer_id) \
															
 
																+((me) * STARPU_MAXMICDEVS + (peer_id) +  2 * STARPU_MAXMICDEVS + STARPU_MIC_PORTS_BEGIN + 2)
															
 
																+
															
 
																+#define STARPU_MIC_PAGE_SIZE 0x1000
															
 
																+#define STARPU_MIC_GET_PAGE_SIZE_MULTIPLE(size) \
															
 
																+(((size) % STARPU_MIC_PAGE_SIZE == 0) ? (size) : (((size) / STARPU_MIC_PAGE_SIZE + 1) * STARPU_MIC_PAGE_SIZE))
															
 
																+
															
 
																+#define STARPU_MIC_COMMON_REPORT_SCIF_ERROR(status) \
															
 
																+	_starpu_mic_common_report_scif_error(__starpu_func__, __FILE__, __LINE__, status)
															
 
																+
															
 
																+struct _starpu_mic_free_command
															
 
																+{
															
 
																+	void *addr;
															
 
																+	size_t size;
															
 
																+};
															
 
																+
															
 
																+void _starpu_mic_common_report_scif_error(const char *func, const char *file, int line, const int status);
															
 
																+
															
 
																+void _starpu_mic_common_send(const struct _starpu_mp_node *node, void *msg, int len);
															
 
																+
															
 
																+void _starpu_mic_common_recv(const struct _starpu_mp_node *node, void *msg, int len);
															
 
																+
															
 
																+void _starpu_mic_common_dt_send(const struct _starpu_mp_node *node, void *msg, int len);
															
 
																+
															
 
																+void _starpu_mic_common_dt_recv(const struct _starpu_mp_node *node, void *msg, int len);
															
 
																+
															
 
																+void _starpu_mic_common_connect(scif_epd_t *endpoint, uint16_t remote_node, 
															
 
																+				uint16_t local_port_number, uint16_t remote_port_number);
															
 
																+void _starpu_mic_common_accept(scif_epd_t *endpoint, uint16_t port_number);
															
 
																+
															
 
																+#endif /* STARPU_USE_MIC */
															
 
																+
															
 
																+#endif /* __DRIVER_MIC_COMMON_H__ */
															
--- a/src/drivers/mic/driver_mic_sink.c
+++ b/src/drivers/mic/driver_mic_sink.c
@@ -0,0 +1,135 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2012  Inria
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+
															
 
																+#include <errno.h>
															
 
																+
															
 
																+#include <common/COISysInfo_common.h>
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include <drivers/mp_common/mp_common.h>
															
 
																+#include <drivers/mp_common/sink_common.h>
															
 
																+
															
 
																+#include "driver_mic_common.h"
															
 
																+#include "driver_mic_sink.h"
															
 
																+
															
 
																+/* Initialize the MIC sink, initializing connection to the source
															
 
																+ * and to the other devices (not implemented yet).
															
 
																+ */
															
 
																+
															
 
																+void _starpu_mic_sink_init(struct _starpu_mp_node *node)
															
 
																+{
															
 
																+	//unsigned int i;
															
 
																+	
															
 
																+	/* Initialize connection with the source */
															
 
																+	_starpu_mic_common_accept(&node->mp_connection.mic_endpoint,
															
 
																+					 STARPU_MIC_SOURCE_PORT_NUMBER);
															
 
																+
															
 
																+	_starpu_mic_common_accept(&node->host_sink_dt_connection.mic_endpoint,
															
 
																+									 STARPU_MIC_SOURCE_DT_PORT_NUMBER);
															
 
																+
															
 
																+	//node->sink_sink_dt_connections = malloc(node->nb_mp_sinks * sizeof(union _starpu_mp_connection));
															
 
																+
															
 
																+	//for (i = 0; i < (unsigned int)node->devid; ++i)
															
 
																+	//	_starpu_mic_common_connect(&node->sink_sink_dt_connections[i].mic_endpoint,
															
 
																+	//								STARPU_TO_MIC_ID(i),
															
 
																+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i),	
															
 
																+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(i, node->devid));
															
 
																+
															
 
																+	//for (i = node->devid + 1; i < node->nb_mp_sinks; ++i)
															
 
																+	//	_starpu_mic_common_accept(&node->sink_sink_dt_connections[i].mic_endpoint,
															
 
																+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i));
															
 
																+}
															
 
																+
															
 
																+/* Deinitialize the MIC sink, close all the connections.
															
 
																+ */
															
 
																+
															
 
																+void _starpu_mic_sink_deinit(struct _starpu_mp_node *node)
															
 
																+{
															
 
																+	//unsigned int i;
															
 
																+
															
 
																+	//for (i = 0; i < node->nb_mp_sinks; ++i)
															
 
																+	//{
															
 
																+	//	if (i != (unsigned int)node->devid)
															
 
																+	//		scif_close(node->sink_sink_dt_connections[i].mic_endpoint);
															
 
																+	//}
															
 
																+
															
 
																+	//free(node->sink_sink_dt_connections);
															
 
																+
															
 
																+	scif_close(node->host_sink_dt_connection.mic_endpoint);
															
 
																+	scif_close(node->mp_connection.mic_endpoint);
															
 
																+}
															
 
																+
															
 
																+/* Report an error which occured when using a MIC device
															
 
																+ * and print this error in a human-readable style
															
 
																+ */
															
 
																+
															
 
																+void _starpu_mic_sink_report_error(const char *func, const char *file, const int line, const int status)
															
 
																+{
															
 
																+	const char *errormsg = strerror(status);
															
 
																+	printf("SINK: oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
															
 
																+	STARPU_ASSERT(0);
															
 
																+}
															
 
																+
															
 
																+/* Return the number of cores on the callee, a MIC device or Processor Xeon
															
 
																+ */
															
 
																+unsigned int _starpu_mic_sink_get_nb_core(void)
															
 
																+{
															
 
																+	return (unsigned int) COISysGetCoreCount();
															
 
																+}
															
 
																+
															
 
																+/* Allocate memory on the MIC.
															
 
																+ * Memory is register for remote direct access. */
															
 
																+void _starpu_mic_sink_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size)
															
 
																+{
															
 
																+	STARPU_ASSERT(arg_size == sizeof(size_t));
															
 
																+
															
 
																+	void *addr = NULL;
															
 
																+	size_t size = *(size_t *)(arg);
															
 
																+	
															
 
																+	if (posix_memalign(&addr, STARPU_MIC_PAGE_SIZE, size) != 0)
															
 
																+		_starpu_mp_common_send_command(mp_node, STARPU_ERROR_ALLOCATE, NULL, 0);
															
 
																+
															
 
																+#ifndef STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY
															
 
																+	scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
															
 
																+	size_t window_size = STARPU_MIC_GET_PAGE_SIZE_MULTIPLE(size);
															
 
																+
															
 
																+	if (scif_register(epd, addr, window_size, (off_t)addr, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED) < 0)
															
 
																+	{
															
 
																+		free(addr);
															
 
																+		_starpu_mp_common_send_command(mp_node, STARPU_ERROR_ALLOCATE, NULL, 0);
															
 
																+	}
															
 
																+#endif
															
 
																+	
															
 
																+	_starpu_mp_common_send_command(mp_node, STARPU_ANSWER_ALLOCATE, &addr, sizeof(addr));
															
 
																+}
															
 
																+
															
 
																+/* Unregister and free memory. */
															
 
																+void _starpu_mic_sink_free(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size)
															
 
																+{
															
 
																+	STARPU_ASSERT(arg_size == sizeof(struct _starpu_mic_free_command));
															
 
																+
															
 
																+	void *addr = ((struct _starpu_mic_free_command *)arg)->addr;
															
 
																+	
															
 
																+#ifndef STARPU_DISABLE_ASYNCHRONOUS_MIC_COPY
															
 
																+	scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
															
 
																+	size_t size = ((struct _starpu_mic_free_command *)arg)->size;
															
 
																+	size_t window_size = STARPU_MIC_GET_PAGE_SIZE_MULTIPLE(size);
															
 
																+
															
 
																+	scif_unregister(epd, (off_t)addr, window_size);
															
 
																+#endif
															
 
																+	free(addr);
															
 
																+}
															
--- a/src/drivers/mic/driver_mic_sink.h
+++ b/src/drivers/mic/driver_mic_sink.h