12 yıl önce · 57ac315b6d
--- a/AUTHORS
+++ b/AUTHORS
@@ -10,6 +10,8 @@ David Gómez <david_gomez1380@yahoo.com.mx>
 
				 Sylvain Henry <sylvain.henry@inria.fr>
			
 
				 Mehdi Juhoor <mjuhoor@gmail.com>
			
 
				 Antoine Lucas <antoine.lucas.33@gmail.com>
			
 
				+Brice Mortier <brice.mortier@etu.u-bordeaux1.fr>
			
 
				+Damien Pasqualinotto <dam.pasqualinotto@wanadoo.fr>
			
 
				 Nguyen Quôc-Dinh <nguyen.quocdinh@gmail.com>
			
 
				 Cyril Roelandt <cyril.roelandt@inria.fr>
			
 
				 Anthony Roy <theanthony33@gmail.com>
			
@@ -17,4 +19,4 @@ Ludovic Stordeur <ludovic.stordeur@inria.fr>
 
				 François Tessier <francois.tessier@inria.fr>
			
 
				 Samuel Thibault <samuel.thibault@labri.fr>
			
 
				 Pierre-André Wacrenier <wacrenier@labri.fr>
			
 
				-Andra Hugo <andra.hugo@inria.fr>
			
 
				+Andra Hugo <andra.hugo@inria.fr>
			
--- a/Makefile.am
+++ b/Makefile.am
@@ -67,6 +67,9 @@ versinclude_HEADERS = 				\
 
				 	include/starpu_fxt.h			\
			
 
				 	include/starpu_cuda.h			\
			
 
				 	include/starpu_opencl.h			\
			
 
				+	include/starpu_sink.h			\
			
 
				+	include/starpu_mic.h			\
			
 
				+	include/starpu_scc.h			\
			
 
				 	include/starpu_expert.h			\
			
 
				 	include/starpu_profiling.h		\
			
 
				 	include/starpu_bound.h			\
			
--- a/configure.ac
+++ b/configure.ac
@@ -100,6 +100,31 @@ else
 
				    LC_ALL=C svnversion $srcdir > ./STARPU-REVISION
			
 
				 fi
			
 
				 
			
 
				+AM_CONDITIONAL([STARPU_CROSS_COMPILING], [test "x$cross_compiling" = "xyes"])
			
 
				+
			
 
				+###############################################################################
			
 
				+#                                                                             #
			
 
				+#                           MIC device compilation                            #
			
 
				+#   (Must be done in beginning to change prefix in the whole configuration)   #
			
 
				+#                                                                             #
			
 
				+###############################################################################
			
 
				+
			
 
				+AC_ARG_ENABLE(mic, [AS_HELP_STRING([--enable-mic],
			
 
				+	      [use MIC device(s)])], [enable_mic=yes], [enable_mic=no])
			
 
				+AC_ARG_ENABLE(mic-rma, [AS_HELP_STRING([--disable-mic-rma],
			
 
				+	      [use MIC RMA transfer])], [], [enable_mic_rma=yes])
			
 
				+
			
 
				+if test x$enable_mic = xyes ; then
			
 
				+	AC_DEFINE(STARPU_USE_MIC, [1], [MIC workers support is enabled])
			
 
				+fi
			
 
				+if test x$enable_mic_rma = xyes ; then
			
 
				+	AC_DEFINE([STARPU_MIC_USE_RMA], [1], [MIC RMA transfer is enable])
			
 
				+fi
			
 
				+
			
 
				+AM_CONDITIONAL([STARPU_USE_MIC], [test "x$enable_mic" = "xyes"])
			
 
				+
			
 
				+###############################################################################
			
 
				+
			
 
				 AC_PATH_PROGS([STARPU_MS_LIB], [lib])
			
 
				 AC_ARG_VAR([STARPU_MS_LIB], [Path to Microsoft's Visual Studio `lib' tool])
			
 
				 AM_CONDITIONAL([STARPU_HAVE_MS_LIB], [test "x$STARPU_MS_LIB" != "x"])
			
@@ -880,6 +905,252 @@ if test x$enable_blocking = xno -a x$enable_simgrid != xyes ; then
 
				 	AC_DEFINE(STARPU_NON_BLOCKING_DRIVERS, [1], [drivers must progress])
			
 
				 fi
			
 
				 
			
 
				+###############################################################################
			
 
				+#                                                                             #
			
 
				+#                                 MIC settings                                #
			
 
				+#                                                                             #
			
 
				+###############################################################################
			
 
				+
			
 
				+AC_MSG_CHECKING(maximum number of MIC devices)
			
 
				+AC_ARG_ENABLE(maxmicdev, [AS_HELP_STRING([--enable-maxmicdev=<number>],
			
 
				+			[maximum number of MIC devices])],
			
 
				+			nmaxmicdev=$enableval, nmaxmicdev=4)
			
 
				+AC_MSG_RESULT($nmaxmicdev)
			
 
				+
			
 
				+AC_DEFINE_UNQUOTED(STARPU_MAXMICDEVS, [$nmaxmicdev],
			
 
				+	[maximum number of MIC devices])
			
 
				+
			
 
				+AC_MSG_CHECKING(maximum number of MIC cores)
			
 
				+AC_ARG_ENABLE(maxmicdev, [AS_HELP_STRING([--enable-maxmiccore=<number>],
			
 
				+			[maximum number of MIC cores])],
			
 
				+			nmaxmiccore=$enableval, nmaxmiccore=128)
			
 
				+AC_MSG_RESULT($nmaxmiccore)
			
 
				+
			
 
				+AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmiccore],
			
 
				+	[maximum number of MIC cores])
			
 
				+
			
 
				+AC_ARG_WITH(coi-dir,
			
 
				+	[AS_HELP_STRING([--with-coi-dir=<path>],
			
 
				+	[specify the MIC's COI installation directory])],
			
 
				+	[coi_dir="$withval"],
			
 
				+	[coi_dir=no])
			
 
				+
			
 
				+AC_ARG_WITH(coi-include-dir,
			
 
				+	[AS_HELP_STRING([--with-coi-include-dir=<path>],
			
 
				+	[specify where the MIC's COI headers are installed])],
			
 
				+	[coi_include_dir="$withval"],
			
 
				+	[coi_include_dir=no])
			
 
				+
			
 
				+AC_ARG_WITH(coi-lib-dir,
			
 
				+	[AS_HELP_STRING([--with-coi-lib-dir=<path>],
			
 
				+	[specify where the MIC's COI libraries are installed])],
			
 
				+	[coi_lib_dir="$withval"],
			
 
				+	[coi_lib_dir=no])
			
 
				+
			
 
				+AC_DEFUN([STARPU_CHECK_COI_RUNTIME],
			
 
				+[
			
 
				+    __coi_dir=$1
			
 
				+    __coi_include_dir=$2
			
 
				+    __coi_lib_dir=$3
			
 
				+    __coi_lib_name=$4
			
 
				+
			
 
				+    if test "$__coi_dir" != "no" -a "$__coi_dir" != "" ; then
			
 
				+	AC_MSG_CHECKING(whether MIC's COI runtime is available in $__coi_dir)
			
 
				+    else
			
 
				+	AC_MSG_CHECKING(whether MIC's COI runtime is available)
			
 
				+    fi
			
 
				+    AC_MSG_RESULT()
			
 
				+
			
 
				+    if test "$__coi_include_dir" = "no" -a "$__coi_dir" != "no" ; then
			
 
				+        __coi_include_dir="${__coi_dir}/include"
			
 
				+    fi
			
 
				+    if test "$__coi_lib_dir" = "no" -a "$__coi_dir" != "no" ; then
			
 
				+        __coi_lib_dir="${__coi_dir}/lib"
			
 
				+    fi
			
 
				+
			
 
				+    SAVED_CPPFLAGS="$CPPFLAGS"
			
 
				+    SAVED_LDFLAGS="$LDFLAGS"
			
 
				+
			
 
				+    if test "$__coi_include_dir" != "no" ; then
			
 
				+        CPPFLAGS="${CPPFLAGS} -I$__coi_include_dir"
			
 
				+    fi
			
 
				+    if test "$__coi_lib_dir" != "no" ; then
			
 
				+	LDFLAGS="${LDFLAGS} -L$__coi_lib_dir"
			
 
				+    fi
			
 
				+
			
 
				+    AC_CHECK_HEADER([source/COIEngine_source.h],[have_valid_coi=yes],[have_valid_coi=no])
			
 
				+
			
 
				+    if test "$have_valid_coi" = "yes" ; then
			
 
				+	AC_HAVE_LIBRARY([$__coi_lib_name],[have_valid_coi=yes],[have_valid_coi=no])
			
 
				+
			
 
				+        if test "$have_valid_coi" = "no" ; then
			
 
				+            if test "$3" = "no" -a "$__coi_dir" != "no" ; then
			
 
				+		# ${__coi_dir}/lib didn't work, let's try with lib64
			
 
				+                __coi_lib_dir="$__coi_dir/lib64"
			
 
				+		LDFLAGS="${SAVED_LDFLAGS} -L$__coi_lib_dir"
			
 
				+	        AC_HAVE_LIBRARY([$__coi_lib_name],[have_valid_coi=yes],[have_valid_coi=no])
			
 
				+            fi
			
 
				+        fi
			
 
				+    fi
			
 
				+
			
 
				+    if test "$have_valid_coi" = "yes" -a "$__coi_include_dir" != "no"; then
			
 
				+        STARPU_COI_CPPFLAGS="-I$__coi_include_dir"
			
 
				+    fi
			
 
				+
			
 
				+    if test "$have_valid_coi" = "yes" -a "$__coi_lib_dir" != "no"; then
			
 
				+        STARPU_COI_LDFLAGS="-L$__coi_lib_dir -l$__coi_lib_name"
			
 
				+    fi
			
 
				+
			
 
				+    CPPFLAGS="${SAVED_CPPFLAGS}"
			
 
				+    LDFLAGS="${SAVED_LDFLAGS}"
			
 
				+])
			
 
				+
			
 
				+if test x$enable_mic = xyes ; then
			
 
				+
			
 
				+    STARPU_CHECK_COI_RUNTIME($coi_dir, $coi_include_dir, $coi_lib_dir, "coi_host")
			
 
				+
			
 
				+    # Host runtime is not compatible, we are probably cross-compiling
			
 
				+    # Let's have a look for the device runtime which lib has a different name
			
 
				+    if test "$have_valid_coi" = "no" ; then
			
 
				+	    STARPU_CHECK_COI_RUNTIME($coi_dir, $coi_include_dir, $coi_lib_dir, "coi_device")
			
 
				+    fi
			
 
				+
			
 
				+    if test "$have_valid_coi" = "no" ; then
			
 
				+	AC_MSG_ERROR([cannot find MIC's COI runtime])
			
 
				+    fi
			
 
				+
			
 
				+    AC_SUBST(STARPU_COI_CPPFLAGS)
			
 
				+    AC_SUBST(STARPU_COI_LDFLAGS)
			
 
				+fi
			
 
				+
			
 
				+###############################################################################
			
 
				+#                                                                             #
			
 
				+#                                 SCC settings                                #
			
 
				+#                                                                             #
			
 
				+###############################################################################
			
 
				+
			
 
				+AC_ARG_ENABLE([rcce], [AS_HELP_STRING([--disable-rcce],
			
 
				+			  [do not use SCC device(s)])], [], enable_rcce=maybe)
			
 
				+
			
 
				+nmaxsccdev=47
			
 
				+AC_DEFINE_UNQUOTED(STARPU_MAXSCCDEVS, [$nmaxsccdev], [maximum number of SCC devices])
			
 
				+
			
 
				+AC_ARG_WITH(rcce-dir,
			
 
				+			[AS_HELP_STRING([--with-rcce-dir=<path>],
			
 
				+							[specify RCCE installation directory])],
			
 
				+			[
			
 
				+				rcce_dir="$withval"
			
 
				+				enable_rcce=yes
			
 
				+			],
			
 
				+			rcce_dir=no)
			
 
				+
			
 
				+AC_ARG_WITH(rcce-include-dir,
			
 
				+			[AS_HELP_STRING([--with-rcce-include-dir=<path>],
			
 
				+							[specify where RCCE headers are installed])],
			
 
				+			[
			
 
				+				rcce_include_dir="$withval"
			
 
				+				enable_rcce=yes
			
 
				+			],
			
 
				+			rcce_include_dir=no)
			
 
				+
			
 
				+AC_ARG_WITH(rcce-lib-dir,
			
 
				+			[AS_HELP_STRING([--with-rcce-lib-dir=<path>],
			
 
				+							[specify where RCCE libraries are installed])],
			
 
				+			[
			
 
				+			 	rcce_lib_dir="$withval"
			
 
				+			 	enable_rcce=yes
			
 
				+			],
			
 
				+			rcce_lib_dir=no)
			
 
				+
			
 
				+if test x$enable_rcce = xyes -o x$enable_rcce = xmaybe ; then
			
 
				+	have_valid_rcce=yes
			
 
				+
			
 
				+	SAVED_LDFLAGS="${LDFLAGS}"
			
 
				+	SAVED_CPPFLAGS="${CPPFLAGS}"
			
 
				+	SAVED_LIBS="${LIBS}"
			
 
				+
			
 
				+	if test x$rcce_include_dir != xno ; then
			
 
				+		STARPU_RCCE_CPPFLAGS="-I${rcce_include_dir}"
			
 
				+	elif test x$rcce_dir != xno ; then
			
 
				+		STARPU_RCCE_CPPFLAGS="-I${rcce_dir}/include"
			
 
				+	fi
			
 
				+
			
 
				+	CPPFLAGS="${CPPFLAGS} ${STARPU_RCCE_CPPFLAGS}"
			
 
				+	AC_CHECK_HEADER([RCCE.h], [], [have_valid_rcce=no])
			
 
				+
			
 
				+	if test x$rcce_lib_dir != xno ; then
			
 
				+		STARPU_RCCE_LDFLAGS="-L${rcce_lib_dir}"
			
 
				+	elif test x$rcce_dir != xno ; then
			
 
				+		STARPU_RCCE_LDFLAGS="-L${rcce_lib}/lib"
			
 
				+	fi
			
 
				+
			
 
				+	LDFLAGS="${LDFLAGS} ${STARPU_RCCE_LDFLAGS}"
			
 
				+	AC_CHECK_LIB([RCCE_bigflags_nongory_nopwrmgmt], [RCCE_init], [], [have_valid_rcce=no])
			
 
				+
			
 
				+	# in case RCCE was explicitely required, but is not available, this is an error
			
 
				+	if test x$enable_rcce = xyes -a x$have_valid_rcce = xno ; then
			
 
				+		AC_MSG_ERROR([cannot find RCCE library])
			
 
				+	fi
			
 
				+
			
 
				+	if test x$have_valid_rcce = xyes ; then
			
 
				+		STARPU_RCCE_CPPFLAGS="${STARPU_RCCE_CPPFLAGS} -DSCC"
			
 
				+		STARPU_RCCE_LDFLAGS="${STARPU_RCCE_LDFLAGS} -lRCCE_bigflags_nongory_nopwrmgmt -ldl"
			
 
				+
			
 
				+		AC_DEFINE(STARPU_USE_SCC, [1], [SCC support is enabled])
			
 
				+
			
 
				+		AC_SUBST(STARPU_RCCE_CFLAGS)
			
 
				+		AC_SUBST(STARPU_RCCE_CPPFLAGS)
			
 
				+		AC_SUBST(STARPU_RCCE_LDFLAGS)
			
 
				+	fi
			
 
				+
			
 
				+	LDFLAGS="${SAVED_LDFLAGS}"
			
 
				+	CPPFLAGS="${SAVED_CPPFLAGS}"
			
 
				+	LIBS="${SAVED_LIBS}"
			
 
				+
			
 
				+	enable_rcce=$have_valid_rcce
			
 
				+fi
			
 
				+
			
 
				+AM_CONDITIONAL(STARPU_USE_SCC, test x$enable_rcce = xyes)
			
 
				+
			
 
				+AC_MSG_CHECKING(whether RCCE should be used)
			
 
				+AC_MSG_RESULT($enable_rcce)
			
 
				+
			
 
				+
			
 
				+###############################################################################
			
 
				+#                                                                             #
			
 
				+#                             MP Common settings                              #
			
 
				+#                                                                             #
			
 
				+###############################################################################
			
 
				+
			
 
				+AM_CONDITIONAL([STARPU_USE_MP], [test "x$enable_mic" = "xyes" -o "x$enable_mpi" = "xyes" -o "x$enable_rcce" = "xyes"])
			
 
				+
			
 
				+AC_ARG_ENABLE([export-dynamic], [AS_HELP_STRING([--disable-export-dynamic],
			
 
				+			  [Prevent the linker from adding all symbols to the dynamic symbol table])], [], [])
			
 
				+
			
 
				+if test x$enable_mic = xyes -o x$enable_mpi = xyes -o x$enable_rcce = xyes ; then
			
 
				+	AC_DEFINE(STARPU_USE_MP, [1], [Message-passing SINKs support
			
 
				+		  is enabled])
			
 
				+
			
 
				+	if test x$enable_export_dynamic != xno ; then
			
 
				+		STARPU_EXPORT_DYNAMIC="-rdynamic"
			
 
				+	fi
			
 
				+fi
			
 
				+
			
 
				+AC_SUBST(STARPU_EXPORT_DYNAMIC)
			
 
				+
			
 
				+# Computes the maximum number of different kernels a message-passing sink
			
 
				+# can lookup for and launch.
			
 
				+AC_MSG_CHECKING(Maximum number of message-passing kernels)
			
 
				+AC_ARG_ENABLE(maxmpkernels, [AS_HELP_STRING([
			
 
				+	      -enable-maxmpkernels=<number>],
			
 
				+	      [maximum number of kernels a message-passing sink can lookup
			
 
				+	      for and execute])],
			
 
				+	      maxmpkernels=$enableval, maxmpkernels=10)
			
 
				+AC_MSG_RESULT($maxmpkernels)
			
 
				+AC_DEFINE_UNQUOTED(STARPU_MAXMPKERNELS, [$maxmpkernels],
			
 
				+		[maximum number of message-passing kernels])
			
 
				+
			
 
				+###############################################################################
			
 
				 
			
 
				 ###############################################################################
			
 
				 #                                                                             #
			
@@ -1074,6 +1345,7 @@ if test x$enable_simgrid = xyes ; then
 
				 	maxnodes=16
			
 
				 else
			
 
				 	# We have one memory node shared by all CPU workers, one node per GPU
			
 
				+	# and per MIC device
			
 
				 	nodes=1
			
 
				 	if test x$enable_cuda = xyes ; then
			
 
				 		# we could have used nmaxcudadev + 1, but this would certainly give an
			
@@ -1085,6 +1357,14 @@ else
 
				 		# odd number.
			
 
				 		nodes=`expr $nodes + $nmaxopencldev`
			
 
				 	fi
			
 
				+	if test x$enable_mic = xyes ; then
			
 
				+		maxnodes=`expr $maxnodes + $nmaxmicdev`
			
 
				+	fi
			
 
				+	if test x$enable_rcce = xyes ; then
			
 
				+		# Only 1 memory node for the shared memory.
			
 
				+		maxnodes=`expr $maxnodes + 1`
			
 
				+	fi
			
 
				+
			
 
				 	# set maxnodes to the next power of 2 greater than nodes
			
 
				 	maxnodes=1
			
 
				 	while test "$maxnodes" -lt "$nodes"
			
@@ -1136,7 +1416,7 @@ AC_CHECK_FUNCS([clock_gettime])
 
				 
			
 
				 # Compute the maximum number of workers (we round it to 16 for alignment
			
 
				 # purposes).
			
 
				-nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxopencldev + 15 \) / 16 \) `
			
 
				+nmaxworkers=`expr 16 \* \( \( $maxcpus + $nmaxcudadev + $nmaxopencldev + $nmaxmiccore + $nmaxsccdev + 15 \) / 16 \) `
			
 
				 AC_MSG_CHECKING(Maximum number of workers)
			
 
				 AC_MSG_RESULT($nmaxworkers)
			
 
				 AC_DEFINE_UNQUOTED(STARPU_NMAXWORKERS, [$nmaxworkers], [Maximum number of workers])
			
@@ -1936,6 +2216,8 @@ AC_MSG_NOTICE([
 
				 	CPUs   enabled: $enable_cpu
			
 
				 	CUDA   enabled: $enable_cuda
			
 
				 	OpenCL enabled: $enable_opencl
			
 
				+	SCC    enabled: $enable_rcce
			
 
				+	MIC    enabled: $enable_mic
			
 
				 
			
 
				 	Compile-time limits
			
 
				 	(change these with --enable-maxcpus, --enable-maxcudadev,
			
--- a/doc/chapters/advanced-examples.texi
+++ b/doc/chapters/advanced-examples.texi
@@ -61,6 +61,7 @@ void scal_sse_func(void *buffers[], void *cl_arg)
 
				 struct starpu_codelet cl = @{
			
 
				     .where = STARPU_CPU,
			
 
				     .cpu_funcs = @{ scal_cpu_func, scal_sse_func, NULL @},
			
 
				+    .cpu_funcs_name = @{ "scal_cpu_func", "scal_sse_func", NULL @},
			
 
				     .nbuffers = 1,
			
 
				     .modes = @{ STARPU_RW @}
			
 
				 @};
			
@@ -100,6 +101,7 @@ struct starpu_codelet cl = @{
 
				     .where = STARPU_CPU|STARPU_CUDA,
			
 
				     .can_execute = can_execute,
			
 
				     .cpu_funcs = @{ cpu_func, NULL @},
			
 
				+    .cpu_funcs_name = @{ "cpu_func", NULL @},
			
 
				     .cuda_funcs = @{ gpu_func, NULL @}
			
 
				     .nbuffers = 1,
			
 
				     .modes = @{ STARPU_RW @}
			
@@ -146,6 +148,7 @@ struct starpu_codelet cl = @{
 
				     .where = STARPU_CPU|STARPU_CUDA,
			
 
				     .can_execute = can_execute,
			
 
				     .cpu_funcs = @{ cpu_func, NULL @},
			
 
				+    .cpu_funcs_name = @{ "cpu_func", NULL @},
			
 
				     .cuda_funcs = @{ scal_gpu_13, scal_gpu_20, NULL @},
			
 
				     .nbuffers = 1,
			
 
				     .modes = @{ STARPU_RW @}
			
@@ -359,6 +362,7 @@ static struct starpu_perfmodel mult_perf_model = @{
 
				 struct starpu_codelet cl = @{
			
 
				     .where = STARPU_CPU,
			
 
				     .cpu_funcs = @{ cpu_mult, NULL @},
			
 
				+    .cpu_funcs_name = @{ "cpu_mult", NULL @},
			
 
				     .nbuffers = 3,
			
 
				     .modes = @{ STARPU_R, STARPU_R, STARPU_W @},
			
 
				     /* for the scheduling policy to be able to use performance models */
			
@@ -520,6 +524,7 @@ void func_cpu(void *descr[], void *_args)
 
				 struct starpu_codelet mycodelet = @{
			
 
				         .where = STARPU_CPU,
			
 
				         .cpu_funcs = @{ func_cpu, NULL @},
			
 
				+        .cpu_funcs_name = @{ "func_cpu", NULL @},
			
 
				         .nbuffers = 2,
			
 
				         .modes = @{ STARPU_RW, STARPU_RW @}
			
 
				 @};
			
@@ -623,6 +628,7 @@ the codelets for initialization and reduction:
 
				 struct starpu_codelet bzero_variable_cl =
			
 
				 @{
			
 
				         .cpu_funcs = @{ bzero_variable_cpu, NULL @},
			
 
				+        .cpu_funcs_name = @{ "bzero_variable_cpu", NULL @},
			
 
				         .cuda_funcs = @{ bzero_variable_cuda, NULL @},
			
 
				         .nbuffers = 1,
			
 
				 @}
			
@@ -645,6 +651,7 @@ static void accumulate_variable_cuda(void *descr[], void *cl_arg)
 
				 struct starpu_codelet accumulate_variable_cl =
			
 
				 @{
			
 
				         .cpu_funcs = @{ accumulate_variable_cpu, NULL @},
			
 
				+        .cpu_funcs_name = @{ "accumulate_variable_cpu", NULL @},
			
 
				         .cuda_funcs = @{ accumulate_variable_cuda, NULL @},
			
 
				         .nbuffers = 1,
			
 
				 @}
			
@@ -827,6 +834,7 @@ static struct starpu_codelet cl =
 
				     .type = STARPU_FORKJOIN,
			
 
				     .max_parallelism = INT_MAX,
			
 
				     .cpu_funcs = @{scal_cpu_func, NULL@},
			
 
				+    .cpu_funcs_name = @{"scal_cpu_func", NULL@},
			
 
				     .nbuffers = 1,
			
 
				 @};
			
 
				 @end smallexample
			
@@ -870,6 +878,7 @@ static struct starpu_codelet cl =
 
				     .type = STARPU_SPMD,
			
 
				     .max_parallelism = INT_MAX,
			
 
				     .cpu_funcs = @{ func, NULL @},
			
 
				+    .cpu_funcs_name = @{ "func", NULL @},
			
 
				     .nbuffers = 1,
			
 
				 @}
			
 
				 @end smallexample
			
@@ -977,6 +986,7 @@ void opencl_to_cpu_func(void *buffers[], void *args);
 
				 struct starpu_codelet opencl_to_cpu_cl = @{
			
 
				     .where = STARPU_CPU,
			
 
				     .cpu_funcs = @{ opencl_to_cpu_func, NULL @},
			
 
				+    .cpu_funcs_name = @{ "opencl_to_cpu_func", NULL @},
			
 
				     .nbuffers = 1,
			
 
				     .modes = @{ STARPU_RW @}
			
 
				 @};
			
@@ -1287,6 +1297,7 @@ struct starpu_codelet dummy_big_cl =
 
				 	.cuda_funcs = @{dummy_big_kernel, NULL@},
			
 
				 	.opencl_funcs = @{dummy_big_kernel, NULL@},
			
 
				 	.cpu_funcs = @{dummy_big_kernel, NULL@},
			
 
				+	.cpu_funcs_name = @{"dummy_big_kernel", NULL@},
			
 
				 	.nbuffers = STARPU_NMAXBUFS+1,
			
 
				 	.dyn_modes = modes
			
 
				 @};
			
--- a/doc/chapters/api.texi
+++ b/doc/chapters/api.texi
@@ -24,6 +24,8 @@
 
				 * Theoretical lower bound on execution time API::
			
 
				 * CUDA extensions::
			
 
				 * OpenCL extensions::
			
 
				+* MIC extensions::
			
 
				+* SCC extensions::
			
 
				 * Miscellaneous helpers::
			
 
				 * FXT Support::
			
 
				 * FFT Support::
			
@@ -104,6 +106,14 @@ be specified with the @code{STARPU_NCUDA} environment variable.
 
				 This is the number of OpenCL devices that StarPU can use. This can
			
 
				 also be specified with the @code{STARPU_NOPENCL} environment variable.
			
 
				 
			
 
				+@item @code{int nmic} (default = -1)
			
 
				+This is the number of MIC devices that StarPU can use. This can
			
 
				+also be specified with the @code{STARPU_NMIC} environment variable.
			
 
				+
			
 
				+@item @code{int nscc} (default = -1)
			
 
				+This is the number of SCC devices that StarPU can use. This can
			
 
				+also be specified with the @code{STARPU_NSCC} environment variable.
			
 
				+
			
 
				 @item @code{unsigned use_explicit_workers_bindid} (default = 0)
			
 
				 If this flag is set, the @code{workers_bindid} array indicates where the
			
 
				 different workers are bound, otherwise StarPU automatically selects where to
			
@@ -139,6 +149,26 @@ the @code{STARPU_WORKERS_OPENCLID} environment variable.
 
				 If the @code{use_explicit_workers_opencl_gpuid} flag is set, this array
			
 
				 contains the logical identifiers of the OpenCL devices to be used.
			
 
				 
			
 
				+@item @code{unsigned use_explicit_workers_mic_gpuid} (default = 0)
			
 
				+If this flag is set, the MIC workers will be attached to the MIC devices
			
 
				+specified in the @code{workers_mic_gpuid} array. Otherwise, StarPU affects
			
 
				+the MIC devices in a round-robin fashion. This can also be specified with
			
 
				+the @code{STARPU_WORKERS_MICID} environment variable.
			
 
				+
			
 
				+@item @code{unsigned workers_mic_gpuid[STARPU_NMAXWORKERS]}
			
 
				+If the @code{use_explicit_workers_mic_gpuid} flag is set, this array
			
 
				+contains the logical identifiers of the MIC devices to be used.
			
 
				+
			
 
				+@item @code{unsigned use_explicit_workers_scc_gpuid} (default = 0)
			
 
				+If this flag is set, the SCC workers will be attached to the SCC devices
			
 
				+specified in the @code{workers_scc_gpuid} array. Otherwise, StarPU affects
			
 
				+the SCC devices in a round-robin fashion. This can also be specified with
			
 
				+the @code{STARPU_WORKERS_SCCID} environment variable.
			
 
				+
			
 
				+@item @code{unsigned workers_scc_gpuid[STARPU_NMAXWORKERS]}
			
 
				+If the @code{use_explicit_workers_scc_gpuid} flag is set, this array
			
 
				+contains the logical identifiers of the SCC devices to be used.
			
 
				+
			
 
				 @item @code{int calibrate} (default = 0)
			
 
				 If this flag is set, StarPU will calibrate the performance models when
			
 
				 executing tasks. If this value is equal to @code{-1}, the default value is
			
@@ -162,6 +192,11 @@ task scheduler will however still however still try varying combined worker
 
				 sizes to look for the most efficient ones.
			
 
				 This can also be specified with the @code{STARPU_SINGLE_COMBINED_WORKER} environment variable.
			
 
				 
			
 
				+@item @code{mic_sink_program_path} (default = NULL)
			
 
				+Path to the program to execute on the MIC device, compiled for MIC
			
 
				+architecture. When set to NULL, StarPU automatically looks next to the host
			
 
				+program location.
			
 
				+
			
 
				 @item @code{int disable_asynchronous_copy} (default = 0)
			
 
				 This flag should be set to 1 to disable asynchronous copies between
			
 
				 CPUs and all accelerators. This can also be specified with the
			
@@ -223,6 +258,12 @@ Upon successful completion, this function returns 0. Otherwise, @code{-ENODEV}
 
				 indicates that no worker was available (so that StarPU was not initialized).
			
 
				 @end deftypefun
			
 
				 
			
 
				+@deftypefun int starpu_initialize ({struct starpu_conf *}@var{conf}, int @var{argc}, {char ***}@var{argv})
			
 
				+This is the same as @code{starpu_init}, but also takes the @code{argc} and
			
 
				+@code{argv} as gotten by the application. This is needed for MIC and SCC
			
 
				+execution so that instances of StarPU can know whether they are slaves or masters.
			
 
				+@end deftypefun
			
 
				+
			
 
				 @deftypefun int starpu_conf_init ({struct starpu_conf *}@var{conf})
			
 
				 This function initializes the @var{conf} structure passed as argument
			
 
				 with the default values. In case some configuration parameters are already
			
@@ -319,6 +360,8 @@ The different values are:
 
				 @item @code{STARPU_CPU_WORKER}
			
 
				 @item @code{STARPU_CUDA_WORKER}
			
 
				 @item @code{STARPU_OPENCL_WORKER}
			
 
				+@item @code{STARPU_MIC_WORKER}
			
 
				+@item @code{STARPU_SCC_WORKER}
			
 
				 @end table
			
 
				 @end deftp
			
 
				 
			
@@ -348,6 +391,20 @@ This function returns the number of OpenCL devices controlled by StarPU. The ret
 
				 value should be at most @code{STARPU_MAXOPENCLDEVS}.
			
 
				 @end deftypefun
			
 
				 
			
 
				+@deftypefun unsigned starpu_mic_worker_get_count (void)
			
 
				+This function returns the number of MIC workers controlled by StarPU.
			
 
				+@end deftypefun
			
 
				+
			
 
				+@deftypefun unsigned starpu_mic_device_get_count (void)
			
 
				+This function returns the number of MIC devices controlled by StarPU. The returned
			
 
				+value should be at most @code{STARPU_MAXMICDEVS}.
			
 
				+@end deftypefun
			
 
				+
			
 
				+@deftypefun unsigned starpu_scc_worker_get_count (void)
			
 
				+This function returns the number of SCC devices controlled by StarPU. The returned
			
 
				+value should be at most @code{STARPU_MAXSCCDEVS}.
			
 
				+@end deftypefun
			
 
				+
			
 
				 @deftypefun int starpu_worker_get_id (void)
			
 
				 This function returns the identifier of the current worker, i.e the one associated to the calling
			
 
				 thread. The returned value is either -1 if the current context is not a StarPU
			
@@ -420,6 +477,9 @@ todo
 
				 @item @code{STARPU_CPU_RAM}
			
 
				 @item @code{STARPU_CUDA_RAM}
			
 
				 @item @code{STARPU_OPENCL_RAM}
			
 
				+@item @code{STARPU_MIC_RAM}
			
 
				+@item @code{STARPU_SCC_RAM}
			
 
				+@item @code{STARPU_SCC_SHM}
			
 
				 @end table
			
 
				 @end deftp
			
 
				 
			
@@ -1364,8 +1424,8 @@ be useful to provide more specific method in case of e.g. available particular
 
				 CUDA or OpenCL support.
			
 
				 
			
 
				 @table @asis
			
 
				-@item @code{int (*@{ram,cuda,opencl@}_to_@{ram,cuda,opencl@})(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
			
 
				-These 12 functions define how to copy data from the @var{src_interface}
			
 
				+@item @code{int (*@{ram,cuda,opencl,mic@}_to_@{ram,cuda,opencl,mic@})(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
			
 
				+These 14 functions define how to copy data from the @var{src_interface}
			
 
				 interface on the @var{src_node} node to the @var{dst_interface} interface
			
 
				 on the @var{dst_node} node. They return 0 on success.
			
 
				 
			
@@ -1386,6 +1446,22 @@ Must return 0 if the transfer was actually completed completely synchronously,
 
				 or -EAGAIN if at least some transfers are still ongoing and should be awaited
			
 
				 for by the core.
			
 
				 
			
 
				+@item @code{int (*@{ram,mic@}_to_@{ram,mic@}_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
			
 
				+These 2 functions (@code{ram_to_ram} and @code{mic_to_mic} are not among them) define how to copy
			
 
				+data from the @var{src_interface} interface on the @var{src_node} node to the
			
 
				+@var{dst_interface} interface on the @var{dst_node} node.
			
 
				+Must return 0 if the transfer was actually completed completely synchronously,
			
 
				+or -EAGAIN if at least some transfers are still ongoing and should be awaited
			
 
				+for by the core.
			
 
				+
			
 
				+@item @code{int (*@{src,sink@}_to_@{src,sink@}_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)}
			
 
				+These 3 functions (@code{src_to_src} is not among them) define how to copy
			
 
				+data from the @var{src_interface} interface on the @var{src_node} node to the
			
 
				+@var{dst_interface} interface on the @var{dst_node} node.
			
 
				+Must return 0 if the transfer was actually completed completely synchronously,
			
 
				+or -EAGAIN if at least some transfers are still ongoing and should be awaited
			
 
				+for by the core.
			
 
				+
			
 
				 @item @code{int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data)}
			
 
				 Define how to copy data from the @var{src_interface} interface on the
			
 
				 @var{src_node} node to the @var{dst_interface} interface on the @var{dst_node}
			
@@ -1729,6 +1805,24 @@ pointer to a codelet which converts from CPU to CUDA
 
				 
			
 
				 @item @code{struct starpu_codelet *cuda_to_cpu_cl}
			
 
				 pointer to a codelet which converts from CUDA to CPU
			
 
				+
			
 
				+@item @code{size_t mic_elemsize}
			
 
				+the size of each element on MIC devices,
			
 
				+
			
 
				+@item @code{struct starpu_codelet *cpu_to_mic_cl}
			
 
				+pointer to a codelet which converts from CPU to MIC
			
 
				+
			
 
				+@item @code{struct starpu_codelet *mic_to_cpu_cl}
			
 
				+pointer to a codelet which converts from MIC to CPU
			
 
				+
			
 
				+@item @code{size_t scc_elemsize}
			
 
				+the size of each element on SCC devices,
			
 
				+
			
 
				+@item @code{struct starpu_codelet *cpu_to_scc_cl}
			
 
				+pointer to a codelet which converts from CPU to SCC
			
 
				+
			
 
				+@item @code{struct starpu_codelet *scc_to_cpu_cl}
			
 
				+pointer to a codelet which converts from SCC to CPU
			
 
				 @end table
			
 
				 @end deftp
			
 
				 
			
@@ -1791,7 +1885,19 @@ processing unit.
 
				 
			
 
				 @defmac STARPU_OPENCL
			
 
				 This macro is used when setting the field @code{where} of a @code{struct
			
 
				-starpu_codelet} to specify the codelet may be executed on a OpenCL
			
 
				+starpu_codelet} to specify the codelet may be executed on an OpenCL
			
 
				+processing unit.
			
 
				+@end defmac
			
 
				+
			
 
				+@defmac STARPU_MIC
			
 
				+This macro is used when setting the field @code{where} of a @code{struct
			
 
				+starpu_codelet} to specify the codelet may be executed on a MIC
			
 
				+processing unit.
			
 
				+@end defmac
			
 
				+
			
 
				+@defmac STARPU_SCC
			
 
				+This macro is used when setting the field @code{where} of a @code{struct
			
 
				+starpu_codelet} to specify the codelet may be executed on an SCC
			
 
				 processing unit.
			
 
				 @end defmac
			
 
				 
			
@@ -1864,6 +1970,12 @@ If the @code{where} field is set, then the @code{cpu_funcs} field is
 
				 ignored if @code{STARPU_CPU} does not appear in the @code{where}
			
 
				 field, it must be non-null otherwise.
			
 
				 
			
 
				+@item @code{char * cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS]} (optional)
			
 
				+Is an array of strings which provide the name of the CPU functions referenced in
			
 
				+the @code{cpu_funcs} array. This can be used when running on MIC devices or the
			
 
				+SCC platform, for StarPU to simply look up the MIC function implementation
			
 
				+through its name.
			
 
				+
			
 
				 @item @code{starpu_cuda_func_t cuda_func} (optional)
			
 
				 This field has been made deprecated. One should use instead the
			
 
				 @code{cuda_funcs} field.
			
@@ -1891,6 +2003,28 @@ If the @code{where} field is set, then the @code{opencl_funcs} field
 
				 is ignored if @code{STARPU_OPENCL} does not appear in the @code{where}
			
 
				 field, it must be non-null otherwise.
			
 
				 
			
 
				+@item @code{starpu_mic_func_t mic_funcs[STARPU_MAXIMPLEMENTATIONS]} (optional)
			
 
				+Is an array of function pointers to a function which returns the MIC
			
 
				+implementation of the codelet.
			
 
				+It must be terminated by a NULL value.
			
 
				+The functions prototype must be:
			
 
				+@code{starpu_mic_kernel_t mic_func(struct starpu_codelet *cl, unsigned nimpl);}.
			
 
				+If the @code{where} field is set, then the @code{mic_funcs} field
			
 
				+is ignored if @code{STARPU_MIC} does not appear in the @code{where}
			
 
				+field. It can be null if @code{cpu_funcs_name} is non-NULL, in which case StarPU
			
 
				+will simply make a symbol lookup to get the implementation.
			
 
				+
			
 
				+@item @code{starpu_scc_func_t scc_funcs[STARPU_MAXIMPLEMENTATIONS]} (optional)
			
 
				+Is an array of function pointers to a function which returns the SCC
			
 
				+implementation of the codelet.
			
 
				+It must be terminated by a NULL value.
			
 
				+The functions prototype must be:
			
 
				+@code{starpu_scc_kernel_t scc_func(struct starpu_codelet *cl, unsigned nimpl);}.
			
 
				+If the @code{where} field is set, then the @code{scc_funcs} field
			
 
				+is ignored if @code{STARPU_SCC} does not appear in the @code{where}
			
 
				+field. It can be null if @code{cpu_funcs_name} is non-NULL, in which case StarPU
			
 
				+will simply make a symbol lookup to get the implementation.
			
 
				+
			
 
				 @item @code{unsigned nbuffers}
			
 
				 Specifies the number of arguments taken by the codelet. These arguments are
			
 
				 managed by the DSM and are accessed from the @code{void *buffers[]}
			
@@ -2287,6 +2421,7 @@ executing. It thus does not include tasks waiting for dependencies.
 
				 This function returns the task currently executed by the worker, or
			
 
				 NULL if it is called either from a thread that is not a task or simply
			
 
				 because there is no task being executed at the moment.
			
 
				+This function must be called from the callback (not from the codelet).
			
 
				 @end deftypefun
			
 
				 
			
 
				 @deftypefun void starpu_codelet_display_stats ({struct starpu_codelet} *@var{cl})
			
@@ -3263,6 +3398,48 @@ successfull. It returns 0 if the synchronous copy was successful, or
 
				 fails otherwise.
			
 
				 @end deftypefun
			
 
				 
			
 
				+@node MIC extensions
			
 
				+@section MIC extensions
			
 
				+
			
 
				+@defmac STARPU_USE_MIC
			
 
				+This macro is defined when StarPU has been installed with MIC
			
 
				+support. It should be used in your code to detect the availability of
			
 
				+MIC.
			
 
				+@end defmac
			
 
				+
			
 
				+@deftypefun int starpu_mic_register_kernel({starpu_mic_func_symbol_t *}@var{symbol}, {const char *}@var{func_name})
			
 
				+Initiate a lookup on each MIC device to find the adress of the function
			
 
				+named FUNC_NAME, store them in the global array kernels and return
			
 
				+the index in the array through SYMBOL.
			
 
				+@end deftypefun
			
 
				+
			
 
				+@deftypefun starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t @var{symbol})
			
 
				+If success, return the pointer to the function defined by SYMBOL on the
			
 
				+device linked to the called device. This can for instance be used in a
			
 
				+@code{starpu_mic_func_t} implementation.
			
 
				+@end deftypefun
			
 
				+
			
 
				+@node SCC extensions
			
 
				+@section SCC extensions
			
 
				+
			
 
				+@defmac STARPU_USE_SCC
			
 
				+This macro is defined when StarPU has been installed with SCC
			
 
				+support. It should be used in your code to detect the availability of
			
 
				+SCC.
			
 
				+@end defmac
			
 
				+
			
 
				+@deftypefun int starpu_scc_register_kernel({starpu_scc_func_symbol_t *}@var{symbol}, {const char *}@var{func_name})
			
 
				+Initiate a lookup on each SCC device to find the adress of the function
			
 
				+named FUNC_NAME, store them in the global array kernels and return
			
 
				+the index in the array through SYMBOL.
			
 
				+@end deftypefun
			
 
				+
			
 
				+@deftypefun starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t @var{symbol})
			
 
				+If success, return the pointer to the function defined by SYMBOL on the
			
 
				+device linked to the called device. This can for instance be used in a
			
 
				+@code{starpu_scc_func_t} implementation.
			
 
				+@end deftypefun
			
 
				+
			
 
				 @node Miscellaneous helpers
			
 
				 @section Miscellaneous helpers
			
 
				 
			
@@ -3816,7 +3993,11 @@ number of CUDA workers.
 
				 
			
 
				 @item @code{unsigned nhwopenclgpus}
			
 
				 Total number of OpenCL devices, as detected. May be different from the actual
			
 
				-number of CUDA workers.
			
 
				+number of OpenCL workers.
			
 
				+
			
 
				+@item @code{unsigned nhscc}
			
 
				+Total number of SCC cores, as detected. May be different from the actual
			
 
				+number of core workers.
			
 
				 
			
 
				 @item @code{unsigned ncpus}
			
 
				 Actual number of CPU workers used by StarPU.
			
@@ -3827,6 +4008,9 @@ Actual number of CUDA workers used by StarPU.
 
				 @item @code{unsigned nopenclgpus}
			
 
				 Actual number of OpenCL workers used by StarPU.
			
 
				 
			
 
				+@item @code{unsigned nsccdevices}
			
 
				+Actual number of SCC workers used by StarPU.
			
 
				+
			
 
				 @item @code{unsigned workers_bindid[STARPU_NMAXWORKERS]}
			
 
				 Indicates the successive cpu identifier that should be used to bind the
			
 
				 workers. It is either filled according to the user's explicit
			
@@ -3835,17 +4019,29 @@ variable. Otherwise, a round-robin policy is used to distributed the workers
 
				 over the cpus.
			
 
				 
			
 
				 @item @code{unsigned workers_cuda_gpuid[STARPU_NMAXWORKERS]}
			
 
				-Indicates the successive cpu identifier that should be used by the CUDA
			
 
				+Indicates the successive CUDA identifier that should be used by the CUDA
			
 
				 driver.  It is either filled according to the user's explicit parameters (from
			
 
				 starpu_conf) or according to the STARPU_WORKERS_CUDAID env. variable. Otherwise,
			
 
				 they are taken in ID order.
			
 
				 
			
 
				 @item @code{unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS]}
			
 
				-Indicates the successive cpu identifier that should be used by the OpenCL
			
 
				+Indicates the successive OpenCL identifier that should be used by the OpenCL
			
 
				 driver.  It is either filled according to the user's explicit parameters (from
			
 
				 starpu_conf) or according to the STARPU_WORKERS_OPENCLID env. variable. Otherwise,
			
 
				 they are taken in ID order.
			
 
				 
			
 
				+@item @code{unsigned workers_mic_deviceid[STARPU_NMAXWORKERS]}
			
 
				+Indicates the successive MIC devices that should be used by the MIC
			
 
				+driver.  It is either filled according to the user's explicit parameters (from
			
 
				+starpu_conf) or according to the STARPU_WORKERS_MICID env. variable. Otherwise,
			
 
				+they are taken in ID order.
			
 
				+
			
 
				+@item @code{unsigned workers_scc_deviceid[STARPU_NMAXWORKERS]}
			
 
				+Indicates the successive SCC devices that should be used by the SCC
			
 
				+driver.  It is either filled according to the user's explicit parameters (from
			
 
				+starpu_conf) or according to the STARPU_WORKERS_SCCID env. variable. Otherwise,
			
 
				+they are taken in ID order.
			
 
				+
			
 
				 @end table
			
 
				 @end deftp
			
 
				 
			
--- a/doc/chapters/basic-examples.texi
+++ b/doc/chapters/basic-examples.texi
@@ -132,6 +132,7 @@ struct starpu_codelet cl =
 
				 @{
			
 
				     .where = STARPU_CPU,
			
 
				     .cpu_funcs = @{ cpu_func, NULL @},
			
 
				+    .cpu_funcs_name = @{ "cpu_func", NULL @},
			
 
				     .nbuffers = 0
			
 
				 @};
			
 
				 @end smallexample
			
@@ -642,6 +643,7 @@ void scal_cpu_func(void *buffers[], void *cl_arg)
 
				 struct starpu_codelet cl =
			
 
				 @{
			
 
				     .cpu_funcs = @{ scal_cpu_func, NULL @},
			
 
				+    .cpu_funcs_name = @{ "scal_cpu_func", NULL @},
			
 
				     .nbuffers = 1,
			
 
				     .modes = @{ STARPU_RW @}
			
 
				 @};
			
@@ -822,6 +824,7 @@ static struct starpu_codelet cl =
 
				 @{
			
 
				     .cuda_funcs = @{ scal_cuda_func, NULL @},
			
 
				     .cpu_funcs = @{ scal_cpu_func, NULL @},
			
 
				+    .cpu_funcs_name = @{ "scal_cpu_func", NULL @},
			
 
				     .opencl_funcs = @{ scal_opencl_func, NULL @},
			
 
				     .nbuffers = 1,
			
 
				     .modes = @{ STARPU_RW @}
			
--- a/doc/chapters/perf-optimization.texi
+++ b/doc/chapters/perf-optimization.texi
@@ -554,6 +554,7 @@ CUDA or OpenCL execution:
 
				 static struct starpu_codelet cl11 =
			
 
				 @{
			
 
				 	.cpu_funcs = @{chol_cpu_codelet_update_u11, NULL@},
			
 
				+	.cpu_funcs_name = @{"chol_cpu_codelet_update_u11", NULL@},
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	.cuda_funcs = @{chol_cublas_codelet_update_u11, NULL@},
			
 
				 #elif defined(STARPU_SIMGRID)
			
--- a/doc/chapters/vector_scal_c.texi
+++ b/doc/chapters/vector_scal_c.texi
@@ -1,7 +1,7 @@
 
				 @c -*-texinfo-*-
			
 
				 
			
 
				 @c This file is part of the StarPU Handbook.
			
 
				-@c Copyright (C) 2009-2011  Université de Bordeaux 1
			
 
				+@c Copyright (C) 2009-2011, 2013  Université de Bordeaux 1
			
 
				 @c Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				 @c See the file starpu.texi for copying conditions.
			
 
				 
			
@@ -26,6 +26,7 @@ static struct starpu_codelet cl = @{
 
				     .where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
			
 
				     /* CPU implementation of the codelet */
			
 
				     .cpu_funcs = @{ scal_cpu_func, scal_sse_func, NULL @},
			
 
				+    .cpu_funcs_name = @{ "scal_cpu_func", "scal_sse_func", NULL @},
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				     /* CUDA implementation of the codelet */
			
 
				     .cuda_funcs = @{ scal_cuda_func, NULL @},
			
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -20,7 +20,7 @@ AM_CFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STAR
 
				 AM_CXXFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
			
 
				 LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(MAGMA_LIBS) $(HWLOC_LIBS) @LIBS@
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include
			
 
				-AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) 
			
 
				+AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS)
			
 
				 
			
 
				 SUBDIRS = stencil
			
 
				 
			
--- a/examples/basic_examples/vector_scal.c
+++ b/examples/basic_examples/vector_scal.c
@@ -1,7 +1,7 @@
 
				 /* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				  *
			
 
				  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				- * Copyright (C) 2010-2012  Université de Bordeaux 1
			
 
				+ * Copyright (C) 2010-2013  Université de Bordeaux 1
			
 
				  *
			
 
				  * StarPU is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU Lesser General Public License as published by
			
@@ -52,7 +52,7 @@ static struct starpu_perfmodel vector_scal_power_model =
 
				 
			
 
				 static struct starpu_codelet cl =
			
 
				 {
			
 
				-	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL,
			
 
				+	.where = STARPU_CPU | STARPU_CUDA | STARPU_OPENCL | STARPU_MIC,
			
 
				 	/* CPU implementation of the codelet */
			
 
				 	.cpu_funcs = {
			
 
				 		scal_cpu_func
			
@@ -67,6 +67,19 @@ static struct starpu_codelet cl =
 
				 #endif
			
 
				 		, NULL
			
 
				 	},
			
 
				+	.cpu_funcs_name = {
			
 
				+		"scal_cpu_func",
			
 
				+#ifdef STARPU_HAVE_ICC
			
 
				+		"scal_cpu_func_icc",
			
 
				+#endif
			
 
				+#ifdef __SSE__
			
 
				+		"scal_sse_func",
			
 
				+#ifdef STARPU_HAVE_ICC
			
 
				+		"scal_sse_func_icc"
			
 
				+#endif
			
 
				+#endif
			
 
				+	},
			
 
				+
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	/* CUDA implementation of the codelet */
			
 
				 	.cuda_funcs = {scal_cuda_func, NULL},
			
--- a/examples/stencil/Makefile.am
+++ b/examples/stencil/Makefile.am
@@ -16,7 +16,7 @@
 
				 AM_CFLAGS = $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
			
 
				 LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) @LIBS@
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include
			
 
				-AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS)
			
 
				+AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS)
			
 
				 
			
 
				 if USE_MPI
			
 
				 LIBS += $(top_builddir)/mpi/src/libstarpumpi-@STARPU_EFFECTIVE_VERSION@.la
			
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -92,6 +92,10 @@ struct starpu_conf
 
				 	int ncuda;
			
 
				 	/* number of GPU OpenCL device workers (-1 for default) */
			
 
				 	int nopencl;
			
 
				+	/* number of MIC device workers (-1 for default) */
			
 
				+	int nmic;
			
 
				+	/* number of SCC device workers (-1 for default) */
			
 
				+	int nscc;
			
 
				 
			
 
				 	unsigned use_explicit_workers_bindid;
			
 
				 	unsigned workers_bindid[STARPU_NMAXWORKERS];
			
@@ -102,6 +106,12 @@ struct starpu_conf
 
				 	unsigned use_explicit_workers_opencl_gpuid;
			
 
				 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
			
 
				 
			
 
				+	unsigned use_explicit_workers_mic_deviceid;
			
 
				+	unsigned workers_mic_deviceid[STARPU_NMAXWORKERS];
			
 
				+
			
 
				+	unsigned use_explicit_workers_scc_deviceid;
			
 
				+	unsigned workers_scc_deviceid[STARPU_NMAXWORKERS];
			
 
				+
			
 
				 	/* calibrate bus (-1 for default) */
			
 
				 	int bus_calibrate;
			
 
				 
			
@@ -111,6 +121,10 @@ struct starpu_conf
 
				 	/* Create only one combined worker, containing all CPU workers */
			
 
				 	int single_combined_worker;
			
 
				 
			
 
				+	/* Path to the kernel to execute on the MIC device, compiled
			
 
				+	 * for MIC architecture. */
			
 
				+	char *mic_sink_program_path;
			
 
				+
			
 
				 	/* indicate if all asynchronous copies should be disabled */
			
 
				 	int disable_asynchronous_copy;
			
 
				 
			
@@ -140,6 +154,12 @@ int starpu_conf_init(struct starpu_conf *conf);
 
				  */
			
 
				 int starpu_init(struct starpu_conf *conf) STARPU_WARN_UNUSED_RESULT;
			
 
				 
			
 
				+/* Alternative initialization method with argc and argv. This is use by
			
 
				+ * MIC, MPI, and SCC implementation.
			
 
				+ * Don't call starpu_init and starpu_initialize in the same program.
			
 
				+ */
			
 
				+int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv);
			
 
				+
			
 
				 /* Shutdown method: note that statistics are only generated once StarPU is
			
 
				  * shutdown */
			
 
				 void starpu_shutdown(void);
			
@@ -156,6 +176,8 @@ void starpu_display_stats();
 
				 
			
 
				 void starpu_get_version(int *major, int *minor, int *release);
			
 
				 
			
 
				+int starpu_worker_get_mp_nodeid(int id);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/include/starpu_config.h.in
+++ b/include/starpu_config.h.in
@@ -25,6 +25,8 @@
 
				 #undef STARPU_USE_CPU
			
 
				 #undef STARPU_USE_CUDA
			
 
				 #undef STARPU_USE_OPENCL
			
 
				+#undef STARPU_USE_MIC
			
 
				+#undef STARPU_USE_SCC
			
 
				 
			
 
				 #undef STARPU_SIMGRID
			
 
				 
			
@@ -70,9 +72,12 @@
 
				 #undef STARPU_MAXCPUS
			
 
				 #undef STARPU_MAXCUDADEVS
			
 
				 #undef STARPU_MAXOPENCLDEVS
			
 
				+#undef STARPU_MAXMICDEVS
			
 
				+#undef STARPU_MAXSCCDEVS
			
 
				 #undef STARPU_NMAXWORKERS
			
 
				 #undef STARPU_NMAX_SCHED_CTXS
			
 
				 #undef STARPU_MAXIMPLEMENTATIONS
			
 
				+#undef STARPU_MAXMPKERNELS
			
 
				 #undef STARPU_USE_SC_HYPERVISOR
			
 
				 #undef STARPU_HAVE_GLPK_H
			
 
				 
			
--- a/include/starpu_data.h
+++ b/include/starpu_data.h
@@ -102,7 +102,14 @@ enum starpu_node_kind
 
				 	STARPU_UNUSED     = 0x00,
			
 
				 	STARPU_CPU_RAM    = 0x01,
			
 
				 	STARPU_CUDA_RAM   = 0x02,
			
 
				-	STARPU_OPENCL_RAM = 0x03
			
 
				+	STARPU_OPENCL_RAM = 0x03,
			
 
				+	STARPU_MIC_RAM    = 0x05,
			
 
				+
			
 
				+	/* This node kind is not used anymore, but implementations in interfaces
			
 
				+	 * will be useful for MPI. */
			
 
				+	STARPU_SCC_RAM    = 0x06,
			
 
				+
			
 
				+	STARPU_SCC_SHM    = 0x07
			
 
				 };
			
 
				 
			
 
				 unsigned starpu_worker_get_memory_node(unsigned workerid);
			
--- a/include/starpu_data_interfaces.h
+++ b/include/starpu_data_interfaces.h
@@ -45,6 +45,7 @@ struct starpu_data_copy_methods
 
				 	int (*ram_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 	int (*ram_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 	int (*ram_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*ram_to_mic)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				 	/* src type is cuda */
			
 
				 	int (*cuda_to_ram)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
@@ -56,6 +57,14 @@ struct starpu_data_copy_methods
 
				 	int (*opencl_to_cuda)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 	int (*opencl_to_opencl)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				 
			
 
				+	/* src type is mic */
			
 
				+	int (*mic_to_ram)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
			
 
				+
			
 
				+	/* scc case */
			
 
				+	int (*scc_src_to_sink)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*scc_sink_to_src)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*scc_sink_to_sink)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+
			
 
				 #ifdef STARPU_USE_CUDA
			
 
				 	/* for asynchronous CUDA transfers */
			
 
				 	int (*ram_to_cuda_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cudaStream_t stream);
			
@@ -74,6 +83,12 @@ struct starpu_data_copy_methods
 
				 	int (*opencl_to_opencl_async)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, cl_event *event);
			
 
				 #endif
			
 
				 
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	/* Asynchronous MIC transfers */
			
 
				+	int (*ram_to_mic_async)(void *src_intreface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+	int (*mic_to_ram_async)(void *src_interface, unsigned srd_node, void *dst_interface, unsigned dst_node);
			
 
				+#endif
			
 
				+
			
 
				 	int (*any_to_any)(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
			
 
				 };
			
 
				 
			
@@ -162,6 +177,8 @@ extern struct starpu_data_interface_ops starpu_interface_matrix_ops;
 
				 /* Matrix interface for dense matrices */
			
 
				 struct starpu_matrix_interface
			
 
				 {
			
 
				+	enum starpu_data_interface_id id;
			
 
				+
			
 
				 	uintptr_t ptr;
			
 
				 	uintptr_t dev_handle;
			
 
				 	size_t offset;
			
@@ -192,6 +209,8 @@ size_t starpu_matrix_get_elemsize(starpu_data_handle_t handle);
 
				  */
			
 
				 struct starpu_coo_interface
			
 
				 {
			
 
				+	enum starpu_data_interface_id id;
			
 
				+
			
 
				 	uint32_t  *columns;
			
 
				 	uint32_t  *rows;
			
 
				 	uintptr_t values;
			
@@ -229,6 +248,8 @@ void starpu_coo_data_register(starpu_data_handle_t *handleptr, unsigned home_nod
 
				 /* TODO: rename to 3dmatrix? */
			
 
				 struct starpu_block_interface
			
 
				 {
			
 
				+	enum starpu_data_interface_id id;
			
 
				+
			
 
				 	uintptr_t ptr;
			
 
				 	uintptr_t dev_handle;
			
 
				 	size_t offset;
			
@@ -263,6 +284,8 @@ size_t starpu_block_get_elemsize(starpu_data_handle_t handle);
 
				 /* vector interface for contiguous (non-strided) buffers */
			
 
				 struct starpu_vector_interface
			
 
				 {
			
 
				+	enum starpu_data_interface_id id;
			
 
				+
			
 
				 	uintptr_t ptr;
			
 
				 	uintptr_t dev_handle;
			
 
				 	size_t offset;
			
@@ -285,9 +308,12 @@ uintptr_t starpu_vector_get_local_ptr(starpu_data_handle_t handle);
 
				 /* variable interface for a single data (not a vector, a matrix, a list, ...) */
			
 
				 struct starpu_variable_interface
			
 
				 {
			
 
				+	enum starpu_data_interface_id id;
			
 
				+
			
 
				 	uintptr_t ptr;
			
 
				+	uintptr_t dev_handle;
			
 
				+	size_t offset;
			
 
				 	size_t elemsize;
			
 
				-	/* No dev_handle, since it can not be filtered, offset will always be zero */
			
 
				 };
			
 
				 
			
 
				 void starpu_variable_data_register(starpu_data_handle_t *handle, unsigned home_node, uintptr_t ptr, size_t size);
			
@@ -296,10 +322,10 @@ uintptr_t starpu_variable_get_local_ptr(starpu_data_handle_t handle);
 
				 
			
 
				 /* helper methods */
			
 
				 #define STARPU_VARIABLE_GET_PTR(interface)	(((struct starpu_variable_interface *)(interface))->ptr)
			
 
				+#define STARPU_VARIABLE_GET_OFFSET(interface)	(((struct starpu_variable_interface *)(interface))->offset)
			
 
				 #define STARPU_VARIABLE_GET_ELEMSIZE(interface)	(((struct starpu_variable_interface *)(interface))->elemsize)
			
 
				 #define STARPU_VARIABLE_GET_DEV_HANDLE(interface) \
			
 
				 	(((struct starpu_variable_interface *)(interface))->ptr)
			
 
				-#define STARPU_VARIABLE_GET_OFFSET 0
			
 
				 
			
 
				 /* void interface. There is no data really associated to that interface, but it
			
 
				  * may be used as a synchronization mechanism. It also permits to express an
			
@@ -311,6 +337,8 @@ void starpu_void_data_register(starpu_data_handle_t *handle);
 
				 /* CSR interface for sparse matrices (compressed sparse row representation) */
			
 
				 struct starpu_csr_interface
			
 
				 {
			
 
				+	enum starpu_data_interface_id id;
			
 
				+
			
 
				 	uint32_t nnz; /* number of non-zero entries */
			
 
				 	uint32_t nrow; /* number of rows */
			
 
				 	uintptr_t nzval; /* non-zero values */
			
@@ -352,6 +380,8 @@ size_t starpu_csr_get_elemsize(starpu_data_handle_t handle);
 
				  * representation) */
			
 
				 struct starpu_bcsr_interface
			
 
				 {
			
 
				+	enum starpu_data_interface_id id;
			
 
				+
			
 
				 	uint32_t nnz; /* number of non-zero BLOCKS */
			
 
				 	uint32_t nrow; /* number of rows (in terms of BLOCKS) */
			
 
				 
			
@@ -406,13 +436,22 @@ struct starpu_multiformat_data_interface_ops
 
				 	size_t cuda_elemsize;
			
 
				 	struct starpu_codelet *cpu_to_cuda_cl;
			
 
				 	struct starpu_codelet *cuda_to_cpu_cl;
			
 
				+	size_t mic_elemsize;
			
 
				+	struct starpu_codelet *cpu_to_mic_cl;
			
 
				+	struct starpu_codelet *mic_to_cpu_cl;
			
 
				+	size_t scc_elemsize;
			
 
				+	struct starpu_codelet *cpu_to_scc_cl;
			
 
				+	struct starpu_codelet *scc_to_cpu_cl;
			
 
				 };
			
 
				 
			
 
				 struct starpu_multiformat_interface
			
 
				 {
			
 
				+	enum starpu_data_interface_id id;
			
 
				+
			
 
				 	void *cpu_ptr;
			
 
				 	void *cuda_ptr;
			
 
				 	void *opencl_ptr;
			
 
				+	void *mic_ptr;
			
 
				 	uint32_t nx;
			
 
				 	struct starpu_multiformat_data_interface_ops *ops;
			
 
				 };
			
@@ -422,8 +461,24 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handle, unsigned hom
 
				 #define STARPU_MULTIFORMAT_GET_CPU_PTR(interface)  (((struct starpu_multiformat_interface *)(interface))->cpu_ptr)
			
 
				 #define STARPU_MULTIFORMAT_GET_CUDA_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->cuda_ptr)
			
 
				 #define STARPU_MULTIFORMAT_GET_OPENCL_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->opencl_ptr)
			
 
				+#define STARPU_MULTIFORMAT_GET_MIC_PTR(interface) (((struct starpu_multiformat_interface *)(interface))->mic_ptr)
			
 
				 #define STARPU_MULTIFORMAT_GET_NX(interface)  (((struct starpu_multiformat_interface *)(interface))->nx)
			
 
				 
			
 
				+/* Generic type representing an interface, for now it's only used before
			
 
				+ * execution on message-passing devices but it can be useful in other cases.
			
 
				+ */
			
 
				+union _starpu_interface
			
 
				+{
			
 
				+	struct starpu_matrix_interface matrix;
			
 
				+	struct starpu_block_interface block;
			
 
				+	struct starpu_vector_interface vector;
			
 
				+	struct starpu_csr_interface csr;
			
 
				+	struct starpu_coo_interface coo;
			
 
				+	struct starpu_bcsr_interface bcsr;
			
 
				+	struct starpu_variable_interface variable;
			
 
				+	struct starpu_multiformat_interface multiformat;
			
 
				+};
			
 
				+
			
 
				 enum starpu_data_interface_id starpu_data_get_interface_id(starpu_data_handle_t handle);
			
 
				 
			
 
				 int starpu_data_pack(starpu_data_handle_t handle, void **ptr, starpu_ssize_t *count);
			
--- a/include/starpu_mic.h
+++ b/include/starpu_mic.h
@@ -0,0 +1,35 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#ifndef __STARPU_MIC_H__
			
 
				+#define __STARPU_MIC_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+
			
 
				+typedef void *starpu_mic_func_symbol_t;
			
 
				+
			
 
				+int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name);
			
 
				+
			
 
				+starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol);
			
 
				+
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+
			
 
				+
			
 
				+#endif /* __STARPU_MIC_H__ */
			
--- a/include/starpu_perfmodel.h
+++ b/include/starpu_perfmodel.h
@@ -43,8 +43,10 @@ enum starpu_perfmodel_archtype
 
				 	STARPU_CPU_DEFAULT = 0,
			
 
				 	/* CPU combined workers between 0 and STARPU_MAXCPUS-1 */
			
 
				 	STARPU_CUDA_DEFAULT = STARPU_MAXCPUS,
			
 
				-	STARPU_OPENCL_DEFAULT = STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS
			
 
				+	STARPU_OPENCL_DEFAULT = STARPU_CUDA_DEFAULT + STARPU_MAXCUDADEVS,
			
 
				 	/* STARPU_OPENCL_DEFAULT + devid */
			
 
				+	STARPU_MIC_DEFAULT = STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS,
			
 
				+	STARPU_SCC_DEFAULT = STARPU_MIC_DEFAULT + STARPU_MAXMICDEVS
			
 
				 };
			
 
				 
			
 
				 #ifdef __STDC_VERSION__
			
@@ -54,15 +56,19 @@ enum starpu_perfmodel_archtype
 
				 
			
 
				 _Static_assert(STARPU_CPU_DEFAULT == 0,
			
 
				 	       "invalid STARPU_CPU_DEFAULT value");
			
 
				-_Static_assert(STARPU_CUDA_DEFAULT > STARPU_CPU_DEFAULT,
			
 
				-	       "invalid STARPU_CPU_DEFAULT value");
			
 
				+_Static_assert(STARPU_CPU_DEFAULT < STARPU_CUDA_DEFAULT,
			
 
				+	       "invalid STARPU_{CPU,CUDA}_DEFAULT values");
			
 
				 _Static_assert(STARPU_CUDA_DEFAULT < STARPU_OPENCL_DEFAULT,
			
 
				 	       "invalid STARPU_{CUDA,OPENCL}_DEFAULT values");
			
 
				+_Static_assert(STARPU_OPENCL_DEFAULT < STARPU_MIC_DEFAULT,
			
 
				+	       "invalid STARPU_{OPENCL,MIC}_DEFAULT values");
			
 
				+_Static_assert(STARPU_MIC_DEFAULT < STARPU_SCC_DEFAULT,
			
 
				+	       "invalid STARPU_{MIC,SCC}_DEFAULT values");
			
 
				 
			
 
				 #  endif
			
 
				 #endif
			
 
				 
			
 
				-#define STARPU_NARCH_VARIATIONS	(STARPU_OPENCL_DEFAULT + STARPU_MAXOPENCLDEVS)
			
 
				+#define STARPU_NARCH_VARIATIONS	(STARPU_MIC_DEFAULT + STARPU_MAXMICDEVS)
			
 
				 
			
 
				 struct starpu_perfmodel_history_entry
			
 
				 {
			
--- a/include/starpu_scc.h
+++ b/include/starpu_scc.h
@@ -0,0 +1,35 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#ifndef __STARPU_SCC_H__
			
 
				+#define __STARPU_SCC_H__
			
 
				+
			
 
				+#include <starpu_config.h>
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+
			
 
				+typedef void *starpu_scc_func_symbol_t;
			
 
				+
			
 
				+int starpu_scc_register_kernel(starpu_scc_func_symbol_t *symbol, const char *func_name);
			
 
				+
			
 
				+starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol);
			
 
				+
			
 
				+#endif /* STARPU_USE_SCC */
			
 
				+
			
 
				+
			
 
				+#endif /* __STARPU_SCC_H__ */
			
--- a/include/starpu_sink.h
+++ b/include/starpu_sink.h
@@ -0,0 +1,23 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#ifndef __STARPU_SINK_H__
			
 
				+#define __STARPU_SINK_H__
			
 
				+
			
 
				+void starpu_sink_common_worker(int argc, char **argv);
			
 
				+
			
 
				+#endif /* __STARPU_SINK_H__ */
			
--- a/include/starpu_task.h
+++ b/include/starpu_task.h
@@ -37,6 +37,8 @@ extern "C"
 
				 #define STARPU_CPU	((1ULL)<<1)
			
 
				 #define STARPU_CUDA	((1ULL)<<3)
			
 
				 #define STARPU_OPENCL	((1ULL)<<6)
			
 
				+#define STARPU_MIC	((1ULL)<<7)
			
 
				+#define STARPU_SCC	((1ULL)<<8)
			
 
				 
			
 
				 /* Codelet types */
			
 
				 enum starpu_codelet_type
			
@@ -65,6 +67,11 @@ typedef uint64_t starpu_tag_t;
 
				 typedef void (*starpu_cpu_func_t)(void **, void*);    /* CPU core */
			
 
				 typedef void (*starpu_cuda_func_t)(void **, void*);   /* NVIDIA CUDA device */
			
 
				 typedef void (*starpu_opencl_func_t)(void **, void*); /* OpenCL CUDA device */
			
 
				+typedef void (*starpu_mic_kernel_t)(void **, void*); /* MIC device */
			
 
				+typedef void (*starpu_scc_kernel_t)(void **, void*); /* SCC device */
			
 
				+
			
 
				+typedef starpu_mic_kernel_t (*starpu_mic_func_t)(void);
			
 
				+typedef starpu_scc_kernel_t (*starpu_scc_func_t)(void);
			
 
				 
			
 
				 #define STARPU_MULTIPLE_CPU_IMPLEMENTATIONS    ((starpu_cpu_func_t) -1)
			
 
				 #define STARPU_MULTIPLE_CUDA_IMPLEMENTATIONS   ((starpu_cuda_func_t) -1)
			
@@ -91,6 +98,10 @@ struct starpu_codelet
 
				 	starpu_cpu_func_t cpu_funcs[STARPU_MAXIMPLEMENTATIONS];
			
 
				 	starpu_cuda_func_t cuda_funcs[STARPU_MAXIMPLEMENTATIONS];
			
 
				 	starpu_opencl_func_t opencl_funcs[STARPU_MAXIMPLEMENTATIONS];
			
 
				+	starpu_mic_func_t mic_funcs[STARPU_MAXIMPLEMENTATIONS];
			
 
				+	starpu_scc_func_t scc_funcs[STARPU_MAXIMPLEMENTATIONS];
			
 
				+
			
 
				+	char *cpu_funcs_name[STARPU_MAXIMPLEMENTATIONS];
			
 
				 
			
 
				 	/* how many buffers do the codelet takes as argument ? */
			
 
				 	unsigned nbuffers;
			
@@ -128,6 +139,8 @@ struct starpu_task
 
				 	void *cl_arg;
			
 
				 	/* in case the argument buffer has to be uploaded explicitely */
			
 
				 	size_t cl_arg_size;
			
 
				+	/* must StarPU release cl_arg ? - 0 by default */
			
 
				+	unsigned cl_arg_free;
			
 
				 
			
 
				 	/* when the task is done, callback_func(callback_arg) is called */
			
 
				 	void (*callback_func)(void *);
			
--- a/include/starpu_worker.h
+++ b/include/starpu_worker.h
@@ -36,7 +36,17 @@ enum starpu_worker_archtype
 
				 	STARPU_ANY_WORKER,    /* any worker, used in the hypervisor */
			
 
				 	STARPU_CPU_WORKER,    /* CPU core */
			
 
				 	STARPU_CUDA_WORKER,   /* NVIDIA CUDA device */
			
 
				-	STARPU_OPENCL_WORKER  /* OpenCL device */
			
 
				+	STARPU_OPENCL_WORKER, /* OpenCL device */
			
 
				+	STARPU_MIC_WORKER,    /* Intel MIC device */
			
 
				+	STARPU_SCC_WORKER     /* Intel SCC device */
			
 
				+};
			
 
				+
			
 
				+/* Represent the topology of sink devices, contains useful informations about
			
 
				+ * their capabilities */
			
 
				+// XXX: unused.
			
 
				+struct starpu_sink_topology
			
 
				+{
			
 
				+	unsigned nb_cpus;
			
 
				 };
			
 
				 
			
 
				 struct starpu_sched_ctx_iterator
			
@@ -61,10 +71,20 @@ struct starpu_machine_topology
 
				 	unsigned nhwcpus;
			
 
				 	unsigned nhwcudagpus;
			
 
				 	unsigned nhwopenclgpus;
			
 
				+	unsigned nhwscc;
			
 
				 
			
 
				 	unsigned ncpus;
			
 
				 	unsigned ncudagpus;
			
 
				 	unsigned nopenclgpus;
			
 
				+	unsigned nsccdevices;
			
 
				+
			
 
				+	/* Topology of MP nodes (mainly MIC and SCC) as well as necessary
			
 
				+	 * objects to communicate with them. */
			
 
				+	unsigned nhwmicdevices;
			
 
				+	unsigned nmicdevices;
			
 
				+
			
 
				+	unsigned nhwmiccores[STARPU_MAXMICDEVS]; // Each MIC node has its set of cores.
			
 
				+	unsigned nmiccores[STARPU_MAXMICDEVS];
			
 
				 
			
 
				 	/* Where to bind workers ? */
			
 
				 	unsigned workers_bindid[STARPU_NMAXWORKERS];
			
@@ -74,6 +94,12 @@ struct starpu_machine_topology
 
				 
			
 
				 	/* Which GPU(s) do we use for OpenCL ? */
			
 
				 	unsigned workers_opencl_gpuid[STARPU_NMAXWORKERS];
			
 
				+
			
 
				+	/* Which MIC core(s) do we use ? */
			
 
				+	/* unsigned workers_mic_deviceid[STARPU_NMAXWORKERS]; */
			
 
				+
			
 
				+	/* Which SCC(s) do we use ? */
			
 
				+	unsigned workers_scc_deviceid[STARPU_NMAXWORKERS];
			
 
				 };
			
 
				 
			
 
				 /* generic structure used by the scheduling contexts to iterate the workers */
			
@@ -113,6 +139,10 @@ unsigned starpu_worker_is_combined_worker(int id);
 
				 unsigned starpu_cpu_worker_get_count(void);
			
 
				 unsigned starpu_cuda_worker_get_count(void);
			
 
				 unsigned starpu_opencl_worker_get_count(void);
			
 
				+unsigned starpu_mic_worker_get_count(void);
			
 
				+unsigned starpu_scc_worker_get_count(void);
			
 
				+
			
 
				+unsigned starpu_mic_device_get_count(void);
			
 
				 
			
 
				 /* Return the identifier of the thread in case this is associated to a worker.
			
 
				  * This will return -1 if this function is called directly from the application
			
@@ -166,6 +196,8 @@ void starpu_worker_get_name(int id, char *dst, size_t maxlen);
 
				  */
			
 
				 int starpu_worker_get_devid(int id);
			
 
				 
			
 
				+int starpu_worker_get_mp_nodeid(int id);
			
 
				+
			
 
				 #ifdef __cplusplus
			
 
				 }
			
 
				 #endif
			
--- a/libstarpu-mic.pc.in
+++ b/libstarpu-mic.pc.in
@@ -0,0 +1,28 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2009-2011, 2013  Université de Bordeaux 1
			
 
				+# Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+
			
 
				+prefix=@prefix@
			
 
				+exec_prefix=@exec_prefix@
			
 
				+libdir=@libdir@
			
 
				+includedir=@includedir@
			
 
				+
			
 
				+Name: starpu
			
 
				+Description: offers support for heterogeneous multicore architecture
			
 
				+Version: @PACKAGE_VERSION@
			
 
				+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SC_HYPERVISOR@
			
 
				+Libs.private: @LDFLAGS@ @LIBS@
			
 
				+Requires: @HWLOC_REQUIRES@
			
--- a/libstarpu.pc.in
+++ b/libstarpu.pc.in
@@ -23,6 +23,6 @@ Name: starpu
 
				 Description: offers support for heterogeneous multicore architecture
			
 
				 Version: @PACKAGE_VERSION@
			
 
				 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_API
			
 
				-Libs: -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SC_HYPERVISOR@
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@ @STARPU_SC_HYPERVISOR@
			
 
				 Libs.private: @LDFLAGS@ @LIBS@
			
 
				 Requires: @HWLOC_REQUIRES@
			
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -49,10 +49,10 @@ endif STARPU_HAVE_WINDOWS
 
				 
			
 
				 lib_LTLIBRARIES = libstarpu-@STARPU_EFFECTIVE_VERSION@.la
			
 
				 
			
 
				-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include/ -DBUILDING_STARPU
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CPPFLAGS = -I$(top_srcdir)/include/ $(STARPU_RCCE_CPPFLAGS) -DBUILDING_STARPU
			
 
				 
			
 
				-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS)
			
 
				-libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = -lm $(HWLOC_LIBS) $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LIBS) $(STARPU_GLPK_LDFLAGS)
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_CFLAGS = $(GLOBAL_AM_CFLAGS) $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(STARPU_RCCE_CFLAGS) $(FXT_CFLAGS)
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LIBADD = -lm $(HWLOC_LIBS) $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(STARPU_RCCE_LDFLAGS) $(FXT_LIBS) $(STARPU_GLPK_LDFLAGS)
			
 
				 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_LDFLAGS = $(ldflags) $(FXT_LDFLAGS) -no-undefined									\
			
 
				   -version-info $(libstarpu_so_version)
			
 
				 
			
@@ -105,11 +105,20 @@ noinst_HEADERS = 						\
 
				 	common/uthash.h						\
			
 
				 	common/barrier_counter.h				\
			
 
				 	drivers/driver_common/driver_common.h			\
			
 
				+	drivers/mp_common/mp_common.h				\
			
 
				+	drivers/mp_common/source_common.h			\
			
 
				+	drivers/mp_common/sink_common.h				\
			
 
				 	drivers/cpu/driver_cpu.h				\
			
 
				 	drivers/cuda/driver_cuda.h				\
			
 
				 	drivers/opencl/driver_opencl.h				\
			
 
				 	drivers/opencl/driver_opencl_utils.h			\
			
 
				 	debug/starpu_debug_helpers.h				\
			
 
				+	drivers/mic/driver_mic_common.h				\
			
 
				+	drivers/mic/driver_mic_source.h				\
			
 
				+	drivers/mic/driver_mic_sink.h				\
			
 
				+	drivers/scc/driver_scc_common.h				\
			
 
				+	drivers/scc/driver_scc_source.h				\
			
 
				+	drivers/scc/driver_scc_sink.h				\
			
 
				 	debug/traces/starpu_fxt.h				\
			
 
				 	profiling/bound.h					\
			
 
				 	profiling/profiling.h					\
			
@@ -244,5 +253,40 @@ libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.
 
				 endif
			
 
				 endif
			
 
				 
			
 
				+if STARPU_USE_SCC
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/scc/driver_scc_common.c
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/scc/driver_scc_source.c
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/scc/driver_scc_sink.c
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/scc/driver_scc_utils.c
			
 
				+endif
			
 
				+
			
 
				+
			
 
				+#########################################
			
 
				+#										#
			
 
				+#        Generic MP compilation			#
			
 
				+#										#
			
 
				+#########################################
			
 
				+
			
 
				+if STARPU_USE_MP
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mp_common/mp_common.c
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mp_common/source_common.c
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mp_common/sink_common.c
			
 
				+endif
			
 
				+
			
 
				+#########################################
			
 
				+#										#
			
 
				+#	     MIC compilation				#
			
 
				+#										#
			
 
				+#########################################
			
 
				+
			
 
				+if STARPU_USE_MIC
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_common.c
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_source.c
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_sink.c
			
 
				+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/mic/driver_mic_utils.c
			
 
				+endif
			
 
				+
			
 
				+#########################################
			
 
				+
			
 
				 showcheck:
			
 
				 	-cat /dev/null
			
--- a/src/common/fxt.h
+++ b/src/common/fxt.h
@@ -37,6 +37,8 @@
 
				 #define _STARPU_FUT_CPU_KEY	0x101
			
 
				 #define _STARPU_FUT_CUDA_KEY	0x102
			
 
				 #define _STARPU_FUT_OPENCL_KEY	0x103
			
 
				+#define _STARPU_FUT_MIC_KEY	0x104
			
 
				+#define _STARPU_FUT_SCC_KEY	0x105
			
 
				 
			
 
				 #define _STARPU_FUT_WORKER_INIT_START	0x5100
			
 
				 #define _STARPU_FUT_WORKER_INIT_END	0x5101
			
--- a/src/core/jobs.h
+++ b/src/core/jobs.h
@@ -51,6 +51,8 @@ typedef void (*_starpu_cl_func_t)(void **, void *);
 
				 #define _STARPU_CPU_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_CPU)
			
 
				 #define _STARPU_CUDA_MAY_PERFORM(j)      ((j)->task->cl->where & STARPU_CUDA)
			
 
				 #define _STARPU_OPENCL_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_OPENCL)
			
 
				+#define _STARPU_MIC_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_MIC)
			
 
				+#define _STARPU_SCC_MAY_PERFORM(j)	((j)->task->cl->where & STARPU_SCC)
			
 
				 
			
 
				 /* A job is the internal representation of a task. */
			
 
				 LIST_TYPE(_starpu_job,
			
@@ -116,6 +118,10 @@ LIST_TYPE(_starpu_job,
 
				 	 * so we need a flag to differentiate them from "normal" tasks. */
			
 
				 	unsigned reduction_task;
			
 
				 
			
 
				+	/* Used by MIC driver to record codelet start time instead of using a
			
 
				+	 * local variable */
			
 
				+	struct timespec cl_start;
			
 
				+
			
 
				 #ifdef STARPU_USE_FXT
			
 
				 	/* A symbol name may be associated to the job directly for debug
			
 
				 	 * purposes (for instance if the codelet is NULL). */
			
--- a/src/core/perfmodel/perfmodel_bus.c
+++ b/src/core/perfmodel/perfmodel_bus.c
@@ -67,6 +67,7 @@ static unsigned was_benchmarked = 0;
 
				 static unsigned ncpus = 0;
			
 
				 static unsigned ncuda = 0;
			
 
				 static unsigned nopencl = 0;
			
 
				+static unsigned nmic = 0;
			
 
				 
			
 
				 /* Benchmarking the performance of the bus */
			
 
				 
			
@@ -91,6 +92,11 @@ static double opencldev_latency_dtoh[STARPU_MAXNODES] = {0.0};
 
				 static struct dev_timing opencldev_timing_per_cpu[STARPU_MAXNODES*STARPU_MAXCPUS];
			
 
				 #endif
			
 
				 
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static double mic_time_host_to_device[STARPU_MAXNODES] = {0.0};
			
 
				+static double mic_time_device_to_host[STARPU_MAXNODES] = {0.0};
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 static hwloc_topology_t hwtopology;
			
 
				 #endif
			
@@ -695,6 +701,19 @@ static void benchmark_all_gpu_devices(void)
 
				 	}
			
 
				 #endif
			
 
				 
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	/* TODO: implement real calibration ! For now we only put an arbitrary
			
 
				+	 * value for each device during at the declaration as a bug fix, else
			
 
				+	 * we get problems on heft scheduler */
			
 
				+        nmic = _starpu_mic_src_get_device_count();
			
 
				+
			
 
				+	for (i = 0; i < STARPU_MAXNODES; i++)
			
 
				+	{
			
 
				+		mic_time_host_to_device[i] = 0.1;
			
 
				+		mic_time_device_to_host[i] = 0.1;
			
 
				+	}
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+
			
 
				 #ifdef STARPU_HAVE_HWLOC
			
 
				 	hwloc_set_cpubind(hwtopology, former_cpuset, HWLOC_CPUBIND_THREAD);
			
 
				 #elif __linux__
			
@@ -1082,6 +1101,9 @@ static void write_bus_latency_file_content(void)
 
				 #ifdef STARPU_USE_OPENCL
			
 
				         maxnode += nopencl;
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+        maxnode += nmic;
			
 
				+#endif
			
 
				         for (src = 0; src < STARPU_MAXNODES; src++)
			
 
				 	{
			
 
				 		for (dst = 0; dst < STARPU_MAXNODES; dst++)
			
@@ -1290,6 +1312,9 @@ static void write_bus_bandwidth_file_content(void)
 
				 #ifdef STARPU_USE_OPENCL
			
 
				         maxnode += nopencl;
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+        maxnode += nmic;
			
 
				+#endif
			
 
				 	for (src = 0; src < STARPU_MAXNODES; src++)
			
 
				 	{
			
 
				 		for (dst = 0; dst < STARPU_MAXNODES; dst++)
			
@@ -1300,7 +1325,7 @@ static void write_bus_bandwidth_file_content(void)
 
				 			{
			
 
				 				bandwidth = NAN;
			
 
				 			}
			
 
				-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC)
			
 
				 			else if (src != dst)
			
 
				 			{
			
 
				 				double slowness = 0.0;
			
@@ -1320,11 +1345,17 @@ static void write_bus_bandwidth_file_content(void)
 
				 				}
			
 
				 #endif
			
 
				 #ifdef STARPU_USE_OPENCL
			
 
				-				if (src > ncuda)
			
 
				+				if (src > ncuda && src <= ncuda + nopencl)
			
 
				 					slowness += opencldev_timing_dtoh[src-ncuda];
			
 
				-				if (dst > ncuda)
			
 
				+				if (dst > ncuda && dst <= ncuda + nopencl)
			
 
				 					slowness += opencldev_timing_htod[dst-ncuda];
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+				if (src > ncuda + nopencl)
			
 
				+					slowness += mic_time_device_to_host[src - (ncuda + nopencl)];
			
 
				+				if (dst > ncuda + nopencl)
			
 
				+					slowness += mic_time_host_to_device[dst - (ncuda + nopencl)];
			
 
				+#endif
			
 
				 				bandwidth = 1.0/slowness;
			
 
				 			}
			
 
				 #endif
			
@@ -1364,6 +1395,9 @@ void starpu_bus_print_bandwidth(FILE *f)
 
				 #ifdef STARPU_USE_OPENCL
			
 
				         maxnode += nopencl;
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+        maxnode += nmic;
			
 
				+#endif
			
 
				 
			
 
				 	fprintf(f, "from/to\t");
			
 
				 	fprintf(f, "RAM\t");
			
@@ -1501,7 +1535,7 @@ static void check_bus_config_file(void)
 
				 	{
			
 
				                 FILE *f;
			
 
				                 int ret;
			
 
				-		unsigned read_cuda = -1, read_opencl = -1;
			
 
				+		unsigned read_cuda = -1, read_opencl = -1, read_mic = -1;
			
 
				                 unsigned read_cpus = -1;
			
 
				 
			
 
				                 // Loading configuration from file
			
@@ -1517,6 +1551,9 @@ static void check_bus_config_file(void)
 
				 		ret = fscanf(f, "%d\t", &read_opencl);
			
 
				 		STARPU_ASSERT(ret == 1);
			
 
				                 _starpu_drop_comments(f);
			
 
				+		ret = fscanf(f, "%d\t", &read_mic);
			
 
				+		STARPU_ASSERT(ret == 1);
			
 
				+                _starpu_drop_comments(f);
			
 
				                 fclose(f);
			
 
				 
			
 
				                 // Loading current configuration
			
@@ -1527,6 +1564,9 @@ static void check_bus_config_file(void)
 
				 #ifdef STARPU_USE_OPENCL
			
 
				                 nopencl = _starpu_opencl_get_device_count();
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+                nmic = _starpu_mic_src_get_device_count();
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				 
			
 
				                 // Checking if both configurations match
			
 
				                 if (read_cpus != ncpus)
			
@@ -1547,6 +1587,12 @@ static void check_bus_config_file(void)
 
				                         _starpu_bus_force_sampling();
			
 
				 			_STARPU_DISP("... done\n");
			
 
				                 }
			
 
				+                else if (read_mic != nmic)
			
 
				+		{
			
 
				+                        _STARPU_DISP("Current configuration does not match the bus performance model (MIC: (stored) %d != (current) %d), recalibrating...", read_mic, nmic);
			
 
				+                        _starpu_bus_force_sampling();
			
 
				+			_STARPU_DISP("... done\n");
			
 
				+                }
			
 
				         }
			
 
				 }
			
 
				 
			
@@ -1567,6 +1613,7 @@ static void write_bus_config_file_content(void)
 
				         fprintf(f, "%u # Number of CPUs\n", ncpus);
			
 
				         fprintf(f, "%d # Number of CUDA devices\n", ncuda);
			
 
				         fprintf(f, "%d # Number of OpenCL devices\n", nopencl);
			
 
				+        fprintf(f, "%d # Number of MIC devices\n", nmic);
			
 
				 
			
 
				         fclose(f);
			
 
				 }
			
--- a/src/core/perfmodel/perfmodel_history.c
+++ b/src/core/perfmodel/perfmodel_history.c
@@ -366,6 +366,21 @@ static void parse_model_file(FILE *f, struct starpu_perfmodel *model, unsigned s
 
				 			   archmin + STARPU_MIN(narchs, STARPU_MAXOPENCLDEVS),
			
 
				 			   narchs > STARPU_MAXOPENCLDEVS ? narchs - STARPU_MAXOPENCLDEVS : 0);
			
 
				 	}
			
 
				+
			
 
				+	/* Parsing MIC devs */
			
 
				+	_starpu_drop_comments(f);
			
 
				+	ret = fscanf(f, "%u\n", &narchs);
			
 
				+	STARPU_ASSERT(ret == 1);
			
 
				+
			
 
				+	archmin += STARPU_MAXOPENCLDEVS;
			
 
				+	_STARPU_DEBUG("Parsing %u MIC devices\n", narchs);
			
 
				+	if (narchs > 0)
			
 
				+	{
			
 
				+		parse_arch(f, model, scan_history,
			
 
				+			   archmin,
			
 
				+			   archmin + STARPU_MIN(narchs, STARPU_MAXMICDEVS),
			
 
				+			   narchs > STARPU_MAXMICDEVS ? narchs - STARPU_MAXMICDEVS : 0);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 
			
@@ -447,6 +462,7 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
				 		{
			
 
				 			case STARPU_CUDA_DEFAULT:
			
 
				 			case STARPU_OPENCL_DEFAULT:
			
 
				+			case STARPU_MIC_DEFAULT:
			
 
				 				arch_base = arch;
			
 
				 				idx++;
			
 
				 				break;
			
@@ -479,42 +495,48 @@ static void dump_model_file(FILE *f, struct starpu_perfmodel *model)
 
				 	}
			
 
				 
			
 
				 	/* Writing stuff */
			
 
				+
			
 
				 	char *name = "unknown";
			
 
				 	unsigned substract_to_arch = 0;
			
 
				 	for (arch = 0; arch < STARPU_NARCH_VARIATIONS; arch++)
			
 
				 	{
			
 
				+		unsigned char arch_already_visited = 0;
			
 
				+
			
 
				 		switch (arch)
			
 
				 		{
			
 
				 			case STARPU_CPU_DEFAULT:
			
 
				-				arch_base = arch;
			
 
				 				name = "CPU";
			
 
				-				fprintf(f, "##################\n");
			
 
				-				fprintf(f, "# %ss\n", name);
			
 
				-				fprintf(f, "# maximum number of %ss\n", name);
			
 
				-				fprintf(f, "%u\n", my_narch = narch[0]);
			
 
				+				my_narch = narch[0];
			
 
				 				break;
			
 
				 			case STARPU_CUDA_DEFAULT:
			
 
				-				arch_base = arch;
			
 
				 				name = "CUDA";
			
 
				 				substract_to_arch = STARPU_MAXCPUS;
			
 
				-				fprintf(f, "##################\n");
			
 
				-				fprintf(f, "# %ss\n", name);
			
 
				-				fprintf(f, "# number of %s architectures\n", name);
			
 
				-				fprintf(f, "%u\n", my_narch = narch[1]);
			
 
				+				my_narch = narch[1];
			
 
				 				break;
			
 
				 			case STARPU_OPENCL_DEFAULT:
			
 
				-				arch_base = arch;
			
 
				 				name = "OPENCL";
			
 
				-				substract_to_arch += STARPU_MAXCUDADEVS;
			
 
				-				fprintf(f, "##################\n");
			
 
				-				fprintf(f, "# %ss\n", name);
			
 
				-				fprintf(f, "# number of %s architectures\n", name);
			
 
				-				fprintf(f, "%u\n", my_narch = narch[2]);
			
 
				+				my_narch = narch[2];
			
 
				+				break;
			
 
				+			case STARPU_MIC_DEFAULT:
			
 
				+				name = "MIC";
			
 
				+				my_narch = narch[3];
			
 
				 				break;
			
 
				 			default:
			
 
				+				/* The current worker arch was already written,
			
 
				+				 * we don't need to write it again */
			
 
				+				arch_already_visited = 1;
			
 
				 				break;
			
 
				 		}
			
 
				 
			
 
				+		if (!arch_already_visited)
			
 
				+		{
			
 
				+			arch_base = arch;
			
 
				+			fprintf(f, "##################\n");
			
 
				+			fprintf(f, "# %ss\n", name);
			
 
				+			fprintf(f, "# number of %s architectures\n", name);
			
 
				+			fprintf(f, "%u\n", my_narch);
			
 
				+		}
			
 
				+
			
 
				 		unsigned max_impl = 0;
			
 
				 		if (model->type == STARPU_HISTORY_BASED || model->type == STARPU_NL_REGRESSION_BASED)
			
 
				 		{
			
@@ -1024,6 +1046,12 @@ void starpu_perfmodel_get_arch_name(enum starpu_perfmodel_archtype arch, char *a
 
				 		int devid = arch - STARPU_OPENCL_DEFAULT;
			
 
				 		snprintf(archname, maxlen, "opencl_%d_impl_%u", devid,nimpl);
			
 
				 	}
			
 
				+	else if ((STARPU_MIC_DEFAULT <= arch)
			
 
				+		&& (arch < STARPU_MIC_DEFAULT + STARPU_MAXMICDEVS))
			
 
				+	{
			
 
				+		int devid = arch - STARPU_MIC_DEFAULT;
			
 
				+		snprintf(archname, maxlen, "mic_%d_impl_%u", devid, nimpl);
			
 
				+	}
			
 
				 	else
			
 
				 	{
			
 
				 		STARPU_ABORT();
			
--- a/src/core/sched_policy.c
+++ b/src/core/sched_policy.c
@@ -486,6 +486,20 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
				 			break;
			
 
				 		}
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+		case STARPU_MIC_RAM:
			
 
				+			struct starpu_multiformat_data_interface_ops *mf_ops;
			
 
				+			mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
			
 
				+			conversion_task->cl = mf_ops->mic_to_cpu_cl;
			
 
				+			break;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+		case STARPU_SCC_RAM:
			
 
				+			struct starpu_multiformat_data_interface_ops *mf_ops;
			
 
				+			mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
			
 
				+			conversion_task->cl = mf_ops->scc_to_cpu_cl;
			
 
				+			break;
			
 
				+#endif
			
 
				 		default:
			
 
				 			_STARPU_ERROR("Oops : %u\n", handle->mf_node);
			
 
				 		}
			
@@ -508,6 +522,20 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 
				 		break;
			
 
				 	}
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	case STARPU_MIC_RAM:
			
 
				+		struct starpu_multiformat_data_interface_ops *mf_ops;
			
 
				+		mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
			
 
				+		conversion_task->cl = mf_ops->cpu_to_mic_cl;
			
 
				+		break;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	case STARPU_SCC_RAM:
			
 
				+		struct starpu_multiformat_data_interface_ops *mf_ops;
			
 
				+		mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
			
 
				+		conversion_task->cl = mf_ops->cpu_to_scc_cl;
			
 
				+		break;
			
 
				+#endif
			
 
				 	default:
			
 
				 		STARPU_ABORT();
			
 
				 	}
			
--- a/src/core/task.c
+++ b/src/core/task.c
@@ -155,6 +155,11 @@ void _starpu_task_destroy(struct starpu_task *task)
 
				 		starpu_task_clean(task);
			
 
				 		/* TODO handle the case of task with detach = 1 and destroy = 1 */
			
 
				 		/* TODO handle the case of non terminated tasks -> return -EINVAL */
			
 
				+
			
 
				+		/* Does user want StarPU release cl_arg ? */
			
 
				+		if (task->cl_arg_free)
			
 
				+			free(task->cl_arg);
			
 
				+
			
 
				 		free(task);
			
 
				 	}
			
 
				 }
			
@@ -871,6 +876,8 @@ _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
 
				 					return 0;
			
 
				 				case STARPU_CUDA_RAM:      /* Fall through */
			
 
				 				case STARPU_OPENCL_RAM:
			
 
				+				case STARPU_MIC_RAM:
			
 
				+				case STARPU_SCC_RAM:
			
 
				 					return 1;
			
 
				 				default:
			
 
				 					STARPU_ABORT();
			
@@ -878,12 +885,16 @@ _starpu_handle_needs_conversion_task_for_arch(starpu_data_handle_t handle,
 
				 			break;
			
 
				 		case STARPU_CUDA_RAM:    /* Fall through */
			
 
				 		case STARPU_OPENCL_RAM:
			
 
				+		case STARPU_MIC_RAM:
			
 
				+		case STARPU_SCC_RAM:
			
 
				 			switch(starpu_node_get_kind(handle->mf_node))
			
 
				 			{
			
 
				 				case STARPU_CPU_RAM:
			
 
				 					return 1;
			
 
				 				case STARPU_CUDA_RAM:
			
 
				 				case STARPU_OPENCL_RAM:
			
 
				+				case STARPU_MIC_RAM:
			
 
				+				case STARPU_SCC_RAM:
			
 
				 					return 0;
			
 
				 				default:
			
 
				 					STARPU_ABORT();
			
@@ -920,3 +931,18 @@ unsigned starpu_task_get_implementation(struct starpu_task *task)
 
				 {
			
 
				 	return _starpu_get_job_associated_to_task(task)->nimpl;
			
 
				 }
			
 
				+
			
 
				+starpu_mic_func_t _starpu_task_get_mic_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
			
 
				+{
			
 
				+	return cl->mic_funcs[nimpl];
			
 
				+}
			
 
				+
			
 
				+starpu_scc_func_t _starpu_task_get_scc_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
			
 
				+{
			
 
				+	return cl->scc_funcs[nimpl];
			
 
				+}
			
 
				+
			
 
				+char *_starpu_task_get_cpu_name_nth_implementation(struct starpu_codelet *cl, unsigned nimpl)
			
 
				+{
			
 
				+	return cl->cpu_funcs_name[nimpl];
			
 
				+}
			
--- a/src/core/task.h
+++ b/src/core/task.h
@@ -72,6 +72,10 @@ void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl);
 
				 starpu_cpu_func_t _starpu_task_get_cpu_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
			
 
				 starpu_cuda_func_t _starpu_task_get_cuda_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
			
 
				 starpu_opencl_func_t _starpu_task_get_opencl_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
			
 
				+starpu_mic_func_t _starpu_task_get_mic_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
			
 
				+starpu_scc_func_t _starpu_task_get_scc_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
			
 
				+
			
 
				+char *_starpu_task_get_cpu_name_nth_implementation(struct starpu_codelet *cl, unsigned nimpl);
			
 
				 
			
 
				 #define _STARPU_TASK_SET_INTERFACE(task, interface, i) do { if (task->dyn_handles) task->dyn_interfaces[i] = interface; else task->interfaces[i] = interface;} while(0)
			
 
				 #define _STARPU_TASK_GET_INTERFACES(task) ((task->dyn_handles) ? task->dyn_interfaces : task->interfaces)
			
--- a/src/core/topology.c
+++ b/src/core/topology.c
@@ -23,6 +23,9 @@
 
				 #include <core/debug.h>
			
 
				 #include <core/topology.h>
			
 
				 #include <drivers/cuda/driver_cuda.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				+#include <drivers/scc/driver_scc_source.h>
			
 
				+#include <drivers/mp_common/source_common.h>
			
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				 #include <profiling/profiling.h>
			
 
				 #include <common/uthash.h>
			
@@ -45,7 +48,7 @@
 
				 
			
 
				 static unsigned topology_is_initialized = 0;
			
 
				 
			
 
				-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
			
 
				 
			
 
				 struct handle_entry
			
 
				 {
			
@@ -67,9 +70,9 @@ static unsigned may_bind_automatically = 0;
 
				  * Discover the topology of the machine
			
 
				  */
			
 
				 
			
 
				-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_SIMGRID)
			
 
				+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC)  || defined(STARPU_SIMGRID)
			
 
				 static void
			
 
				-_starpu_initialize_workers_gpuid (int *explicit_workers_gpuid,
			
 
				+_starpu_initialize_workers_deviceid (int *explicit_workers_gpuid,
			
 
				 				  int *current, int *workers_gpuid,
			
 
				 				  const char *varname, unsigned nhwgpus)
			
 
				 {
			
@@ -144,7 +147,8 @@ _starpu_initialize_workers_gpuid (int *explicit_workers_gpuid,
 
				 			  workers_gpuid[i] = (unsigned)(i % nhwgpus);
			
 
				 
			
 
				 		/* StarPU can use sampling techniques to bind threads
			
 
				-		 * correctly */
			
 
				+		 * correctly
			
 
				+		 * TODO: use a private value for each kind of device */
			
 
				 		may_bind_automatically = 1;
			
 
				 	}
			
 
				 }
			
@@ -157,7 +161,7 @@ _starpu_initialize_workers_cuda_gpuid (struct _starpu_machine_config *config)
 
				 	struct starpu_machine_topology *topology = &config->topology;
			
 
				 	struct starpu_conf *uconf = config->conf;
			
 
				 
			
 
				-        _starpu_initialize_workers_gpuid (
			
 
				+        _starpu_initialize_workers_deviceid (
			
 
				 		uconf->use_explicit_workers_cuda_gpuid == 0
			
 
				 		? NULL
			
 
				 		: (int *)uconf->workers_cuda_gpuid,
			
@@ -184,7 +188,7 @@ _starpu_initialize_workers_opencl_gpuid (struct _starpu_machine_config*config)
 
				 	struct starpu_machine_topology *topology = &config->topology;
			
 
				 	struct starpu_conf *uconf = config->conf;
			
 
				 
			
 
				-        _starpu_initialize_workers_gpuid(
			
 
				+        _starpu_initialize_workers_deviceid(
			
 
				 		uconf->use_explicit_workers_opencl_gpuid == 0
			
 
				 		? NULL
			
 
				 		: (int *)uconf->workers_opencl_gpuid,
			
@@ -258,6 +262,145 @@ _starpu_get_next_opencl_gpuid (struct _starpu_machine_config *config)
 
				 }
			
 
				 #endif
			
 
				 
			
 
				+#if 0
			
 
				+#if defined(STARPU_USE_MIC) || defined(STARPU_SIMGRID)
			
 
				+static void _starpu_initialize_workers_mic_deviceid(struct _starpu_machine_config *config)
			
 
				+{
			
 
				+	struct starpu_machine_topology *topology = &config->topology;
			
 
				+	struct starpu_conf *uconf = config->conf;
			
 
				+
			
 
				+	_starpu_initialize_workers_deviceid(
			
 
				+		uconf->use_explicit_workers_mic_deviceid == 0
			
 
				+		? NULL
			
 
				+		: (int *)config->user_conf->workers_mic_deviceid,
			
 
				+		&(config->current_mic_deviceid),
			
 
				+		"STARPU_WORKERS_MICID",
			
 
				+		topology->nhwmiccores);
			
 
				+}
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+static void _starpu_initialize_workers_scc_deviceid(struct _starpu_machine_config *config)
			
 
				+{
			
 
				+	struct starpu_machine_topology *topology = &config->topology;
			
 
				+	struct starpu_conf *uconf = config->conf;
			
 
				+
			
 
				+	_starpu_initialize_workers_deviceid(
			
 
				+		uconf->use_explicit_workers_scc_deviceid == 0
			
 
				+		? NULL
			
 
				+		: (int *) uconf->workers_scc_deviceid,
			
 
				+		&(config->current_scc_deviceid),
			
 
				+		"STARPU_WORKERS_SCCID",
			
 
				+		topology->nhwscc);
			
 
				+}
			
 
				+#endif /* STARPU_USE_SCC */
			
 
				+
			
 
				+#if 0
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static inline int _starpu_get_next_mic_deviceid(struct _starpu_machine_config *config)
			
 
				+{
			
 
				+	unsigned i = ((config->current_mic_deviceid++) % config->topology.nmicdevices);
			
 
				+
			
 
				+	return (int)config->topology.workers_mic_deviceid[i];
			
 
				+}
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+static inline int _starpu_get_next_scc_deviceid(struct _starpu_machine_config *config)
			
 
				+{
			
 
				+	unsigned i = ((config->current_scc_deviceid++) % config->topology.nsccdevices);
			
 
				+
			
 
				+	return (int)config->topology.workers_scc_deviceid[i];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static void
			
 
				+_starpu_init_mic_topology (struct _starpu_machine_config *config, long mic_idx)
			
 
				+{
			
 
				+    /* Discover the topology of the mic node identifier by MIC_IDX. That
			
 
				+     * means, make this StarPU instance aware of the number of cores available
			
 
				+     * on this MIC device. Update the `nhwmiccores' topology field
			
 
				+     * accordingly. */
			
 
				+
			
 
				+    struct starpu_machine_topology *topology = &config->topology;
			
 
				+
			
 
				+    int nbcores;
			
 
				+    _starpu_src_common_sink_nbcores (mic_nodes[mic_idx], &nbcores);
			
 
				+    topology->nhwmiccores[mic_idx] = nbcores;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static int
			
 
				+_starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
			
 
				+		       COIENGINE *coi_handle, COIPROCESS *coi_process)
			
 
				+{
			
 
				+    /* Initialize the MIC node of index MIC_IDX. */
			
 
				+
			
 
				+    struct starpu_conf *user_conf = config->user_conf;
			
 
				+
			
 
				+    char ***argv = _starpu_get_argv();
			
 
				+    const char *suffixes[] = {"-mic", "_mic", NULL};
			
 
				+
			
 
				+    /* Environment variables to send to the Sink, it informs it what kind
			
 
				+     * of node it is (architecture and type) as there is no way to discover
			
 
				+     * it itself */
			
 
				+    char mic_idx_env[32];
			
 
				+    sprintf(mic_idx_env, "DEVID=%d", mic_idx);
			
 
				+
			
 
				+    /* XXX: this is currently necessary so that the remote process does not
			
 
				+     * segfault. */
			
 
				+    char nb_mic_env[32];
			
 
				+    sprintf(nb_mic_env, "NB_MIC=%d", 2);
			
 
				+
			
 
				+    const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", mic_idx_env, nb_mic_env, NULL};
			
 
				+
			
 
				+    char mic_sink_program_path[1024];
			
 
				+    /* Let's get the helper program to run on the MIC device */
			
 
				+    int mic_file_found =
			
 
				+	_starpu_src_common_locate_file (mic_sink_program_path,
			
 
				+					getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
			
 
				+					getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
			
 
				+					(user_conf==NULL ? NULL : user_conf->mic_sink_program_path),
			
 
				+					(argv ? (*argv)[0] : NULL),
			
 
				+					suffixes);
			
 
				+
			
 
				+    if (0 != mic_file_found) {
			
 
				+	fprintf(stderr, "No MIC program specified, use the environment"
			
 
				+		"variable STARPU_MIC_SINK_PROGRAM_NAME or the environment"
			
 
				+		"or the field 'starpu_conf.mic_sink_program_path'"
			
 
				+		"to define it.\n");
			
 
				+
			
 
				+	return -1;
			
 
				+    }
			
 
				+
			
 
				+    COIRESULT res;
			
 
				+    /* Let's get the handle which let us manage the remote MIC device */
			
 
				+    res = COIEngineGetHandle(COI_ISA_MIC, mic_idx, coi_handle);
			
 
				+    if (STARPU_UNLIKELY(res != COI_SUCCESS))
			
 
				+	STARPU_MIC_SRC_REPORT_COI_ERROR(res);
			
 
				+
			
 
				+    /* We launch the helper on the MIC device, which will wait for us
			
 
				+     * to give it work to do.
			
 
				+     * As we will communicate further with the device throught scif we
			
 
				+     * don't need to keep the process pointer */
			
 
				+    res = COIProcessCreateFromFile(*coi_handle, mic_sink_program_path, 0, NULL, 0,
			
 
				+				   mic_sink_env, 1, NULL, 0, NULL,
			
 
				+				   coi_process);
			
 
				+    if (STARPU_UNLIKELY(res != COI_SUCCESS))
			
 
				+	STARPU_MIC_SRC_REPORT_COI_ERROR(res);
			
 
				+
			
 
				+    /* Let's create the node structure, we'll communicate with the peer
			
 
				+     * through scif thanks to it */
			
 
				+    mic_nodes[mic_idx] =
			
 
				+	_starpu_mp_common_node_create(STARPU_MIC_SOURCE, mic_idx);
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 
			
 
				 static void
			
 
				 _starpu_init_topology (struct _starpu_machine_config *config)
			
@@ -284,6 +427,9 @@ _starpu_init_topology (struct _starpu_machine_config *config)
 
				 	_starpu_cpu_discover_devices(config);
			
 
				 	_starpu_cuda_discover_devices(config);
			
 
				 	_starpu_opencl_discover_devices(config);
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	config->topology.nhwscc = _starpu_scc_src_get_device_count();
			
 
				+#endif
			
 
				 
			
 
				 	topology_is_initialized = 1;
			
 
				 }
			
@@ -434,8 +580,109 @@ _starpu_topology_get_nhwcpu (struct _starpu_machine_config *config)
 
				 	return config->topology.nhwcpus;
			
 
				 }
			
 
				 
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static void
			
 
				+_starpu_init_mic_config (struct _starpu_machine_config *config,
			
 
				+			 struct starpu_conf *user_conf,
			
 
				+			 unsigned mic_idx)
			
 
				+{
			
 
				+    // Configure the MIC device of index MIC_IDX.
			
 
				+
			
 
				+    struct starpu_machine_topology *topology = &config->topology;
			
 
				+
			
 
				+    topology->nhwmiccores[mic_idx] = 0;
			
 
				+
			
 
				+    _starpu_init_mic_topology (config, mic_idx);
			
 
				+
			
 
				+    int nmiccores;
			
 
				+    nmiccores = starpu_get_env_number("STARPU_NMIC");
			
 
				+
			
 
				+    /* STARPU_NMIC is not set. Did the user specify anything ? */
			
 
				+    if (nmiccores == -1 && user_conf)
			
 
				+	nmiccores = user_conf->nmic;
			
 
				+
			
 
				+    if (nmiccores != 0)
			
 
				+    {
			
 
				+	if (nmiccores == -1)
			
 
				+	{
			
 
				+	    /* Nothing was specified, so let's use the number of
			
 
				+	     * detected mic cores. ! */
			
 
				+	    nmiccores = topology->nhwmiccores[mic_idx];
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+	    if (nmiccores > topology->nhwmiccores[mic_idx])
			
 
				+	    {
			
 
				+		/* The user requires more MIC devices than there is available */
			
 
				+		fprintf(stderr,
			
 
				+			"# Warning: %d MIC devices requested. Only %d available.\n",
			
 
				+			nmiccores, topology->nhwmiccores[mic_idx]);
			
 
				+		nmiccores = topology->nhwmiccores[mic_idx];
			
 
				+	    }
			
 
				+	}
			
 
				+    }
			
 
				+
			
 
				+    topology->nmiccores[mic_idx] = nmiccores;
			
 
				+    STARPU_ASSERT(topology->nmiccores[mic_idx] + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				+
			
 
				+    /* _starpu_initialize_workers_mic_deviceid (config); */
			
 
				+
			
 
				+    unsigned miccore_id;
			
 
				+    for (miccore_id = 0; miccore_id < topology->nmiccores[mic_idx]; miccore_id++)
			
 
				+    {
			
 
				+	config->workers[topology->nworkers + miccore_id].arch = STARPU_MIC_WORKER;
			
 
				+	config->workers[topology->nworkers + miccore_id].perf_arch = STARPU_MIC_DEFAULT;
			
 
				+	config->workers[topology->nworkers + miccore_id].mp_nodeid = mic_idx;
			
 
				+	config->workers[topology->nworkers + miccore_id].devid = miccore_id;
			
 
				+	config->workers[topology->nworkers + miccore_id].worker_mask = STARPU_MIC;
			
 
				+	config->worker_mask |= STARPU_MIC;
			
 
				+    }
			
 
				+
			
 
				+    topology->nworkers += topology->nmiccores[mic_idx];
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void
			
 
				+_starpu_init_mp_config (struct _starpu_machine_config *config,
			
 
				+			struct starpu_conf *user_conf)
			
 
				+{
			
 
				+    /* Discover and configure the mp topology. That means:
			
 
				+     * - discover the number of mp nodes;
			
 
				+     * - initialize each discovered node;
			
 
				+     * - discover the local topology (number of PUs/devices) of each node;
			
 
				+     * - configure the workers accordingly.
			
 
				+     */
			
 
				+
			
 
				+    struct starpu_machine_topology *topology = &config->topology;
			
 
				+
			
 
				+    // We currently only support MIC at this level.
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+    static COIENGINE handles[2];
			
 
				+    static COIPROCESS process[2];
			
 
				+
			
 
				+    /* Discover and initialize the number of MIC nodes through the mp
			
 
				+     * infrastructure. */
			
 
				+    unsigned nhwmicdevices = _starpu_mic_src_get_device_count();
			
 
				+
			
 
				+    int reqmicdevices = starpu_get_env_number("STARPU_NMICDEVS");
			
 
				+    if (-1 == reqmicdevices)
			
 
				+	reqmicdevices = nhwmicdevices;
			
 
				+
			
 
				+    topology->nmicdevices = 0;
			
 
				+    unsigned i;
			
 
				+    for (i = 0; i < STARPU_MIN (nhwmicdevices, reqmicdevices); i++)
			
 
				+	if (0 == _starpu_init_mic_node (config, i, &handles[i], &process[i]))
			
 
				+	    topology->nmicdevices++;
			
 
				+
			
 
				+    i = 0;
			
 
				+    for (; i < topology->nmicdevices; i++)
			
 
				+	_starpu_init_mic_config (config, user_conf, i);
			
 
				+#endif
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 static int
			
 
				-_starpu_init_machine_config (struct _starpu_machine_config *config)
			
 
				+_starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_config)
			
 
				 {
			
 
				 	int i;
			
 
				 	for (i = 0; i < STARPU_NMAXWORKERS; i++)
			
@@ -498,6 +745,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 
				 		int devid = _starpu_get_next_cuda_gpuid(config);
			
 
				 		enum starpu_perfmodel_archtype arch =
			
 
				 			(enum starpu_perfmodel_archtype)((int)STARPU_CUDA_DEFAULT + devid);
			
 
				+		config->workers[worker_idx].mp_nodeid = -1;
			
 
				 		config->workers[worker_idx].devid = devid;
			
 
				 		config->workers[worker_idx].perf_arch = arch;
			
 
				 		config->workers[worker_idx].worker_mask = STARPU_CUDA;
			
@@ -572,6 +820,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 
				 		config->workers[worker_idx].arch = STARPU_OPENCL_WORKER;
			
 
				 		enum starpu_perfmodel_archtype arch =
			
 
				 			(enum starpu_perfmodel_archtype)((int)STARPU_OPENCL_DEFAULT + devid);
			
 
				+		config->workers[worker_idx].mp_nodeid = -1;
			
 
				 		config->workers[worker_idx].devid = devid;
			
 
				 		config->workers[worker_idx].perf_arch = arch;
			
 
				 		config->workers[worker_idx].worker_mask = STARPU_OPENCL;
			
@@ -582,6 +831,78 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 
				 	topology->nworkers += topology->nopenclgpus;
			
 
				 #endif
			
 
				 
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	int nscc = config->conf->nscc;
			
 
				+
			
 
				+	unsigned nb_scc_nodes = _starpu_scc_src_get_device_count();
			
 
				+
			
 
				+	if (nscc != 0)
			
 
				+	{
			
 
				+		/* The user did not disable SCC. We need to count
			
 
				+		 * the number of devices */
			
 
				+		int nb_devices = nb_scc_nodes;
			
 
				+
			
 
				+		if (nscc == -1)
			
 
				+		{
			
 
				+			/* Nothing was specified, so let's choose ! */
			
 
				+			nscc = nb_devices;
			
 
				+			if (nscc > STARPU_MAXSCCDEVS)
			
 
				+			{
			
 
				+				_STARPU_DISP("Warning: %d SCC devices available. Only %d enabled. Use configuration option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nb_devices, STARPU_MAXSCCDEVS);
			
 
				+				nscc = STARPU_MAXSCCDEVS;
			
 
				+			}
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			/* Let's make sure this value is OK. */
			
 
				+			if (nscc > nb_devices)
			
 
				+			{
			
 
				+				/* The user requires more SCC devices than there is available */
			
 
				+				_STARPU_DISP("Warning: %d SCC devices requested. Only %d available.\n", nscc, nb_devices);
			
 
				+				nscc = nb_devices;
			
 
				+			}
			
 
				+			/* Let's make sure this value is OK. */
			
 
				+			if (nscc > STARPU_MAXSCCDEVS)
			
 
				+			{
			
 
				+				_STARPU_DISP("Warning: %d SCC devices requested. Only %d enabled. Use configure option --enable-maxsccdev=xxx to update the maximum value of supported SCC devices.\n", nscc, STARPU_MAXSCCDEVS);
			
 
				+				nscc = STARPU_MAXSCCDEVS;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Now we know how many SCC devices will be used */
			
 
				+	topology->nsccdevices = nscc;
			
 
				+	STARPU_ASSERT(topology->nsccdevices + topology->nworkers <= STARPU_NMAXWORKERS);
			
 
				+
			
 
				+	_starpu_initialize_workers_scc_deviceid(config);
			
 
				+
			
 
				+	unsigned sccdev;
			
 
				+	for (sccdev = 0; sccdev < topology->nsccdevices; sccdev++)
			
 
				+	{
			
 
				+		config->workers[topology->nworkers + sccdev].arch = STARPU_SCC_WORKER;
			
 
				+		int devid = _starpu_get_next_scc_deviceid(config);
			
 
				+		enum starpu_perf_archtype arch = (enum starpu_perf_archtype)((int)STARPU_SCC_DEFAULT + devid);
			
 
				+		config->workers[topology->nworkers + sccdev].mp_nodeid = -1;
			
 
				+		config->workers[topology->nworkers + sccdev].devid = devid;
			
 
				+		config->workers[topology->nworkers + sccdev].perf_arch = arch;
			
 
				+		config->workers[topology->nworkers + sccdev].worker_mask = STARPU_SCC;
			
 
				+		config->worker_mask |= STARPU_SCC;
			
 
				+	}
			
 
				+
			
 
				+	for (; sccdev < nb_scc_nodes; ++sccdev)
			
 
				+		_starpu_scc_exit_useless_node(sccdev);
			
 
				+
			
 
				+	topology->nworkers += topology->nsccdevices;
			
 
				+#endif /* STARPU_USE_SCC */
			
 
				+
			
 
				+
			
 
				+	/* Unless not requested, we need to complete configuration with the
			
 
				+	 * ones of the mp nodes. */
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	if (! no_mp_config)
			
 
				+	    _starpu_init_mp_config (config, user_conf);
			
 
				+#endif
			
 
				+
			
 
				 /* we put the CPU section after the accelerator : in case there was an
			
 
				  * accelerator found, we devote one cpu */
			
 
				 #if defined(STARPU_USE_CPU) || defined(STARPU_SIMGRID)
			
@@ -591,8 +912,15 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 
				 	{
			
 
				 		if (ncpu == -1)
			
 
				 		{
			
 
				-			unsigned already_busy_cpus = topology->ncudagpus + topology->nopenclgpus;
			
 
				-			long avail_cpus = topology->nhwcpus - already_busy_cpus;
			
 
				+			unsigned mic_busy_cpus = 0;
			
 
				+			unsigned i = 0;
			
 
				+			for (i = 0; i < STARPU_MAXMICDEVS; i++)
			
 
				+				mic_busy_cpus += (topology->nmiccores[i] ? 1 : 0);
			
 
				+
			
 
				+			unsigned already_busy_cpus = mic_busy_cpus + topology->ncudagpus
			
 
				+				+ topology->nopenclgpus + topology->nsccdevices;
			
 
				+
			
 
				+			long avail_cpus = (long) topology->nhwcpus - (long) already_busy_cpus;
			
 
				 			if (avail_cpus < 0)
			
 
				 				avail_cpus = 0;
			
 
				 			ncpu = STARPU_MIN(avail_cpus, STARPU_MAXCPUS);
			
@@ -617,6 +945,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config)
 
				 		int worker_idx = topology->nworkers + cpu;
			
 
				 		config->workers[worker_idx].arch = STARPU_CPU_WORKER;
			
 
				 		config->workers[worker_idx].perf_arch = STARPU_CPU_DEFAULT;
			
 
				+		config->workers[worker_idx].mp_nodeid = -1;
			
 
				 		config->workers[worker_idx].devid = cpu;
			
 
				 		config->workers[worker_idx].worker_mask = STARPU_CPU;
			
 
				 		config->worker_mask |= STARPU_CPU;
			
@@ -745,7 +1074,7 @@ _starpu_bind_thread_on_cpus (
 
				 
			
 
				 
			
 
				 static void
			
 
				-_starpu_init_workers_binding (struct _starpu_machine_config *config)
			
 
				+_starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_config)
			
 
				 {
			
 
				 	/* launch one thread per CPU */
			
 
				 	unsigned ram_memory_node;
			
@@ -770,6 +1099,21 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config)
 
				 	 * combinations in a matrix which we initialize here. */
			
 
				 	_starpu_initialize_busid_matrix();
			
 
				 
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	/* Each MIC device has its own memory node. */
			
 
				+	unsigned mic_memory_nodes[STARPU_MAXMICDEVS];
			
 
				+
			
 
				+	// Register the memory nodes for the MIC devices.
			
 
				+	if (! no_mp_config) {
			
 
				+	    unsigned i = 0;
			
 
				+	    for (i = 0; i < config->topology.nmicdevices; i++) {
			
 
				+		mic_memory_nodes[i] = _starpu_register_memory_node (STARPU_MIC_RAM, i);
			
 
				+		_starpu_register_bus(0, mic_memory_nodes[i]);
			
 
				+		_starpu_register_bus(mic_memory_nodes[i], 0);
			
 
				+	    }
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				 	unsigned worker;
			
 
				 	for (worker = 0; worker < config->topology.nworkers; worker++)
			
 
				 	{
			
@@ -852,6 +1196,38 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config)
 
				 				break;
			
 
				 #endif
			
 
				 
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+		        case STARPU_MIC_WORKER:
			
 
				+				//if (may_bind_automatically)
			
 
				+				//{
			
 
				+				//	/* StarPU is allowed to bind threads automatically */
			
 
				+				//	preferred_binding = _starpu_get_mic_affinity_vector(workerarg->devid);
			
 
				+				//	npreferred = config->topology.nhwcpus;
			
 
				+				//}
			
 
				+				is_a_set_of_accelerators = 1;
			
 
				+				memory_node = mic_memory_nodes[workerarg->mp_nodeid];
			
 
				+				_starpu_memory_node_worker_add(memory_node);
			
 
				+				/* memory_node = _starpu_register_memory_node(STARPU_MIC_RAM, workerarg->devid);*/
			
 
				+
			
 
				+				/* _starpu_register_bus(0, memory_node);
			
 
				+				 * _starpu_register_bus(memory_node, 0); */
			
 
				+				break;
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+			case STARPU_SCC_WORKER:
			
 
				+			{
			
 
				+				/* Node 0 represents the SCC shared memory when we're on SCC. */
			
 
				+				struct _starpu_mem_node_descr *descr = _starpu_get_memory_node_description();
			
 
				+				descr->nodes[ram_memory_node] = STARPU_SCC_SHM;
			
 
				+
			
 
				+				is_a_set_of_accelerators = 0;
			
 
				+				memory_node = ram_memory_node;
			
 
				+				_starpu_memory_node_worker_add(memory_node);
			
 
				+			}
			
 
				+				break;
			
 
				+#endif
			
 
				+
			
 
				 			default:
			
 
				 				STARPU_ABORT();
			
 
				 		}
			
@@ -902,18 +1278,18 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config)
 
				 
			
 
				 
			
 
				 int
			
 
				-_starpu_build_topology (struct _starpu_machine_config *config)
			
 
				+_starpu_build_topology (struct _starpu_machine_config *config, int no_mp_config)
			
 
				 {
			
 
				 	int ret;
			
 
				 
			
 
				-	ret = _starpu_init_machine_config(config);
			
 
				+	ret = _starpu_init_machine_config(config, no_mp_config);
			
 
				 	if (ret)
			
 
				 		return ret;
			
 
				 
			
 
				 	/* for the data management library */
			
 
				 	_starpu_memory_nodes_init();
			
 
				 
			
 
				-	_starpu_init_workers_binding(config);
			
 
				+	_starpu_init_workers_binding(config, no_mp_config);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/src/core/topology.h
+++ b/src/core/topology.h
@@ -27,7 +27,7 @@
 
				 struct _starpu_machine_config;
			
 
				 
			
 
				 /* Detect the number of memory nodes and where to bind the different workers. */
			
 
				-int _starpu_build_topology(struct _starpu_machine_config *config);
			
 
				+int _starpu_build_topology(struct _starpu_machine_config *config, int no_mp_config);
			
 
				 
			
 
				 /* Destroy all resources used to store the topology of the machine. */
			
 
				 void _starpu_destroy_topology(struct _starpu_machine_config *config);
			
--- a/src/core/workers.c
+++ b/src/core/workers.c
@@ -28,6 +28,8 @@
 
				 #include <core/task.h>
			
 
				 #include <profiling/profiling.h>
			
 
				 #include <starpu_task_list.h>
			
 
				+#include <drivers/mp_common/sink_common.h>
			
 
				+#include <drivers/scc/driver_scc_common.h>
			
 
				 
			
 
				 #include <drivers/cpu/driver_cpu.h>
			
 
				 #include <drivers/cuda/driver_cuda.h>
			
@@ -51,6 +53,29 @@ static starpu_pthread_key_t worker_key;
 
				 
			
 
				 static struct _starpu_machine_config config;
			
 
				 
			
 
				+/* Pointers to argc and argv
			
 
				+ */
			
 
				+static int *my_argc = 0;
			
 
				+static char ***my_argv = NULL;
			
 
				+
			
 
				+/* Initialize value of static argc and argv, called when the process begins
			
 
				+ */
			
 
				+void _starpu_set_argc_argv(int *argc_param, char ***argv_param)
			
 
				+{
			
 
				+	my_argc = argc_param;
			
 
				+	my_argv = argv_param;
			
 
				+}
			
 
				+
			
 
				+int *_starpu_get_argc()
			
 
				+{
			
 
				+	return my_argc;
			
 
				+}
			
 
				+
			
 
				+char ***_starpu_get_argv()
			
 
				+{
			
 
				+	return my_argv;
			
 
				+}
			
 
				+
			
 
				 int _starpu_is_initialized(void)
			
 
				 {
			
 
				 	return initialized == INITIALIZED;
			
@@ -140,6 +165,16 @@ uint32_t _starpu_worker_exists(struct starpu_task *task)
 
				 	    _starpu_worker_exists_and_can_execute(task, STARPU_OPENCL_WORKER))
			
 
				 		return 1;
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	if ((task->cl->where & STARPU_MIC) &&
			
 
				+	    _starpu_worker_exists_and_can_execute(task, STARPU_MIC_WORKER))
			
 
				+		return 1;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	if ((task->cl->where & STARPU_SCC) &&
			
 
				+	    _starpu_worker_exists_and_can_execute(task, STARPU_SCC_WORKER))
			
 
				+		return 1;
			
 
				+#endif
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -158,6 +193,11 @@ uint32_t _starpu_can_submit_opencl_task(void)
 
				 	return (STARPU_OPENCL & config.worker_mask);
			
 
				 }
			
 
				 
			
 
				+uint32_t _starpu_can_submit_scc_task(void)
			
 
				+{
			
 
				+	return (STARPU_SCC & config.worker_mask);
			
 
				+}
			
 
				+
			
 
				 static int _starpu_can_use_nth_implementation(enum starpu_worker_archtype arch, struct starpu_codelet *cl, unsigned nimpl)
			
 
				 {
			
 
				 	switch(arch)
			
@@ -196,13 +236,26 @@ static int _starpu_can_use_nth_implementation(enum starpu_worker_archtype arch,
 
				 		starpu_opencl_func_t func = _starpu_task_get_opencl_nth_implementation(cl, nimpl);
			
 
				 		return func != NULL;
			
 
				 	}
			
 
				+	case STARPU_MIC_WORKER:
			
 
				+	{
			
 
				+		starpu_mic_func_t func = _starpu_task_get_mic_nth_implementation(cl, nimpl);
			
 
				+		char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
			
 
				+
			
 
				+		return func != NULL || func_name != NULL;
			
 
				+	}
			
 
				+	case STARPU_SCC_WORKER:
			
 
				+	{
			
 
				+		starpu_scc_func_t func = _starpu_task_get_scc_nth_implementation(cl, nimpl);
			
 
				+		char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
			
 
				+
			
 
				+		return func != NULL || func_name != NULL;
			
 
				+	}
			
 
				 	default:
			
 
				 		STARPU_ASSERT_MSG(0, "Unknown arch type %d", arch);
			
 
				 	}
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 int starpu_worker_can_execute_task(unsigned workerid, struct starpu_task *task, unsigned nimpl)
			
 
				 {
			
 
				 	/* TODO: check that the task operand sizes will fit on that device */
			
@@ -255,6 +308,11 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 
				  * Runtime initialization methods
			
 
				  */
			
 
				 
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static unsigned mic_initiated[STARPU_MAXMICDEVS];
			
 
				+static struct _starpu_worker_set mic_worker_set[STARPU_MAXMICDEVS];
			
 
				+#endif
			
 
				+
			
 
				 static void _starpu_init_worker_queue(struct _starpu_worker *workerarg)
			
 
				 {
			
 
				 	starpu_pthread_cond_t *cond = &workerarg->sched_cond;
			
@@ -374,6 +432,9 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 	for (worker = 0; worker < nworkers; worker++)
			
 
				 	{
			
 
				 		struct _starpu_worker *workerarg = &pconfig->workers[worker];
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+		unsigned mp_nodeid = workerarg->mp_nodeid;
			
 
				+#endif
			
 
				 
			
 
				 		workerarg->config = pconfig;
			
 
				 
			
@@ -503,6 +564,72 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 
				 #endif
			
 
				 				break;
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+			case STARPU_MIC_WORKER:
			
 
				+				/* We use the Gordon approach for the MIC,
			
 
				+				 * which consists in spawning only one thread
			
 
				+				 * per MIC device, which will control all MIC
			
 
				+				 * workers of this device. (by using a worker set). */
			
 
				+				if (mic_initiated[mp_nodeid])
			
 
				+					goto worker_set_initialized;
			
 
				+
			
 
				+				mic_worker_set[mp_nodeid].nworkers = config->topology.nmiccores[mp_nodeid];
			
 
				+
			
 
				+				/* We assume all MIC workers of a given MIC
			
 
				+				 * device are contiguous so that we can
			
 
				+				 * address them with the first one only. */
			
 
				+				mic_worker_set[mp_nodeid].workers = workerarg;
			
 
				+				mic_worker_set[mp_nodeid].set_is_initialized = 0;
			
 
				+
			
 
				+				STARPU_PTHREAD_CREATE(
			
 
				+						workerarg->name,
			
 
				+						&mic_worker_set[mp_nodeid].worker_thread,
			
 
				+						NULL,
			
 
				+						_starpu_mic_src_worker,
			
 
				+						&mic_worker_set[mp_nodeid]);
			
 
				+
			
 
				+				_STARPU_PTHREAD_MUTEX_LOCK(&mic_worker_set[mp_nodeid].mutex);
			
 
				+				while (!mic_worker_set[mp_nodeid].set_is_initialized)
			
 
				+					_STARPU_PTHREAD_COND_WAIT(&mic_worker_set[mp_nodeid].ready_cond,
			
 
				+								  &mic_worker_set[mp_nodeid].mutex);
			
 
				+				_STARPU_PTHREAD_MUTEX_UNLOCK(&mic_worker_set[mp_nodeid].mutex);
			
 
				+
			
 
				+				mic_initiated[mp_nodeid] = 1;
			
 
				+
			
 
				+		worker_set_initialized:
			
 
				+				workerarg->set = &mic_worker_set[mp_nodeid];
			
 
				+				mic_worker_set[mp_nodeid].joined = 0;
			
 
				+				workerarg->worker_is_running = 1;
			
 
				+
			
 
				+#ifdef STARPU_USE_FXT
			
 
				+				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
			
 
				+				while (!workerarg->worker_is_running)
			
 
				+					STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
			
 
				+				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
			
 
				+#endif
			
 
				+
			
 
				+				break;
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+			case STARPU_SCC_WORKER:
			
 
				+				workerarg->set = NULL;
			
 
				+				workerarg->worker_is_initialized = 0;
			
 
				+				STARPU_PTHREAD_CREATE(
			
 
				+						workerarg->name
			
 
				+						&workerarg->worker_thread,
			
 
				+						NULL,
			
 
				+						_starpu_scc_src_worker,
			
 
				+						workerarg);
			
 
				+
			
 
				+#ifdef STARPU_USE_FXT
			
 
				+				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);
			
 
				+				while (!workerarg->worker_is_running)
			
 
				+					STARPU_PTHREAD_COND_WAIT(&workerarg->started_cond, &workerarg->mutex);
			
 
				+				STARPU_PTHREAD_MUTEX_UNLOCK(&workerarg->mutex);
			
 
				+#endif
			
 
				+				break;
			
 
				+#endif
			
 
				+
			
 
				 			default:
			
 
				 				STARPU_ABORT();
			
 
				 		}
			
@@ -598,8 +725,11 @@ int starpu_conf_init(struct starpu_conf *conf)
 
				 		conf->ncpus = starpu_get_env_number("STARPU_NCPUS");
			
 
				 	conf->ncuda = starpu_get_env_number("STARPU_NCUDA");
			
 
				 	conf->nopencl = starpu_get_env_number("STARPU_NOPENCL");
			
 
				+	conf->nmic = starpu_get_env_number("STARPU_NMIC");
			
 
				+	conf->nscc = starpu_get_env_number("STARPU_NSCC");
			
 
				 	conf->calibrate = starpu_get_env_number("STARPU_CALIBRATE");
			
 
				 	conf->bus_calibrate = starpu_get_env_number("STARPU_BUS_CALIBRATE");
			
 
				+	conf->mic_sink_program_path = getenv("STARPU_MIC_PROGRAM_PATH");
			
 
				 
			
 
				 	if (conf->calibrate == -1)
			
 
				 	     conf->calibrate = 0;
			
@@ -610,6 +740,8 @@ int starpu_conf_init(struct starpu_conf *conf)
 
				 	conf->use_explicit_workers_bindid = 0; /* TODO */
			
 
				 	conf->use_explicit_workers_cuda_gpuid = 0; /* TODO */
			
 
				 	conf->use_explicit_workers_opencl_gpuid = 0; /* TODO */
			
 
				+	conf->use_explicit_workers_mic_deviceid = 0; /* TODO */
			
 
				+	conf->use_explicit_workers_scc_deviceid = 0; /* TODO */
			
 
				 
			
 
				 	conf->single_combined_worker = starpu_get_env_number("STARPU_SINGLE_COMBINED_WORKER");
			
 
				 	if (conf->single_combined_worker == -1)
			
@@ -676,6 +808,32 @@ void _starpu_conf_check_environment(struct starpu_conf *conf)
 
				 
			
 
				 int starpu_init(struct starpu_conf *user_conf)
			
 
				 {
			
 
				+	return starpu_initialize(user_conf, NULL, NULL);
			
 
				+}
			
 
				+
			
 
				+int starpu_initialize(struct starpu_conf *user_conf, int *argc, char ***argv)
			
 
				+{
			
 
				+	int is_a_sink = 0; /* Always defined. If the MP infrastructure is not
			
 
				+			    * used, we cannot be a sink. */
			
 
				+#ifdef STARPU_USE_MP
			
 
				+	_starpu_set_argc_argv(argc, argv);
			
 
				+
			
 
				+#	ifdef STARPU_USE_SCC
			
 
				+	/* In SCC case we look at the rank to know if we are a sink */
			
 
				+	if (_starpu_scc_common_mp_init() && !_starpu_scc_common_is_src_node())
			
 
				+		setenv("STARPU_SINK", "STARPU_SCC", 1);
			
 
				+#	endif
			
 
				+
			
 
				+	/* If StarPU was configured to use MP sinks, we have to control the
			
 
				+	 * kind on node we are running on : host or sink ? */
			
 
				+	if (getenv("STARPU_SINK"))
			
 
				+		is_a_sink = 1;
			
 
				+#else
			
 
				+	(void)argc;
			
 
				+	(void)argv;
			
 
				+
			
 
				+#endif /* STARPU_USE_MP */
			
 
				+
			
 
				 	int ret;
			
 
				 
			
 
				 #ifndef STARPU_SIMGRID
			
@@ -783,11 +941,17 @@ int starpu_init(struct starpu_conf *user_conf)
 
				 
			
 
				 	_starpu_load_bus_performance_files();
			
 
				 
			
 
				-	ret = _starpu_build_topology(&config);
			
 
				+	/* Depending on whether we are a MP sink or not, we must build the
			
 
				+	 * topology with MP nodes or not. */
			
 
				+	ret = _starpu_build_topology(&config, is_a_sink ? 1 : 0);
			
 
				 	if (ret)
			
 
				 	{
			
 
				 		STARPU_PTHREAD_MUTEX_LOCK(&init_mutex);
			
 
				 		init_count--;
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+		if (_starpu_scc_common_is_mp_initialized())
			
 
				+			_starpu_scc_src_mp_deinit();
			
 
				+#endif
			
 
				 		initialized = UNINITIALIZED;
			
 
				 		/* Let somebody else try to do it */
			
 
				 		STARPU_PTHREAD_COND_SIGNAL(&init_cond);
			
@@ -799,6 +963,14 @@ int starpu_init(struct starpu_conf *user_conf)
 
				 	 * threads */
			
 
				 	_starpu_initialize_current_task_key();
			
 
				 
			
 
				+	/* Theorically, MP sinks should not have to initialize the scheduling
			
 
				+	 * policy: indeed, they do not have their own one but are under the
			
 
				+	 * order of the MP source's one.
			
 
				+	 *
			
 
				+	 * For unkown reasons to me (excluded the fact that this software is
			
 
				+	 * ununderstandable for normally-formed human brains...), skipping
			
 
				+	 * this step makes _starpu_launch_drivers() hangs.
			
 
				+	 */
			
 
				 	_starpu_create_sched_ctx(config.conf->sched_policy_name, NULL, -1, 1, "init");
			
 
				 
			
 
				 	_starpu_initialize_registered_performance_models();
			
@@ -813,6 +985,20 @@ int starpu_init(struct starpu_conf *user_conf)
 
				 	STARPU_PTHREAD_MUTEX_UNLOCK(&init_mutex);
			
 
				 
			
 
				 	_STARPU_DEBUG("Initialisation finished\n");
			
 
				+
			
 
				+#ifdef STARPU_USE_MP
			
 
				+	/* Finally, if we are a MP sink, we never leave this function. Else,
			
 
				+	 * we enter an infinite event loop which listen for MP commands from
			
 
				+	 * the source. */
			
 
				+	if (is_a_sink) {
			
 
				+		_starpu_sink_common_worker();
			
 
				+
			
 
				+		/* We should normally never leave the loop as we don't want to
			
 
				+		 * really initialize STARPU */
			
 
				+		STARPU_ASSERT(0);
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -1012,6 +1198,11 @@ void starpu_shutdown(void)
 
				 	if (AYU_event) AYU_event(AYU_FINISH, 0, NULL);
			
 
				 #endif
			
 
				 
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	if (_starpu_scc_common_is_mp_initialized())
			
 
				+		_starpu_scc_src_mp_deinit();
			
 
				+#endif
			
 
				+
			
 
				 	_STARPU_DEBUG("Shutdown finished\n");
			
 
				 }
			
 
				 
			
@@ -1033,6 +1224,12 @@ int starpu_worker_get_count_by_type(enum starpu_worker_archtype type)
 
				 		case STARPU_OPENCL_WORKER:
			
 
				 			return config.topology.nopenclgpus;
			
 
				 
			
 
				+		case STARPU_MIC_WORKER:
			
 
				+			return config.topology.nmicdevices;
			
 
				+
			
 
				+		case STARPU_SCC_WORKER:
			
 
				+			return config.topology.nsccdevices;
			
 
				+
			
 
				 		default:
			
 
				 			return -EINVAL;
			
 
				 	}
			
@@ -1073,6 +1270,21 @@ int starpu_asynchronous_opencl_copy_disabled(void)
 
				 	return config.conf->disable_asynchronous_opencl_copy;
			
 
				 }
			
 
				 
			
 
				+unsigned starpu_mic_worker_get_count(void)
			
 
				+{
			
 
				+	int i = 0, count = 0;
			
 
				+	
			
 
				+	for (i = 0; i < STARPU_MAXMICDEVS; i++)
			
 
				+		count += config.topology.nmiccores[i];
			
 
				+	
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+unsigned starpu_scc_worker_get_count(void)
			
 
				+{
			
 
				+	return config.topology.nsccdevices;
			
 
				+}
			
 
				+
			
 
				 /* When analyzing performance, it is useful to see what is the processing unit
			
 
				  * that actually performed the task. This function returns the id of the
			
 
				  * processing unit actually executing it, therefore it makes no sense to use it
			
@@ -1146,6 +1358,11 @@ int starpu_combined_worker_get_rank(void)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+int starpu_worker_get_mp_nodeid(int id)
			
 
				+{
			
 
				+	return config.workers[id].mp_nodeid;
			
 
				+}
			
 
				+
			
 
				 int starpu_worker_get_devid(int id)
			
 
				 {
			
 
				 	return config.workers[id].devid;
			
--- a/src/core/workers.h
+++ b/src/core/workers.h
@@ -37,6 +37,15 @@
 
				 #include <drivers/cuda/driver_cuda.h>
			
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				 
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+#include <drivers/scc/driver_scc_source.h>
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				 #include <drivers/cpu/driver_cpu.h>
			
 
				 
			
 
				 #include <datawizard/datawizard.h>
			
@@ -51,6 +60,8 @@ struct _starpu_worker
 
				 	uint32_t worker_mask; /* what is the type of worker ? */
			
 
				 	enum starpu_perfmodel_archtype perf_arch; /* in case there are different models of the same arch */
			
 
				 	starpu_pthread_t worker_thread; /* the thread which runs the worker */
			
 
				+	int mp_nodeid; /* which mp node hold the cpu/gpu/etc (-1 for this
			
 
				+			* node) */
			
 
				 	unsigned devid; /* which cpu/gpu/etc is controlled by the worker ? */
			
 
				 	int bindid; /* which cpu is the driver bound to ? (logical index) */
			
 
				 	int workerid; /* uniquely identify the worker among all processing units types */
			
@@ -199,6 +210,9 @@ uint32_t _starpu_can_submit_cpu_task(void);
 
				 /* Is there a worker that can execute OpenCL code ? */
			
 
				 uint32_t _starpu_can_submit_opencl_task(void);
			
 
				 
			
 
				+/* Is there a worker that can execute OpenCL code ? */
			
 
				+uint32_t _starpu_can_submit_scc_task(void);
			
 
				+
			
 
				 /* Check whether there is anything that the worker should do instead of
			
 
				  * sleeping (waiting on something to happen). */
			
 
				 unsigned _starpu_worker_can_block(unsigned memnode);
			
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -180,6 +180,11 @@ static int worker_supports_direct_access(unsigned node, unsigned handling_node)
 
				 #endif
			
 
				 		case STARPU_OPENCL_RAM:
			
 
				 			return 0;
			
 
				+		case STARPU_MIC_RAM:
			
 
				+			/* We don't handle direct MIC-MIC transfers yet */
			
 
				+			return 0;
			
 
				+		case STARPU_SCC_RAM:
			
 
				+			return 1;
			
 
				 		default:
			
 
				 			return 1;
			
 
				 	}
			
--- a/src/datawizard/copy_driver.c
+++ b/src/datawizard/copy_driver.c
@@ -320,6 +320,93 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 
				 		}
			
 
				 		break;
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_MIC_RAM):
			
 
				+		/* RAM -> MIC */
			
 
				+#	ifdef STARPU_MIC_USE_RMA
			
 
				+		if (!req || starpu_asynchronous_copy_disabled() ||
			
 
				+				!(copy_methods->ram_to_mic_async || copy_methods->any_to_any))
			
 
				+		{
			
 
				+			/* this is not associated to a request so it's synchronous */
			
 
				+			STARPU_ASSERT(copy_methods->ram_to_mic || copy_methods->any_to_any);
			
 
				+			if (copy_methods->ram_to_mic)
			
 
				+				copy_methods->ram_to_mic(src_interface, src_node, dst_interface, dst_node);
			
 
				+			else
			
 
				+				copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			req->async_channel.type = STARPU_MIC_RAM;
			
 
				+			if (copy_methods->ram_to_mic_async)
			
 
				+				ret = copy_methods->ram_to_mic_async(src_interface, src_node, dst_interface, dst_node);
			
 
				+			else
			
 
				+			{
			
 
				+				STARPU_ASSERT(copy_methods->any_to_any);
			
 
				+				ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				+			}
			
 
				+			_starpu_mic_init_event(&(req->async_channel.event.mic_event), dst_node);
			
 
				+		}
			
 
				+		break;
			
 
				+#	else
			
 
				+		copy_methods->ram_to_mic(src_interface, src_node, dst_interface, dst_node);
			
 
				+		break;
			
 
				+#	endif
			
 
				+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_MIC_RAM,STARPU_CPU_RAM):
			
 
				+		/* MIC -> RAM */
			
 
				+#	ifdef STARPU_MIC_USE_RMA
			
 
				+		if (!req || starpu_asynchronous_copy_disabled() ||
			
 
				+				!(copy_methods->mic_to_ram_async || copy_methods->any_to_any))
			
 
				+		{
			
 
				+			/* this is not associated to a request so it's synchronous */
			
 
				+			STARPU_ASSERT(copy_methods->mic_to_ram || copy_methods->any_to_any);
			
 
				+			if (copy_methods->mic_to_ram)
			
 
				+				copy_methods->mic_to_ram(src_interface, src_node, dst_interface, dst_node);
			
 
				+			else
			
 
				+				copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+		}
			
 
				+		else
			
 
				+		{
			
 
				+			req->async_channel.type = STARPU_MIC_RAM;
			
 
				+			if (copy_methods->mic_to_ram_async)
			
 
				+				ret = copy_methods->mic_to_ram_async(src_interface, src_node, dst_interface, dst_node);
			
 
				+			else
			
 
				+			{
			
 
				+				STARPU_ASSERT(copy_methods->any_to_any);
			
 
				+				ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, &req->async_channel);
			
 
				+			}
			
 
				+			_starpu_mic_init_event(&(req->async_channel.event.mic_event), src_node);
			
 
				+		}
			
 
				+		break;
			
 
				+#	else
			
 
				+		copy_methods->mic_to_ram(src_interface, src_node, dst_interface, dst_node);
			
 
				+		break;
			
 
				+#	endif
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+		/* SCC RAM associated to the master process is considered as
			
 
				+		 * the main memory node. */
			
 
				+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_SCC_RAM):
			
 
				+		/* master private SCC RAM -> slave private SCC RAM */
			
 
				+		if (copy_methods->scc_src_to_sink)
			
 
				+			copy_methods->scc_src_to_sink(src_interface, src_node, dst_interface, dst_node);
			
 
				+		else
			
 
				+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+		break;
			
 
				+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_SCC_RAM,STARPU_CPU_RAM):
			
 
				+		/* slave private SCC RAM -> master private SCC RAM */
			
 
				+		if (copy_methods->scc_sink_to_src)
			
 
				+			copy_methods->scc_sink_to_src(src_interface, src_node, dst_interface, dst_node);
			
 
				+		else
			
 
				+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+		break;
			
 
				+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_SCC_RAM,STARPU_SCC_RAM):
			
 
				+		/* slave private SCC RAM -> slave private SCC RAM */
			
 
				+		if (copy_methods->scc_sink_to_sink)
			
 
				+			copy_methods->scc_sink_to_sink(src_interface, src_node, dst_interface, dst_node);
			
 
				+		else
			
 
				+			copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, NULL);
			
 
				+		break;
			
 
				+#endif
			
 
				 	default:
			
 
				 		STARPU_ABORT();
			
 
				 		break;
			
@@ -436,6 +523,47 @@ int starpu_interface_copy(uintptr_t src, size_t src_offset, unsigned src_node, u
 
				 				size,
			
 
				 				&async_channel->event.opencl_event);
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_MIC_RAM,STARPU_CPU_RAM):
			
 
				+		if (async_data)
			
 
				+			return _starpu_mic_copy_ram_to_mic_async(
			
 
				+					(void*) src + src_offset, src_node,
			
 
				+					(void*) dst + dst_offset, dst_node,
			
 
				+					size);
			
 
				+		else
			
 
				+			return _starpu_mic_copy_ram_to_mic(
			
 
				+					(void*) src + src_offset, src_node,
			
 
				+					(void*) dst + dst_offset, dst_node,
			
 
				+					size);
			
 
				+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_MIC_RAM):
			
 
				+		if (async_data)
			
 
				+			return _starpu_mic_copy_mic_to_ram_async(
			
 
				+					(void*) src + src_offset, src_node,
			
 
				+					(void*) dst + dst_offset, dst_node,
			
 
				+					size);
			
 
				+		else
			
 
				+			return _starpu_mic_copy_mic_to_ram(
			
 
				+					(void*) src + src_offset, src_node,
			
 
				+					(void*) dst + dst_offset, dst_node,
			
 
				+					size);
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_SCC_RAM,STARPU_CPU_RAM):
			
 
				+		_starpu_scc_copy_src_to_sink(
			
 
				+				(void*) src + src_offset, src_node,
			
 
				+				(void*) dst + dst_offset, dst_node,
			
 
				+				size);
			
 
				+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_CPU_RAM,STARPU_SCC_RAM):
			
 
				+		_starpu_scc_copy_sink_to_src(
			
 
				+				(void*) src + src_offset, src_node,
			
 
				+				(void*) dst + dst_offset, dst_node,
			
 
				+				size);
			
 
				+	case _STARPU_MEMORY_NODE_TUPLE(STARPU_SCC_RAM,STARPU_SCC_RAM):
			
 
				+		_starpu_scc_copy_sink_to_sink(
			
 
				+				(void*) src + src_offset, src_node,
			
 
				+				(void*) dst + dst_offset, dst_node,
			
 
				+				size);
			
 
				+#endif
			
 
				 	default:
			
 
				 		STARPU_ABORT();
			
 
				 		return -1;
			
@@ -488,6 +616,11 @@ void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_
 
				 	      break;
			
 
				 	}
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	case STARPU_MIC_RAM:
			
 
				+		_starpu_mic_wait_request_completion(&(async_channel->event.mic_event));
			
 
				+		break;
			
 
				+#endif
			
 
				 	case STARPU_CPU_RAM:
			
 
				 	default:
			
 
				 		STARPU_ABORT();
			
@@ -539,6 +672,11 @@ unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *as
 
				 		break;
			
 
				 	}
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	case STARPU_MIC_RAM:
			
 
				+		success = _starpu_mic_request_is_complete(&(async_channel->event.mic_event));
			
 
				+		break;
			
 
				+#endif
			
 
				 	case STARPU_CPU_RAM:
			
 
				 	default:
			
 
				 		STARPU_ABORT();
			
--- a/src/datawizard/copy_driver.h
+++ b/src/datawizard/copy_driver.h
@@ -36,6 +36,18 @@
 
				 struct _starpu_data_request;
			
 
				 struct _starpu_data_replicate;
			
 
				 
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+/* MIC need memory_node to now which MIC is concerned.
			
 
				+ * mark is used to wait asynchronous request.
			
 
				+ * signal is used to test asynchronous request. */
			
 
				+struct _starpu_mic_async_event
			
 
				+{
			
 
				+	unsigned memory_node;
			
 
				+	int mark;
			
 
				+	uint64_t *signal;
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				 /* this is a structure that can be queried to see whether an asynchronous
			
 
				  * transfer has terminated or not */
			
 
				 union _starpu_async_channel_event
			
@@ -54,6 +66,9 @@ union _starpu_async_channel_event
 
				 #ifdef STARPU_USE_OPENCL
			
 
				         cl_event opencl_event;
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	struct _starpu_mic_async_event mic_event;
			
 
				+#endif
			
 
				 };
			
 
				 
			
 
				 struct _starpu_async_channel
			
--- a/src/datawizard/interfaces/bcsr_filters.c
+++ b/src/datawizard/interfaces/bcsr_filters.c
@@ -35,6 +35,7 @@ void starpu_bcsr_filter_canonical_block(void *father_interface, void *child_inte
 
				 
			
 
				 	uint32_t ptr_offset = c*r*id*elemsize;
			
 
				 
			
 
				+	matrix_child->id = STARPU_MATRIX_INTERFACE_ID;
			
 
				 	matrix_child->nx = c;
			
 
				 	matrix_child->ny = r;
			
 
				 	matrix_child->ld = c;
			
--- a/src/datawizard/interfaces/bcsr_interface.c
+++ b/src/datawizard/interfaces/bcsr_interface.c
@@ -46,7 +46,7 @@ static int bcsr_compare(void *data_interface_a, void *data_interface_b);
 
				 static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle);
			
 
				 
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_bcsr_ops =
			
 
				+struct starpu_data_interface_ops starpu_interface_bcsr_ops =
			
 
				 {
			
 
				 	.register_data_handle = register_bcsr_handle,
			
 
				 	.allocate_data_on_node = allocate_bcsr_buffer_on_node,
			
@@ -82,6 +82,7 @@ static void register_bcsr_handle(starpu_data_handle_t handle, unsigned home_node
 
				 			local_interface->rowptr = NULL;
			
 
				 		}
			
 
				 
			
 
				+		local_interface->id = bcsr_interface->id;
			
 
				 		local_interface->nnz = bcsr_interface->nnz;
			
 
				 		local_interface->nrow = bcsr_interface->nrow;
			
 
				 		local_interface->firstentry = bcsr_interface->firstentry;
			
@@ -98,6 +99,7 @@ void starpu_bcsr_data_register(starpu_data_handle_t *handleptr, unsigned home_no
 
				 {
			
 
				 	struct starpu_bcsr_interface bcsr_interface =
			
 
				 	{
			
 
				+		.id = STARPU_BCSR_INTERFACE_ID,
			
 
				 		.nzval = nzval,
			
 
				 		.colind = colind,
			
 
				 		.rowptr = rowptr,
			
@@ -109,7 +111,7 @@ void starpu_bcsr_data_register(starpu_data_handle_t *handleptr, unsigned home_no
 
				 		.elemsize = elemsize
			
 
				 	};
			
 
				 
			
 
				-	starpu_data_register(handleptr, home_node, &bcsr_interface, &interface_bcsr_ops);
			
 
				+	starpu_data_register(handleptr, home_node, &bcsr_interface, &starpu_interface_bcsr_ops);
			
 
				 }
			
 
				 
			
 
				 static uint32_t footprint_bcsr_interface_crc32(starpu_data_handle_t handle)
			
--- a/src/datawizard/interfaces/block_filters.c
+++ b/src/datawizard/interfaces/block_filters.c
@@ -37,6 +37,7 @@ void starpu_block_filter_block(void *father_interface, void *child_interface, ST
 
				 	_starpu_filter_nparts_compute_chunk_size_and_offset(nx, nparts, elemsize, id, 1,
			
 
				 				       &chunk_size, &offset);
			
 
				 
			
 
				+	block_child->id = block_father->id;
			
 
				 	block_child->nx = chunk_size;
			
 
				 	block_child->ny = ny;
			
 
				 	block_child->nz = nz;
			
--- a/src/datawizard/interfaces/block_interface.c
+++ b/src/datawizard/interfaces/block_interface.c
@@ -26,6 +26,8 @@
 
				 #include <starpu_cuda.h>
			
 
				 #include <starpu_opencl.h>
			
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				+#include <drivers/scc/driver_scc_source.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				 
			
 
				 static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED);
			
 
				 #ifdef STARPU_USE_CUDA
			
@@ -43,6 +45,17 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
				 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
			
 
				 static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+static int copy_scc_src_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_scc_sink_to_src(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_scc_sink_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+#endif
			
 
				 
			
 
				 static const struct starpu_data_copy_methods block_copy_data_methods_s =
			
 
				 {
			
@@ -74,7 +87,7 @@ static uint32_t footprint_block_interface_crc32(starpu_data_handle_t handle);
 
				 static int block_compare(void *data_interface_a, void *data_interface_b);
			
 
				 static void display_block_interface(starpu_data_handle_t handle, FILE *f);
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_block_ops =
			
 
				+struct starpu_data_interface_ops starpu_interface_block_ops =
			
 
				 {
			
 
				 	.register_data_handle = register_block_handle,
			
 
				 	.allocate_data_on_node = allocate_block_buffer_on_node,
			
@@ -126,6 +139,7 @@ static void register_block_handle(starpu_data_handle_t handle, unsigned home_nod
 
				 			local_interface->ldz  = 0;
			
 
				 		}
			
 
				 
			
 
				+		local_interface->id = block_interface->id;
			
 
				 		local_interface->nx = block_interface->nx;
			
 
				 		local_interface->ny = block_interface->ny;
			
 
				 		local_interface->nz = block_interface->nz;
			
@@ -140,6 +154,7 @@ void starpu_block_data_register(starpu_data_handle_t *handleptr, unsigned home_n
 
				 {
			
 
				 	struct starpu_block_interface block_interface =
			
 
				 	{
			
 
				+		.id = STARPU_BLOCK_INTERFACE_ID,
			
 
				 		.ptr = ptr,
			
 
				                 .dev_handle = ptr,
			
 
				                 .offset = 0,
			
@@ -151,7 +166,12 @@ void starpu_block_data_register(starpu_data_handle_t *handleptr, unsigned home_n
 
				 		.elemsize = elemsize
			
 
				 	};
			
 
				 
			
 
				-	starpu_data_register(handleptr, home_node, &block_interface, &interface_block_ops);
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	_starpu_scc_set_offset_in_shared_memory((void*)block_interface.ptr,
			
 
				+			(void**)&(block_interface.dev_handle), &(block_interface.offset));
			
 
				+#endif
			
 
				+
			
 
				+	starpu_data_register(handleptr, home_node, &block_interface, &starpu_interface_block_ops);
			
 
				 }
			
 
				 
			
 
				 static uint32_t footprint_block_interface_crc32(starpu_data_handle_t handle)
			
@@ -584,6 +604,170 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_A
 
				 
			
 
				 #endif
			
 
				 
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+static int copy_scc_src_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	uint32_t nx = STARPU_BLOCK_GET_NX(dst_interface);
			
 
				+	uint32_t ny = STARPU_BLOCK_GET_NY(dst_interface);
			
 
				+	uint32_t nz = STARPU_BLOCK_GET_NZ(dst_interface);
			
 
				+
			
 
				+	size_t elemsize = STARPU_BLOCK_GET_ELEMSIZE(dst_interface);
			
 
				+
			
 
				+	uint32_t src_ldy = STARPU_BLOCK_GET_LDY(src_interface);
			
 
				+	uint32_t src_ldz = STARPU_BLOCK_GET_LDZ(src_interface);
			
 
				+	uint32_t dst_ldy = STARPU_BLOCK_GET_LDY(dst_interface);
			
 
				+	uint32_t dst_ldz = STARPU_BLOCK_GET_LDZ(dst_interface);
			
 
				+
			
 
				+	void *src_ptr = (void *)STARPU_BLOCK_GET_PTR(src_interface);
			
 
				+	void *dst_ptr = (void *)STARPU_BLOCK_GET_PTR(dst_interface);
			
 
				+
			
 
				+	unsigned y, z;
			
 
				+	for (z = 0; z < nz; ++z)
			
 
				+	{
			
 
				+		for (y = 0; y < ny; ++y)
			
 
				+		{
			
 
				+			uint32_t src_offset = (y*src_ldy + z*src_ldz) * elemsize;
			
 
				+			uint32_t dst_offset = (y*dst_ldy + z*dst_ldz) * elemsize;
			
 
				+
			
 
				+			_starpu_scc_copy_src_to_sink(src_ptr + src_offset, src_node,
			
 
				+							dst_ptr + dst_offset, dst_node, nx*elemsize);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*nz*elemsize);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int copy_scc_sink_to_src(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	uint32_t nx = STARPU_BLOCK_GET_NX(dst_interface);
			
 
				+	uint32_t ny = STARPU_BLOCK_GET_NY(dst_interface);
			
 
				+	uint32_t nz = STARPU_BLOCK_GET_NZ(dst_interface);
			
 
				+
			
 
				+	size_t elemsize = STARPU_BLOCK_GET_ELEMSIZE(dst_interface);
			
 
				+
			
 
				+	uint32_t src_ldy = STARPU_BLOCK_GET_LDY(src_interface);
			
 
				+	uint32_t src_ldz = STARPU_BLOCK_GET_LDZ(src_interface);
			
 
				+	uint32_t dst_ldy = STARPU_BLOCK_GET_LDY(dst_interface);
			
 
				+	uint32_t dst_ldz = STARPU_BLOCK_GET_LDZ(dst_interface);
			
 
				+
			
 
				+	void *src_ptr = (void *)STARPU_BLOCK_GET_PTR(src_interface);
			
 
				+	void *dst_ptr = (void *)STARPU_BLOCK_GET_PTR(dst_interface);
			
 
				+
			
 
				+	unsigned y, z;
			
 
				+	for (z = 0; z < nz; ++z)
			
 
				+	{
			
 
				+		for (y = 0; y < ny; ++y)
			
 
				+		{
			
 
				+			uint32_t src_offset = (y*src_ldy + z*src_ldz) * elemsize;
			
 
				+			uint32_t dst_offset = (y*dst_ldy + z*dst_ldz) * elemsize;
			
 
				+
			
 
				+			_starpu_scc_copy_sink_to_src(src_ptr + src_offset, src_node,
			
 
				+							dst_ptr + dst_offset, dst_node, nx*elemsize);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*nz*elemsize);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int copy_scc_sink_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	uint32_t nx = STARPU_BLOCK_GET_NX(dst_interface);
			
 
				+	uint32_t ny = STARPU_BLOCK_GET_NY(dst_interface);
			
 
				+	uint32_t nz = STARPU_BLOCK_GET_NZ(dst_interface);
			
 
				+
			
 
				+	size_t elemsize = STARPU_BLOCK_GET_ELEMSIZE(dst_interface);
			
 
				+
			
 
				+	uint32_t src_ldy = STARPU_BLOCK_GET_LDY(src_interface);
			
 
				+	uint32_t src_ldz = STARPU_BLOCK_GET_LDZ(src_interface);
			
 
				+	uint32_t dst_ldy = STARPU_BLOCK_GET_LDY(dst_interface);
			
 
				+	uint32_t dst_ldz = STARPU_BLOCK_GET_LDZ(dst_interface);
			
 
				+
			
 
				+	void *src_ptr = (void *)STARPU_BLOCK_GET_PTR(src_interface);
			
 
				+	void *dst_ptr = (void *)STARPU_BLOCK_GET_PTR(dst_interface);
			
 
				+
			
 
				+	unsigned y, z;
			
 
				+	for (z = 0; z < nz; ++z)
			
 
				+	{
			
 
				+		for (y = 0; y < ny; ++y)
			
 
				+		{
			
 
				+			uint32_t src_offset = (y*src_ldy + z*src_ldz) * elemsize;
			
 
				+			uint32_t dst_offset = (y*dst_ldy + z*dst_ldz) * elemsize;
			
 
				+
			
 
				+			_starpu_scc_copy_sink_to_sink(src_ptr + src_offset, src_node,
			
 
				+					dst_ptr + dst_offset, dst_node, nx*elemsize);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*nz*elemsize);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif /* STARPU_USE_SCC */
			
 
				+
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static int copy_mic_common(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
			
 
				+						   int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
			
 
				+{
			
 
				+	struct starpu_block_interface *src_block = src_interface;
			
 
				+	struct starpu_block_interface *dst_block = dst_interface;
			
 
				+	
			
 
				+	uint32_t nx = dst_block->nx;
			
 
				+	uint32_t ny = dst_block->ny;
			
 
				+	uint32_t nz = dst_block->nz;
			
 
				+	size_t elemsize = dst_block->elemsize;
			
 
				+
			
 
				+	uint32_t ldy_src = src_block->ldy;
			
 
				+	uint32_t ldz_src = src_block->ldz;
			
 
				+	uint32_t ldy_dst = dst_block->ldy;
			
 
				+	uint32_t ldz_dst = dst_block->ldz;
			
 
				+
			
 
				+	uintptr_t ptr_src = src_block->ptr;
			
 
				+	uintptr_t ptr_dst = dst_block->ptr;
			
 
				+
			
 
				+	unsigned y, z;
			
 
				+	for (z = 0; z < nz; z++)
			
 
				+	{
			
 
				+		for (y = 0; y < ny; y++)
			
 
				+		{
			
 
				+			uint32_t src_offset = (y*ldy_src + z*ldz_src)*elemsize;
			
 
				+			uint32_t dst_offset = (y*ldy_dst + z*ldz_dst)*elemsize;
			
 
				+
			
 
				+			copy_func((void *)(ptr_src + src_offset), src_node, (void *)(ptr_dst + dst_offset), dst_node, nx*elemsize);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*nz*elemsize);
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+}
			
 
				+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	return copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic);
			
 
				+}
			
 
				+
			
 
				+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	return copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram);
			
 
				+}
			
 
				+
			
 
				+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic_async);
			
 
				+	return -EAGAIN;
			
 
				+}
			
 
				+
			
 
				+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram_async);
			
 
				+	return -EAGAIN;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /* as not all platform easily have a BLAS lib installed ... */
			
 
				 static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
--- a/src/datawizard/interfaces/coo_interface.c
+++ b/src/datawizard/interfaces/coo_interface.c
@@ -89,6 +89,7 @@ register_coo_handle(starpu_data_handle_t handle, unsigned home_node,
 
				 			local_interface->rows = 0;
			
 
				 		}
			
 
				 
			
 
				+		local_interface->id = coo_interface->id;
			
 
				 		local_interface->nx = coo_interface->nx;
			
 
				 		local_interface->ny = coo_interface->ny;
			
 
				 		local_interface->n_values = coo_interface->n_values;
			
@@ -189,7 +190,7 @@ display_coo_interface(starpu_data_handle_t handle, FILE *f)
 
				 	fprintf(f, "%u\t%u", coo_interface->nx, coo_interface->ny);
			
 
				 }
			
 
				 
			
 
				-struct starpu_data_interface_ops _starpu_interface_coo_ops =
			
 
				+struct starpu_data_interface_ops starpu_interface_coo_ops =
			
 
				 {
			
 
				 	.register_data_handle  = register_coo_handle,
			
 
				 	.allocate_data_on_node = allocate_coo_buffer_on_node,
			
@@ -212,6 +213,7 @@ starpu_coo_data_register(starpu_data_handle_t *handleptr, unsigned home_node,
 
				 {
			
 
				 	struct starpu_coo_interface coo_interface =
			
 
				 	{
			
 
				+		.id = STARPU_COO_INTERFACE_ID,
			
 
				 		.values = values,
			
 
				 		.columns = columns,
			
 
				 		.rows = rows,
			
@@ -222,5 +224,5 @@ starpu_coo_data_register(starpu_data_handle_t *handleptr, unsigned home_node,
 
				 	};
			
 
				 
			
 
				 	starpu_data_register(handleptr, home_node, &coo_interface,
			
 
				-			     &_starpu_interface_coo_ops);
			
 
				+			     &starpu_interface_coo_ops);
			
 
				 }
			
--- a/src/datawizard/interfaces/csr_filters.c
+++ b/src/datawizard/interfaces/csr_filters.c
@@ -46,6 +46,7 @@ void starpu_csr_filter_vertical_block(void *father_interface, void *child_interf
 
				 
			
 
				 	uint32_t local_nnz = rowptr[first_index + child_nrow] - rowptr[first_index];
			
 
				 
			
 
				+	csr_child->id = csr_father->id;
			
 
				 	csr_child->nnz = local_nnz;
			
 
				 	csr_child->nrow = child_nrow;
			
 
				 	csr_child->firstentry = local_firstentry;
			
--- a/src/datawizard/interfaces/csr_interface.c
+++ b/src/datawizard/interfaces/csr_interface.c
@@ -27,6 +27,8 @@
 
				 #include <starpu_cuda.h>
			
 
				 #include <starpu_opencl.h>
			
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				+#include <drivers/scc/driver_scc_source.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				 
			
 
				 static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
			
 
				 
			
@@ -42,7 +44,7 @@ static size_t csr_interface_get_size(starpu_data_handle_t handle);
 
				 static int csr_compare(void *data_interface_a, void *data_interface_b);
			
 
				 static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle);
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_csr_ops =
			
 
				+struct starpu_data_interface_ops starpu_interface_csr_ops =
			
 
				 {
			
 
				 	.register_data_handle = register_csr_handle,
			
 
				 	.allocate_data_on_node = allocate_csr_buffer_on_node,
			
@@ -76,6 +78,7 @@ static void register_csr_handle(starpu_data_handle_t handle, unsigned home_node,
 
				 			local_interface->colind = NULL;
			
 
				 		}
			
 
				 
			
 
				+		local_interface->id = csr_interface->id;
			
 
				 		local_interface->rowptr = csr_interface->rowptr;
			
 
				 		local_interface->nnz = csr_interface->nnz;
			
 
				 		local_interface->nrow = csr_interface->nrow;
			
@@ -91,6 +94,7 @@ void starpu_csr_data_register(starpu_data_handle_t *handleptr, unsigned home_nod
 
				 {
			
 
				 	struct starpu_csr_interface csr_interface =
			
 
				 	{
			
 
				+		.id = STARPU_CSR_INTERFACE_ID,
			
 
				 		.nnz = nnz,
			
 
				 		.nrow = nrow,
			
 
				 		.nzval = nzval,
			
@@ -100,7 +104,7 @@ void starpu_csr_data_register(starpu_data_handle_t *handleptr, unsigned home_nod
 
				 		.elemsize = elemsize
			
 
				 	};
			
 
				 
			
 
				-	starpu_data_register(handleptr, home_node, &csr_interface, &interface_csr_ops);
			
 
				+	starpu_data_register(handleptr, home_node, &csr_interface, &starpu_interface_csr_ops);
			
 
				 }
			
 
				 
			
 
				 static uint32_t footprint_csr_interface_crc32(starpu_data_handle_t handle)
			
--- a/src/datawizard/interfaces/data_interface.c
+++ b/src/datawizard/interfaces/data_interface.c
@@ -83,6 +83,40 @@ void _starpu_data_interface_shutdown()
 
				 	registered_tag_handles = NULL;
			
 
				 }
			
 
				 
			
 
				+struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id)
			
 
				+{
			
 
				+	switch (interface_id)
			
 
				+	{
			
 
				+		case STARPU_MATRIX_INTERFACE_ID:
			
 
				+			return &starpu_interface_matrix_ops;
			
 
				+
			
 
				+		case STARPU_BLOCK_INTERFACE_ID:
			
 
				+			return &starpu_interface_block_ops;
			
 
				+
			
 
				+		case STARPU_VECTOR_INTERFACE_ID:
			
 
				+			return &starpu_interface_vector_ops;
			
 
				+
			
 
				+		case STARPU_CSR_INTERFACE_ID:
			
 
				+			return &starpu_interface_csr_ops;
			
 
				+
			
 
				+		case STARPU_BCSR_INTERFACE_ID:
			
 
				+			return &starpu_interface_bcsr_ops;
			
 
				+
			
 
				+		case STARPU_VARIABLE_INTERFACE_ID:
			
 
				+			return &starpu_interface_variable_ops;
			
 
				+
			
 
				+		case STARPU_VOID_INTERFACE_ID:
			
 
				+			return &starpu_interface_void_ops;
			
 
				+
			
 
				+		case STARPU_MULTIFORMAT_INTERFACE_ID:
			
 
				+			return &starpu_interface_multiformat_ops;
			
 
				+
			
 
				+		default:
			
 
				+			STARPU_ABORT();
			
 
				+			return NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 /* Register the mapping from PTR to HANDLE.  If PTR is already mapped to
			
 
				  * some handle, the new mapping shadows the previous one.   */
			
 
				 void _starpu_data_register_ram_pointer(starpu_data_handle_t handle, void *ptr)
			
@@ -598,6 +632,11 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 
				 					break;
			
 
				 				}
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+				case STARPU_MIC_RAM:
			
 
				+					cl = mf_ops->mic_to_cpu_cl;
			
 
				+					break;
			
 
				+#endif
			
 
				 				case STARPU_CPU_RAM:      /* Impossible ! */
			
 
				 				default:
			
 
				 					STARPU_ABORT();
			
--- a/src/datawizard/interfaces/data_interface.h
+++ b/src/datawizard/interfaces/data_interface.h
@@ -23,6 +23,14 @@
 
				 
			
 
				 /* Some data interfaces or filters use this interface internally */
			
 
				 extern struct starpu_data_interface_ops starpu_interface_matrix_ops;
			
 
				+extern struct starpu_data_interface_ops starpu_interface_block_ops;
			
 
				+extern struct starpu_data_interface_ops starpu_interface_vector_ops;
			
 
				+extern struct starpu_data_interface_ops starpu_interface_csr_ops;
			
 
				+extern struct starpu_data_interface_ops starpu_interface_bcsr_ops;
			
 
				+extern struct starpu_data_interface_ops starpu_interface_variable_ops;
			
 
				+extern struct starpu_data_interface_ops starpu_interface_void_ops;
			
 
				+extern struct starpu_data_interface_ops starpu_interface_multiformat_ops;
			
 
				+
			
 
				 void _starpu_data_free_interfaces(starpu_data_handle_t handle)
			
 
				 	STARPU_ATTRIBUTE_INTERNAL;
			
 
				 
			
@@ -33,6 +41,8 @@ extern void _starpu_data_interface_init(void) STARPU_ATTRIBUTE_INTERNAL;
 
				 extern int _starpu_data_check_not_busy(starpu_data_handle_t handle) STARPU_ATTRIBUTE_INTERNAL;
			
 
				 extern void _starpu_data_interface_shutdown(void) STARPU_ATTRIBUTE_INTERNAL;
			
 
				 
			
 
				+struct starpu_data_interface_ops *_starpu_data_interface_get_ops(unsigned interface_id);
			
 
				+
			
 
				 extern void _starpu_data_register_ram_pointer(starpu_data_handle_t handle,
			
 
				 						void *ptr)
			
 
				 	STARPU_ATTRIBUTE_INTERNAL;
			
--- a/src/datawizard/interfaces/matrix_filters.c
+++ b/src/datawizard/interfaces/matrix_filters.c
@@ -41,6 +41,7 @@ void starpu_matrix_filter_block(void *father_interface, void *child_interface, S
 
				 						     &child_nx, &offset);
			
 
				 
			
 
				 	/* update the child's interface */
			
 
				+	matrix_child->id = matrix_father->id;
			
 
				 	matrix_child->nx = child_nx;
			
 
				 	matrix_child->ny = ny;
			
 
				 	matrix_child->elemsize = elemsize;
			
@@ -115,6 +116,7 @@ void starpu_matrix_filter_vertical_block(void *father_interface, void *child_int
 
				 						     matrix_father->ld,
			
 
				 						     &child_ny, &offset);
			
 
				 
			
 
				+	matrix_child->id = matrix_father->id;
			
 
				 	matrix_child->nx = nx;
			
 
				 	matrix_child->ny = child_ny;
			
 
				 	matrix_child->elemsize = elemsize;
			
--- a/src/datawizard/interfaces/matrix_interface.c
+++ b/src/datawizard/interfaces/matrix_interface.c
@@ -24,6 +24,8 @@
 
				 #include <starpu_cuda.h>
			
 
				 #include <starpu_opencl.h>
			
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				+#include <drivers/scc/driver_scc_source.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				 
			
 
				 /* If you can promise that there is no stride in your matrices, you can define this */
			
 
				 // #define NO_STRIDE
			
@@ -47,6 +49,17 @@ static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARP
 
				 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
			
 
				 static int copy_opencl_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, cl_event *event);
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+static int copy_scc_src_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_scc_sink_to_src(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_scc_sink_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+#endif
			
 
				 
			
 
				 static const struct starpu_data_copy_methods matrix_copy_data_methods_s =
			
 
				 {
			
@@ -127,6 +140,7 @@ static void register_matrix_handle(starpu_data_handle_t handle, unsigned home_no
 
				 			local_interface->ld  = 0;
			
 
				 		}
			
 
				 
			
 
				+		local_interface->id = matrix_interface->id;
			
 
				 		local_interface->nx = matrix_interface->nx;
			
 
				 		local_interface->ny = matrix_interface->ny;
			
 
				 		local_interface->elemsize = matrix_interface->elemsize;
			
@@ -151,6 +165,7 @@ void starpu_matrix_data_register(starpu_data_handle_t *handleptr, unsigned home_
 
				 {
			
 
				 	struct starpu_matrix_interface matrix_interface =
			
 
				 	{
			
 
				+		.id = STARPU_MATRIX_INTERFACE_ID,
			
 
				 		.ptr = ptr,
			
 
				 		.ld = ld,
			
 
				 		.nx = nx,
			
@@ -160,6 +175,11 @@ void starpu_matrix_data_register(starpu_data_handle_t *handleptr, unsigned home_
 
				                 .offset = 0
			
 
				 	};
			
 
				 
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	_starpu_scc_set_offset_in_shared_memory((void*)matrix_interface.ptr,
			
 
				+			(void**)&(matrix_interface.dev_handle), &(matrix_interface.offset));
			
 
				+#endif
			
 
				+
			
 
				 	starpu_data_register(handleptr, home_node, &matrix_interface, &starpu_interface_matrix_ops);
			
 
				 }
			
 
				 
			
@@ -558,6 +578,147 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_A
 
				 
			
 
				 #endif
			
 
				 
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+static int copy_scc_src_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	uint32_t nx = STARPU_MATRIX_GET_NX(dst_interface);
			
 
				+	uint32_t ny = STARPU_MATRIX_GET_NY(dst_interface);
			
 
				+
			
 
				+	size_t elemsize = STARPU_MATRIX_GET_ELEMSIZE(dst_interface);
			
 
				+
			
 
				+	uint32_t src_ld = STARPU_MATRIX_GET_LD(src_interface);
			
 
				+	uint32_t dst_ld = STARPU_MATRIX_GET_LD(dst_interface);
			
 
				+
			
 
				+	void *src_ptr = (void *)STARPU_MATRIX_GET_PTR(src_interface);
			
 
				+	void *dst_ptr = (void *)STARPU_MATRIX_GET_PTR(dst_interface);
			
 
				+
			
 
				+	unsigned y;
			
 
				+	for (y = 0; y < ny; ++y)
			
 
				+	{
			
 
				+		uint32_t src_offset = y*src_ld*elemsize;
			
 
				+		uint32_t dst_offset = y*dst_ld*elemsize;
			
 
				+
			
 
				+		_starpu_scc_copy_src_to_sink(src_ptr + src_offset, src_node,
			
 
				+						dst_ptr + dst_offset, dst_node, nx*elemsize);
			
 
				+	}
			
 
				+
			
 
				+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*elemsize);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int copy_scc_sink_to_src(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	uint32_t nx = STARPU_MATRIX_GET_NX(dst_interface);
			
 
				+	uint32_t ny = STARPU_MATRIX_GET_NY(dst_interface);
			
 
				+
			
 
				+	size_t elemsize = STARPU_MATRIX_GET_ELEMSIZE(dst_interface);
			
 
				+
			
 
				+	uint32_t src_ld = STARPU_MATRIX_GET_LD(src_interface);
			
 
				+	uint32_t dst_ld = STARPU_MATRIX_GET_LD(dst_interface);
			
 
				+
			
 
				+	void *src_ptr = (void *)STARPU_MATRIX_GET_PTR(src_interface);
			
 
				+	void *dst_ptr = (void *)STARPU_MATRIX_GET_PTR(dst_interface);
			
 
				+
			
 
				+	unsigned y;
			
 
				+	for (y = 0; y < ny; ++y)
			
 
				+	{
			
 
				+		uint32_t src_offset = y*src_ld*elemsize;
			
 
				+		uint32_t dst_offset = y*dst_ld*elemsize;
			
 
				+
			
 
				+		_starpu_scc_copy_sink_to_src(src_ptr + src_offset, src_node,
			
 
				+						dst_ptr + dst_offset, dst_node, nx*elemsize);
			
 
				+	}
			
 
				+
			
 
				+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*elemsize);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int copy_scc_sink_to_sink(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	uint32_t nx = STARPU_MATRIX_GET_NX(dst_interface);
			
 
				+	uint32_t ny = STARPU_MATRIX_GET_NY(dst_interface);
			
 
				+
			
 
				+	size_t elemsize = STARPU_MATRIX_GET_ELEMSIZE(dst_interface);
			
 
				+
			
 
				+	uint32_t src_ld = STARPU_MATRIX_GET_LD(src_interface);
			
 
				+	uint32_t dst_ld = STARPU_MATRIX_GET_LD(dst_interface);
			
 
				+
			
 
				+	void *src_ptr = (void *)STARPU_MATRIX_GET_PTR(src_interface);
			
 
				+	void *dst_ptr = (void *)STARPU_MATRIX_GET_PTR(dst_interface);
			
 
				+
			
 
				+	unsigned y;
			
 
				+	for (y = 0; y < ny; ++y)
			
 
				+	{
			
 
				+		uint32_t src_offset = y*src_ld*elemsize;
			
 
				+		uint32_t dst_offset = y*dst_ld*elemsize;
			
 
				+
			
 
				+		_starpu_scc_copy_sink_to_sink(src_ptr + src_offset, src_node,
			
 
				+						dst_ptr + dst_offset, dst_node, nx*elemsize);
			
 
				+	}
			
 
				+
			
 
				+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, nx*ny*elemsize);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif /* STARPU_USE_SCC */
			
 
				+
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static int copy_mic_common(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
			
 
				+						   int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
			
 
				+{
			
 
				+	struct starpu_matrix_interface *src_matrix = src_interface;
			
 
				+	struct starpu_matrix_interface *dst_matrix = dst_interface;
			
 
				+
			
 
				+	unsigned y;
			
 
				+	uint32_t nx = dst_matrix->nx;
			
 
				+	uint32_t ny = dst_matrix->ny;
			
 
				+	size_t elemsize = dst_matrix->elemsize;
			
 
				+
			
 
				+	uint32_t ld_src = src_matrix->ld;
			
 
				+	uint32_t ld_dst = dst_matrix->ld;
			
 
				+
			
 
				+	uintptr_t ptr_src = src_matrix->ptr;
			
 
				+	uintptr_t ptr_dst = dst_matrix->ptr;
			
 
				+
			
 
				+
			
 
				+	for (y = 0; y < ny; y++)
			
 
				+	{
			
 
				+		uint32_t src_offset = y*ld_src*elemsize;
			
 
				+		uint32_t dst_offset = y*ld_dst*elemsize;
			
 
				+
			
 
				+		copy_func((void *)(ptr_src + src_offset), src_node, (void *)(ptr_dst + dst_offset), dst_node, nx*elemsize);
			
 
				+	}
			
 
				+
			
 
				+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, (size_t)nx*ny*elemsize);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	return copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic);
			
 
				+}
			
 
				+
			
 
				+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	return copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram);
			
 
				+}
			
 
				+
			
 
				+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic_async);
			
 
				+	return -EAGAIN;
			
 
				+}
			
 
				+
			
 
				+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	copy_mic_common(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram_async);
			
 
				+	return -EAGAIN;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /* as not all platform easily have a  lib installed ... */
			
 
				 static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node STARPU_ATTRIBUTE_UNUSED)
			
 
				 {
			
--- a/src/datawizard/interfaces/multiformat_interface.c
+++ b/src/datawizard/interfaces/multiformat_interface.c
@@ -23,6 +23,7 @@
 
				 #include <starpu_cuda.h>
			
 
				 #include <starpu_opencl.h>
			
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				 #include <core/task.h>
			
 
				 
			
 
				 static int copy_ram_to_ram(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node);
			
@@ -41,6 +42,12 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node STARPU_A
 
				 static int copy_ram_to_opencl_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
			
 
				 static int copy_opencl_to_ram_async(void *src_interface, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst_interface, unsigned dst_node, cl_event *event);
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node);
			
 
				+#endif
			
 
				 
			
 
				 static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
			
 
				 {
			
@@ -65,6 +72,12 @@ static const struct starpu_data_copy_methods multiformat_copy_data_methods_s =
 
				         .ram_to_opencl_async = copy_ram_to_opencl_async,
			
 
				 	.opencl_to_ram_async = copy_opencl_to_ram_async,
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	.ram_to_mic = copy_ram_to_mic,
			
 
				+	.mic_to_ram = copy_mic_to_ram,
			
 
				+	.ram_to_mic_async = copy_ram_to_mic_async,
			
 
				+	.mic_to_ram_async = copy_mic_to_ram_async,
			
 
				+#endif
			
 
				 };
			
 
				 
			
 
				 static void register_multiformat_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);
			
@@ -86,7 +99,7 @@ get_mf_ops(void *data_interface)
 
				 	return mf->ops;
			
 
				 }
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_multiformat_ops =
			
 
				+struct starpu_data_interface_ops starpu_interface_multiformat_ops =
			
 
				 {
			
 
				 	.register_data_handle  = register_multiformat_handle,
			
 
				 	.allocate_data_on_node = allocate_multiformat_buffer_on_node,
			
@@ -121,6 +134,10 @@ static void *multiformat_handle_to_pointer(starpu_data_handle_t handle, unsigned
 
				 		case STARPU_OPENCL_RAM:
			
 
				 			return multiformat_interface->opencl_ptr;
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+		case STARPU_MIC_RAM:
			
 
				+			return multiformat_interface->mic_ptr;
			
 
				+#endif
			
 
				 		default:
			
 
				 			STARPU_ABORT();
			
 
				 	}
			
@@ -147,6 +164,9 @@ static void register_multiformat_handle(starpu_data_handle_t handle, unsigned ho
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 			local_interface->opencl_ptr = multiformat_interface->opencl_ptr;
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+			local_interface->mic_ptr    = multiformat_interface->mic_ptr;
			
 
				+#endif
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
@@ -157,7 +177,11 @@ static void register_multiformat_handle(starpu_data_handle_t handle, unsigned ho
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 			local_interface->opencl_ptr = NULL;
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+			local_interface->mic_ptr    = NULL;
			
 
				+#endif
			
 
				 		}
			
 
				+		local_interface->id = multiformat_interface->id;
			
 
				 		local_interface->nx = multiformat_interface->nx;
			
 
				 		local_interface->ops = multiformat_interface->ops;
			
 
				 	}
			
@@ -173,17 +197,23 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
 
				 	_starpu_codelet_check_deprecated_fields(format_ops->opencl_to_cpu_cl);
			
 
				 	_starpu_codelet_check_deprecated_fields(format_ops->cpu_to_cuda_cl);
			
 
				 	_starpu_codelet_check_deprecated_fields(format_ops->cuda_to_cpu_cl);
			
 
				+	_starpu_codelet_check_deprecated_fields(format_ops->cpu_to_mic_cl);
			
 
				+	_starpu_codelet_check_deprecated_fields(format_ops->mic_to_cpu_cl);
			
 
				+	_starpu_codelet_check_deprecated_fields(format_ops->cpu_to_scc_cl);
			
 
				+	_starpu_codelet_check_deprecated_fields(format_ops->scc_to_cpu_cl);
			
 
				 
			
 
				 	struct starpu_multiformat_interface multiformat =
			
 
				 	{
			
 
				+		.id         = STARPU_MULTIFORMAT_INTERFACE_ID,
			
 
				 		.cpu_ptr    = ptr,
			
 
				 		.cuda_ptr   = NULL,
			
 
				 		.opencl_ptr = NULL,
			
 
				+		.mic_ptr    = NULL,
			
 
				 		.nx         = nobjects,
			
 
				 		.ops        = format_ops
			
 
				 	};
			
 
				 
			
 
				-	starpu_data_register(handleptr, home_node, &multiformat, &interface_multiformat_ops);
			
 
				+	starpu_data_register(handleptr, home_node, &multiformat, &starpu_interface_multiformat_ops);
			
 
				 }
			
 
				 
			
 
				 static uint32_t footprint_multiformat_interface_crc32(starpu_data_handle_t handle)
			
@@ -204,6 +234,12 @@ static int multiformat_compare(void *data_interface_a, void *data_interface_b)
 
				 #ifdef STARPU_USE_OPENCL
			
 
				 			&& (multiformat_a->ops->opencl_elemsize == multiformat_b->ops->opencl_elemsize)
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+		    && (multiformat_a->ops->mic_elemsize == multiformat_b->ops->mic_elemsize)
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+		    && (multiformat_a->ops->scc_elemsize == multiformat_b->ops->scc_elemsize)
			
 
				+#endif
			
 
				 		);
			
 
				 }
			
 
				 
			
@@ -645,3 +681,70 @@ static int copy_opencl_to_opencl(void *src_interface, unsigned src_node,
 
				 	return 0;
			
 
				 }
			
 
				 #endif
			
 
				+
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+static int copy_mic_common_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
			
 
				+						   int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
			
 
				+{
			
 
				+	struct starpu_multiformat_interface *src_multiformat = src_interface;
			
 
				+	struct starpu_multiformat_interface *dst_multiformat = dst_interface;
			
 
				+
			
 
				+	STARPU_ASSERT(src_multiformat != NULL);
			
 
				+	STARPU_ASSERT(dst_multiformat != NULL);
			
 
				+	STARPU_ASSERT(dst_multiformat->ops != NULL);
			
 
				+
			
 
				+	size_t size = dst_multiformat->nx * dst_multiformat->ops->mic_elemsize;
			
 
				+	if (src_multiformat->mic_ptr == NULL)
			
 
				+	{
			
 
				+		src_multiformat->mic_ptr = malloc(size);
			
 
				+		if (src_multiformat->mic_ptr == NULL)
			
 
				+			return -ENOMEM;
			
 
				+	}
			
 
				+	
			
 
				+	copy_func(src_multiformat->cpu_ptr, src_node, dst_multiformat->cpu_ptr, dst_node, size);
			
 
				+
			
 
				+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int copy_mic_common_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node,
			
 
				+						   int (*copy_func)(void *, unsigned, void *, unsigned, size_t))
			
 
				+{
			
 
				+	struct starpu_multiformat_interface *src_multiformat = src_interface;
			
 
				+	struct starpu_multiformat_interface *dst_multiformat = dst_interface;
			
 
				+
			
 
				+	STARPU_ASSERT(src_multiformat != NULL);
			
 
				+	STARPU_ASSERT(dst_multiformat != NULL);
			
 
				+	STARPU_ASSERT(dst_multiformat->ops != NULL);
			
 
				+			
			
 
				+	size_t size = src_multiformat->nx * src_multiformat->ops->mic_elemsize;
			
 
				+	copy_func(src_multiformat->mic_ptr, src_node, dst_multiformat->mic_ptr, dst_node, size);
			
 
				+
			
 
				+	_STARPU_TRACE_DATA_COPY(src_node, dst_node, size);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int copy_ram_to_mic(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	return copy_mic_common_ram_to_mic(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic);
			
 
				+}
			
 
				+
			
 
				+static int copy_mic_to_ram(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	return copy_mic_common_mic_to_ram(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram);
			
 
				+}
			
 
				+
			
 
				+static int copy_ram_to_mic_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	copy_mic_common_ram_to_mic(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_ram_to_mic_async);
			
 
				+	return -EAGAIN;
			
 
				+}
			
 
				+
			
 
				+static int copy_mic_to_ram_async(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node)
			
 
				+{
			
 
				+	copy_mic_common_mic_to_ram(src_interface, src_node, dst_interface, dst_node, _starpu_mic_copy_mic_to_ram_async);
			
 
				+	return -EAGAIN;
			
 
				+}
			
 
				+#endif
			
--- a/src/datawizard/interfaces/variable_interface.c
+++ b/src/datawizard/interfaces/variable_interface.c
@@ -24,6 +24,8 @@
 
				 #include <starpu_cuda.h>
			
 
				 #include <starpu_opencl.h>
			
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				+#include <drivers/scc/driver_scc_source.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				 
			
 
				 static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
			
 
				 
			
@@ -41,7 +43,7 @@ static uint32_t footprint_variable_interface_crc32(starpu_data_handle_t handle);
 
				 static int variable_compare(void *data_interface_a, void *data_interface_b);
			
 
				 static void display_variable_interface(starpu_data_handle_t handle, FILE *f);
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_variable_ops =
			
 
				+struct starpu_data_interface_ops starpu_interface_variable_ops =
			
 
				 {
			
 
				 	.register_data_handle = register_variable_handle,
			
 
				 	.allocate_data_on_node = allocate_variable_buffer_on_node,
			
@@ -65,6 +67,7 @@ static void *variable_handle_to_pointer(starpu_data_handle_t handle, unsigned no
 
				 
			
 
				 static void register_variable_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface)
			
 
				 {
			
 
				+	struct starpu_variable_interface *variable_interface = (struct starpu_variable_interface *)data_interface;
			
 
				 	unsigned node;
			
 
				 	for (node = 0; node < STARPU_MAXNODES; node++)
			
 
				 	{
			
@@ -73,14 +76,19 @@ static void register_variable_handle(starpu_data_handle_t handle, unsigned home_
 
				 
			
 
				 		if (node == home_node)
			
 
				 		{
			
 
				-			local_interface->ptr = STARPU_VARIABLE_GET_PTR(data_interface);
			
 
				+			local_interface->ptr = variable_interface->ptr;
			
 
				+			local_interface->dev_handle = variable_interface->dev_handle;
			
 
				+			local_interface->offset = variable_interface->offset;
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				 			local_interface->ptr = 0;
			
 
				+			local_interface->dev_handle = 0;
			
 
				+			local_interface->offset = 0;
			
 
				 		}
			
 
				 
			
 
				-		local_interface->elemsize = STARPU_VARIABLE_GET_ELEMSIZE(data_interface);
			
 
				+		local_interface->id = variable_interface->id;
			
 
				+		local_interface->elemsize = variable_interface->elemsize;
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -90,11 +98,19 @@ void starpu_variable_data_register(starpu_data_handle_t *handleptr, unsigned hom
 
				 {
			
 
				 	struct starpu_variable_interface variable =
			
 
				 	{
			
 
				+		.id = STARPU_VARIABLE_INTERFACE_ID,
			
 
				 		.ptr = ptr,
			
 
				+		.dev_handle = ptr,
			
 
				+		.offset = 0,
			
 
				 		.elemsize = elemsize
			
 
				 	};
			
 
				 
			
 
				-	starpu_data_register(handleptr, home_node, &variable, &interface_variable_ops);
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	_starpu_scc_set_offset_in_shared_memory((void*)variable.ptr, (void**)&(variable.dev_handle),
			
 
				+			&(variable.offset));
			
 
				+#endif
			
 
				+
			
 
				+	starpu_data_register(handleptr, home_node, &variable, &starpu_interface_variable_ops);
			
 
				 }
			
 
				 
			
 
				 
			
--- a/src/datawizard/interfaces/vector_filters.c
+++ b/src/datawizard/interfaces/vector_filters.c
@@ -35,6 +35,7 @@ void starpu_vector_filter_block(void *father_interface, void *child_interface, S
 
				 	_starpu_filter_nparts_compute_chunk_size_and_offset(nx, nchunks, elemsize, id, 1,
			
 
				 						     &child_nx, &offset);
			
 
				 
			
 
				+	vector_child->id = vector_father->id;
			
 
				 	vector_child->nx = child_nx;
			
 
				 	vector_child->elemsize = elemsize;
			
 
				 
			
@@ -95,6 +96,8 @@ void starpu_vector_filter_divide_in_2(void *father_interface, void *child_interf
 
				 
			
 
				 	STARPU_ASSERT_MSG(length_first < nx, "First part is too long: %u vs %u", length_first, nx);
			
 
				 
			
 
				+	vector_child->id = vector_father->id;
			
 
				+
			
 
				 	/* this is the first child */
			
 
				 	if (id == 0)
			
 
				 	{
			
@@ -138,6 +141,7 @@ void starpu_vector_filter_list(void *father_interface, void *child_interface, st
 
				 
			
 
				 	uint32_t chunk_size = length_tab[id];
			
 
				 
			
 
				+	vector_child->id = vector_father->id;
			
 
				 	vector_child->nx = chunk_size;
			
 
				 	vector_child->elemsize = elemsize;
			
 
				 
			
--- a/src/datawizard/interfaces/vector_interface.c
+++ b/src/datawizard/interfaces/vector_interface.c
@@ -24,6 +24,8 @@
 
				 #include <starpu_cuda.h>
			
 
				 #include <starpu_opencl.h>
			
 
				 #include <drivers/opencl/driver_opencl.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				+#include <drivers/scc/driver_scc_source.h>
			
 
				 
			
 
				 static int copy_any_to_any(void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, void *async_data);
			
 
				 
			
@@ -41,7 +43,7 @@ static uint32_t footprint_vector_interface_crc32(starpu_data_handle_t handle);
 
				 static int vector_compare(void *data_interface_a, void *data_interface_b);
			
 
				 static void display_vector_interface(starpu_data_handle_t handle, FILE *f);
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_vector_ops =
			
 
				+struct starpu_data_interface_ops starpu_interface_vector_ops =
			
 
				 {
			
 
				 	.register_data_handle = register_vector_handle,
			
 
				 	.allocate_data_on_node = allocate_vector_buffer_on_node,
			
@@ -89,6 +91,7 @@ static void register_vector_handle(starpu_data_handle_t handle, unsigned home_no
 
				                         local_interface->offset = 0;
			
 
				 		}
			
 
				 
			
 
				+		local_interface->id = vector_interface->id;
			
 
				 		local_interface->nx = vector_interface->nx;
			
 
				 		local_interface->elemsize = vector_interface->elemsize;
			
 
				 	}
			
@@ -100,6 +103,7 @@ void starpu_vector_data_register(starpu_data_handle_t *handleptr, unsigned home_
 
				 {
			
 
				 	struct starpu_vector_interface vector =
			
 
				 	{
			
 
				+		.id = STARPU_VECTOR_INTERFACE_ID,
			
 
				 		.ptr = ptr,
			
 
				 		.nx = nx,
			
 
				 		.elemsize = elemsize,
			
@@ -107,7 +111,11 @@ void starpu_vector_data_register(starpu_data_handle_t *handleptr, unsigned home_
 
				                 .offset = 0
			
 
				 	};
			
 
				 
			
 
				-	starpu_data_register(handleptr, home_node, &vector, &interface_vector_ops);
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	_starpu_scc_set_offset_in_shared_memory((void*)vector.ptr, (void**)&(vector.dev_handle), &(vector.offset));
			
 
				+#endif
			
 
				+
			
 
				+	starpu_data_register(handleptr, home_node, &vector, &starpu_interface_vector_ops);
			
 
				 }
			
 
				 
			
 
				 
			
--- a/src/datawizard/interfaces/void_interface.c
+++ b/src/datawizard/interfaces/void_interface.c
@@ -40,7 +40,7 @@ static uint32_t footprint_void_interface_crc32(starpu_data_handle_t handle);
 
				 static int void_compare(void *data_interface_a, void *data_interface_b);
			
 
				 static void display_void_interface(starpu_data_handle_t handle, FILE *f);
			
 
				 
			
 
				-static struct starpu_data_interface_ops interface_void_ops =
			
 
				+struct starpu_data_interface_ops starpu_interface_void_ops =
			
 
				 {
			
 
				 	.register_data_handle = register_void_handle,
			
 
				 	.allocate_data_on_node = allocate_void_buffer_on_node,
			
@@ -64,7 +64,7 @@ static void register_void_handle(starpu_data_handle_t handle STARPU_ATTRIBUTE_UN
 
				 /* declare a new data with the void interface */
			
 
				 void starpu_void_data_register(starpu_data_handle_t *handleptr)
			
 
				 {
			
 
				-	starpu_data_register(handleptr, 0, NULL, &interface_void_ops);
			
 
				+	starpu_data_register(handleptr, 0, NULL, &starpu_interface_void_ops);
			
 
				 }
			
 
				 
			
 
				 
			
--- a/src/datawizard/malloc.c
+++ b/src/datawizard/malloc.c
@@ -177,6 +177,13 @@ int starpu_malloc_flags(void **A, size_t dim, int flags)
 
				 	}
			
 
				 #endif /* STARPU_SIMGRID */
			
 
				 
			
 
				+	if (_starpu_can_submit_scc_task())
			
 
				+	{
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+		_starpu_scc_allocate_shared_memory(A, dim);
			
 
				+#endif
			
 
				+	}
			
 
				+	else
			
 
				 #ifdef STARPU_HAVE_POSIX_MEMALIGN
			
 
				 	if (_malloc_align != sizeof(void*))
			
 
				 	{
			
@@ -318,6 +325,12 @@ int starpu_free_flags(void *A, size_t dim, int flags)
 
				 	}
			
 
				 #endif /* STARPU_SIMGRID */
			
 
				 
			
 
				+	if (_starpu_can_submit_scc_task())
			
 
				+	{
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+		_starpu_scc_free_shared_memory(A, dim);
			
 
				+#endif
			
 
				+	} else
			
 
				 	free(A);
			
 
				 
			
 
				 out:
			
@@ -406,6 +419,18 @@ starpu_malloc_on_node(unsigned dst_node, size_t size)
 
				 #endif
			
 
				 			}
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+		case STARPU_MIC_RAM:
			
 
				+			if (_starpu_mic_allocate_memory((void **)(&addr), size, dst_node))
			
 
				+				addr = 0;
			
 
				+			break;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+		case STARPU_SCC_RAM:
			
 
				+			if (_starpu_scc_allocate_memory((void **)(&addr), size, dst_node))
			
 
				+				addr = 0;
			
 
				+			break;
			
 
				+#endif
			
 
				 		default:
			
 
				 			STARPU_ABORT();
			
 
				 	}
			
@@ -461,6 +486,16 @@ starpu_free_on_node(unsigned dst_node, uintptr_t addr, size_t size)
 
				                         break;
			
 
				 		}
			
 
				 #endif
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+		case STARPU_MIC_RAM:
			
 
				+			_starpu_mic_free_memory((void*) addr, size, dst_node);
			
 
				+			break;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+		case STARPU_SCC_RAM:
			
 
				+			_starpu_scc_free_memory((void *) addr, size, dst_node);
			
 
				+			break;
			
 
				+#endif
			
 
				 		default:
			
 
				 			STARPU_ABORT();
			
 
				 	}
			
--- a/src/datawizard/reduction.c
+++ b/src/datawizard/reduction.c
@@ -20,6 +20,8 @@
 
				 #include <util/starpu_data_cpy.h>
			
 
				 #include <core/task.h>
			
 
				 #include <datawizard/datawizard.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				+#include <drivers/mp_common/source_common.h>
			
 
				 
			
 
				 void starpu_data_set_reduction_methods(starpu_data_handle_t handle,
			
 
				 				       struct starpu_codelet *redux_cl,
			
@@ -68,6 +70,12 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 
				 		case STARPU_OPENCL_WORKER:
			
 
				 			init_func = _starpu_task_get_opencl_nth_implementation(init_cl, 0);
			
 
				 			break;
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+		case STARPU_MIC_WORKER:
			
 
				+			init_func = _starpu_mic_src_get_kernel_from_codelet(init_cl, 0);
			
 
				+			break;
			
 
				+#endif
			
 
				+			/* TODO: SCC */
			
 
				 		default:
			
 
				 			STARPU_ABORT();
			
 
				 			break;
			
@@ -75,7 +83,22 @@ void _starpu_redux_init_data_replicate(starpu_data_handle_t handle, struct _star
 
				 
			
 
				 	STARPU_ASSERT(init_func);
			
 
				 
			
 
				-	init_func(&replicate->data_interface, NULL);
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	if (starpu_worker_get_type(workerid) == STARPU_MIC_WORKER)
			
 
				+	{
			
 
				+		const struct _starpu_mp_node *node = _starpu_mic_src_get_actual_thread_mp_node();
			
 
				+
			
 
				+		// XXX: give the correct coreid.
			
 
				+		_starpu_src_common_execute_kernel(node,
			
 
				+						  (void(*)(void))init_func, 0,
			
 
				+						  &handle, &(replicate->data_interface), 1,
			
 
				+						  NULL, 0);
			
 
				+	}
			
 
				+	else
			
 
				+#endif
			
 
				+	{
			
 
				+		init_func(&replicate->data_interface, NULL);
			
 
				+	}
			
 
				 
			
 
				 	replicate->initialized = 1;
			
 
				 }
			
--- a/src/debug/traces/starpu_fxt.c
+++ b/src/debug/traces/starpu_fxt.c
@@ -31,12 +31,16 @@
 
				 static char *cpus_worker_colors[STARPU_NMAXWORKERS] = {"/greens9/7", "/greens9/6", "/greens9/5", "/greens9/4",  "/greens9/9", "/greens9/3",  "/greens9/2",  "/greens9/1"  };
			
 
				 static char *cuda_worker_colors[STARPU_NMAXWORKERS] = {"/ylorrd9/9", "/ylorrd9/6", "/ylorrd9/3", "/ylorrd9/1", "/ylorrd9/8", "/ylorrd9/7", "/ylorrd9/4", "/ylorrd9/2",  "/ylorrd9/1"};
			
 
				 static char *opencl_worker_colors[STARPU_NMAXWORKERS] = {"/blues9/9", "/blues9/6", "/blues9/3", "/blues9/1", "/blues9/8", "/blues9/7", "/blues9/4", "/blues9/2",  "/blues9/1"};
			
 
				+static char *mic_worker_colors[STARPU_NMAXWORKERS] = {"/reds9/9", "/reds9/6", "/reds9/3", "/reds9/1", "/reds9/8", "/reds9/7", "/reds9/4", "/reds9/2",  "/reds9/1"};
			
 
				+static char *scc_worker_colors[STARPU_NMAXWORKERS] = {"/reds9/9", "/reds9/6", "/reds9/3", "/reds9/1", "/reds9/8", "/reds9/7", "/reds9/4", "/reds9/2",  "/reds9/1"};
			
 
				 static char *other_worker_colors[STARPU_NMAXWORKERS] = {"/greys9/9", "/greys9/8", "/greys9/7", "/greys9/6"};
			
 
				 static char *worker_colors[STARPU_NMAXWORKERS];
			
 
				 
			
 
				 static unsigned opencl_index = 0;
			
 
				 static unsigned cuda_index = 0;
			
 
				 static unsigned cpus_index = 0;
			
 
				+static unsigned mic_index = 0;
			
 
				+static unsigned scc_index = 0;
			
 
				 static unsigned other_index = 0;
			
 
				 
			
 
				 static void set_next_other_worker_color(int workerid)
			
@@ -59,6 +63,11 @@ static void set_next_opencl_worker_color(int workerid)
 
				 	worker_colors[workerid] = opencl_worker_colors[opencl_index++];
			
 
				 }
			
 
				 
			
 
				+static void set_next_mic_worker_color(int workerid)
			
 
				+{
			
 
				+	worker_colors[workerid] = mic_worker_colors[mic_index++];
			
 
				+}
			
 
				+
			
 
				 static const char *get_worker_color(int workerid)
			
 
				 {
			
 
				 	return worker_colors[workerid];
			
@@ -345,6 +354,16 @@ static void handle_worker_init_start(struct fxt_ev_64 *ev, struct starpu_fxt_opt
 
				 			kindstr = "OPENCL";
			
 
				 			archtype = STARPU_OPENCL_DEFAULT + devid;
			
 
				 			break;
			
 
				+		case _STARPU_FUT_MIC_KEY:
			
 
				+			set_next_mic_worker_color(workerid);
			
 
				+			kindstr = "mic";
			
 
				+			archtype = STARPU_MIC_DEFAULT + devid;
			
 
				+			break;
			
 
				+		case _STARPU_FUT_SCC_KEY:
			
 
				+			set_next_scc_worker_color(workerid);
			
 
				+			kindstr = "scc";
			
 
				+			archtype = STARPU_SCC_DEFAULT + devid;
			
 
				+			break;
			
 
				 		default:
			
 
				 			STARPU_ABORT();
			
 
				 	}
			
--- a/src/drivers/gordon/driver_gordon.c
+++ b/src/drivers/gordon/driver_gordon.c
@@ -374,7 +374,8 @@ void *gordon_worker_inject(struct _starpu_worker_set *arg)
 
				 					struct _starpu_job_list *chunk_list;
			
 
				 					if (chunk != (nchunks -1))
			
 
				 					{
			
 
				-						/* split the list in 2 parts : list = chunk_list | tail */
			
 
				+						/* split the list in 2 parts :
			
 
				+						 * list = chunk_list | tail */
			
 
				 						chunk_list = _starpu_job_list_new();
			
 
				 
			
 
				 						/* find the end */
			
--- a/src/drivers/mic/driver_mic_common.c
+++ b/src/drivers/mic/driver_mic_common.c
@@ -0,0 +1,116 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+#include <drivers/mic/driver_mic_common.h>
			
 
				+
			
 
				+
			
 
				+void _starpu_mic_common_report_scif_error(const char *func, const char *file, const int line, const int status)
			
 
				+{
			
 
				+	const char *errormsg = strerror(status);
			
 
				+	printf("Common: oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
			
 
				+	STARPU_ASSERT(0);
			
 
				+}
			
 
				+
			
 
				+/* Handles the error so the caller (which must be generic) doesn't have to
			
 
				+ * care about it.
			
 
				+ */
			
 
				+
			
 
				+void _starpu_mic_common_send(const struct _starpu_mp_node *node, void *msg, int len)
			
 
				+{
			
 
				+	if ((scif_send(node->mp_connection.mic_endpoint, msg, len, SCIF_SEND_BLOCK)) < 0)
			
 
				+		STARPU_MP_COMMON_REPORT_ERROR(node, errno);
			
 
				+}
			
 
				+
			
 
				+/* Handles the error so the caller (which must be generic) doesn't have to
			
 
				+ * care about it.
			
 
				+ */
			
 
				+
			
 
				+void _starpu_mic_common_recv(const struct _starpu_mp_node *node, void *msg, int len)
			
 
				+{
			
 
				+	if ((scif_recv(node->mp_connection.mic_endpoint, msg, len, SCIF_RECV_BLOCK)) < 0)
			
 
				+		STARPU_MP_COMMON_REPORT_ERROR(node, errno);
			
 
				+}
			
 
				+
			
 
				+/* Handles the error so the caller (which must be generic) doesn't have to
			
 
				+ * care about it.
			
 
				+ */
			
 
				+void _starpu_mic_common_dt_send(const struct _starpu_mp_node *mp_node, void *msg, int len)
			
 
				+{
			
 
				+	if ((scif_send(mp_node->host_sink_dt_connection.mic_endpoint, msg, len, SCIF_SEND_BLOCK)) < 0)
			
 
				+		STARPU_MP_COMMON_REPORT_ERROR(mp_node, errno);
			
 
				+}
			
 
				+
			
 
				+/* Handles the error so the caller (which must be generic) doesn't have to
			
 
				+ * care about it.
			
 
				+ */
			
 
				+void _starpu_mic_common_dt_recv(const struct _starpu_mp_node *mp_node, void *msg, int len)
			
 
				+{
			
 
				+	if ((scif_recv(mp_node->host_sink_dt_connection.mic_endpoint, msg, len, SCIF_SEND_BLOCK)) < 0)
			
 
				+		STARPU_MP_COMMON_REPORT_ERROR(mp_node, errno);
			
 
				+}
			
 
				+
			
 
				+void _starpu_mic_common_connect(scif_epd_t *endpoint, uint16_t remote_node,
			
 
				+				uint16_t local_port_number, uint16_t remote_port_number)
			
 
				+{
			
 
				+	/* Endpoint only useful for the initialization of the connection */
			
 
				+	struct scif_portID portID;
			
 
				+
			
 
				+	portID.node = remote_node;
			
 
				+	portID.port = remote_port_number;
			
 
				+
			
 
				+	if ((*endpoint = scif_open()) < 0)
			
 
				+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	if ((scif_bind(*endpoint, local_port_number)) < 0)
			
 
				+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	while (scif_connect(*endpoint, &portID) != 0)
			
 
				+	{
			
 
				+		if (errno != ECONNREFUSED)
			
 
				+			STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* Wait and accept the connection from the wanted device on the port PORT_NUMBER
			
 
				+ * and then initialize the connection, the resutling endpoint is stored in ENDPOINT */
			
 
				+void _starpu_mic_common_accept(scif_epd_t *endpoint, uint16_t port_number)
			
 
				+{
			
 
				+	/* Unused variables, only useful to make scif_accept don't cause
			
 
				+	 * a seg fault when trying to access PEER parameter */
			
 
				+	struct scif_portID portID;
			
 
				+
			
 
				+	/* Endpoint only useful for the initialization of the connection */
			
 
				+	int init_epd;
			
 
				+
			
 
				+	if ((init_epd = scif_open()) < 0)
			
 
				+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	if ((scif_bind(init_epd, port_number)) < 0)
			
 
				+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	/* We fix the maximum number of request to 1 as we
			
 
				+	 * only need one connection, more would be an error */
			
 
				+	if ((scif_listen(init_epd, 1)) < 0)
			
 
				+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	if ((scif_accept(init_epd, &portID, endpoint, SCIF_ACCEPT_SYNC)) < 0)
			
 
				+		STARPU_MIC_COMMON_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	scif_close(init_epd);
			
 
				+}
			
--- a/src/drivers/mic/driver_mic_common.h
+++ b/src/drivers/mic/driver_mic_common.h
@@ -0,0 +1,69 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#ifndef __DRIVER_MIC_COMMON_H__
			
 
				+#define __DRIVER_MIC_COMMON_H__
			
 
				+
			
 
				+
			
 
				+#include <common/config.h>
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+
			
 
				+#define STARPU_TO_MIC_ID(id) ((id) + 1)
			
 
				+
			
 
				+#define STARPU_MIC_PORTS_BEGIN 1099
			
 
				+
			
 
				+#define STARPU_MIC_SOURCE_PORT_NUMBER STARPU_MIC_PORTS_BEGIN
			
 
				+#define STARPU_MIC_SINK_PORT_NUMBER(id) ((id) + STARPU_MIC_PORTS_BEGIN + 1)
			
 
				+
			
 
				+#define STARPU_MIC_SOURCE_DT_PORT_NUMBER (STARPU_MAXMICDEVS + STARPU_MIC_PORTS_BEGIN + 1)
			
 
				+#define STARPU_MIC_SINK_DT_PORT_NUMBER(id) ((id) + STARPU_MAXMICDEVS + STARPU_MIC_PORTS_BEGIN + 2)
			
 
				+
			
 
				+#define STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(me, peer_id) \
			
 
				+((me) * STARPU_MAXMICDEVS + (peer_id) +  2 * STARPU_MAXMICDEVS + STARPU_MIC_PORTS_BEGIN + 2)
			
 
				+
			
 
				+#define STARPU_MIC_PAGE_SIZE 0x1000
			
 
				+#define STARPU_MIC_GET_PAGE_SIZE_MULTIPLE(size) \
			
 
				+(((size) % STARPU_MIC_PAGE_SIZE == 0) ? (size) : (((size) / STARPU_MIC_PAGE_SIZE + 1) * STARPU_MIC_PAGE_SIZE))
			
 
				+
			
 
				+#define STARPU_MIC_COMMON_REPORT_SCIF_ERROR(status) \
			
 
				+	_starpu_mic_common_report_scif_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				+
			
 
				+struct _starpu_mic_free_command
			
 
				+{
			
 
				+	void *addr;
			
 
				+	size_t size;
			
 
				+};
			
 
				+
			
 
				+void _starpu_mic_common_report_scif_error(const char *func, const char *file, int line, const int status);
			
 
				+
			
 
				+void _starpu_mic_common_send(const struct _starpu_mp_node *node, void *msg, int len);
			
 
				+
			
 
				+void _starpu_mic_common_recv(const struct _starpu_mp_node *node, void *msg, int len);
			
 
				+
			
 
				+void _starpu_mic_common_dt_send(const struct _starpu_mp_node *node, void *msg, int len);
			
 
				+
			
 
				+void _starpu_mic_common_dt_recv(const struct _starpu_mp_node *node, void *msg, int len);
			
 
				+
			
 
				+void _starpu_mic_common_connect(scif_epd_t *endpoint, uint16_t remote_node, 
			
 
				+				uint16_t local_port_number, uint16_t remote_port_number);
			
 
				+void _starpu_mic_common_accept(scif_epd_t *endpoint, uint16_t port_number);
			
 
				+
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+
			
 
				+#endif /* __DRIVER_MIC_COMMON_H__ */
			
--- a/src/drivers/mic/driver_mic_sink.c
+++ b/src/drivers/mic/driver_mic_sink.c
@@ -0,0 +1,135 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <errno.h>
			
 
				+
			
 
				+#include <common/COISysInfo_common.h>
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+#include <drivers/mp_common/sink_common.h>
			
 
				+
			
 
				+#include "driver_mic_common.h"
			
 
				+#include "driver_mic_sink.h"
			
 
				+
			
 
				+/* Initialize the MIC sink, initializing connection to the source
			
 
				+ * and to the other devices (not implemented yet).
			
 
				+ */
			
 
				+
			
 
				+void _starpu_mic_sink_init(struct _starpu_mp_node *node)
			
 
				+{
			
 
				+	//unsigned int i;
			
 
				+	
			
 
				+	/* Initialize connection with the source */
			
 
				+	_starpu_mic_common_accept(&node->mp_connection.mic_endpoint,
			
 
				+					 STARPU_MIC_SOURCE_PORT_NUMBER);
			
 
				+
			
 
				+	_starpu_mic_common_accept(&node->host_sink_dt_connection.mic_endpoint,
			
 
				+									 STARPU_MIC_SOURCE_DT_PORT_NUMBER);
			
 
				+
			
 
				+	//node->sink_sink_dt_connections = malloc(node->nb_mp_sinks * sizeof(union _starpu_mp_connection));
			
 
				+
			
 
				+	//for (i = 0; i < (unsigned int)node->devid; ++i)
			
 
				+	//	_starpu_mic_common_connect(&node->sink_sink_dt_connections[i].mic_endpoint,
			
 
				+	//								STARPU_TO_MIC_ID(i),
			
 
				+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i),	
			
 
				+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(i, node->devid));
			
 
				+
			
 
				+	//for (i = node->devid + 1; i < node->nb_mp_sinks; ++i)
			
 
				+	//	_starpu_mic_common_accept(&node->sink_sink_dt_connections[i].mic_endpoint,
			
 
				+	//								STARPU_MIC_SINK_SINK_DT_PORT_NUMBER(node->devid, i));
			
 
				+}
			
 
				+
			
 
				+/* Deinitialize the MIC sink, close all the connections.
			
 
				+ */
			
 
				+
			
 
				+void _starpu_mic_sink_deinit(struct _starpu_mp_node *node)
			
 
				+{
			
 
				+	//unsigned int i;
			
 
				+
			
 
				+	//for (i = 0; i < node->nb_mp_sinks; ++i)
			
 
				+	//{
			
 
				+	//	if (i != (unsigned int)node->devid)
			
 
				+	//		scif_close(node->sink_sink_dt_connections[i].mic_endpoint);
			
 
				+	//}
			
 
				+
			
 
				+	//free(node->sink_sink_dt_connections);
			
 
				+
			
 
				+	scif_close(node->host_sink_dt_connection.mic_endpoint);
			
 
				+	scif_close(node->mp_connection.mic_endpoint);
			
 
				+}
			
 
				+
			
 
				+/* Report an error which occured when using a MIC device
			
 
				+ * and print this error in a human-readable style
			
 
				+ */
			
 
				+
			
 
				+void _starpu_mic_sink_report_error(const char *func, const char *file, const int line, const int status)
			
 
				+{
			
 
				+	const char *errormsg = strerror(status);
			
 
				+	printf("SINK: oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
			
 
				+	STARPU_ASSERT(0);
			
 
				+}
			
 
				+
			
 
				+/* Return the number of cores on the callee, a MIC device or Processor Xeon
			
 
				+ */
			
 
				+unsigned int _starpu_mic_sink_get_nb_core(void)
			
 
				+{
			
 
				+	return (unsigned int) COISysGetCoreCount();
			
 
				+}
			
 
				+
			
 
				+/* Allocate memory on the MIC.
			
 
				+ * Memory is register for remote direct access. */
			
 
				+void _starpu_mic_sink_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size)
			
 
				+{
			
 
				+	STARPU_ASSERT(arg_size == sizeof(size_t));
			
 
				+
			
 
				+	void *addr = NULL;
			
 
				+	size_t size = *(size_t *)(arg);
			
 
				+	
			
 
				+	if (posix_memalign(&addr, STARPU_MIC_PAGE_SIZE, size) != 0)
			
 
				+		_starpu_mp_common_send_command(mp_node, STARPU_ERROR_ALLOCATE, NULL, 0);
			
 
				+
			
 
				+#ifdef STARPU_MIC_USE_RMA
			
 
				+	scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
			
 
				+	size_t window_size = STARPU_MIC_GET_PAGE_SIZE_MULTIPLE(size);
			
 
				+
			
 
				+	if (scif_register(epd, addr, window_size, (off_t)addr, SCIF_PROT_READ | SCIF_PROT_WRITE, SCIF_MAP_FIXED) < 0)
			
 
				+	{
			
 
				+		free(addr);
			
 
				+		_starpu_mp_common_send_command(mp_node, STARPU_ERROR_ALLOCATE, NULL, 0);
			
 
				+	}
			
 
				+#endif
			
 
				+	
			
 
				+	_starpu_mp_common_send_command(mp_node, STARPU_ANSWER_ALLOCATE, &addr, sizeof(addr));
			
 
				+}
			
 
				+
			
 
				+/* Unregister and free memory. */
			
 
				+void _starpu_mic_sink_free(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size)
			
 
				+{
			
 
				+	STARPU_ASSERT(arg_size == sizeof(struct _starpu_mic_free_command));
			
 
				+
			
 
				+	void *addr = ((struct _starpu_mic_free_command *)arg)->addr;
			
 
				+	
			
 
				+#ifdef STARPU_MIC_USE_RMA
			
 
				+	scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
			
 
				+	size_t size = ((struct _starpu_mic_free_command *)arg)->size;
			
 
				+	size_t window_size = STARPU_MIC_GET_PAGE_SIZE_MULTIPLE(size);
			
 
				+
			
 
				+	scif_unregister(epd, (off_t)addr, window_size);
			
 
				+#endif
			
 
				+	free(addr);
			
 
				+}
			
--- a/src/drivers/mic/driver_mic_sink.h
+++ b/src/drivers/mic/driver_mic_sink.h
@@ -0,0 +1,48 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __DRIVER_MIC_SINK_H__
			
 
				+#define __DRIVER_MIC_SINK_H__
			
 
				+
			
 
				+#include <common/config.h>
			
 
				+
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+
			
 
				+#include <scif.h>
			
 
				+
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+#include <drivers/mp_common/sink_common.h>
			
 
				+
			
 
				+
			
 
				+#define STARPU_MIC_SINK_REPORT_ERROR(status) \
			
 
				+	_starpu_mic_sink_report_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				+
			
 
				+
			
 
				+void _starpu_mic_sink_report_error(const char *func, const char *file, const int line, const int status);
			
 
				+
			
 
				+void _starpu_mic_sink_init(struct _starpu_mp_node *node);
			
 
				+
			
 
				+void _starpu_mic_sink_deinit(struct _starpu_mp_node *node);
			
 
				+
			
 
				+unsigned int _starpu_mic_sink_get_nb_core(void);
			
 
				+
			
 
				+void _starpu_mic_sink_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size);
			
 
				+void _starpu_mic_sink_free(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size);
			
 
				+
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+
			
 
				+
			
 
				+#endif /* __DRIVER_MIC_SINK_H__ */
			
--- a/src/drivers/mic/driver_mic_source.c
+++ b/src/drivers/mic/driver_mic_source.c
@@ -0,0 +1,749 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#include <scif.h>
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_profiling.h>
			
 
				+#include <core/sched_policy.h>
			
 
				+
			
 
				+#include <drivers/driver_common/driver_common.h>
			
 
				+#include <drivers/mp_common/source_common.h>
			
 
				+
			
 
				+#include "driver_mic_common.h"
			
 
				+#include "driver_mic_source.h"
			
 
				+
			
 
				+
			
 
				+/* Array of structures containing all the informations useful to send
			
 
				+ * and receive informations with devices */
			
 
				+struct _starpu_mp_node *mic_nodes[STARPU_MAXMICDEVS];
			
 
				+
			
 
				+static COIENGINE handles[STARPU_MAXMICDEVS];
			
 
				+/* static COIPROCESS process[STARPU_MAXMICDEVS]; */
			
 
				+
			
 
				+/* Structure used by host to store informations about a kernel executable on
			
 
				+ * a MIC device : its name, and its address on each device.
			
 
				+ * If a kernel has been initialized, then a lookup has already been achieved and the
			
 
				+ * device knows how to call it, else the host still needs to do a lookup.
			
 
				+ */
			
 
				+struct _starpu_mic_kernel
			
 
				+{
			
 
				+	char *name;
			
 
				+	starpu_mic_kernel_t func[STARPU_MAXMICDEVS];
			
 
				+};
			
 
				+
			
 
				+/* Hash table use to store _starpu_mic_kernel
			
 
				+ */
			
 
				+static struct _starpu_htbl kernels_htbl;
			
 
				+
			
 
				+/* Mutex for concurrent access to the table.
			
 
				+ */
			
 
				+starpu_pthread_mutex_t htbl_mutex = PTHREAD_MUTEX_INITIALIZER;
			
 
				+
			
 
				+/* Number of MIC worker initialized.
			
 
				+ */
			
 
				+unsigned int nb_mic_worker_init = 0;
			
 
				+starpu_pthread_mutex_t nb_mic_worker_init_mutex = PTHREAD_MUTEX_INITIALIZER;
			
 
				+
			
 
				+/* Returns the ID of the MIC device controlled by the caller.
			
 
				+ * if the worker doesn't control a MIC device -ENODEV is returned
			
 
				+ */
			
 
				+
			
 
				+//static int _starpu_mic_get_devid(void)
			
 
				+//{
			
 
				+//	struct _starpu_machine_config *config = _starpu_get_machine_config();
			
 
				+//	int workerid = starpu_worker_get_id();
			
 
				+//
			
 
				+//	if (config->workers[workerid].arch != STARPU_MIC_WORKER)
			
 
				+//		return -ENODEV;
			
 
				+//
			
 
				+//	return config->workers[workerid].devid;
			
 
				+//}
			
 
				+
			
 
				+const struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node()
			
 
				+{
			
 
				+	struct _starpu_worker *actual_worker = _starpu_get_local_worker_key();
			
 
				+	STARPU_ASSERT(actual_worker);
			
 
				+
			
 
				+	int nodeid = actual_worker->mp_nodeid;
			
 
				+	STARPU_ASSERT(nodeid >= 0 && nodeid < STARPU_MAXMICDEVS);
			
 
				+
			
 
				+	return mic_nodes[nodeid];
			
 
				+}
			
 
				+
			
 
				+const struct _starpu_mp_node *_starpu_mic_src_get_mp_node_from_memory_node(int memory_node)
			
 
				+{
			
 
				+	int nodeid = _starpu_memory_node_to_devid(memory_node);
			
 
				+	STARPU_ASSERT(nodeid >= 0 && nodeid < STARPU_MAXMICDEVS);
			
 
				+
			
 
				+	return mic_nodes[nodeid];
			
 
				+}
			
 
				+
			
 
				+// Should be obsolete.
			
 
				+/* static void _starpu_mic_src_init_context(int devid,
			
 
				+ * 					 struct starpu_conf *user_conf)
			
 
				+ * {
			
 
				+ * 	COIRESULT res;
			
 
				+ * 	char mic_sink_program_path[1024];
			
 
				+ *
			
 
				+ * 	char ***argv = _starpu_get_argv();
			
 
				+ * 	const char *suffixes[] = {"-mic", "_mic", NULL};
			
 
				+ *
			
 
				+ * 	char devid_env[32];
			
 
				+ * 	sprintf(devid_env, "DEVID=%d", devid);
			
 
				+ *
			
 
				+ * 	char nb_mic_env[32];
			
 
				+ * 	sprintf(nb_mic_env, "NB_MIC=%d", starpu_mic_worker_get_count());
			
 
				+ *
			
 
				+ * 	/\* Environment variables to send to the Sink, it informs it what kind
			
 
				+ * 	 * of node it is (architecture and type) as there is no way to discover
			
 
				+ * 	 * it itself *\/
			
 
				+ * 	const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", devid_env, nb_mic_env, NULL};
			
 
				+ *
			
 
				+ * 	/\* Let's get the helper program to run on the MIC device *\/
			
 
				+ * 	int mic_file_found = _starpu_src_common_locate_file(mic_sink_program_path,
			
 
				+ * 							getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
			
 
				+ * 							getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
			
 
				+ * 							(user_conf == NULL ? NULL : user_conf->mic_sink_program_path),
			
 
				+ * 							(argv ? (*argv)[0] : NULL),
			
 
				+ * 							suffixes);
			
 
				+ *
			
 
				+ * 	STARPU_ASSERT(mic_file_found == 0);
			
 
				+ *
			
 
				+ * 	/\* Let's get the handle which let us manage the remote MIC device *\/
			
 
				+ * 	res = COIEngineGetHandle(COI_ISA_MIC, devid, &handles[devid]);
			
 
				+ * 	if (STARPU_UNLIKELY(res != COI_SUCCESS))
			
 
				+ * 		STARPU_MIC_SRC_REPORT_COI_ERROR(res);
			
 
				+ *
			
 
				+ * 	/\* We launch the helper on the MIC device, which will wait for us
			
 
				+ * 	 * to give it work to do.
			
 
				+ * 	 * As we will communicate further with the device throught scif we
			
 
				+ * 	 * don't need to keep the process pointer *\/
			
 
				+ * 	res = COIProcessCreateFromFile(handles[devid], mic_sink_program_path, 0, NULL, 0,
			
 
				+ * 				       mic_sink_env, 1, NULL, 0, NULL,
			
 
				+ * 				       &process[devid]);
			
 
				+ * 	if (STARPU_UNLIKELY(res != COI_SUCCESS))
			
 
				+ * 		STARPU_MIC_SRC_REPORT_COI_ERROR(res);
			
 
				+ *
			
 
				+ * 	/\* Let's create the node structure, we'll communicate with the peer
			
 
				+ * 	 * through scif thanks to it *\/
			
 
				+ * 	mic_nodes[devid] = _starpu_mp_common_node_create(STARPU_MIC_SOURCE,
			
 
				+ * 							   devid);
			
 
				+ *
			
 
				+ *
			
 
				+ * 	// XXX: this is not replicated in `_starpu_init_mic_node'.
			
 
				+ * 	STARPU_PTHREAD_MUTEX_LOCK(&nb_mic_worker_init_mutex);
			
 
				+ * 	++nb_mic_worker_init;
			
 
				+ * 	STARPU_PTHREAD_MUTEX_UNLOCK(&nb_mic_worker_init_mutex);
			
 
				+ * } */
			
 
				+
			
 
				+/* static void _starpu_mic_src_free_kernel(void *kernel)
			
 
				+ * {
			
 
				+ * 	struct _starpu_mic_kernel *k = kernel;
			
 
				+ *
			
 
				+ * 	free(k->name);
			
 
				+ * 	free(kernel);
			
 
				+ * } */
			
 
				+
			
 
				+/* static void _starpu_mic_src_deinit_context(int devid)
			
 
				+ * {
			
 
				+ * 	_starpu_mp_common_send_command(mic_nodes[devid], STARPU_EXIT, NULL, 0);
			
 
				+ *
			
 
				+ * 	COIProcessDestroy(process[devid], -1, 0, NULL, NULL);
			
 
				+ *
			
 
				+ * 	_starpu_mp_common_node_destroy(mic_nodes[devid]);
			
 
				+ *
			
 
				+ * 	STARPU_PTHREAD_MUTEX_LOCK(&nb_mic_worker_init_mutex);
			
 
				+ * 	unsigned int tmp = --nb_mic_worker_init;
			
 
				+ * 	STARPU_PTHREAD_MUTEX_UNLOCK(&nb_mic_worker_init_mutex);
			
 
				+ *
			
 
				+ * 	if (tmp == 0)
			
 
				+ * 		_starpu_htbl_destroy(&kernels_htbl, _starpu_mic_src_free_kernel);
			
 
				+ * } */
			
 
				+
			
 
				+static int
			
 
				+_starpu_mic_src_finalize_job (struct _starpu_job *j, struct _starpu_worker *worker)
			
 
				+{
			
 
				+    uint32_t mask = 0;
			
 
				+    int profiling = starpu_profiling_status_get();
			
 
				+    struct timespec codelet_end;
			
 
				+
			
 
				+    _starpu_driver_end_job(worker, j, worker->perf_arch, &codelet_end, 0,
			
 
				+			   profiling);
			
 
				+
			
 
				+    _starpu_driver_update_job_feedback(j, worker, worker->perf_arch,
			
 
				+				       &j->cl_start, &codelet_end,
			
 
				+				       profiling);
			
 
				+
			
 
				+    _starpu_push_task_output (j, mask);
			
 
				+
			
 
				+    _starpu_handle_job_termination(j);
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+static int
			
 
				+_starpu_mic_src_process_completed_job (struct _starpu_worker_set *workerset)
			
 
				+{
			
 
				+    struct _starpu_mp_node *node = mic_nodes[workerset->workers[0].mp_nodeid];
			
 
				+    enum _starpu_mp_command answer;
			
 
				+    void *arg;
			
 
				+    int arg_size;
			
 
				+
			
 
				+    answer = _starpu_mp_common_recv_command (node, &arg, &arg_size);
			
 
				+    STARPU_ASSERT (answer == STARPU_EXECUTION_COMPLETED);
			
 
				+
			
 
				+    void *arg_ptr = arg;
			
 
				+    int coreid;
			
 
				+
			
 
				+    coreid = *(int *) arg_ptr;
			
 
				+    arg_ptr += sizeof (coreid); // Useless.
			
 
				+
			
 
				+    struct _starpu_worker *worker = &workerset->workers[coreid];
			
 
				+    struct starpu_task *task = worker->current_task;
			
 
				+    struct _starpu_job *j = _starpu_get_job_associated_to_task (task);
			
 
				+
			
 
				+    _starpu_mic_src_finalize_job (j, worker);
			
 
				+
			
 
				+    worker->current_task = NULL;
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static int _starpu_mic_src_execute_job(struct _starpu_job *j, struct _starpu_worker *args)
			
 
				+{
			
 
				+	int ret;
			
 
				+	uint32_t mask = 0;
			
 
				+
			
 
				+	STARPU_ASSERT(j);
			
 
				+	struct starpu_task *task = j->task;
			
 
				+
			
 
				+	//struct timespec codelet_end;
			
 
				+
			
 
				+	int profiling = starpu_profiling_status_get();
			
 
				+	unsigned calibrate_model = 0;
			
 
				+
			
 
				+	STARPU_ASSERT(task);
			
 
				+	struct starpu_codelet *cl = task->cl;
			
 
				+	STARPU_ASSERT(cl);
			
 
				+
			
 
				+	if (cl->model && cl->model->benchmarking)
			
 
				+		calibrate_model = 1;
			
 
				+
			
 
				+	ret = _starpu_fetch_task_input(j, mask);
			
 
				+	if (ret != 0)
			
 
				+	{
			
 
				+		/* there was not enough memory, so the input of
			
 
				+		 * the codelet cannot be fetched ... put the
			
 
				+		 * codelet back, and try it later */
			
 
				+		return -EAGAIN;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	starpu_mic_kernel_t kernel = _starpu_mic_src_get_kernel_from_codelet(j->task->cl, j->nimpl);
			
 
				+
			
 
				+	_starpu_driver_start_job (args, j, &j->cl_start, 0, profiling);
			
 
				+
			
 
				+	_starpu_src_common_execute_kernel_from_task(mic_nodes[args->mp_nodeid],
			
 
				+						    (void (*)(void)) kernel, args->devid, task);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int _starpu_mic_src_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name)
			
 
				+{
			
 
				+	unsigned int func_name_size = (strlen(func_name) + 1) * sizeof(char);
			
 
				+
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&htbl_mutex);
			
 
				+	struct _starpu_mic_kernel *kernel = _starpu_htbl_search(&kernels_htbl, func_name, func_name_size);
			
 
				+
			
 
				+	if (kernel != NULL)
			
 
				+	{
			
 
				+		STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
			
 
				+		// Function already in the table.
			
 
				+		*symbol = kernel;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	kernel = malloc(sizeof(*kernel));
			
 
				+	if (kernel == NULL)
			
 
				+	{
			
 
				+		STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	kernel->name = malloc(func_name_size);
			
 
				+	if (kernel->name == NULL)
			
 
				+	{
			
 
				+		STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
			
 
				+		free(kernel);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	int ret = _starpu_htbl_insert(&kernels_htbl, func_name, func_name_size, kernel);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
			
 
				+	if (ret != 0)
			
 
				+	{
			
 
				+		free(kernel->name);
			
 
				+		free(kernel);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	memcpy(kernel->name, func_name, func_name_size);
			
 
				+
			
 
				+	unsigned int nb_mic_devices = _starpu_mic_src_get_device_count();
			
 
				+	unsigned int i;
			
 
				+	for (i = 0; i < nb_mic_devices; ++i)
			
 
				+		kernel->func[i] = NULL;
			
 
				+
			
 
				+	*symbol = kernel;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+starpu_mic_kernel_t _starpu_mic_src_get_kernel(starpu_mic_func_symbol_t symbol)
			
 
				+{
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				+	/* This function has to be called in the codelet only, by the thread
			
 
				+	 * which will handle the task */
			
 
				+	if (workerid < 0)
			
 
				+		return NULL;
			
 
				+
			
 
				+	int nodeid = starpu_worker_get_mp_nodeid(workerid);
			
 
				+
			
 
				+	struct _starpu_mic_kernel *kernel = symbol;
			
 
				+
			
 
				+	if (kernel->func[nodeid] == NULL)
			
 
				+	{
			
 
				+		struct _starpu_mp_node *node = mic_nodes[nodeid];
			
 
				+		int ret = _starpu_src_common_lookup(node, (void (**)(void))&kernel->func[nodeid], kernel->name);
			
 
				+		if (ret)
			
 
				+			return NULL;
			
 
				+	}
			
 
				+
			
 
				+	return kernel->func[nodeid];
			
 
				+}
			
 
				+
			
 
				+/* Report an error which occured when using a MIC device
			
 
				+ * and print this error in a human-readable style.
			
 
				+ * It hanbles errors occuring when using COI.
			
 
				+ */
			
 
				+
			
 
				+void _starpu_mic_src_report_coi_error(const char *func, const char *file,
			
 
				+				      const int line, const COIRESULT status)
			
 
				+{
			
 
				+	const char *errormsg = COIResultGetName(status);
			
 
				+	printf("SRC: oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
			
 
				+	STARPU_ASSERT(0);
			
 
				+}
			
 
				+
			
 
				+/* Report an error which occured when using a MIC device
			
 
				+ * and print this error in a human-readable style.
			
 
				+ * It hanbles errors occuring when using SCIF.
			
 
				+ */
			
 
				+
			
 
				+void _starpu_mic_src_report_scif_error(const char *func, const char *file, const int line, const int status)
			
 
				+{
			
 
				+	const char *errormsg = strerror(status);
			
 
				+	printf("SRC: oops in %s (%s:%u)... %d: %s \n", func, file, line, status, errormsg);
			
 
				+	STARPU_ASSERT(0);
			
 
				+}
			
 
				+
			
 
				+/* Return the number of MIC devices in the system.
			
 
				+ * If the number of devices is already known, we use the cached value
			
 
				+ * without calling again COI. */
			
 
				+
			
 
				+unsigned _starpu_mic_src_get_device_count(void)
			
 
				+{
			
 
				+	static unsigned short cached = 0;
			
 
				+	static unsigned nb_devices = 0;
			
 
				+
			
 
				+	/* We don't need to call the COI API again if we already
			
 
				+	 * have the result in cache */
			
 
				+	if (!cached)
			
 
				+	{
			
 
				+		COIRESULT res;
			
 
				+		res = COIEngineGetCount(COI_ISA_MIC, &nb_devices);
			
 
				+
			
 
				+		/* If something is wrong with the COI engine, we shouldn't
			
 
				+		 * use MIC devices (if there is any...) */
			
 
				+		if (res != COI_SUCCESS)
			
 
				+			nb_devices = 0;
			
 
				+
			
 
				+		cached = 1;
			
 
				+	}
			
 
				+
			
 
				+	return nb_devices;
			
 
				+}
			
 
				+
			
 
				+unsigned starpu_mic_device_get_count(void)
			
 
				+{
			
 
				+    // Return the number of configured MIC devices.
			
 
				+    struct _starpu_machine_config *config = _starpu_get_machine_config ();
			
 
				+    struct starpu_machine_topology *topology = &config->topology;
			
 
				+
			
 
				+    return topology->nmicdevices;
			
 
				+}
			
 
				+
			
 
				+starpu_mic_kernel_t _starpu_mic_src_get_kernel_from_codelet(struct starpu_codelet *cl, unsigned nimpl)
			
 
				+{
			
 
				+	starpu_mic_kernel_t kernel = NULL;
			
 
				+
			
 
				+	starpu_mic_func_t func = _starpu_task_get_mic_nth_implementation(cl, nimpl);
			
 
				+	if (func)
			
 
				+	{
			
 
				+		/* We execute the function contained in the codelet, it must return a
			
 
				+		 * pointer to the function to execute on the device, either specified
			
 
				+		 * directly by the user or by a call to starpu_mic_get_func().
			
 
				+		 */
			
 
				+		kernel = func();
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		/* If user dont define any starpu_mic_fun_t in cl->mic_func we try to use
			
 
				+		 * cpu_func_name.
			
 
				+		 */
			
 
				+		char *func_name = _starpu_task_get_cpu_name_nth_implementation(cl, nimpl);
			
 
				+		if (func_name)
			
 
				+		{
			
 
				+			starpu_mic_func_symbol_t symbol;
			
 
				+
			
 
				+			_starpu_mic_src_register_kernel(&symbol, func_name);
			
 
				+
			
 
				+			kernel = _starpu_mic_src_get_kernel(symbol);
			
 
				+		}
			
 
				+	}
			
 
				+	STARPU_ASSERT(kernel);
			
 
				+
			
 
				+	return kernel;
			
 
				+}
			
 
				+
			
 
				+/* Initialize the node structure describing the MIC source.
			
 
				+ */
			
 
				+void _starpu_mic_src_init(struct _starpu_mp_node *node)
			
 
				+{
			
 
				+    /* Let's initialize the connection with the peered sink device */
			
 
				+    _starpu_mic_common_connect(&node->mp_connection.mic_endpoint,
			
 
				+					STARPU_TO_MIC_ID(node->peer_id),
			
 
				+					STARPU_MIC_SINK_PORT_NUMBER(node->peer_id),
			
 
				+					STARPU_MIC_SOURCE_PORT_NUMBER);
			
 
				+
			
 
				+    _starpu_mic_common_connect(&node->host_sink_dt_connection.mic_endpoint,
			
 
				+			       STARPU_TO_MIC_ID(node->peer_id),
			
 
				+			       STARPU_MIC_SINK_DT_PORT_NUMBER(node->peer_id),
			
 
				+			       STARPU_MIC_SOURCE_DT_PORT_NUMBER);
			
 
				+}
			
 
				+
			
 
				+/* Deinitialize the MIC sink, close all the connections.
			
 
				+ */
			
 
				+void _starpu_mic_src_deinit(struct _starpu_mp_node *node)
			
 
				+{
			
 
				+	scif_close(node->host_sink_dt_connection.mic_endpoint);
			
 
				+	scif_close(node->mp_connection.mic_endpoint);
			
 
				+}
			
 
				+
			
 
				+/* Get infos of the MIC associed to memory_node */
			
 
				+static void _starpu_mic_get_engine_info(COI_ENGINE_INFO *info, int devid)
			
 
				+{
			
 
				+	STARPU_ASSERT(devid >= 0 && devid < STARPU_MAXMICDEVS);
			
 
				+
			
 
				+	if (COIEngineGetInfo(handles[devid], sizeof(*info), info) != COI_SUCCESS)
			
 
				+		STARPU_MIC_SRC_REPORT_COI_ERROR(errno);
			
 
				+}
			
 
				+
			
 
				+/* TODO: call _starpu_memory_manager_set_global_memory_size instead */
			
 
				+/* Return the size of the memory on the MIC associed to memory_node */
			
 
				+size_t _starpu_mic_get_global_mem_size(int devid)
			
 
				+{
			
 
				+	COI_ENGINE_INFO infos;
			
 
				+	_starpu_mic_get_engine_info(&infos, devid);
			
 
				+
			
 
				+	return infos.PhysicalMemory;
			
 
				+}
			
 
				+
			
 
				+/* Return the size of the free memory on the MIC associed to memory_node */
			
 
				+size_t _starpu_mic_get_free_mem_size(int devid)
			
 
				+{
			
 
				+	COI_ENGINE_INFO infos;
			
 
				+	_starpu_mic_get_engine_info(&infos, devid);
			
 
				+
			
 
				+	return infos.PhysicalMemoryFree;
			
 
				+}
			
 
				+
			
 
				+/* Allocate memory on MIC.
			
 
				+ * Return 0 if OK or 1 if not.
			
 
				+ */
			
 
				+int _starpu_mic_allocate_memory(void **addr, size_t size, unsigned memory_node)
			
 
				+{
			
 
				+	/* We check we have (1.25 * size) free space in the MIC because
			
 
				+	 * transfert with scif is not possible when the MIC
			
 
				+	 * doesn't have enought free memory.
			
 
				+	 * In this cas we can't tell any things to the host. */
			
 
				+	//int devid = _starpu_memory_node_to_devid(memory_node);
			
 
				+	//if (_starpu_mic_get_free_mem_size(devid) < size * 1.25)
			
 
				+	//	return 1;
			
 
				+
			
 
				+	const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(memory_node);
			
 
				+
			
 
				+	return _starpu_src_common_allocate(mp_node, addr, size);
			
 
				+}
			
 
				+
			
 
				+/* Free memory on MIC.
			
 
				+ * Mic need size to free memory for use the function scif_unregister.
			
 
				+ */
			
 
				+void _starpu_mic_free_memory(void *addr, size_t size, unsigned memory_node)
			
 
				+{
			
 
				+	const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(memory_node);
			
 
				+	struct _starpu_mic_free_command cmd = {addr, size};
			
 
				+
			
 
				+	return _starpu_mp_common_send_command(mp_node, STARPU_FREE, &cmd, sizeof(cmd));
			
 
				+}
			
 
				+
			
 
				+/* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
			
 
				+ * node to the address pointed by DST in the DST_NODE memory node
			
 
				+ */
			
 
				+int _starpu_mic_copy_ram_to_mic(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size)
			
 
				+{
			
 
				+	const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(dst_node);
			
 
				+
			
 
				+	return _starpu_src_common_copy_host_to_sink(mp_node, src, dst, size);
			
 
				+}
			
 
				+
			
 
				+/* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
			
 
				+ * node to the address pointed by DST in the DST_NODE memory node
			
 
				+ */
			
 
				+int _starpu_mic_copy_mic_to_ram(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size)
			
 
				+{
			
 
				+	const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(src_node);
			
 
				+
			
 
				+	return _starpu_src_common_copy_sink_to_host(mp_node, src, dst, size);
			
 
				+}
			
 
				+
			
 
				+/* Asynchronous transfers */
			
 
				+int _starpu_mic_copy_ram_to_mic_async(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size)
			
 
				+{
			
 
				+	const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(dst_node);
			
 
				+
			
 
				+	if (scif_vwriteto(mp_node->host_sink_dt_connection.mic_endpoint, src, size, (off_t)dst, 0) < 0)
			
 
				+		STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int _starpu_mic_copy_mic_to_ram_async(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size)
			
 
				+{
			
 
				+	const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(src_node);
			
 
				+
			
 
				+	if (scif_vreadfrom(mp_node->host_sink_dt_connection.mic_endpoint, dst, size, (off_t)src, 0) < 0)
			
 
				+		STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Initialize a _starpu_mic_async_event. */
			
 
				+int _starpu_mic_init_event(struct _starpu_mic_async_event *event, unsigned memory_node)
			
 
				+{
			
 
				+	const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(memory_node);
			
 
				+	scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
			
 
				+
			
 
				+	event->memory_node = memory_node;
			
 
				+
			
 
				+	/* Address of allocation must be multiple of the page size. */
			
 
				+	if (posix_memalign((void **)&(event->signal), 0x1000, sizeof(*(event->signal))) != 0)
			
 
				+		return -ENOMEM;
			
 
				+	*(event->signal) = 0;
			
 
				+
			
 
				+	/* The size pass to scif_register is 0x1000 because it should be a multiple of the page size. */
			
 
				+	if (scif_register(epd, event->signal, 0x1000, (off_t)(event->signal), SCIF_PROT_WRITE, SCIF_MAP_FIXED) < 0)
			
 
				+		STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	/* Mark for a futur wait. */
			
 
				+	if (scif_fence_mark(epd, SCIF_FENCE_INIT_SELF, &(event->mark)) < 0)
			
 
				+		STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	/* Tell to scif to write STARPU_MIC_REQUEST_COMPLETE in event->signal when the transfer is complete.
			
 
				+	 * We use this for test the end of a transfer. */
			
 
				+	if (scif_fence_signal(epd, (off_t)event->signal, STARPU_MIC_REQUEST_COMPLETE, 0, 0, SCIF_FENCE_INIT_SELF | SCIF_SIGNAL_LOCAL) < 0)
			
 
				+		STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Wait the end of the asynchronous request */
			
 
				+void _starpu_mic_wait_request_completion(struct _starpu_mic_async_event *event)
			
 
				+{
			
 
				+	if (event->signal != NULL)
			
 
				+	{
			
 
				+		const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(event->memory_node);
			
 
				+		scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
			
 
				+
			
 
				+		if (scif_fence_wait(epd, event->mark) < 0)
			
 
				+			STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+		if (scif_unregister(epd, (off_t)(event->signal), 0x1000) < 0)
			
 
				+			STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+		free(event->signal);
			
 
				+		event->signal = NULL;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* Test if a asynchronous request is end.
			
 
				+ * Return 1 if is end, 0 else. */
			
 
				+int _starpu_mic_request_is_complete(struct _starpu_mic_async_event *event)
			
 
				+{
			
 
				+	if (event->signal != NULL && *(event->signal) != STARPU_MIC_REQUEST_COMPLETE)
			
 
				+		return 0;
			
 
				+
			
 
				+	const struct _starpu_mp_node *mp_node = _starpu_mic_src_get_mp_node_from_memory_node(event->memory_node);
			
 
				+	scif_epd_t epd = mp_node->host_sink_dt_connection.mic_endpoint;
			
 
				+
			
 
				+	if (scif_unregister(epd, (off_t)(event->signal), 0x1000) < 0)
			
 
				+		STARPU_MIC_SRC_REPORT_SCIF_ERROR(errno);
			
 
				+
			
 
				+	free(event->signal);
			
 
				+	event->signal = NULL;
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+void *_starpu_mic_src_worker(void *arg)
			
 
				+{
			
 
				+	struct _starpu_worker_set *args = arg;
			
 
				+	/* As all workers of a set share common data, we just use the first
			
 
				+	 * one for intializing the following stuffs. */
			
 
				+	struct _starpu_worker *baseworker = &args->workers[0];
			
 
				+	struct _starpu_machine_config *config = baseworker->config;
			
 
				+	unsigned baseworkerid = baseworker - config->workers;
			
 
				+
			
 
				+	unsigned memnode = baseworker->memory_node;
			
 
				+
			
 
				+	_starpu_worker_init(baseworker, _STARPU_FUT_MIC_KEY);
			
 
				+
			
 
				+	// Current task for a thread managing a worker set has no sense.
			
 
				+	_starpu_set_current_task(NULL);
			
 
				+
			
 
				+	baseworker->status = STATUS_UNKNOWN;
			
 
				+
			
 
				+	_STARPU_TRACE_WORKER_INIT_END
			
 
				+
			
 
				+	/* tell the main thread that this one is ready */
			
 
				+	_STARPU_STARPU_PTHREAD_MUTEX_LOCK(&args->mutex);
			
 
				+	args->set_is_initialized = 1;
			
 
				+	_STARPU_PTHREAD_COND_SIGNAL(&args->ready_cond);
			
 
				+	_STARPU_STARPU_PTHREAD_MUTEX_UNLOCK(&args->mutex);
			
 
				+
			
 
				+
			
 
				+	while (_starpu_machine_is_running())
			
 
				+	{
			
 
				+		int res;
			
 
				+		struct starpu_task *task = NULL;
			
 
				+		struct _starpu_job * j;
			
 
				+		unsigned micworkerid = 0;
			
 
				+
			
 
				+		_STARPU_TRACE_START_PROGRESS(memnode);
			
 
				+		_starpu_datawizard_progress(memnode, 1);
			
 
				+		_STARPU_TRACE_END_PROGRESS(memnode);
			
 
				+
			
 
				+		_STARPU_STARPU_PTHREAD_MUTEX_LOCK(baseworker->sched_mutex);
			
 
				+
			
 
				+		/* We pop tasklists of each worker in the set and process the
			
 
				+		 * first non-empty list. */
			
 
				+		for (micworkerid = 0 ; (micworkerid < args->nworkers) && (task == NULL); micworkerid++)
			
 
				+		    task = _starpu_pop_task (&args->workers[micworkerid]);
			
 
				+
			
 
				+		if (task != NULL)
			
 
				+		    goto task_found;
			
 
				+
			
 
				+		/* No task to submit, so we can poll the MIC device for
			
 
				+		 * completed jobs. */
			
 
				+		struct pollfd fd = {
			
 
				+		    .fd = mic_nodes[baseworker->mp_nodeid]->mp_connection.mic_endpoint,
			
 
				+		    .events = POLLIN
			
 
				+		};
			
 
				+
			
 
				+		if (0 < poll (&fd, 1, 0)) {
			
 
				+		    _starpu_mic_src_process_completed_job (args);
			
 
				+		    goto restart_loop;
			
 
				+		}
			
 
				+
			
 
				+		/* At this point, there is really nothing to do for the thread
			
 
				+		 * so we can block.
			
 
				+		 * XXX: blocking drivers is in fact broken. DO NOT USE IT ! */
			
 
				+		if (_starpu_worker_can_block(memnode))
			
 
				+		    _starpu_block_worker(baseworkerid, baseworker->sched_cond, baseworker->sched_mutex);
			
 
				+
			
 
				+	restart_loop:
			
 
				+		_STARPU_STARPU_PTHREAD_MUTEX_UNLOCK(baseworker->sched_mutex);
			
 
				+		continue;
			
 
				+
			
 
				+	task_found:
			
 
				+		/* If the MIC core associated to `micworkerid' is already
			
 
				+		 * processing a job, we push back this one in the worker task
			
 
				+		 * list. */
			
 
				+		_STARPU_STARPU_PTHREAD_MUTEX_UNLOCK(baseworker->sched_mutex);
			
 
				+
			
 
				+		if (args->workers[micworkerid].current_task) {
			
 
				+		    _starpu_push_task_to_workers(j);
			
 
				+		    continue;
			
 
				+		}
			
 
				+
			
 
				+		STARPU_ASSERT(task);
			
 
				+		j = _starpu_get_job_associated_to_task(task);
			
 
				+
			
 
				+		/* can a MIC device do that task ? */
			
 
				+		if (!_STARPU_MIC_MAY_PERFORM(j))
			
 
				+		{
			
 
				+			/* this isn't a mic task */
			
 
				+			_starpu_push_task_to_workers(j);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		args->workers[micworkerid].current_task = j->task;
			
 
				+
			
 
				+		res = _starpu_mic_src_execute_job (j, &args->workers[micworkerid]);
			
 
				+
			
 
				+		if (res)
			
 
				+		{
			
 
				+			switch (res)
			
 
				+			{
			
 
				+				case -EAGAIN:
			
 
				+					_STARPU_DISP("ouch, put the codelet %p back ... \n", j);
			
 
				+					_starpu_push_task(j);
			
 
				+					STARPU_ABORT();
			
 
				+					continue;
			
 
				+				default:
			
 
				+					STARPU_ASSERT(0);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_START
			
 
				+
			
 
				+	_starpu_handle_all_pending_node_data_requests(memnode);
			
 
				+
			
 
				+	/* In case there remains some memory that was automatically
			
 
				+	 * allocated by StarPU, we release it now. Note that data
			
 
				+	 * coherency is not maintained anymore at that point ! */
			
 
				+	_starpu_free_all_automatically_allocated_buffers(memnode);
			
 
				+
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_CUDA_KEY);
			
 
				+
			
 
				+	return NULL;
			
 
				+
			
 
				+}
			
--- a/src/drivers/mic/driver_mic_source.h
+++ b/src/drivers/mic/driver_mic_source.h
@@ -0,0 +1,79 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __DRIVER_MIC_SOURCE_H__
			
 
				+#define __DRIVER_MIC_SOURCE_H__
			
 
				+
			
 
				+#include <starpu_mic.h>
			
 
				+#include <common/config.h>
			
 
				+
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+
			
 
				+#include <source/COIProcess_source.h>
			
 
				+#include <source/COIEngine_source.h>
			
 
				+
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+
			
 
				+/* Array of structures containing all the informations useful to send
			
 
				+ * and receive informations with devices */
			
 
				+extern struct _starpu_mp_node *mic_nodes[STARPU_MAXMICDEVS];
			
 
				+
			
 
				+struct _starpu_mic_async_event *event;
			
 
				+
			
 
				+#define STARPU_MIC_REQUEST_COMPLETE 42
			
 
				+
			
 
				+#define STARPU_MIC_SRC_REPORT_COI_ERROR(status) \
			
 
				+	_starpu_mic_src_report_coi_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				+
			
 
				+#define STARPU_MIC_SRC_REPORT_SCIF_ERROR(status) \
			
 
				+	_starpu_mic_src_report_scif_error(__starpu_func__, __FILE__, __LINE__, status)
			
 
				+
			
 
				+const struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node();
			
 
				+const struct _starpu_mp_node *_starpu_mic_src_get_mp_node_from_memory_node(int memory_node);
			
 
				+
			
 
				+int _starpu_mic_src_register_kernel(starpu_mic_func_symbol_t *symbol, const char *func_name);
			
 
				+starpu_mic_kernel_t _starpu_mic_src_get_kernel(starpu_mic_func_symbol_t symbol);
			
 
				+
			
 
				+void _starpu_mic_src_report_coi_error(const char *func, const char *file, int line, const COIRESULT status);
			
 
				+void _starpu_mic_src_report_scif_error(const char *func, const char *file, int line, const int status);
			
 
				+
			
 
				+unsigned _starpu_mic_src_get_device_count(void);
			
 
				+starpu_mic_kernel_t _starpu_mic_src_get_kernel_from_codelet(struct starpu_codelet *cl, unsigned nimpl);
			
 
				+
			
 
				+void _starpu_mic_src_init(struct _starpu_mp_node *node);
			
 
				+void _starpu_mic_src_deinit(struct _starpu_mp_node *node);
			
 
				+
			
 
				+size_t _starpu_mic_get_global_mem_size(int devid);
			
 
				+size_t _starpu_mic_get_free_mem_size(int devid);
			
 
				+
			
 
				+int _starpu_mic_allocate_memory(void **addr, size_t size, unsigned memory_node);
			
 
				+void _starpu_mic_free_memory(void *addr, size_t size, unsigned memory_node);
			
 
				+
			
 
				+int _starpu_mic_copy_ram_to_mic(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size);
			
 
				+int _starpu_mic_copy_mic_to_ram(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size);
			
 
				+int _starpu_mic_copy_ram_to_mic_async(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size);
			
 
				+int _starpu_mic_copy_mic_to_ram_async(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size);
			
 
				+
			
 
				+int _starpu_mic_init_event(struct _starpu_mic_async_event *event, unsigned memory_node);
			
 
				+void _starpu_mic_wait_request_completion(struct _starpu_mic_async_event *event);
			
 
				+int _starpu_mic_request_is_complete(struct _starpu_mic_async_event *event);
			
 
				+
			
 
				+void *_starpu_mic_src_worker(void *arg);
			
 
				+
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+
			
 
				+
			
 
				+#endif /* __DRIVER_MIC_SOURCE_H__ */
			
--- a/src/drivers/mic/driver_mic_utils.c
+++ b/src/drivers/mic/driver_mic_utils.c
@@ -0,0 +1,45 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_mic.h>
			
 
				+#include <common/config.h>
			
 
				+
			
 
				+#include <drivers/mp_common/source_common.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				+
			
 
				+
			
 
				+/* Initiate a lookup on each MIC device to find the adress of the function
			
 
				+ * named FUNC_NAME, store them in the global array kernels and return
			
 
				+ * the index in the array through SYMBOL.
			
 
				+ * If success, returns 0. If the user has registered too many kernels (more
			
 
				+ * than STARPU_MAXMICDEVS) returns -ENOMEM
			
 
				+ */
			
 
				+int starpu_mic_register_kernel(starpu_mic_func_symbol_t *symbol,
			
 
				+			       const char *func_name)
			
 
				+{
			
 
				+	return _starpu_mic_src_register_kernel(symbol, func_name);
			
 
				+}
			
 
				+
			
 
				+/* If success, return the pointer to the function defined by SYMBOL on the
			
 
				+ * device linked to the called 
			
 
				+ * device.
			
 
				+ */
			
 
				+starpu_mic_kernel_t starpu_mic_get_kernel(starpu_mic_func_symbol_t symbol)
			
 
				+{
			
 
				+	return _starpu_mic_src_get_kernel(symbol);
			
 
				+}
			
--- a/src/drivers/mp_common/mp_common.c
+++ b/src/drivers/mp_common/mp_common.c
@@ -0,0 +1,234 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <stdlib.h>
			
 
				+#include <pthread.h>
			
 
				+
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+#include <drivers/mp_common/sink_common.h>
			
 
				+#include <drivers/mic/driver_mic_common.h>
			
 
				+#include <drivers/mic/driver_mic_source.h>
			
 
				+#include <drivers/mic/driver_mic_sink.h>
			
 
				+#include <drivers/scc/driver_scc_common.h>
			
 
				+#include <drivers/scc/driver_scc_source.h>
			
 
				+#include <drivers/scc/driver_scc_sink.h>
			
 
				+
			
 
				+/* Allocate and initialize the sink structure, when the function returns
			
 
				+ * all the pointer of functions are linked to the right ones.
			
 
				+ */
			
 
				+struct _starpu_mp_node * __attribute__((malloc))
			
 
				+    _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind,
			
 
				+				  int peer_id)
			
 
				+{
			
 
				+	struct _starpu_mp_node *node;
			
 
				+
			
 
				+	node = (struct _starpu_mp_node *) malloc(sizeof(struct _starpu_mp_node));
			
 
				+
			
 
				+	node->kind = node_kind;
			
 
				+
			
 
				+	node->peer_id = peer_id;
			
 
				+
			
 
				+	switch(node->kind)
			
 
				+	{
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+		case STARPU_MIC_SOURCE:
			
 
				+			{
			
 
				+				node->nb_mp_sinks = starpu_mic_worker_get_count();
			
 
				+				node->devid = peer_id;
			
 
				+
			
 
				+				node->init = _starpu_mic_src_init;
			
 
				+				node->deinit = _starpu_mic_src_deinit;
			
 
				+				node->report_error = _starpu_mic_src_report_scif_error;
			
 
				+
			
 
				+				node->mp_send = _starpu_mic_common_send;
			
 
				+				node->mp_recv = _starpu_mic_common_recv;
			
 
				+				node->dt_send = _starpu_mic_common_dt_send;
			
 
				+				node->dt_recv = _starpu_mic_common_dt_recv;
			
 
				+
			
 
				+				node->execute = NULL;
			
 
				+				node->nbcores = NULL;
			
 
				+				node->allocate = NULL;
			
 
				+				node->free = NULL;
			
 
				+
			
 
				+				/* A source node is only working on one core,
			
 
				+				 * there is no need for this function */
			
 
				+				node->get_nb_core = NULL;
			
 
				+			}
			
 
				+			break;
			
 
				+
			
 
				+		case STARPU_MIC_SINK:
			
 
				+			{
			
 
				+				node->devid = atoi(getenv("DEVID"));;
			
 
				+				node->nb_mp_sinks = atoi(getenv("NB_MIC"));
			
 
				+
			
 
				+				node->init = _starpu_mic_sink_init;
			
 
				+				node->deinit = _starpu_mic_sink_deinit;
			
 
				+				node->report_error = _starpu_mic_sink_report_error;
			
 
				+
			
 
				+				node->mp_send = _starpu_mic_common_send;
			
 
				+				node->mp_recv = _starpu_mic_common_recv;
			
 
				+				node->dt_send = _starpu_mic_common_dt_send;
			
 
				+				node->dt_recv = _starpu_mic_common_dt_recv;
			
 
				+
			
 
				+				node->execute = _starpu_sink_common_execute;
			
 
				+				node->nbcores = _starpu_sink_nbcores;
			
 
				+				node->allocate = _starpu_mic_sink_allocate;
			
 
				+				node->free = _starpu_mic_sink_free;
			
 
				+
			
 
				+				node->get_nb_core = _starpu_mic_sink_get_nb_core;
			
 
				+			}
			
 
				+			break;
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+		case STARPU_SCC_SOURCE:
			
 
				+			{
			
 
				+				node->init = _starpu_scc_src_init;
			
 
				+				node->deinit = NULL;
			
 
				+				node->report_error = _starpu_scc_common_report_rcce_error;
			
 
				+
			
 
				+				node->mp_send = _starpu_scc_common_send;
			
 
				+				node->mp_recv = _starpu_scc_common_recv;
			
 
				+				node->dt_send = _starpu_scc_common_send;
			
 
				+				node->dt_recv = _starpu_scc_common_recv;
			
 
				+				node->dt_send_to_device = NULL;
			
 
				+				node->dt_recv_from_device = NULL;
			
 
				+
			
 
				+				node->execute = NULL;
			
 
				+				node->allocate = NULL;
			
 
				+				node->free = NULL;
			
 
				+
			
 
				+				node->get_nb_core = NULL;
			
 
				+			}
			
 
				+			break;
			
 
				+
			
 
				+		case STARPU_SCC_SINK:
			
 
				+			{
			
 
				+				node->init = _starpu_scc_sink_init;
			
 
				+				node->deinit = _starpu_scc_sink_deinit;
			
 
				+				node->report_error = _starpu_scc_common_report_rcce_error;
			
 
				+
			
 
				+				node->mp_send = _starpu_scc_common_send;
			
 
				+				node->mp_recv = _starpu_scc_common_recv;
			
 
				+				node->dt_send = _starpu_scc_common_send;
			
 
				+				node->dt_recv = _starpu_scc_common_recv;
			
 
				+				node->dt_send_to_device = _starpu_scc_sink_send_to_device;
			
 
				+				node->dt_recv_from_device = _starpu_scc_sink_recv_from_device;
			
 
				+
			
 
				+				node->execute = _starpu_scc_sink_execute;
			
 
				+				node->allocate = _starpu_sink_common_allocate;
			
 
				+				node->free = _starpu_sink_common_free;
			
 
				+
			
 
				+				node->get_nb_core = NULL;
			
 
				+			}
			
 
				+			break;
			
 
				+#endif /* STARPU_USE_SCC */
			
 
				+
			
 
				+#ifdef STARPU_USE_MPI
			
 
				+		case STARPU_MPI_SOURCE:
			
 
				+			STARPU_ABORT();
			
 
				+			break;
			
 
				+
			
 
				+		case STARPU_MPI_SINK:
			
 
				+			STARPU_ABORT();
			
 
				+			break;
			
 
				+#endif /* STARPU_USE_MPI */
			
 
				+
			
 
				+		default:
			
 
				+			STARPU_ASSERT(0);
			
 
				+	}
			
 
				+
			
 
				+	/* Let's allocate the buffer, we want it to be big enough to contain
			
 
				+	 * a command, an argument and the argument size */
			
 
				+	node->buffer = (void *) malloc(BUFFER_SIZE);
			
 
				+
			
 
				+	if (node->init)
			
 
				+		node->init(node);
			
 
				+
			
 
				+	return node;
			
 
				+}
			
 
				+
			
 
				+/* Deinitialize the sink structure and release the structure */
			
 
				+
			
 
				+void _starpu_mp_common_node_destroy(struct _starpu_mp_node *node)
			
 
				+{
			
 
				+	if (node->deinit)
			
 
				+		node->deinit(node);
			
 
				+
			
 
				+	free(node->buffer);
			
 
				+
			
 
				+	free(node);
			
 
				+}
			
 
				+
			
 
				+/* Send COMMAND to RECIPIENT, along with ARG if ARG_SIZE is non-zero */
			
 
				+
			
 
				+void _starpu_mp_common_send_command(const struct _starpu_mp_node *node,
			
 
				+				    const enum _starpu_mp_command command,
			
 
				+				    void *arg, int arg_size)
			
 
				+{
			
 
				+	STARPU_ASSERT(arg_size <= BUFFER_SIZE);
			
 
				+
			
 
				+	/* MIC and MPI sizes are given through a int */
			
 
				+	int command_size = sizeof(enum _starpu_mp_command);
			
 
				+	int arg_size_size = sizeof(int);
			
 
				+
			
 
				+	/* Let's copy the data into the command line buffer */
			
 
				+	memcpy(node->buffer, &command, command_size);
			
 
				+	memcpy(node->buffer + command_size, &arg_size, arg_size_size);
			
 
				+
			
 
				+	node->mp_send(node, node->buffer, command_size + arg_size_size);
			
 
				+
			
 
				+	if (arg_size)
			
 
				+		node->mp_send(node, arg, arg_size);
			
 
				+}
			
 
				+
			
 
				+/* Return the command received from SENDER. In case SENDER sent an argument
			
 
				+ * beside the command, an address to a copy of this argument is returns in arg.
			
 
				+ * There is no need to free this address as it's not allocated at this time.
			
 
				+ * However, the data pointed by arg shouldn't be relied on after a new call to
			
 
				+ * STARPU_MP_COMMON_RECV_COMMAND as it might corrupt it.
			
 
				+ */
			
 
				+
			
 
				+enum _starpu_mp_command _starpu_mp_common_recv_command(const struct _starpu_mp_node *node,
			
 
				+						       void **arg, int *arg_size)
			
 
				+{
			
 
				+	enum _starpu_mp_command command;
			
 
				+
			
 
				+	/* MIC and MPI sizes are given through a int */
			
 
				+	int command_size = sizeof(enum _starpu_mp_command);
			
 
				+	int arg_size_size = sizeof(int);
			
 
				+
			
 
				+	node->mp_recv(node, node->buffer, command_size + arg_size_size);
			
 
				+
			
 
				+	command = *((enum _starpu_mp_command *) node->buffer);
			
 
				+	*arg_size = *((int *) (node->buffer + command_size));
			
 
				+
			
 
				+	/* If there is no argument (ie. arg_size == 0),
			
 
				+	 * let's return the command right now */
			
 
				+	if (!(*arg_size))
			
 
				+	{
			
 
				+		*arg = NULL;
			
 
				+		return command;
			
 
				+	}
			
 
				+
			
 
				+	STARPU_ASSERT(*arg_size <= BUFFER_SIZE);
			
 
				+
			
 
				+	node->mp_recv(node, node->buffer, *arg_size);
			
 
				+
			
 
				+	*arg = node->buffer;
			
 
				+
			
 
				+	return command;
			
 
				+}
			
--- a/src/drivers/mp_common/mp_common.h
+++ b/src/drivers/mp_common/mp_common.h
@@ -0,0 +1,178 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __MP_COMMON_H__
			
 
				+#define __MP_COMMON_H__
			
 
				+
			
 
				+#include <pthread.h>
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <common/config.h>
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_MP
			
 
				+
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+#include <scif.h>
			
 
				+#endif /* STARPU_USE_MIC */
			
 
				+
			
 
				+#define BUFFER_SIZE 256
			
 
				+
			
 
				+#define STARPU_MP_SRC_NODE 0
			
 
				+#define STARPU_MP_SINK_NODE(a) ((a) + 1)
			
 
				+
			
 
				+#define STARPU_MP_COMMON_REPORT_ERROR(node, status) \
			
 
				+	(node)->report_error(__starpu_func__, __FILE__, __LINE__, (status))
			
 
				+
			
 
				+
			
 
				+enum _starpu_mp_command
			
 
				+{
			
 
				+	STARPU_EXIT = 0x00,
			
 
				+	STARPU_EXECUTE = 0x01,
			
 
				+	STARPU_ERROR_EXECUTE = 0x02,
			
 
				+	STARPU_LOOKUP = 0X03,
			
 
				+	STARPU_ANSWER_LOOKUP = 0X04,
			
 
				+	STARPU_ERROR_LOOKUP = 0X05,
			
 
				+	STARPU_ALLOCATE = 0x06,
			
 
				+	STARPU_ANSWER_ALLOCATE = 0x07,
			
 
				+	STARPU_ERROR_ALLOCATE = 0x08,
			
 
				+	STARPU_FREE = 0x09,
			
 
				+	STARPU_RECV_FROM_HOST = 0x10,
			
 
				+	STARPU_SEND_TO_HOST = 0x11,
			
 
				+	STARPU_RECV_FROM_SINK = 0x12,
			
 
				+	STARPU_SEND_TO_SINK = 0x13,
			
 
				+	STARPU_TRANSFER_COMPLETE = 0x14,
			
 
				+	STARPU_SINK_NBCORES = 0x15,
			
 
				+	STARPU_ANSWER_SINK_NBCORES = 0x16,
			
 
				+	STARPU_EXECUTION_SUBMITTED = 0x42,
			
 
				+	STARPU_EXECUTION_COMPLETED = 0x43
			
 
				+};
			
 
				+
			
 
				+enum _starpu_mp_node_kind
			
 
				+{
			
 
				+	STARPU_MIC_SINK,
			
 
				+	STARPU_MIC_SOURCE,
			
 
				+	STARPU_SCC_SINK,
			
 
				+	STARPU_SCC_SOURCE,
			
 
				+	STARPU_MPI_SINK,
			
 
				+	STARPU_MPI_SOURCE,
			
 
				+	STARPU_INVALID_KIND
			
 
				+};
			
 
				+
			
 
				+union _starpu_mp_connection
			
 
				+{
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	scif_epd_t mic_endpoint;
			
 
				+#endif
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	int scc_nodeid;
			
 
				+#endif
			
 
				+	int mpi_nodeid;
			
 
				+};
			
 
				+
			
 
				+struct _starpu_mp_transfer_command
			
 
				+{
			
 
				+	size_t size;
			
 
				+	void *addr;
			
 
				+};
			
 
				+
			
 
				+struct _starpu_mp_transfer_command_to_device
			
 
				+{
			
 
				+	int devid;
			
 
				+	size_t size;
			
 
				+	void *addr;
			
 
				+};
			
 
				+
			
 
				+/* Message-passing working node, whether source
			
 
				+ * or sink */
			
 
				+struct _starpu_mp_node
			
 
				+{
			
 
				+	enum _starpu_mp_node_kind kind;
			
 
				+
			
 
				+	/* Buffer used for scif data transfers, allocated
			
 
				+	 * during node initialization.
			
 
				+	 * Size : BUFFER_SIZE */
			
 
				+	void *buffer;
			
 
				+
			
 
				+	/* For sink : -1.
			
 
				+	 * For host : index of the sink = devid.
			
 
				+	 */
			
 
				+	int peer_id;
			
 
				+
			
 
				+	/* Only MIC use this for now !!
			
 
				+	 * This is the devid both for the sink and the host. */
			
 
				+	int devid;
			
 
				+
			
 
				+	/* Only MIC use this for now !!
			
 
				+	*  Is the number ok MIC on the system. */
			
 
				+	unsigned int nb_mp_sinks;
			
 
				+
			
 
				+	/* Connection used for command passing between the host thread and the
			
 
				+	 * sink it controls */
			
 
				+	union _starpu_mp_connection mp_connection;
			
 
				+
			
 
				+	/* Only MIC use this for now !!
			
 
				+	 * Connection used for data transfers between the host and his sink. */
			
 
				+	union _starpu_mp_connection host_sink_dt_connection;
			
 
				+
			
 
				+	/* Only MIC use this for now !!
			
 
				+	 * Only sink use this for now !!
			
 
				+	 * Connection used for data transfer between devices.
			
 
				+	 * A sink opens a connection with each other sink,
			
 
				+	 * thus each sink can directly send data to each other.
			
 
				+	 * For sink :
			
 
				+	 *  - sink_sink_dt_connections[i] is the connection to the sink number i.
			
 
				+	 *  - sink_sink_dt_connections[j] is not initialized for the sink number j. */
			
 
				+	union _starpu_mp_connection *sink_sink_dt_connections;
			
 
				+
			
 
				+	/* Node general functions */
			
 
				+	void (*init)(struct _starpu_mp_node *node);
			
 
				+	void (*deinit)(struct _starpu_mp_node *node);
			
 
				+	void (*report_error)(const char *, const char *, const int, const int);
			
 
				+
			
 
				+	/* Message passing */
			
 
				+	void (*mp_send)(const struct _starpu_mp_node *, void *, int);
			
 
				+	void (*mp_recv)(const struct _starpu_mp_node *, void *, int);
			
 
				+
			
 
				+	/* Data transfers */
			
 
				+	void (*dt_send)(const struct _starpu_mp_node *, void *, int);
			
 
				+	void (*dt_recv)(const struct _starpu_mp_node *, void *, int);
			
 
				+	void (*dt_send_to_device)(const struct _starpu_mp_node *, int, void *, int);
			
 
				+	void (*dt_recv_from_device)(const struct _starpu_mp_node *, int, void *, int);
			
 
				+
			
 
				+	void (*execute)(const struct _starpu_mp_node *, void *, int);
			
 
				+	void (*nbcores)(const struct _starpu_mp_node *);
			
 
				+	void (*allocate)(const struct _starpu_mp_node *, void *, int);
			
 
				+	void (*free)(const struct _starpu_mp_node *, void *, int);
			
 
				+
			
 
				+	unsigned int (*get_nb_core)(void);
			
 
				+};
			
 
				+
			
 
				+struct _starpu_mp_node * _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind, int peer_devid);
			
 
				+
			
 
				+void _starpu_mp_common_node_destroy(struct _starpu_mp_node *node);
			
 
				+
			
 
				+void _starpu_mp_common_send_command(const struct _starpu_mp_node *node,
			
 
				+				    const enum _starpu_mp_command command,
			
 
				+				    void *arg, int arg_size);
			
 
				+
			
 
				+enum _starpu_mp_command _starpu_mp_common_recv_command(const struct _starpu_mp_node *node,
			
 
				+						    void **arg, int *arg_size);
			
 
				+
			
 
				+
			
 
				+#endif /* STARPU_USE_MP */
			
 
				+
			
 
				+#endif /* __MP_COMMON_H__ */
			
--- a/src/drivers/mp_common/sink_common.c
+++ b/src/drivers/mp_common/sink_common.c
@@ -0,0 +1,275 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <dlfcn.h>
			
 
				+
			
 
				+#include <common/COISysInfo_common.h>
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <common/config.h>
			
 
				+#include <common/utils.h>
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+
			
 
				+#include "sink_common.h"
			
 
				+
			
 
				+/* Return the sink kind of the running process, based on the value of the
			
 
				+ * STARPU_SINK environment variable.
			
 
				+ * If there is no valid value retrieved, return STARPU_INVALID_KIND
			
 
				+ */
			
 
				+static enum _starpu_mp_node_kind _starpu_sink_common_get_kind(void)
			
 
				+{
			
 
				+	/* Environment varible STARPU_SINK must be defined when running on sink
			
 
				+	 * side : let's use it to get the kind of node we're running on */
			
 
				+	char *node_kind = getenv("STARPU_SINK");
			
 
				+	STARPU_ASSERT(node_kind);
			
 
				+
			
 
				+	if (!strcmp(node_kind, "STARPU_MIC"))
			
 
				+		return STARPU_MIC_SINK;
			
 
				+	else if (!strcmp(node_kind, "STARPU_SCC"))
			
 
				+		return STARPU_SCC_SINK;
			
 
				+	else if (!strcmp(node_kind, "STARPU_MPI"))
			
 
				+		return STARPU_MPI_SINK;
			
 
				+	else
			
 
				+		return STARPU_INVALID_KIND;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+_starpu_sink_nbcores (const struct _starpu_mp_node *node)
			
 
				+{
			
 
				+    // Process packet received from `_starpu_src_common_sink_cores'.
			
 
				+
			
 
				+    // I currently only support MIC for now.
			
 
				+    int nbcores = 0;
			
 
				+    if (STARPU_MIC_SINK == _starpu_sink_common_get_kind ())
			
 
				+	nbcores = COISysGetCoreCount();
			
 
				+
			
 
				+    _starpu_mp_common_send_command (node, STARPU_ANSWER_SINK_NBCORES,
			
 
				+				    &nbcores, sizeof (int));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/* Receive paquet from _starpu_src_common_execute_kernel in the form below :
			
 
				+ * [Function pointer on sink, number of interfaces, interfaces
			
 
				+ * (union _starpu_interface), cl_arg]
			
 
				+ * Then call the function given, passing as argument an array containing the
			
 
				+ * addresses of the received interfaces
			
 
				+ */
			
 
				+void _starpu_sink_common_execute(const struct _starpu_mp_node *node,
			
 
				+					void *arg, int arg_size)
			
 
				+{
			
 
				+	unsigned id = 0;
			
 
				+
			
 
				+	void *arg_ptr = arg;
			
 
				+	void (*kernel)(void **, void *) = NULL;
			
 
				+	unsigned coreid = 0;
			
 
				+	unsigned nb_interfaces = 0;
			
 
				+	void *interfaces[STARPU_NMAXBUFS];
			
 
				+	void *cl_arg;
			
 
				+
			
 
				+	kernel = *(void(**)(void **, void *)) arg_ptr;
			
 
				+	arg_ptr += sizeof(kernel);
			
 
				+
			
 
				+	coreid = *(unsigned *) arg_ptr;
			
 
				+	arg_ptr += sizeof(coreid);
			
 
				+
			
 
				+	nb_interfaces = *(unsigned *) arg_ptr;
			
 
				+	arg_ptr += sizeof(nb_interfaces);
			
 
				+
			
 
				+	/* The function needs an array pointing to each interface it needs
			
 
				+	 * during execution. As in sink-side there is no mean to know which
			
 
				+	 * kind of interface to expect, the array is composed of unions of
			
 
				+	 * interfaces, thus we expect the same size anyway */
			
 
				+	for (id = 0; id < nb_interfaces; id++)
			
 
				+	{
			
 
				+		interfaces[id] = arg_ptr;
			
 
				+		arg_ptr += sizeof(union _starpu_interface);
			
 
				+	}
			
 
				+
			
 
				+	/* Was cl_arg sent ? */
			
 
				+	if (arg_size > arg_ptr - arg)
			
 
				+		cl_arg = arg_ptr;
			
 
				+	else
			
 
				+		cl_arg = NULL;
			
 
				+
			
 
				+	/* XXX: in the future, we will not have to directly execute the kernel
			
 
				+	 * but submit it to the correct local worker. */
			
 
				+	_starpu_mp_common_send_command(node, STARPU_EXECUTION_SUBMITTED,
			
 
				+				       NULL, 0);
			
 
				+
			
 
				+	/* XXX: we keep the synchronous execution model on the sink side for
			
 
				+	 * now. */
			
 
				+	kernel(interfaces, cl_arg);
			
 
				+
			
 
				+	_starpu_mp_common_send_command(node, STARPU_EXECUTION_COMPLETED,
			
 
				+				       &coreid, sizeof(coreid));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static void _starpu_sink_common_lookup(const struct _starpu_mp_node *node,
			
 
				+				       char *func_name)
			
 
				+{
			
 
				+	void (*func)(void);
			
 
				+	void *dl_handle = dlopen(NULL, RTLD_NOW);
			
 
				+	func = dlsym(dl_handle, func_name);
			
 
				+
			
 
				+
			
 
				+	/* If we couldn't find the function, let's send an error to the host.
			
 
				+	 * The user probably made a mistake in the name */
			
 
				+	if (func)
			
 
				+		_starpu_mp_common_send_command(node, STARPU_ANSWER_LOOKUP,
			
 
				+					       &func, sizeof(func));
			
 
				+	else
			
 
				+		_starpu_mp_common_send_command(node, STARPU_ERROR_LOOKUP,
			
 
				+					       NULL, 0);
			
 
				+}
			
 
				+
			
 
				+void _starpu_sink_common_allocate(const struct _starpu_mp_node *mp_node,
			
 
				+				  void *arg, int arg_size)
			
 
				+{
			
 
				+    STARPU_ASSERT(arg_size == sizeof(size_t));
			
 
				+
			
 
				+    void *addr = malloc(*(size_t *)(arg));
			
 
				+
			
 
				+    /* If the allocation fail, let's send an error to the host.
			
 
				+     */
			
 
				+    if (addr)
			
 
				+	_starpu_mp_common_send_command(mp_node, STARPU_ANSWER_ALLOCATE,
			
 
				+				       &addr, sizeof(addr));
			
 
				+    else
			
 
				+	_starpu_mp_common_send_command(mp_node, STARPU_ERROR_ALLOCATE,
			
 
				+				       NULL, 0);
			
 
				+}
			
 
				+
			
 
				+void _starpu_sink_common_free(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED,
			
 
				+			      void *arg, int arg_size)
			
 
				+{
			
 
				+	STARPU_ASSERT(arg_size == sizeof(void *));
			
 
				+
			
 
				+	free(*(void **)(arg));
			
 
				+}
			
 
				+
			
 
				+static void _starpu_sink_common_copy_from_host(const struct _starpu_mp_node *mp_node,
			
 
				+					       void *arg, int arg_size)
			
 
				+{
			
 
				+    STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command));
			
 
				+
			
 
				+    struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg;
			
 
				+
			
 
				+    mp_node->dt_recv(mp_node, cmd->addr, cmd->size);
			
 
				+}
			
 
				+
			
 
				+static void _starpu_sink_common_copy_to_host(const struct _starpu_mp_node *mp_node,
			
 
				+					     void *arg, int arg_size)
			
 
				+{
			
 
				+    STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command));
			
 
				+
			
 
				+    struct _starpu_mp_transfer_command *cmd = (struct _starpu_mp_transfer_command *)arg;
			
 
				+
			
 
				+    mp_node->dt_send(mp_node, cmd->addr, cmd->size);
			
 
				+}
			
 
				+
			
 
				+static void _starpu_sink_common_copy_from_sink(const struct _starpu_mp_node *mp_node,
			
 
				+					       void *arg, int arg_size)
			
 
				+{
			
 
				+    STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command_to_device));
			
 
				+
			
 
				+    struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg;
			
 
				+
			
 
				+    mp_node->dt_recv_from_device(mp_node, cmd->devid, cmd->addr, cmd->size);
			
 
				+
			
 
				+    _starpu_mp_common_send_command(mp_node, STARPU_TRANSFER_COMPLETE, NULL, 0);
			
 
				+}
			
 
				+
			
 
				+static void _starpu_sink_common_copy_to_sink(const struct _starpu_mp_node *mp_node,
			
 
				+					     void *arg, int arg_size)
			
 
				+{
			
 
				+    STARPU_ASSERT(arg_size == sizeof(struct _starpu_mp_transfer_command_to_device));
			
 
				+
			
 
				+    struct _starpu_mp_transfer_command_to_device *cmd = (struct _starpu_mp_transfer_command_to_device *)arg;
			
 
				+
			
 
				+    mp_node->dt_send_to_device(mp_node, cmd->devid, cmd->addr, cmd->size);
			
 
				+}
			
 
				+
			
 
				+/* Function looping on the sink, waiting for tasks to execute.
			
 
				+ * If the caller is the host, don't do anything.
			
 
				+ */
			
 
				+
			
 
				+void _starpu_sink_common_worker(void)
			
 
				+{
			
 
				+	struct _starpu_mp_node *node = NULL;
			
 
				+	enum _starpu_mp_command command = STARPU_EXIT;
			
 
				+	int arg_size = 0;
			
 
				+	void *arg = NULL;
			
 
				+
			
 
				+	enum _starpu_mp_node_kind node_kind = _starpu_sink_common_get_kind();
			
 
				+
			
 
				+	if (node_kind == STARPU_INVALID_KIND)
			
 
				+		_STARPU_ERROR("No valid sink kind retrieved, use the"
			
 
				+			      "STARPU_SINK environment variable to specify"
			
 
				+			      "this\n");
			
 
				+
			
 
				+	/* Create and initialize the node */
			
 
				+	node = _starpu_mp_common_node_create(node_kind, -1);
			
 
				+
			
 
				+	while ((command = _starpu_mp_common_recv_command(node, &arg, &arg_size)) != STARPU_EXIT)
			
 
				+	{
			
 
				+		switch(command)
			
 
				+		{
			
 
				+			case STARPU_EXECUTE:
			
 
				+				node->execute(node, arg, arg_size);
			
 
				+				break;
			
 
				+			case STARPU_SINK_NBCORES:
			
 
				+				node->nbcores (node);
			
 
				+				break;
			
 
				+			case STARPU_LOOKUP:
			
 
				+				_starpu_sink_common_lookup(node, (char *) arg);
			
 
				+				break;
			
 
				+
			
 
				+			case STARPU_ALLOCATE:
			
 
				+				node->allocate(node, arg, arg_size);
			
 
				+				break;
			
 
				+
			
 
				+			case STARPU_FREE:
			
 
				+				node->free(node, arg, arg_size);
			
 
				+				break;
			
 
				+
			
 
				+			case STARPU_RECV_FROM_HOST:
			
 
				+				_starpu_sink_common_copy_from_host(node, arg, arg_size);
			
 
				+				break;
			
 
				+
			
 
				+			case STARPU_SEND_TO_HOST:
			
 
				+				_starpu_sink_common_copy_to_host(node, arg, arg_size);
			
 
				+				break;
			
 
				+
			
 
				+			case STARPU_RECV_FROM_SINK:
			
 
				+				_starpu_sink_common_copy_from_sink(node, arg, arg_size);
			
 
				+				break;
			
 
				+
			
 
				+			case STARPU_SEND_TO_SINK:
			
 
				+				_starpu_sink_common_copy_to_sink(node, arg, arg_size);
			
 
				+				break;
			
 
				+
			
 
				+			default:
			
 
				+				printf("Oops, command %x unrecognized\n", command);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Deinitialize the node and release it */
			
 
				+	_starpu_mp_common_node_destroy(node);
			
 
				+
			
 
				+	exit(0);
			
 
				+}
			
--- a/src/drivers/mp_common/sink_common.h
+++ b/src/drivers/mp_common/sink_common.h
@@ -0,0 +1,39 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#ifndef __SINK_COMMON_H__
			
 
				+#define __SINK_COMMON_H__
			
 
				+
			
 
				+#include <common/config.h>
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_MP
			
 
				+
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+
			
 
				+void _starpu_sink_common_worker(void);
			
 
				+
			
 
				+void _starpu_sink_common_execute(const struct _starpu_mp_node *node, void *arg, int arg_size);
			
 
				+void _starpu_sink_nbcores (const struct _starpu_mp_node *node);
			
 
				+
			
 
				+void _starpu_sink_common_allocate(const struct _starpu_mp_node *mp_node, void *arg, int arg_size);
			
 
				+void _starpu_sink_common_free(const struct _starpu_mp_node *mp_node STARPU_ATTRIBUTE_UNUSED, void *arg, int arg_size);
			
 
				+
			
 
				+#endif /* STARPU_USE_MP */
			
 
				+
			
 
				+
			
 
				+#endif /* __SINK_COMMON_H__ */
			
--- a/src/drivers/mp_common/source_common.c
+++ b/src/drivers/mp_common/source_common.c
@@ -0,0 +1,368 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <string.h>
			
 
				+#include <pthread.h>
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <datawizard/coherency.h>
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+
			
 
				+int
			
 
				+_starpu_src_common_sink_nbcores (const struct _starpu_mp_node *node, int *buf)
			
 
				+{
			
 
				+    // Send a request to the sink NODE for the number of cores on it.
			
 
				+
			
 
				+    enum _starpu_mp_command answer;
			
 
				+    void *arg;
			
 
				+    int arg_size = sizeof (int);
			
 
				+
			
 
				+    _starpu_mp_common_send_command (node, STARPU_SINK_NBCORES, NULL, 0);
			
 
				+
			
 
				+    answer = _starpu_mp_common_recv_command (node, &arg, &arg_size);
			
 
				+
			
 
				+    STARPU_ASSERT (answer == STARPU_ANSWER_SINK_NBCORES && arg_size == sizeof (int));
			
 
				+
			
 
				+    memcpy (buf, arg, arg_size);
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+/* Send a request to the sink linked to NODE for the pointer to the
			
 
				+ * function defined by FUNC_NAME.
			
 
				+ * In case of success, it returns 0 and FUNC_PTR contains the pointer ;
			
 
				+ * else it returns -ESPIPE if the function was not found.
			
 
				+ */
			
 
				+int _starpu_src_common_lookup(struct _starpu_mp_node *node,
			
 
				+			      void (**func_ptr)(void), const char *func_name)
			
 
				+{
			
 
				+	enum _starpu_mp_command answer;
			
 
				+	void *arg;
			
 
				+	int arg_size;
			
 
				+
			
 
				+	/* strlen ignore the terminating '\0' */
			
 
				+	arg_size = (strlen(func_name) + 1) * sizeof(char);
			
 
				+
			
 
				+	_starpu_mp_common_send_command(node, STARPU_LOOKUP, (void *) func_name,
			
 
				+				       arg_size);
			
 
				+	answer = _starpu_mp_common_recv_command(node, (void **) &arg,
			
 
				+						&arg_size);
			
 
				+
			
 
				+	if (answer == STARPU_ERROR_LOOKUP)
			
 
				+		return -ESPIPE;
			
 
				+
			
 
				+	/* We have to be sure the device answered the right question and the
			
 
				+	 * answer has the right size */
			
 
				+	STARPU_ASSERT(answer == STARPU_ANSWER_LOOKUP &&
			
 
				+		      arg_size == sizeof(*func_ptr));
			
 
				+
			
 
				+	memcpy(func_ptr, arg, arg_size);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+ /* Send a message to the sink to execute a kernel.
			
 
				+ * The message sent has the form below :
			
 
				+ * [Function pointer on sink, number of interfaces, interfaces
			
 
				+ * (union _starpu_interface), cl_arg]
			
 
				+ */
			
 
				+int _starpu_src_common_execute_kernel(const struct _starpu_mp_node *node,
			
 
				+				      void (*kernel)(void), unsigned coreid,
			
 
				+				      starpu_data_handle_t *handles,
			
 
				+				      void **interfaces,
			
 
				+				      unsigned nb_interfaces,
			
 
				+				      void *cl_arg, size_t cl_arg_size)
			
 
				+{
			
 
				+	unsigned id;
			
 
				+	void *buffer, *buffer_ptr, *arg = NULL;
			
 
				+	int buffer_size = 0, arg_size = 0;
			
 
				+
			
 
				+	/* If the user didn't give any cl_arg, there is no need to send it */
			
 
				+	buffer_size =
			
 
				+	    sizeof(kernel) + sizeof(coreid) + sizeof(nb_interfaces) +
			
 
				+	    nb_interfaces * sizeof(union _starpu_interface);
			
 
				+	if (cl_arg)
			
 
				+	{
			
 
				+		STARPU_ASSERT(cl_arg_size);
			
 
				+		buffer_size += cl_arg_size;
			
 
				+	}
			
 
				+
			
 
				+	/* We give to send_command a buffer we just allocated, which contains
			
 
				+	 * a pointer to the function (sink-side), core on which execute this
			
 
				+	 * function (sink-side), number of interfaces we send,
			
 
				+	 * an array of generic (union) interfaces and the value of cl_arg */
			
 
				+	buffer_ptr = buffer = (void *) malloc(buffer_size);
			
 
				+
			
 
				+	*(void(**)(void)) buffer = kernel;
			
 
				+	buffer_ptr += sizeof(kernel);
			
 
				+
			
 
				+	*(unsigned *) buffer_ptr = coreid;
			
 
				+	buffer_ptr += sizeof(coreid);
			
 
				+
			
 
				+	*(unsigned *) buffer_ptr = nb_interfaces;
			
 
				+	buffer_ptr += sizeof(nb_interfaces);
			
 
				+
			
 
				+	/* Message-passing execution is a particular case as the codelet is
			
 
				+	 * executed on a sink with a different memory, whereas a codelet is
			
 
				+	 * executed on the host part for the other accelerators.
			
 
				+	 * Thus we need to send a copy of each interface on the MP device */
			
 
				+	for (id = 0; id < nb_interfaces; id++)
			
 
				+	{
			
 
				+		starpu_data_handle_t handle = handles[id];
			
 
				+		memcpy (buffer_ptr, interfaces[id],
			
 
				+			handle->ops->interface_size);
			
 
				+		/* The sink side has no mean to get the type of each
			
 
				+		 * interface, we use a union to make it generic and permit the
			
 
				+		 * sink to go through the array */
			
 
				+		buffer_ptr += sizeof(union _starpu_interface);
			
 
				+	}
			
 
				+
			
 
				+	if (cl_arg)
			
 
				+		memcpy(buffer_ptr, cl_arg, cl_arg_size);
			
 
				+
			
 
				+	_starpu_mp_common_send_command(node, STARPU_EXECUTE, buffer, buffer_size);
			
 
				+	enum _starpu_mp_command answer = _starpu_mp_common_recv_command(node, &arg, &arg_size);
			
 
				+
			
 
				+	if (answer == STARPU_ERROR_EXECUTE)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	STARPU_ASSERT(answer == STARPU_EXECUTION_SUBMITTED);
			
 
				+
			
 
				+	free(buffer);
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+}
			
 
				+
			
 
				+/* Launch the execution of the function KERNEL points to on the sink linked
			
 
				+ * to NODE. Returns 0 in case of success, -EINVAL if kernel is an invalid
			
 
				+ * pointer.
			
 
				+ * Data interfaces in task are send to the sink.
			
 
				+ */
			
 
				+int _starpu_src_common_execute_kernel_from_task(const struct _starpu_mp_node *node,
			
 
				+						void (*kernel)(void), unsigned coreid,
			
 
				+						struct starpu_task *task)
			
 
				+{
			
 
				+    return _starpu_src_common_execute_kernel(node, kernel, coreid,
			
 
				+					     task->handles, task->interfaces, task->cl->nbuffers,
			
 
				+					     task->cl_arg, task->cl_arg_size);
			
 
				+}
			
 
				+
			
 
				+/* Send a request to the sink linked to the MP_NODE to allocate SIZE bytes on
			
 
				+ * the sink.
			
 
				+ * In case of success, it returns 0 and *ADDR contains the address of the
			
 
				+ * allocated area ;
			
 
				+ * else it returns 1 if the allocation fail.
			
 
				+ */
			
 
				+int _starpu_src_common_allocate(const struct _starpu_mp_node *mp_node,
			
 
				+								void **addr, size_t size)
			
 
				+{
			
 
				+	enum _starpu_mp_command answer;
			
 
				+	void *arg;
			
 
				+	int arg_size;
			
 
				+
			
 
				+	_starpu_mp_common_send_command(mp_node, STARPU_ALLOCATE, &size,
			
 
				+								   sizeof(size));
			
 
				+
			
 
				+	answer = _starpu_mp_common_recv_command(mp_node, &arg, &arg_size);
			
 
				+
			
 
				+	if (answer == STARPU_ERROR_ALLOCATE)
			
 
				+		return 1;
			
 
				+
			
 
				+	STARPU_ASSERT(answer == STARPU_ANSWER_ALLOCATE &&
			
 
				+				  arg_size == sizeof(*addr));
			
 
				+
			
 
				+	memcpy(addr, arg, arg_size);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Send a request to the sink linked to the MP_NODE to deallocate the memory
			
 
				+ * area pointed by ADDR.
			
 
				+ */
			
 
				+void _starpu_src_common_free(const struct _starpu_mp_node *mp_node,
			
 
				+							 void *addr)
			
 
				+{
			
 
				+	_starpu_mp_common_send_command(mp_node, STARPU_FREE, &addr, sizeof(addr));
			
 
				+}
			
 
				+
			
 
				+/* Send SIZE bytes pointed by SRC to DST on the sink linked to the MP_NODE.
			
 
				+ */
			
 
				+int _starpu_src_common_copy_host_to_sink(const struct _starpu_mp_node *mp_node,
			
 
				+										 void *src, void *dst, size_t size)
			
 
				+{
			
 
				+	struct _starpu_mp_transfer_command cmd = {size, dst};
			
 
				+
			
 
				+	_starpu_mp_common_send_command(mp_node, STARPU_RECV_FROM_HOST, &cmd, sizeof(cmd));
			
 
				+	mp_node->dt_send(mp_node, src, size);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Receive SIZE bytes pointed by SRC on the sink linked to the MP_NODE and store them in DST.
			
 
				+ */
			
 
				+int _starpu_src_common_copy_sink_to_host(const struct _starpu_mp_node *mp_node,
			
 
				+										 void *src, void *dst, size_t size)
			
 
				+{
			
 
				+	struct _starpu_mp_transfer_command cmd = {size, src};
			
 
				+
			
 
				+	_starpu_mp_common_send_command(mp_node, STARPU_SEND_TO_HOST, &cmd, sizeof(cmd));
			
 
				+	mp_node->dt_recv(mp_node, dst, size);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* Tell the sink linked to SRC_NODE to send SIZE bytes of data pointed by SRC
			
 
				+ * to the sink linked to DST_NODE. The latter store them in DST.
			
 
				+ */
			
 
				+int _starpu_src_common_copy_sink_to_sink(const struct _starpu_mp_node *src_node,
			
 
				+		const struct _starpu_mp_node *dst_node, void *src, void *dst, size_t size)
			
 
				+{
			
 
				+	enum _starpu_mp_command answer;
			
 
				+	void *arg;
			
 
				+	int arg_size;
			
 
				+
			
 
				+	struct _starpu_mp_transfer_command_to_device cmd = {dst_node->peer_id, size, src};
			
 
				+
			
 
				+	/* Tell source to send data to dest. */
			
 
				+	_starpu_mp_common_send_command(src_node, STARPU_SEND_TO_SINK, &cmd, sizeof(cmd));
			
 
				+
			
 
				+	cmd.devid = src_node->peer_id;
			
 
				+	cmd.size = size;
			
 
				+	cmd.addr = dst;
			
 
				+
			
 
				+	/* Tell dest to receive data from source. */
			
 
				+	_starpu_mp_common_send_command(dst_node, STARPU_RECV_FROM_SINK, &cmd, sizeof(cmd));
			
 
				+
			
 
				+	/* Wait for answer from dest to know wether transfer is finished. */
			
 
				+	answer = _starpu_mp_common_recv_command(dst_node, &arg, &arg_size);
			
 
				+
			
 
				+	STARPU_ASSERT(answer == STARPU_TRANSFER_COMPLETE);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/* 5 functions to determine the executable to run on the device (MIC, SCC,
			
 
				+ * MPI).
			
 
				+ */
			
 
				+static void _starpu_src_common_cat_3(char *final, const char *first, const char *second,
			
 
				+										  const char *third)
			
 
				+{
			
 
				+	strcpy(final, first);
			
 
				+	strcat(final, second);
			
 
				+	strcat(final, third);
			
 
				+}
			
 
				+
			
 
				+static void _starpu_src_common_cat_2(char *final, const char *first, const char *second)
			
 
				+{
			
 
				+	_starpu_src_common_cat_3(final, first, second, "");
			
 
				+}
			
 
				+
			
 
				+static void _starpu_src_common_dir_cat(char *final, const char *dir, const char *file)
			
 
				+{
			
 
				+	if (file[0] == '/')
			
 
				+		++file;
			
 
				+
			
 
				+	size_t size = strlen(dir);
			
 
				+	if (dir[size - 1] == '/')
			
 
				+		_starpu_src_common_cat_2(final, dir, file);
			
 
				+	else
			
 
				+		_starpu_src_common_cat_3(final, dir, "/", file);
			
 
				+}
			
 
				+
			
 
				+static int _starpu_src_common_test_suffixes(char *located_file_name, const char *base, const char **suffixes)
			
 
				+{
			
 
				+	unsigned int i;
			
 
				+	for (i = 0; suffixes[i] != NULL; ++i)
			
 
				+	{
			
 
				+		_starpu_src_common_cat_2(located_file_name, base, suffixes[i]);
			
 
				+		if (access(located_file_name, R_OK) == 0)
			
 
				+			return 0;
			
 
				+	}
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+int _starpu_src_common_locate_file(char *located_file_name,
			
 
				+							const char *env_file_name, const char *env_mic_path,
			
 
				+							const char *config_file_name, const char *actual_file_name,
			
 
				+							const char **suffixes)
			
 
				+{
			
 
				+	if (env_file_name != NULL)
			
 
				+	{
			
 
				+		if (access(env_file_name, R_OK) == 0)
			
 
				+		{
			
 
				+			strcpy(located_file_name, env_file_name);
			
 
				+			return 0;
			
 
				+		}
			
 
				+		else if(env_mic_path != NULL)
			
 
				+		{
			
 
				+			_starpu_src_common_dir_cat(located_file_name, env_mic_path, env_file_name);
			
 
				+
			
 
				+			return access(located_file_name, R_OK);
			
 
				+		}
			
 
				+	}
			
 
				+	else if (config_file_name != NULL)
			
 
				+	{
			
 
				+		if (access(config_file_name, R_OK) == 0)
			
 
				+		{
			
 
				+			strcpy(located_file_name, config_file_name);
			
 
				+			return 0;
			
 
				+		}
			
 
				+		else if (env_mic_path != NULL)
			
 
				+		{
			
 
				+			_starpu_src_common_dir_cat(located_file_name, env_mic_path, config_file_name);
			
 
				+
			
 
				+			return access(located_file_name, R_OK);
			
 
				+		}
			
 
				+	}
			
 
				+	else if (actual_file_name != NULL)
			
 
				+	{
			
 
				+		if (_starpu_src_common_test_suffixes(located_file_name, actual_file_name, suffixes) == 0)
			
 
				+			return 0;
			
 
				+
			
 
				+		if (env_mic_path != NULL)
			
 
				+		{
			
 
				+			char actual_cpy[1024];
			
 
				+			strcpy(actual_cpy, actual_file_name);
			
 
				+
			
 
				+			char *last =  strrchr(actual_cpy, '/');
			
 
				+			while (last != NULL)
			
 
				+			{
			
 
				+				char tmp[1024];
			
 
				+
			
 
				+				_starpu_src_common_dir_cat(tmp, env_mic_path, last);
			
 
				+
			
 
				+				if (access(tmp, R_OK) == 0)
			
 
				+				{
			
 
				+					strcpy(located_file_name, tmp);
			
 
				+					return 0;
			
 
				+				}
			
 
				+
			
 
				+				if (_starpu_src_common_test_suffixes(located_file_name, tmp, suffixes) == 0)
			
 
				+					return 0;
			
 
				+
			
 
				+				*last = '\0';
			
 
				+				char *last_tmp = strrchr(actual_cpy, '/');
			
 
				+				*last = '/';
			
 
				+				last = last_tmp;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return 1;
			
 
				+}
			
--- a/src/drivers/mp_common/source_common.h
+++ b/src/drivers/mp_common/source_common.h
@@ -0,0 +1,63 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#ifndef __SOURCE_COMMON_H__
			
 
				+#define __SOURCE_COMMON_H__
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_MP
			
 
				+
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+
			
 
				+int _starpu_src_common_sink_nbcores (const struct _starpu_mp_node *node, int *buf);
			
 
				+
			
 
				+int _starpu_src_common_lookup(const struct _starpu_mp_node *node,
			
 
				+			      void (**func_ptr)(void), const char *func_name);
			
 
				+
			
 
				+int _starpu_src_common_execute_kernel(const struct _starpu_mp_node *node,
			
 
				+				      void (*kernel)(void), unsigned coreid,
			
 
				+				      starpu_data_handle_t *handles, void **interfaces, unsigned nb_interfaces,
			
 
				+				      void *cl_arg, size_t cl_arg_size);
			
 
				+
			
 
				+int _starpu_src_common_execute_kernel_from_task(const struct _starpu_mp_node *node,
			
 
				+						void (*kernel)(void), unsigned coreid,
			
 
				+						struct starpu_task *task);
			
 
				+
			
 
				+int _starpu_src_common_allocate(const struct _starpu_mp_node *mp_node,
			
 
				+				void **addr, size_t size);
			
 
				+
			
 
				+void _starpu_src_common_free(const struct _starpu_mp_node *mp_node,
			
 
				+			     void *addr);
			
 
				+
			
 
				+int _starpu_src_common_copy_host_to_sink(const struct _starpu_mp_node *mp_node,
			
 
				+					 void *src, void *dst, size_t size);
			
 
				+
			
 
				+int _starpu_src_common_copy_sink_to_host(const struct _starpu_mp_node *mp_node,
			
 
				+					 void *src, void *dst, size_t size);
			
 
				+
			
 
				+int _starpu_src_common_copy_sink_to_sink(const struct _starpu_mp_node *src_node,
			
 
				+					 const struct _starpu_mp_node *dst_node, void *src, void *dst, size_t size);
			
 
				+
			
 
				+int _starpu_src_common_locate_file(char *located_file_name,
			
 
				+				   const char *env_file_name, const char *env_mic_path,
			
 
				+				   const char *config_file_name, const char *actual_file_name,
			
 
				+				   const char **suffixes);
			
 
				+
			
 
				+#endif /* STARPU_USE_MP */
			
 
				+
			
 
				+
			
 
				+#endif /* __SOURCE_COMMON_H__ */
			
--- a/src/drivers/scc/driver_scc_common.c
+++ b/src/drivers/scc/driver_scc_common.c
@@ -0,0 +1,174 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <sys/mman.h>
			
 
				+#include <fcntl.h>
			
 
				+
			
 
				+#include <core/workers.h>
			
 
				+
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+#include <drivers/scc/driver_scc_common.h>
			
 
				+
			
 
				+#include <RCCE_lib.h>
			
 
				+#include <SCC_API.h>
			
 
				+
			
 
				+static int rcce_initialized;
			
 
				+
			
 
				+static int src_node_id;
			
 
				+
			
 
				+static t_vcharp rckncm_map;
			
 
				+static t_vcharp shm_addr;
			
 
				+
			
 
				+
			
 
				+static void _starpu_scc_set_src_node_id()
			
 
				+{
			
 
				+	int node_id = starpu_get_env_number("STARPU_SCC_MASTER_NODE");
			
 
				+
			
 
				+	if (node_id != -1)
			
 
				+	{
			
 
				+		if (node_id < RCCE_num_ues())
			
 
				+		{
			
 
				+			src_node_id = node_id;
			
 
				+			return;
			
 
				+		}
			
 
				+		else if (RCCE_ue() == 0)
			
 
				+		{
			
 
				+			/* Only node 0 print the error message. */
			
 
				+			fprintf(stderr, "The node you specify to be the master is "
			
 
				+					"greater than the total number of nodes.\n"
			
 
				+					"Taking node 0 (core %d) by default...\n", RC_COREID[0]);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Node 0 by default. */
			
 
				+	src_node_id = 0;
			
 
				+}
			
 
				+
			
 
				+/* Try to init the RCCE API.
			
 
				+ * return: 	1 on success
			
 
				+ * 			0 on failure
			
 
				+ */
			
 
				+int _starpu_scc_common_mp_init()
			
 
				+{
			
 
				+	int rckncm_fd;
			
 
				+
			
 
				+	/* "/dev/rckncm" is to access shared memory on SCC. */
			
 
				+	if ((rckncm_fd = open("/dev/rckncm", O_RDWR | O_SYNC)) < 0)
			
 
				+	{
			
 
				+		/* It seems that we're not on a SCC system. */
			
 
				+		return (rcce_initialized = 0);
			
 
				+	}
			
 
				+
			
 
				+	int page_size = getpagesize();
			
 
				+	unsigned int aligne_addr = (SHM_ADDR) & (~(page_size - 1));
			
 
				+	if ((rckncm_map = (t_vcharp)mmap(NULL, SHMSIZE, PROT_WRITE | PROT_READ, MAP_SHARED,
			
 
				+					rckncm_fd, aligne_addr)) == MAP_FAILED)
			
 
				+	{
			
 
				+		perror("mmap");
			
 
				+		close(rckncm_fd);
			
 
				+		return (rcce_initialized = 0);
			
 
				+	}
			
 
				+
			
 
				+	int *argc = _starpu_get_argc();
			
 
				+	char ***argv = _starpu_get_argv();
			
 
				+
			
 
				+	/* We can't initialize RCCE without argc and argv. */
			
 
				+	if (!argc || *argc <= 1 || !argv || (RCCE_init(argc, argv) != RCCE_SUCCESS))
			
 
				+	{
			
 
				+		close(rckncm_fd);
			
 
				+		munmap((void*)rckncm_map, SHMSIZE);
			
 
				+		return (rcce_initialized = 0);
			
 
				+	}
			
 
				+
			
 
				+	unsigned int page_offset = (SHM_ADDR) - aligne_addr;
			
 
				+	shm_addr = rckncm_map + page_offset;
			
 
				+
			
 
				+	RCCE_shmalloc_init(shm_addr, RCCE_SHM_SIZE_MAX);
			
 
				+
			
 
				+	/* Which core of the SCC will be the master one? */
			
 
				+	_starpu_scc_set_src_node_id();
			
 
				+
			
 
				+	close(rckncm_fd);
			
 
				+
			
 
				+	return (rcce_initialized = 1);
			
 
				+}
			
 
				+
			
 
				+void *_starpu_scc_common_get_shared_memory_addr()
			
 
				+{
			
 
				+	return (void*)shm_addr;
			
 
				+}
			
 
				+
			
 
				+void _starpu_scc_common_unmap_shared_memory()
			
 
				+{
			
 
				+	munmap((void*)rckncm_map, SHMSIZE);
			
 
				+}
			
 
				+
			
 
				+/* To know if the pointer "ptr" points into the shared memory map */
			
 
				+int _starpu_scc_common_is_in_shared_memory(void *ptr)
			
 
				+{
			
 
				+	return (void*)shm_addr <= ptr && ptr < (void*)shm_addr + SHMSIZE;
			
 
				+}
			
 
				+
			
 
				+int _starpu_scc_common_is_mp_initialized()
			
 
				+{
			
 
				+	return rcce_initialized;
			
 
				+}
			
 
				+
			
 
				+int _starpu_scc_common_get_src_node_id()
			
 
				+{
			
 
				+	return src_node_id;
			
 
				+}
			
 
				+
			
 
				+int _starpu_scc_common_is_src_node()
			
 
				+{
			
 
				+	return RCCE_ue() == src_node_id;
			
 
				+}
			
 
				+
			
 
				+void _starpu_scc_common_send(const struct _starpu_mp_node *node, void *msg, int len)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	/* There are potentially 48 threads running on the master core and RCCE_send write
			
 
				+	 * data in the MPB associated to this core. It's not thread safe, so we have to protect it.
			
 
				+	 * RCCE_acquire_lock uses a test&set register on SCC. */
			
 
				+	RCCE_acquire_lock(RCCE_ue());
			
 
				+
			
 
				+	if ((ret = RCCE_send(msg, len, node->mp_connection.scc_nodeid)) != RCCE_SUCCESS)
			
 
				+	{
			
 
				+		RCCE_release_lock(RCCE_ue());
			
 
				+		STARPU_MP_COMMON_REPORT_ERROR(node, ret);
			
 
				+	}
			
 
				+
			
 
				+	RCCE_release_lock(RCCE_ue());
			
 
				+}
			
 
				+
			
 
				+void _starpu_scc_common_recv(const struct _starpu_mp_node *node, void *msg, int len)
			
 
				+{
			
 
				+	int ret;
			
 
				+	if ((ret = RCCE_recv(msg, len, node->mp_connection.scc_nodeid)) != RCCE_SUCCESS)
			
 
				+		STARPU_MP_COMMON_REPORT_ERROR(node, ret);
			
 
				+}
			
 
				+
			
 
				+void _starpu_scc_common_report_rcce_error(const char *func, const char *file, const int line, const int err_no)
			
 
				+{
			
 
				+	char error_string[RCCE_MAX_ERROR_STRING];
			
 
				+	int error_string_length;
			
 
				+
			
 
				+	RCCE_error_string(err_no, error_string, &error_string_length); 
			
 
				+
			
 
				+	fprintf(stderr, "RCCE error in %s (%s:%d): %s\n", func, file, line, error_string); 
			
 
				+	STARPU_ABORT();
			
 
				+}
			
--- a/src/drivers/scc/driver_scc_common.h
+++ b/src/drivers/scc/driver_scc_common.h
@@ -0,0 +1,50 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __DRIVER_SCC_COMMON_H__
			
 
				+#define __DRIVER_SCC_COMMON_H__
			
 
				+
			
 
				+#include <common/config.h>
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+
			
 
				+#include <RCCE_lib.h>
			
 
				+
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+
			
 
				+#define STARPU_TO_SCC_SINK_ID(id) (id) < RCCE_ue() ? (id) : ((id) + 1)
			
 
				+
			
 
				+int _starpu_scc_common_mp_init();
			
 
				+
			
 
				+void *_starpu_scc_common_get_shared_memory_addr();
			
 
				+void _starpu_scc_common_unmap_shared_memory();
			
 
				+int _starpu_scc_common_is_in_shared_memory(void *ptr);
			
 
				+
			
 
				+int _starpu_scc_common_is_mp_initialized();
			
 
				+
			
 
				+int _starpu_scc_common_get_src_node_id();
			
 
				+int _starpu_scc_common_is_src_node();
			
 
				+
			
 
				+void _starpu_scc_common_send(const struct _starpu_mp_node *node, void *msg, int len);
			
 
				+void _starpu_scc_common_recv(const struct _starpu_mp_node *node, void *msg, int len);
			
 
				+
			
 
				+void _starpu_scc_common_report_rcce_error(const char *func, const char *file, const int line, const int err_no);
			
 
				+
			
 
				+#endif /* STARPU_USE_SCC */
			
 
				+
			
 
				+
			
 
				+#endif /* __DRIVER_SCC_COMMON_H__ */
			
--- a/src/drivers/scc/driver_scc_sink.c
+++ b/src/drivers/scc/driver_scc_sink.c
@@ -0,0 +1,125 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <RCCE.h>
			
 
				+
			
 
				+#include <drivers/mp_common/sink_common.h>
			
 
				+#include <drivers/scc/driver_scc_common.h>
			
 
				+#include <drivers/scc/driver_scc_sink.h>
			
 
				+
			
 
				+
			
 
				+
			
 
				+void _starpu_scc_sink_init(struct _starpu_mp_node *node)
			
 
				+{
			
 
				+	node->mp_connection.scc_nodeid = _starpu_scc_common_get_src_node_id();
			
 
				+}
			
 
				+
			
 
				+void _starpu_scc_sink_deinit(struct _starpu_mp_node *node)
			
 
				+{
			
 
				+	(void)node;
			
 
				+
			
 
				+	_starpu_scc_common_unmap_shared_memory();
			
 
				+	RCCE_finalize();
			
 
				+}
			
 
				+
			
 
				+void _starpu_scc_sink_send_to_device(const struct _starpu_mp_node *node, int dst_devid, void *msg, int len)
			
 
				+{
			
 
				+	int ret;
			
 
				+	if ((ret = RCCE_send(msg, len, STARPU_TO_SCC_SINK_ID(dst_devid))) != RCCE_SUCCESS)
			
 
				+		STARPU_MP_COMMON_REPORT_ERROR(node, ret);
			
 
				+}
			
 
				+
			
 
				+void _starpu_scc_sink_recv_from_device(const struct _starpu_mp_node *node, int src_devid, void *msg, int len)
			
 
				+{
			
 
				+	int ret;
			
 
				+	if ((ret = RCCE_recv(msg, len, STARPU_TO_SCC_SINK_ID(src_devid))) != RCCE_SUCCESS)
			
 
				+		STARPU_MP_COMMON_REPORT_ERROR(node, ret);
			
 
				+}
			
 
				+
			
 
				+/* arg -> [Function pointer on sink, number of interfaces, interfaces
			
 
				+ * (union _starpu_interface), cl_arg]
			
 
				+ *
			
 
				+ * This function change the dev_handle and the ptr of each interfaces
			
 
				+ * given to the sink.
			
 
				+ * dev_handle 	-> 	start of the shared memory (different for each sink)
			
 
				+ * ptr 			-> 	dev_handle + offset
			
 
				+ */
			
 
				+void _starpu_scc_sink_execute(const struct _starpu_mp_node *node, void *arg, int arg_size)
			
 
				+{
			
 
				+	void *local_arg = arg;
			
 
				+
			
 
				+	/* point after the kernel */
			
 
				+	local_arg += sizeof(void(*)(void**, void*));
			
 
				+
			
 
				+	unsigned nb_interfaces = *(unsigned*)local_arg;
			
 
				+	local_arg += sizeof(nb_interfaces);
			
 
				+
			
 
				+	uintptr_t shm_addr = (uintptr_t)_starpu_scc_common_get_shared_memory_addr();
			
 
				+
			
 
				+	unsigned i;
			
 
				+	for (i = 0; i < nb_interfaces; ++i)
			
 
				+	{
			
 
				+		/* The first field of an interface is the interface id. */
			
 
				+		switch (*(enum starpu_data_interface_id *)local_arg)
			
 
				+		{
			
 
				+			case STARPU_MATRIX_INTERFACE_ID:
			
 
				+			{
			
 
				+				struct starpu_matrix_interface *matrix = (struct starpu_matrix_interface *)local_arg;
			
 
				+				matrix->dev_handle = shm_addr;
			
 
				+				matrix->ptr = matrix->dev_handle + matrix->offset;
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			case STARPU_BLOCK_INTERFACE_ID:
			
 
				+			{
			
 
				+				struct starpu_block_interface *block = (struct starpu_block_interface *)local_arg;
			
 
				+				block->dev_handle = shm_addr;
			
 
				+				block->ptr = block->dev_handle + block->offset;
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			case STARPU_VECTOR_INTERFACE_ID:
			
 
				+			{
			
 
				+				struct starpu_vector_interface *vector = (struct starpu_vector_interface *)local_arg;
			
 
				+				vector->dev_handle = shm_addr;
			
 
				+				vector->ptr = vector->dev_handle + vector->offset;
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			case STARPU_VARIABLE_INTERFACE_ID:
			
 
				+			{
			
 
				+				struct starpu_variable_interface *variable = (struct starpu_variable_interface *)local_arg;
			
 
				+				variable->dev_handle = shm_addr;
			
 
				+				variable->ptr = variable->dev_handle + variable->offset;
			
 
				+				break;
			
 
				+			}
			
 
				+
			
 
				+			case STARPU_CSR_INTERFACE_ID:
			
 
				+			case STARPU_BCSR_INTERFACE_ID:
			
 
				+			case STARPU_MULTIFORMAT_INTERFACE_ID:
			
 
				+			fprintf(stderr, "Data type not supported on SCC.\n");
			
 
				+
			
 
				+			default:
			
 
				+				STARPU_ABORT();
			
 
				+		}
			
 
				+
			
 
				+		/* point to the next interface */
			
 
				+		local_arg += sizeof(union _starpu_interface);
			
 
				+	}
			
 
				+
			
 
				+	_starpu_sink_common_execute(node, arg, arg_size);
			
 
				+}
			
--- a/src/drivers/scc/driver_scc_sink.h
+++ b/src/drivers/scc/driver_scc_sink.h
@@ -0,0 +1,38 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __DRIVER_SCC_SINK_H__
			
 
				+#define __DRIVER_SCC_SINK_H__
			
 
				+
			
 
				+#include <common/config.h>
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+
			
 
				+void _starpu_scc_sink_init(struct _starpu_mp_node *node);
			
 
				+void _starpu_scc_sink_deinit(struct _starpu_mp_node *node);
			
 
				+
			
 
				+void _starpu_scc_sink_send_to_device(const struct _starpu_mp_node *node, int dst_devid, void *msg, int len);
			
 
				+void _starpu_scc_sink_recv_from_device(const struct _starpu_mp_node *node, int src_devid, void *msg, int len);
			
 
				+
			
 
				+void _starpu_scc_sink_execute(const struct _starpu_mp_node *node, void *arg, int arg_size);
			
 
				+
			
 
				+#endif /* STARPU_USE_SCC */
			
 
				+
			
 
				+
			
 
				+#endif /* __DRIVER_SCC_SINK_H__ */
			
--- a/src/drivers/scc/driver_scc_source.c
+++ b/src/drivers/scc/driver_scc_source.c
@@ -0,0 +1,408 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_profiling.h>
			
 
				+#include <core/sched_policy.h>
			
 
				+#include <core/task.h>
			
 
				+
			
 
				+#include <RCCE.h>
			
 
				+
			
 
				+#include <drivers/driver_common/driver_common.h>
			
 
				+#include <drivers/mp_common/source_common.h>
			
 
				+#include <drivers/scc/driver_scc_common.h>
			
 
				+#include <drivers/scc/driver_scc_source.h>
			
 
				+
			
 
				+static struct _starpu_mp_node *scc_mp_nodes[STARPU_MAXSCCDEVS];
			
 
				+
			
 
				+struct _starpu_scc_kernel
			
 
				+{
			
 
				+	char *name;
			
 
				+	starpu_scc_kernel_t func[STARPU_MAXSCCDEVS];
			
 
				+};
			
 
				+
			
 
				+static struct _starpu_htbl kernels_htbl;
			
 
				+starpu_pthread_mutex_t htbl_mutex = STARPU_PTHREAD_MUTEX_INITIALIZER;
			
 
				+
			
 
				+static struct _starpu_mp_node *_starpu_scc_src_memory_node_to_mp_node(unsigned memory_node)
			
 
				+{
			
 
				+	int devid = _starpu_memory_node_to_devid(memory_node);
			
 
				+
			
 
				+	STARPU_ASSERT(devid < STARPU_MAXSCCDEVS);
			
 
				+	return scc_mp_nodes[devid];
			
 
				+}
			
 
				+
			
 
				+static void _starpu_scc_src_init_context(int devid)
			
 
				+{
			
 
				+	/* Let's create the node structure, we'll communicate with the peer
			
 
				+	 * through RCCE thanks to it */
			
 
				+	scc_mp_nodes[devid] = _starpu_mp_common_node_create(STARPU_SCC_SOURCE, devid);
			
 
				+}
			
 
				+
			
 
				+static void _starpu_scc_src_deinit_context(int devid)
			
 
				+{
			
 
				+	_starpu_mp_common_send_command(scc_mp_nodes[devid], STARPU_EXIT, NULL, 0);
			
 
				+
			
 
				+	_starpu_mp_common_node_destroy(scc_mp_nodes[devid]);
			
 
				+}
			
 
				+
			
 
				+static int _starpu_scc_src_execute_job(struct _starpu_job *j, struct _starpu_worker *args)
			
 
				+{
			
 
				+	int ret;
			
 
				+	uint32_t mask = 0;
			
 
				+
			
 
				+	STARPU_ASSERT(j);
			
 
				+	struct starpu_task *task = j->task;
			
 
				+
			
 
				+	struct timespec codelet_start, codelet_end;
			
 
				+
			
 
				+	int profiling = starpu_profiling_status_get();
			
 
				+	unsigned calibrate_model = 0;
			
 
				+
			
 
				+	STARPU_ASSERT(task);
			
 
				+	struct starpu_codelet *cl = task->cl;
			
 
				+	STARPU_ASSERT(cl);
			
 
				+
			
 
				+	if (cl->model && cl->model->benchmarking)
			
 
				+		calibrate_model = 1;
			
 
				+
			
 
				+	ret = _starpu_fetch_task_input(j, mask);
			
 
				+	if (ret != 0)
			
 
				+	{
			
 
				+		/* there was not enough memory, so the input of
			
 
				+		 * the codelet cannot be fetched ... put the
			
 
				+		 * codelet back, and try it later */
			
 
				+		return -EAGAIN;
			
 
				+	}
			
 
				+
			
 
				+
			
 
				+	starpu_scc_kernel_t kernel = NULL;
			
 
				+
			
 
				+	starpu_scc_func_t func = _starpu_task_get_scc_nth_implementation(j->task->cl, j->nimpl);
			
 
				+	if (func)
			
 
				+	{
			
 
				+		/* We execute the function contained in the codelet, it must return a
			
 
				+		 * pointer to the function to execute on the device, either specified
			
 
				+		 * directly by the user or by a call to starpu_scc_get_kernel().
			
 
				+		 */
			
 
				+		kernel = func();
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		/* If user doesn't define any starpu_scc_func_t in cl->scc_funcs we try to use
			
 
				+		 * cpu_funcs_name.
			
 
				+		 */
			
 
				+		char *func_name = _starpu_task_get_cpu_name_nth_implementation(j->task->cl, j->nimpl);
			
 
				+		if (func_name)
			
 
				+		{
			
 
				+			starpu_scc_func_symbol_t symbol;
			
 
				+
			
 
				+			_starpu_scc_src_register_kernel(&symbol, func_name);
			
 
				+
			
 
				+			kernel = _starpu_scc_src_get_kernel(symbol);
			
 
				+		}
			
 
				+	}
			
 
				+	STARPU_ASSERT(kernel);
			
 
				+
			
 
				+	_starpu_driver_start_job(args, j, &codelet_start, 0, profiling);
			
 
				+
			
 
				+	_starpu_src_common_execute_kernel_from_task(scc_mp_nodes[args->devid], (void (*)(void)) kernel, task);
			
 
				+
			
 
				+	_starpu_driver_end_job(args, j, args->perf_arch, &codelet_end, 0, profiling);
			
 
				+
			
 
				+	_starpu_driver_update_job_feedback(j, args, args->perf_arch, &codelet_start, &codelet_end, profiling);
			
 
				+
			
 
				+	_starpu_push_task_output(j, mask);
			
 
				+
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void _starpu_scc_src_mp_deinit()
			
 
				+{
			
 
				+	_starpu_scc_common_unmap_shared_memory();
			
 
				+	RCCE_finalize();
			
 
				+}
			
 
				+
			
 
				+int _starpu_scc_src_register_kernel(starpu_scc_func_symbol_t *symbol, const char *func_name)
			
 
				+{
			
 
				+	unsigned int func_name_size = (strlen(func_name) + 1) * sizeof(char);
			
 
				+
			
 
				+	STARPU_PTHREAD_MUTEX_LOCK(&htbl_mutex);
			
 
				+	struct _starpu_scc_kernel *kernel = _starpu_htbl_search(&kernels_htbl, func_name, func_name_size);
			
 
				+
			
 
				+	if (kernel != NULL)
			
 
				+	{
			
 
				+		STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
			
 
				+		// Function already in the table.
			
 
				+		*symbol = kernel;
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	kernel = malloc(sizeof(*kernel));
			
 
				+	if (kernel == NULL)
			
 
				+	{
			
 
				+		STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	kernel->name = malloc(func_name_size);
			
 
				+	if (kernel->name == NULL)
			
 
				+	{
			
 
				+		STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
			
 
				+		free(kernel);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	int ret = _starpu_htbl_insert(&kernels_htbl, func_name, func_name_size, kernel);
			
 
				+	STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
			
 
				+	if (ret != 0)
			
 
				+	{
			
 
				+		free(kernel->name);
			
 
				+		free(kernel);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	memcpy(kernel->name, func_name, func_name_size);
			
 
				+
			
 
				+	unsigned int nb_scc_devices = starpu_scc_worker_get_count();
			
 
				+	unsigned int i;
			
 
				+	for (i = 0; i < nb_scc_devices; ++i)
			
 
				+		kernel->func[i] = NULL;
			
 
				+
			
 
				+	*symbol = kernel;
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+starpu_scc_kernel_t _starpu_scc_src_get_kernel(starpu_scc_func_symbol_t symbol)
			
 
				+{
			
 
				+	int workerid = starpu_worker_get_id();
			
 
				+	/* This function has to be called in the codelet only, by the thread
			
 
				+	 * which will handle the task */
			
 
				+	if (workerid < 0)
			
 
				+		return NULL;
			
 
				+
			
 
				+	int devid = starpu_worker_get_devid(workerid);
			
 
				+
			
 
				+	struct _starpu_scc_kernel *kernel = symbol;
			
 
				+
			
 
				+	if (kernel->func[devid] == NULL)
			
 
				+	{
			
 
				+		struct _starpu_mp_node *node = scc_mp_nodes[devid];
			
 
				+		int ret = _starpu_src_common_lookup(node, (void (**)(void))&kernel->func[devid], kernel->name);
			
 
				+
			
 
				+		if (ret)
			
 
				+			return NULL;
			
 
				+	}
			
 
				+
			
 
				+	return kernel->func[devid];
			
 
				+}
			
 
				+
			
 
				+unsigned _starpu_scc_src_get_device_count()
			
 
				+{
			
 
				+	int nb_scc_devices;
			
 
				+
			
 
				+	if (!_starpu_scc_common_is_mp_initialized())
			
 
				+	{
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	nb_scc_devices = RCCE_num_ues() - 1;
			
 
				+	nb_scc_devices = nb_scc_devices < 0 ? 0 : nb_scc_devices;
			
 
				+
			
 
				+	return nb_scc_devices;
			
 
				+}
			
 
				+
			
 
				+void _starpu_scc_exit_useless_node(int devid)
			
 
				+{
			
 
				+	struct _starpu_mp_node *node = _starpu_mp_common_node_create(STARPU_SCC_SOURCE, devid);
			
 
				+
			
 
				+	_starpu_mp_common_send_command(node, STARPU_EXIT, NULL, 0);
			
 
				+
			
 
				+	_starpu_mp_common_node_destroy(node);
			
 
				+}
			
 
				+
			
 
				+void _starpu_scc_src_init(struct _starpu_mp_node *node)
			
 
				+{
			
 
				+	node->mp_connection.scc_nodeid = STARPU_TO_SCC_SINK_ID(node->peer_id);
			
 
				+}
			
 
				+
			
 
				+/* Allocate memory on SCC.
			
 
				+ * Return 0 if OK or 1 if not.
			
 
				+ */
			
 
				+int _starpu_scc_allocate_memory(void **addr, size_t size, unsigned memory_node)
			
 
				+{
			
 
				+	return _starpu_src_common_allocate(_starpu_scc_src_memory_node_to_mp_node(memory_node),
			
 
				+			addr, size);
			
 
				+}
			
 
				+
			
 
				+/* Free memory on SCC.
			
 
				+ */
			
 
				+void _starpu_scc_free_memory(void *addr, unsigned memory_node)
			
 
				+{
			
 
				+	return _starpu_src_common_free(_starpu_scc_src_memory_node_to_mp_node(memory_node),
			
 
				+			addr);
			
 
				+}
			
 
				+
			
 
				+int _starpu_scc_allocate_shared_memory(void **addr, size_t size)
			
 
				+{
			
 
				+	return (*addr = (void*)RCCE_shmalloc(size)) == NULL;
			
 
				+}
			
 
				+
			
 
				+void _starpu_scc_free_shared_memory(void *addr)
			
 
				+{
			
 
				+	RCCE_shfree(addr);
			
 
				+}
			
 
				+
			
 
				+/* Assigns the offset to "offset" between "ptr" and the start of the shared memory.
			
 
				+ * Affect "dev_handle" with the start of the shared memory is useful for data
			
 
				+ * partionning.
			
 
				+ */
			
 
				+void _starpu_scc_set_offset_in_shared_memory(void *ptr, void **dev_handle, size_t *offset)
			
 
				+{
			
 
				+	/* We're on SCC... */
			
 
				+	if (_starpu_can_submit_scc_task())
			
 
				+	{
			
 
				+		if (!_starpu_scc_common_is_in_shared_memory(ptr))
			
 
				+		{
			
 
				+			fprintf(stderr, "The data (%p) you want to register does not seem to be allocated in shared memory. "
			
 
				+					"Please use starpu_malloc to do this.\n", ptr);
			
 
				+			STARPU_ABORT();
			
 
				+		}
			
 
				+
			
 
				+		void *shm_addr = _starpu_scc_common_get_shared_memory_addr();
			
 
				+
			
 
				+		if (dev_handle)
			
 
				+			*dev_handle = shm_addr;
			
 
				+
			
 
				+		if (offset)
			
 
				+			*offset = ptr - shm_addr;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
			
 
				+ * node to the address pointed by DST in the DST_NODE memory node
			
 
				+ */
			
 
				+int _starpu_scc_copy_src_to_sink(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size)
			
 
				+{
			
 
				+	return _starpu_src_common_copy_host_to_sink(_starpu_scc_src_memory_node_to_mp_node(dst_node),
			
 
				+			src, dst, size);
			
 
				+}
			
 
				+
			
 
				+/* Transfert SIZE bytes from the address pointed by SRC in the SRC_NODE memory
			
 
				+ * node to the address pointed by DST in the DST_NODE memory node
			
 
				+ */
			
 
				+int _starpu_scc_copy_sink_to_src(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size)
			
 
				+{
			
 
				+	return _starpu_src_common_copy_sink_to_host(_starpu_scc_src_memory_node_to_mp_node(src_node),
			
 
				+			src, dst, size);
			
 
				+}
			
 
				+
			
 
				+int _starpu_scc_copy_sink_to_sink(void *src, unsigned src_node, void *dst, unsigned dst_node, size_t size)
			
 
				+{
			
 
				+	return _starpu_src_common_copy_sink_to_sink(_starpu_scc_src_memory_node_to_mp_node(src_node),
			
 
				+			_starpu_scc_src_memory_node_to_mp_node(dst_node),
			
 
				+			src, dst, size);
			
 
				+}
			
 
				+
			
 
				+void *_starpu_scc_src_worker(void *arg)
			
 
				+{
			
 
				+	struct _starpu_worker *args = arg;
			
 
				+
			
 
				+	int devid = args->devid;
			
 
				+	int workerid = args->workerid;
			
 
				+	unsigned memnode = args->memory_node;
			
 
				+
			
 
				+	_starpu_worker_init(args, _STARPU_FUT_SCC_KEY);
			
 
				+
			
 
				+	_starpu_scc_src_init_context(devid);
			
 
				+
			
 
				+	args->status = STATUS_UNKNOWN;
			
 
				+
			
 
				+	_STARPU_TRACE_WORKER_INIT_END
			
 
				+
			
 
				+	/* tell the main thread that this one is ready */
			
 
				+	_STARPU_STARPU_PTHREAD_MUTEX_LOCK(&args->mutex);
			
 
				+	args->worker_is_initialized = 1;
			
 
				+	_STARPU_PTHREAD_COND_SIGNAL(&args->ready_cond);
			
 
				+	_STARPU_STARPU_PTHREAD_MUTEX_UNLOCK(&args->mutex);
			
 
				+
			
 
				+	struct _starpu_job * j;
			
 
				+	struct starpu_task *task;
			
 
				+	int res;
			
 
				+
			
 
				+	while (_starpu_machine_is_running())
			
 
				+	{
			
 
				+		_STARPU_TRACE_START_PROGRESS(memnode);
			
 
				+		_starpu_datawizard_progress(memnode, 1);
			
 
				+		_STARPU_TRACE_END_PROGRESS(memnode);
			
 
				+
			
 
				+		task = _starpu_get_worker_task(args, workerid, memnode);
			
 
				+		if (!task)
			
 
				+			continue;
			
 
				+
			
 
				+		j = _starpu_get_job_associated_to_task(task);
			
 
				+
			
 
				+		/* can a SCC device do that task ? */
			
 
				+		if (!_STARPU_SCC_MAY_PERFORM(j))
			
 
				+		{
			
 
				+			/* this isn't a SCC task */
			
 
				+			_starpu_push_task_to_workers(j);
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		_starpu_set_current_task(task);
			
 
				+		args->current_task = j->task;
			
 
				+
			
 
				+		res = _starpu_scc_src_execute_job(j, args);
			
 
				+
			
 
				+		_starpu_set_current_task(NULL);
			
 
				+		args->current_task = NULL;
			
 
				+
			
 
				+		if (res)
			
 
				+		{
			
 
				+			switch (res)
			
 
				+			{
			
 
				+				case -EAGAIN:
			
 
				+					_STARPU_DISP("ouch, put the codelet %p back ... \n", j);
			
 
				+					_starpu_push_task(j);
			
 
				+					STARPU_ABORT();
			
 
				+					continue;
			
 
				+				default:
			
 
				+					STARPU_ASSERT(0);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		_starpu_handle_job_termination(j);
			
 
				+	}
			
 
				+
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_START
			
 
				+
			
 
				+	_starpu_handle_all_pending_node_data_requests(memnode);
			
 
				+
			
 
				+	/* In case there remains some memory that was automatically
			
 
				+	 * allocated by StarPU, we release it now. Note that data
			
 
				+	 * coherency is not maintained anymore at that point ! */
			
 
				+	_starpu_free_all_automatically_allocated_buffers(memnode);
			
 
				+
			
 
				+	_starpu_scc_src_deinit_context(args->devid);
			
 
				+
			
 
				+	_STARPU_TRACE_WORKER_DEINIT_END(_STARPU_FUT_SCC_KEY);
			
 
				+
			
 
				+	return NULL;
			
 
				+}
			
--- a/src/drivers/scc/driver_scc_source.h
+++ b/src/drivers/scc/driver_scc_source.h
@@ -0,0 +1,56 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#ifndef __DRIVER_SCC_SOURCE_H__
			
 
				+#define __DRIVER_SCC_SOURCE_H__
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_scc.h>
			
 
				+#include <common/config.h>
			
 
				+
			
 
				+
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+
			
 
				+#include <drivers/mp_common/mp_common.h>
			
 
				+
			
 
				+
			
 
				+void _starpu_scc_src_mp_deinit();
			
 
				+
			
 
				+int _starpu_scc_src_register_kernel(starpu_scc_func_symbol_t *symbol, const char *func_name);
			
 
				+starpu_scc_kernel_t _starpu_scc_src_get_kernel(starpu_scc_func_symbol_t symbol);
			
 
				+
			
 
				+unsigned _starpu_scc_src_get_device_count();
			
 
				+void _starpu_scc_exit_useless_node(int devid);
			
 
				+
			
 
				+void _starpu_scc_src_init(struct _starpu_mp_node *node);
			
 
				+
			
 
				+int _starpu_scc_allocate_memory(void **addr, size_t size, unsigned memory_node);
			
 
				+void _starpu_scc_free_memory(void *addr, unsigned memory_node);
			
 
				+int _starpu_scc_allocate_shared_memory(void **addr, size_t size);
			
 
				+void _starpu_scc_free_shared_memory(void *addr);
			
 
				+
			
 
				+void _starpu_scc_set_offset_in_shared_memory(void *ptr, void **dev_handle, size_t *offset);
			
 
				+
			
 
				+int _starpu_scc_copy_src_to_sink(void *src, unsigned src_node STARPU_ATTRIBUTE_UNUSED, void *dst, unsigned dst_node, size_t size);
			
 
				+int _starpu_scc_copy_sink_to_src(void *src, unsigned src_node, void *dst, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, size_t size);
			
 
				+int _starpu_scc_copy_sink_to_sink(void *src, unsigned src_node, void *dst, unsigned dst_node, size_t size);
			
 
				+
			
 
				+void *_starpu_scc_src_worker(void *arg);
			
 
				+
			
 
				+#endif /* STARPU_USE_SCC */
			
 
				+
			
 
				+
			
 
				+#endif /* __DRIVER_SCC_SOURCE_H__ */
			
--- a/src/drivers/scc/driver_scc_utils.c
+++ b/src/drivers/scc/driver_scc_utils.c
@@ -0,0 +1,45 @@
 
				+/* StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+ *
			
 
				+ * Copyright (C) 2012  Inria
			
 
				+ *
			
 
				+ * StarPU is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * StarPU is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+#include <starpu.h>
			
 
				+#include <starpu_scc.h>
			
 
				+#include <common/config.h>
			
 
				+
			
 
				+#include <drivers/mp_common/source_common.h>
			
 
				+#include <drivers/scc/driver_scc_source.h>
			
 
				+
			
 
				+
			
 
				+/* Initiate a lookup on each SCC device to find the adress of the function
			
 
				+ * named FUNC_NAME, store them in the global array kernels and return
			
 
				+ * the index in the array through SYMBOL.
			
 
				+ * If success, returns 0. If the user has registered too many kernels (more
			
 
				+ * than STARPU_MAXSCCDEVS) returns -ENOMEM
			
 
				+ */
			
 
				+int starpu_scc_register_kernel(starpu_scc_func_symbol_t *symbol,
			
 
				+			       const char *func_name)
			
 
				+{
			
 
				+	return _starpu_scc_src_register_kernel(symbol, func_name);
			
 
				+}
			
 
				+
			
 
				+/* If success, return the pointer to the function defined by SYMBOL on the
			
 
				+ * device linked to the called 
			
 
				+ * device.
			
 
				+ */
			
 
				+starpu_scc_kernel_t starpu_scc_get_kernel(starpu_scc_func_symbol_t symbol)
			
 
				+{
			
 
				+	return _starpu_scc_src_get_kernel(symbol);
			
 
				+}
			
--- a/src/top/starpu_top.c
+++ b/src/top/starpu_top.c
@@ -105,6 +105,12 @@ static void starpu_top_get_device_type(int id, char* type)
 
				 	case STARPU_ANY_WORKER:
			
 
				 		strncpy(type, "ANY",9);
			
 
				 		break;
			
 
				+	case STARPU_MIC_WORKER:
			
 
				+		strncpy(type, "MIC", 9);
			
 
				+		break;
			
 
				+	case STARPU_SCC_WORKER:
			
 
				+		strncpy(type, "SCC", 9);
			
 
				+		break;
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/src/util/execute_on_all.c
+++ b/src/util/execute_on_all.c
@@ -110,6 +110,9 @@ void starpu_execute_on_each_worker_ex(void (*func)(void *), void *arg, uint32_t
 
				 	unsigned nworkers = starpu_worker_get_count();
			
 
				 	struct starpu_task *tasks[STARPU_NMAXWORKERS];
			
 
				 
			
 
				+	/* This method only work on CPU, CUDA, OPENCL */
			
 
				+	STARPU_ASSERT((where & ~STARPU_CPU & ~STARPU_CUDA & ~STARPU_OPENCL) == 0);
			
 
				+
			
 
				 	/* create a wrapper codelet */
			
 
				 	struct starpu_codelet wrapper_cl =
			
 
				 	{
			
--- a/src/util/starpu_data_cpy.c
+++ b/src/util/starpu_data_cpy.c
@@ -19,10 +19,15 @@
 
				 #include <core/task.h>
			
 
				 #include <datawizard/datawizard.h>
			
 
				 #include <util/starpu_data_cpy.h>
			
 
				+#include <starpu_mic.h>
			
 
				+#include <starpu_scc.h>
			
 
				 
			
 
				-static void data_cpy_func(void *descr[], void *cl_arg)
			
 
				+static void common_data_cpy_func(void *descr[], void *cl_arg)
			
 
				 {
			
 
				-	const struct starpu_data_copy_methods *copy_methods = (const struct starpu_data_copy_methods *) cl_arg;
			
 
				+	unsigned interface_id = *(unsigned *)cl_arg;
			
 
				+
			
 
				+	const struct starpu_data_interface_ops *interface_ops = _starpu_data_interface_get_ops(interface_id);
			
 
				+	const struct starpu_data_copy_methods *copy_methods = interface_ops->copy_methods;
			
 
				 
			
 
				 	int workerid = starpu_worker_get_id();
			
 
				 	enum starpu_worker_archtype type = starpu_worker_get_type(workerid);
			
@@ -63,6 +68,48 @@ static void data_cpy_func(void *descr[], void *cl_arg)
 
				 
			
 
				 }
			
 
				 
			
 
				+void mp_cpy_kernel(void *descr[], void *cl_arg)
			
 
				+{
			
 
				+	unsigned interface_id = *(unsigned *)cl_arg;
			
 
				+
			
 
				+	const struct starpu_data_interface_ops *interface_ops = _starpu_data_interface_get_ops(interface_id);
			
 
				+	const struct starpu_data_copy_methods *copy_methods = interface_ops->copy_methods;
			
 
				+	
			
 
				+	void *dst_interface = descr[0];
			
 
				+	void *src_interface = descr[1];
			
 
				+
			
 
				+	STARPU_ASSERT(copy_methods->ram_to_ram);
			
 
				+	copy_methods->ram_to_ram(src_interface, 0, dst_interface, 0);
			
 
				+}
			
 
				+
			
 
				+static starpu_mic_kernel_t mic_cpy_func()
			
 
				+{
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	static starpu_mic_func_symbol_t mic_symbol = NULL;
			
 
				+	if (mic_symbol == NULL)
			
 
				+		starpu_mic_register_kernel(&mic_symbol, "mp_cpy_kernel");
			
 
				+
			
 
				+	return starpu_mic_get_kernel(mic_symbol);
			
 
				+#else
			
 
				+	STARPU_ABORT();
			
 
				+	return NULL;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+static starpu_scc_kernel_t scc_cpy_func()
			
 
				+{
			
 
				+#ifdef STARPU_USE_SCC
			
 
				+	static starpu_scc_func_symbol_t scc_symbol = NULL;
			
 
				+	if (scc_symbol == NULL)
			
 
				+		starpu_scc_register_kernel(&scc_symbol, "mp_cpy_kernel");
			
 
				+
			
 
				+	return starpu_scc_get_kernel(scc_symbol);
			
 
				+#else
			
 
				+	STARPU_ABORT();
			
 
				+	return NULL;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 struct starpu_perfmodel copy_model =
			
 
				 {
			
 
				 	.type = STARPU_HISTORY_BASED,
			
@@ -71,10 +118,12 @@ struct starpu_perfmodel copy_model =
 
				 
			
 
				 static struct starpu_codelet copy_cl =
			
 
				 {
			
 
				-	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL,
			
 
				-	.cpu_funcs = {data_cpy_func, NULL},
			
 
				-	.cuda_funcs = {data_cpy_func, NULL},
			
 
				-	.opencl_funcs = {data_cpy_func, NULL},
			
 
				+	.where = STARPU_CPU|STARPU_CUDA|STARPU_OPENCL|STARPU_MIC|STARPU_SCC,
			
 
				+	.cpu_funcs = {common_data_cpy_func, NULL},
			
 
				+	.cuda_funcs = {common_data_cpy_func, NULL},
			
 
				+	.opencl_funcs = {common_data_cpy_func, NULL},
			
 
				+	.mic_funcs = {mic_cpy_func, NULL},
			
 
				+	.scc_funcs = {scc_cpy_func, NULL},
			
 
				 	.nbuffers = 2,
			
 
				 	.modes = {STARPU_W, STARPU_R},
			
 
				 	.model = &copy_model
			
@@ -84,7 +133,6 @@ int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_h
 
				 		     int asynchronous, void (*callback_func)(void*), void *callback_arg,
			
 
				 		     int reduction, struct starpu_task *reduction_dep_task)
			
 
				 {
			
 
				-	const struct starpu_data_copy_methods *copy_methods = dst_handle->ops->copy_methods;
			
 
				 
			
 
				 	struct starpu_task *task = starpu_task_create();
			
 
				 	STARPU_ASSERT(task);
			
@@ -98,7 +146,12 @@ int _starpu_data_cpy(starpu_data_handle_t dst_handle, starpu_data_handle_t src_h
 
				 	}
			
 
				 
			
 
				 	task->cl = &copy_cl;
			
 
				-	task->cl_arg = (void *)copy_methods;
			
 
				+
			
 
				+	unsigned *interface_id = malloc(sizeof(*interface_id));
			
 
				+	*interface_id = dst_handle->ops->interfaceid; 
			
 
				+	task->cl_arg = interface_id;
			
 
				+	task->cl_arg_size = sizeof(*interface_id);
			
 
				+	task->cl_arg_free = 1;
			
 
				 
			
 
				 	task->callback_func = callback_func;
			
 
				 	task->callback_arg = callback_arg;
			
--- a/src/util/starpu_insert_task.c
+++ b/src/util/starpu_insert_task.c
@@ -60,7 +60,6 @@ void starpu_codelet_unpack_args(void *_cl_arg, ...)
 
				 	}
			
 
				 
			
 
				 	va_end(varg_list);
			
 
				-	free(cl_arg);
			
 
				 }
			
 
				 
			
 
				 int starpu_insert_task(struct starpu_codelet *cl, ...)
			
@@ -80,6 +79,7 @@ int starpu_insert_task(struct starpu_codelet *cl, ...)
 
				 	}
			
 
				 
			
 
				 	struct starpu_task *task = starpu_task_create();
			
 
				+	task->cl_arg_free = 1;
			
 
				 
			
 
				 	if (cl && cl->nbuffers > STARPU_NMAXBUFS)
			
 
				 	{
			
--- a/starpu-1.0-mic.pc.in
+++ b/starpu-1.0-mic.pc.in
@@ -0,0 +1,35 @@
 
				+# StarPU --- Runtime system for heterogeneous multicore architectures.
			
 
				+#
			
 
				+# Copyright (C) 2009, 2010, 2011, 2013  Université de Bordeaux 1
			
 
				+# Copyright (C) 2010, 2011, 2012  Centre National de la Recherche Scientifique
			
 
				+#
			
 
				+# StarPU is free software; you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU Lesser General Public License as published by
			
 
				+# the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+# your option) any later version.
			
 
				+#
			
 
				+# StarPU is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+#
			
 
				+# See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+
			
 
				+prefix=@prefix@
			
 
				+exec_prefix=@exec_prefix@
			
 
				+libdir=@libdir@
			
 
				+pkglibdir=@pkglibdir@
			
 
				+includedir=@includedir@
			
 
				+
			
 
				+# When the GCC plug-in is available, the following lines indicate
			
 
				+# where it is installed.
			
 
				+@GCC_PLUGIN_DIR_PKGCONFIG@
			
 
				+@GCC_PLUGIN_PKGCONFIG@
			
 
				+
			
 
				+Name: starpu
			
 
				+Description: offers support for heterogeneous multicore architecture
			
 
				+Version: @PACKAGE_VERSION@
			
 
				+Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_LDFLAGS@ @STARPU_OPENCL_LDFLAGS@
			
 
				+Libs.private: @LDFLAGS@ @LIBS@
			
 
				+Requires: @HWLOC_REQUIRES@
			
 
				+Requires.private: @GORDON_REQUIRES@
			
--- a/starpu-1.0.pc.in
+++ b/starpu-1.0.pc.in
@@ -29,6 +29,6 @@ Name: starpu
 
				 Description: offers support for heterogeneous multicore architecture
			
 
				 Version: @PACKAGE_VERSION@
			
 
				 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@ -DSTARPU_USE_DEPRECATED_ONE_ZERO_API
			
 
				-Libs: -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_OPENCL_LDFLAGS@ @STARPU_CUDA_LDFLAGS@ @STARPU_SC_HYPERVISOR@
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_OPENCL_LDFLAGS@ @STARPU_CUDA_LDFLAGS@ @STARPU_SC_HYPERVISOR@
			
 
				 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
			
 
				 Requires: @HWLOC_REQUIRES@
			
--- a/starpu-1.1.pc.in
+++ b/starpu-1.1.pc.in
@@ -29,6 +29,6 @@ Name: starpu
 
				 Description: offers support for heterogeneous multicore architecture
			
 
				 Version: @PACKAGE_VERSION@
			
 
				 Cflags: -I${includedir}/starpu/@STARPU_EFFECTIVE_VERSION@ @STARPU_CUDA_CPPFLAGS@
			
 
				-Libs: -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_OPENCL_LDFLAGS@ @STARPU_CUDA_LDFLAGS@ @STARPU_SC_HYPERVISOR@
			
 
				+Libs: @STARPU_EXPORT_DYNAMIC@ -L${libdir} -lstarpu-@STARPU_EFFECTIVE_VERSION@ @STARPU_OPENCL_LDFLAGS@ @STARPU_CUDA_LDFLAGS@ @STARPU_SC_HYPERVISOR@
			
 
				 Libs.private: @LDFLAGS@ @LIBS@ @LIBSTARPU_LDFLAGS@
			
 
				 Requires: @HWLOC_REQUIRES@
			
--- a/super-configure
+++ b/super-configure
@@ -0,0 +1,74 @@
 
				+#!/bin/sh
			
 
				+
			
 
				+ROOT_DIR=$PWD
			
 
				+MIC_HOST=x86_64-k1om-linux
			
 
				+MIC_CC_PATH=/usr/linux-k1om-4.7/bin/
			
 
				+DEFAULT_PREFIX=/usr/local
			
 
				+
			
 
				+export PATH=${MIC_CC_PATH}${PATH:+:${PATH}}
			
 
				+
			
 
				+echo "This file was created by StarPU super-configure 0.0.1." > ./super-config.log
			
 
				+echo "" >> ./super-config.log
			
 
				+echo " $ $0 $*" >> ./super-config.log
			
 
				+
			
 
				+for arch in mic host
			
 
				+do
			
 
				+
			
 
				+	# We call the configure script from a build directory further in the
			
 
				+	# arborescence
			
 
				+	command="${ROOT_DIR}/configure --enable-mic --with-coi-dir=/opt/intel/mic/coi"
			
 
				+	prefix_found=no
			
 
				+
			
 
				+	if test x$arch = xmic ; then
			
 
				+		command="$command --without-hwloc --with-coi-lib-dir=/opt/intel/mic/coi/device-linux-release/lib --host=$MIC_HOST"
			
 
				+	else
			
 
				+		command="$command --with-coi-lib-dir=/opt/intel/mic/coi/host-linux-release/lib"
			
 
				+	fi
			
 
				+
			
 
				+	for arg in $*
			
 
				+	do
			
 
				+		if [ ${arg:0:9} = '--prefix=' ]
			
 
				+		then
			
 
				+			prefix_found=yes
			
 
				+			prefix="${arg:9}"
			
 
				+			command="$command ${arg}/${arch}"
			
 
				+		else
			
 
				+			command="$command $arg"
			
 
				+		fi
			
 
				+
			
 
				+	done
			
 
				+
			
 
				+	# If the user didn't specify a directory where to install the library
			
 
				+	# we apply the default one
			
 
				+	if test x$prefix_found = xno ; then
			
 
				+		command="$command --prefix=${DEFAULT_PREFIX}/$arch"
			
 
				+		prefix=${DEFAULT_PREFIX}
			
 
				+	fi
			
 
				+
			
 
				+	# If the build directory doesn't exist yet, create it
			
 
				+	if [ ! -d "${ROOT_DIR}/build_${arch}" ] ; then
			
 
				+		mkdir "build_${arch}"
			
 
				+	fi
			
 
				+
			
 
				+	cd "build_${arch}"
			
 
				+
			
 
				+	if test x$arch = xmic ; then
			
 
				+		LDFLAGS=-export-dynamic $command
			
 
				+	else
			
 
				+		$command
			
 
				+	fi
			
 
				+	make -j
			
 
				+
			
 
				+	if test x$arch = xmic ; then
			
 
				+		make check > /dev/null 2&>1
			
 
				+	fi
			
 
				+
			
 
				+	make install
			
 
				+	cd "${ROOT_DIR}"
			
 
				+
			
 
				+done
			
 
				+
			
 
				+if [ ! -f "${prefix}/mic/lib/pkgconfig/starpu-1.0-mic.pc" ]
			
 
				+then
			
 
				+	ln -s "${prefix}/mic/lib/pkgconfig/starpu-1.0.pc" "${prefix}/mic/lib/pkgconfig/starpu-1.0-mic.pc"
			
 
				+fi
			
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -19,6 +19,7 @@ SUBDIRS =
 
				 AM_CFLAGS = $(HWLOC_CFLAGS) $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CFLAGS)
			
 
				 LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ @LIBS@
			
 
				 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/tools/ -I$(top_srcdir)/mpi/ -I$(top_builddir)/src -I$(top_srcdir)/src
			
 
				+AM_LDFLAGS = $(STARPU_COI_LDFLAGS)
			
 
				 
			
 
				 bin_PROGRAMS =
			
 
				 dist_bin_SCRIPTS =
			
--- a/tools/starpu_machine_display.c
+++ b/tools/starpu_machine_display.c
@@ -141,6 +141,11 @@ int main(int argc, char **argv)
 
				 	unsigned ncuda = starpu_cuda_worker_get_count();
			
 
				 	unsigned nopencl = starpu_opencl_worker_get_count();
			
 
				 
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	unsigned nmicdevs = starpu_mic_device_get_count();
			
 
				+	unsigned nmiccores = starpu_mic_worker_get_count();
			
 
				+#endif
			
 
				+
			
 
				 	fprintf(stdout, "StarPU has found :\n");
			
 
				 
			
 
				 	fprintf(stdout, "\t%u CPU cores\n", ncpu);
			
@@ -152,6 +157,11 @@ int main(int argc, char **argv)
 
				 	fprintf(stdout, "\t%u OpenCL devices\n", nopencl);
			
 
				 	display_worker_names(STARPU_OPENCL_WORKER);
			
 
				 
			
 
				+#ifdef STARPU_USE_MIC
			
 
				+	fprintf(stdout, "\t%d MIC cores (from %d devices)\n", nmiccores, nmicdevs);
			
 
				+	display_worker_names(STARPU_MIC_WORKER);
			
 
				+#endif
			
 
				+
			
 
				 	display_all_combined_workers();
			
 
				 
			
 
				 	fprintf(stdout, "\ntopology ...\n");