Samuel Thibault лет назад: 12
Родитель
Сommit
d64bd81764

+ 32 - 0
doc/chapters/mic-ssc-support.texi

@@ -0,0 +1,32 @@
+@c -*-texinfo-*-
+
+@c This file is part of the StarPU Handbook.
+@c Copyright (C) 2013  Universit@'e de Bordeaux 1
+@c See the file starpu.texi for copying conditions.
+
+@section Compilation
+
+SCC support just needs the presence of the RCCE library.
+
+MIC support actually needs two compilations of StarPU, one for the host and one for
+the device. The @code{super-configure} script can be used to achieve this: it basically
+calls @code{configure} from @code{build_mic} and @code{build_host}, then @code{make} and
+@code{make install}.
+
+@c TODO: move to configuration section ?
+
+It can be parameterized with the following environment variables:
+
+@table @asis
+@item @code{MIC_HOST}
+Defines the value of the @code{--host} parameter passed to @code{configure} for the
+cross-compilation. The current default is @code{x86_64-k1om-linux}.
+
+@item @code{MIC_CC_PATH}
+Defines the path to the MIC cross-compiler. The current default is @code{/usr/linux-k1om-4.7/bin/}.
+
+@item @code{COI_DIR}
+Defines the path to the COI library. The current default is @code{/opt/intel/mic/coi}
+@end table
+
+@section

+ 9 - 0
doc/starpu.texi

@@ -74,6 +74,7 @@ was last updated on @value{UPDATED}.
 * Tips and Tricks::             Tips and tricks to know about
 * StarPU MPI support::          How to combine StarPU with MPI
 * StarPU FFT support::          How to perform FFT computations with StarPU
+* StarPU MIC/SCC support::      How to build and run StarPU applications on MIC and SCC
 * C Extensions::                Easier StarPU programming with GCC
 * SOCL OpenCL Extensions::      How to use OpenCL on top of StarPU
 * Scheduling Contexts in StarPU::         How to use Scheduling Context of StarPU
@@ -163,6 +164,14 @@ was last updated on @value{UPDATED}.
 @include chapters/fft-support.texi
 
 @c ---------------------------------------------------------------------
+@c MIC/SCC support
+@c ---------------------------------------------------------------------
+
+@node StarPU MIC/SCC support
+@chapter StarPU MIC/SCC support
+@include chapters/mic-scc-support.texi
+
+@c ---------------------------------------------------------------------
 @c C Extensions
 @c ---------------------------------------------------------------------
 

+ 3 - 0
examples/Makefile.am

@@ -305,9 +305,12 @@ basic_examples_vector_scal_SOURCES =		\
 	basic_examples/vector_scal_cpu.c
 
 if STARPU_HAVE_ICC
+if STARPU_CROSS_COMPILING
+else
 basic_examples_vector_scal_SOURCES +=		\
 	basic_examples/vector_scal_cpu_icc.icc
 endif
+endif
 
 if STARPU_USE_CUDA
 basic_examples_vector_scal_SOURCES +=		\

+ 4 - 4
examples/basic_examples/vector_scal.c

@@ -56,12 +56,12 @@ static struct starpu_codelet cl =
 	/* CPU implementation of the codelet */
 	.cpu_funcs = {
 		scal_cpu_func
-#ifdef STARPU_HAVE_ICC
+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
 		, scal_cpu_func_icc
 #endif
 #ifdef __SSE__
 		, scal_sse_func
-#ifdef STARPU_HAVE_ICC
+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
 		, scal_sse_func_icc
 #endif
 #endif
@@ -69,12 +69,12 @@ static struct starpu_codelet cl =
 	},
 	.cpu_funcs_name = {
 		"scal_cpu_func",
-#ifdef STARPU_HAVE_ICC
+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
 		"scal_cpu_func_icc",
 #endif
 #ifdef __SSE__
 		"scal_sse_func",
-#ifdef STARPU_HAVE_ICC
+#if defined(STARPU_HAVE_ICC) && !defined(__KNC__) && !defined(__KNF__)
 		"scal_sse_func_icc"
 #endif
 #endif

+ 0 - 3
include/starpu_data_interfaces.h

@@ -439,9 +439,6 @@ struct starpu_multiformat_data_interface_ops
 	size_t mic_elemsize;
 	struct starpu_codelet *cpu_to_mic_cl;
 	struct starpu_codelet *mic_to_cpu_cl;
-	size_t scc_elemsize;
-	struct starpu_codelet *cpu_to_scc_cl;
-	struct starpu_codelet *scc_to_cpu_cl;
 };
 
 struct starpu_multiformat_interface

+ 1 - 1
include/starpu_util.h

@@ -177,7 +177,7 @@ STARPU_ATOMIC_SOMETHING(or, old | value)
 #define STARPU_SYNCHRONIZE() __asm__ __volatile__("sync" ::: "memory")
 #endif
 
-#if defined(__i386__)
+#if defined(__i386__) || defined(__KNC__) || defined(__KNF__)
 #define STARPU_RMB() __asm__ __volatile__("lock; addl $0,0(%%esp)" ::: "memory")
 #define STARPU_WMB() __asm__ __volatile__("lock; addl $0,0(%%esp)" ::: "memory")
 #elif defined(__x86_64__)

+ 1 - 1
src/core/perfmodel/perfmodel_bus.c

@@ -638,7 +638,7 @@ static void benchmark_all_gpu_devices(void)
 	_STARPU_DISP("can not measure bus in simgrid mode, please run starpu_calibrate_bus in non-simgrid mode to make sure the bus performance model was calibrated\n");
 	STARPU_ABORT();
 #else /* !SIMGRID */
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL)
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC)
 	unsigned i;
 #endif
 #ifdef HAVE_CUDA_MEMCPY_PEER

+ 10 - 16
src/core/sched_policy.c

@@ -443,7 +443,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 {
 	struct starpu_task *conversion_task;
 
-#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
+#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
 	struct starpu_multiformat_interface *format_interface;
 #endif
 
@@ -451,7 +451,7 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 	conversion_task->synchronous = 0;
 	STARPU_TASK_SET_HANDLE(conversion_task, handle, 0);
 
-#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
+#if defined(STARPU_USE_OPENCL) || defined(STARPU_USE_CUDA) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
 	/* The node does not really matter here */
 	format_interface = (struct starpu_multiformat_interface *) starpu_data_get_interface_on_node(handle, 0);
 #endif
@@ -464,9 +464,13 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 	switch(node_kind)
 	{
 	case STARPU_CPU_RAM:
+	case STARPU_SCC_RAM:
+	case STARPU_SCC_SHM:
 		switch (starpu_node_get_kind(handle->mf_node))
 		{
 		case STARPU_CPU_RAM:
+		case STARPU_SCC_RAM:
+		case STARPU_SCC_SHM:
 			STARPU_ABORT();
 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
 		case STARPU_CUDA_RAM:
@@ -488,17 +492,12 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 #endif
 #ifdef STARPU_USE_MIC
 		case STARPU_MIC_RAM:
+		{
 			struct starpu_multiformat_data_interface_ops *mf_ops;
 			mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
 			conversion_task->cl = mf_ops->mic_to_cpu_cl;
 			break;
-#endif
-#ifdef STARPU_USE_MIC
-		case STARPU_SCC_RAM:
-			struct starpu_multiformat_data_interface_ops *mf_ops;
-			mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
-			conversion_task->cl = mf_ops->scc_to_cpu_cl;
-			break;
+		}
 #endif
 		default:
 			_STARPU_ERROR("Oops : %u\n", handle->mf_node);
@@ -524,17 +523,12 @@ struct starpu_task *_starpu_create_conversion_task_for_arch(starpu_data_handle_t
 #endif
 #ifdef STARPU_USE_MIC
 	case STARPU_MIC_RAM:
+	{
 		struct starpu_multiformat_data_interface_ops *mf_ops;
 		mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
 		conversion_task->cl = mf_ops->cpu_to_mic_cl;
 		break;
-#endif
-#ifdef STARPU_USE_SCC
-	case STARPU_SCC_RAM:
-		struct starpu_multiformat_data_interface_ops *mf_ops;
-		mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
-		conversion_task->cl = mf_ops->cpu_to_scc_cl;
-		break;
+	}
 #endif
 	default:
 		STARPU_ABORT();

+ 11 - 11
src/core/topology.c

@@ -48,7 +48,7 @@
 
 static unsigned topology_is_initialized = 0;
 
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC) || defined(STARPU_SIMGRID)
 
 struct handle_entry
 {
@@ -70,7 +70,7 @@ static unsigned may_bind_automatically = 0;
  * Discover the topology of the machine
  */
 
-#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_MIC) || defined(STARPU_USE_SCC)  || defined(STARPU_SIMGRID)
+#if defined(STARPU_USE_CUDA) || defined(STARPU_USE_OPENCL) || defined(STARPU_USE_SCC)  || defined(STARPU_SIMGRID)
 static void
 _starpu_initialize_workers_deviceid (int *explicit_workers_gpuid,
 				  int *current, int *workers_gpuid,
@@ -339,7 +339,7 @@ _starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
 {
     /* Initialize the MIC node of index MIC_IDX. */
 
-    struct starpu_conf *user_conf = config->user_conf;
+    struct starpu_conf *user_conf = config->conf;
 
     char ***argv = _starpu_get_argv();
     const char *suffixes[] = {"-mic", "_mic", NULL};
@@ -363,7 +363,7 @@ _starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
 	_starpu_src_common_locate_file (mic_sink_program_path,
 					getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
 					getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
-					(user_conf==NULL ? NULL : user_conf->mic_sink_program_path),
+					user_conf->mic_sink_program_path,
 					(argv ? (*argv)[0] : NULL),
 					suffixes);
 
@@ -611,7 +611,7 @@ _starpu_init_mic_config (struct _starpu_machine_config *config,
 	}
 	else
 	{
-	    if (nmiccores > topology->nhwmiccores[mic_idx])
+	    if ((unsigned) nmiccores > topology->nhwmiccores[mic_idx])
 	    {
 		/* The user requires more MIC devices than there is available */
 		fprintf(stderr,
@@ -670,7 +670,7 @@ _starpu_init_mp_config (struct _starpu_machine_config *config,
 
     topology->nmicdevices = 0;
     unsigned i;
-    for (i = 0; i < STARPU_MIN (nhwmicdevices, reqmicdevices); i++)
+    for (i = 0; i < STARPU_MIN (nhwmicdevices, (unsigned) reqmicdevices); i++)
 	if (0 == _starpu_init_mic_node (config, i, &handles[i], &process[i]))
 	    topology->nmicdevices++;
 
@@ -900,7 +900,7 @@ _starpu_init_machine_config (struct _starpu_machine_config *config, int no_mp_co
 	 * ones of the mp nodes. */
 #ifdef STARPU_USE_MIC
 	if (! no_mp_config)
-	    _starpu_init_mp_config (config, user_conf);
+	    _starpu_init_mp_config (config, config->conf);
 #endif
 
 /* we put the CPU section after the accelerator : in case there was an
@@ -1107,7 +1107,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 	if (! no_mp_config) {
 	    unsigned i = 0;
 	    for (i = 0; i < config->topology.nmicdevices; i++) {
-		mic_memory_nodes[i] = _starpu_register_memory_node (STARPU_MIC_RAM, i);
+		mic_memory_nodes[i] = _starpu_memory_node_register (STARPU_MIC_RAM, i);
 		_starpu_register_bus(0, mic_memory_nodes[i]);
 		_starpu_register_bus(mic_memory_nodes[i], 0);
 	    }
@@ -1206,8 +1206,8 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 				//}
 				is_a_set_of_accelerators = 1;
 				memory_node = mic_memory_nodes[workerarg->mp_nodeid];
-				_starpu_memory_node_worker_add(memory_node);
-				/* memory_node = _starpu_register_memory_node(STARPU_MIC_RAM, workerarg->devid);*/
+				_starpu_memory_node_add_nworkers(memory_node);
+				/* memory_node = _starpu_memory_node_register(STARPU_MIC_RAM, workerarg->devid);*/
 
 				/* _starpu_register_bus(0, memory_node);
 				 * _starpu_register_bus(memory_node, 0); */
@@ -1223,7 +1223,7 @@ _starpu_init_workers_binding (struct _starpu_machine_config *config, int no_mp_c
 
 				is_a_set_of_accelerators = 0;
 				memory_node = ram_memory_node;
-				_starpu_memory_node_worker_add(memory_node);
+				_starpu_memory_add_nworkers(memory_node);
 			}
 				break;
 #endif

+ 10 - 8
src/core/workers.c

@@ -573,7 +573,7 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 				if (mic_initiated[mp_nodeid])
 					goto worker_set_initialized;
 
-				mic_worker_set[mp_nodeid].nworkers = config->topology.nmiccores[mp_nodeid];
+				mic_worker_set[mp_nodeid].nworkers = pconfig->topology.nmiccores[mp_nodeid];
 
 				/* We assume all MIC workers of a given MIC
 				 * device are contiguous so that we can
@@ -581,18 +581,19 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 				mic_worker_set[mp_nodeid].workers = workerarg;
 				mic_worker_set[mp_nodeid].set_is_initialized = 0;
 
-				STARPU_PTHREAD_CREATE(
+				STARPU_PTHREAD_CREATE_ON(
 						workerarg->name,
 						&mic_worker_set[mp_nodeid].worker_thread,
 						NULL,
 						_starpu_mic_src_worker,
-						&mic_worker_set[mp_nodeid]);
+						&mic_worker_set[mp_nodeid],
+						worker+1);
 
-				_STARPU_PTHREAD_MUTEX_LOCK(&mic_worker_set[mp_nodeid].mutex);
+				STARPU_PTHREAD_MUTEX_LOCK(&mic_worker_set[mp_nodeid].mutex);
 				while (!mic_worker_set[mp_nodeid].set_is_initialized)
-					_STARPU_PTHREAD_COND_WAIT(&mic_worker_set[mp_nodeid].ready_cond,
+					STARPU_PTHREAD_COND_WAIT(&mic_worker_set[mp_nodeid].ready_cond,
 								  &mic_worker_set[mp_nodeid].mutex);
-				_STARPU_PTHREAD_MUTEX_UNLOCK(&mic_worker_set[mp_nodeid].mutex);
+				STARPU_PTHREAD_MUTEX_UNLOCK(&mic_worker_set[mp_nodeid].mutex);
 
 				mic_initiated[mp_nodeid] = 1;
 
@@ -614,12 +615,13 @@ static void _starpu_launch_drivers(struct _starpu_machine_config *pconfig)
 			case STARPU_SCC_WORKER:
 				workerarg->set = NULL;
 				workerarg->worker_is_initialized = 0;
-				STARPU_PTHREAD_CREATE(
+				STARPU_PTHREAD_CREATE_ON(
 						workerarg->name
 						&workerarg->worker_thread,
 						NULL,
 						_starpu_scc_src_worker,
-						workerarg);
+						workerarg,
+						worker+1);
 
 #ifdef STARPU_USE_FXT
 				STARPU_PTHREAD_MUTEX_LOCK(&workerarg->mutex);

+ 5 - 0
src/core/workers.h

@@ -192,6 +192,11 @@ struct _starpu_machine_config
 	unsigned submitting;
 };
 
+/* Three functions to manage argv, argc */
+void _starpu_set_argc_argv(int *argc, char ***argv);
+int *_starpu_get_argc();
+char ***_starpu_get_argv();
+
 /* Fill conf with environment variables */
 void _starpu_conf_check_environment(struct starpu_conf *conf);
 

+ 11 - 0
src/datawizard/interfaces/block_interface.c

@@ -75,6 +75,17 @@ static const struct starpu_data_copy_methods block_copy_data_methods_s =
 	.opencl_to_ram_async = copy_opencl_to_ram_async,
 	.opencl_to_opencl_async = copy_opencl_to_opencl_async,
 #endif
+#ifdef STARPU_USE_SCC
+	.scc_src_to_sink = copy_scc_src_to_sink,
+	.scc_sink_to_src = copy_scc_sink_to_src,
+	.scc_sink_to_sink = copy_scc_sink_to_sink,
+#endif
+#ifdef STARPU_USE_MIC
+	.ram_to_mic = copy_ram_to_mic,
+	.mic_to_ram = copy_mic_to_ram,
+	.ram_to_mic_async = copy_ram_to_mic_async,
+	.mic_to_ram_async = copy_mic_to_ram_async,
+#endif
 };
 
 

+ 10 - 1
src/datawizard/interfaces/data_interface.c

@@ -603,7 +603,10 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 		 * XXX : This is quite hacky, could we submit a task instead ?
 		 */
 		if (_starpu_data_is_multiformat_handle(handle) &&
-			starpu_node_get_kind(handle->mf_node) != STARPU_CPU_RAM)
+			(  starpu_node_get_kind(handle->mf_node) != STARPU_CPU_RAM
+			&& starpu_node_get_kind(handle->mf_node) != STARPU_SCC_RAM
+			&& starpu_node_get_kind(handle->mf_node) != STARPU_SCC_SHM
+			 ))
 		{
 			_STARPU_DEBUG("Conversion needed\n");
 			void *buffers[1];
@@ -634,10 +637,16 @@ static void _starpu_data_unregister(starpu_data_handle_t handle, unsigned cohere
 #endif
 #ifdef STARPU_USE_MIC
 				case STARPU_MIC_RAM:
+				{
+					struct starpu_multiformat_data_interface_ops *mf_ops;
+					mf_ops = (struct starpu_multiformat_data_interface_ops *) handle->ops->get_mf_ops(format_interface);
 					cl = mf_ops->mic_to_cpu_cl;
 					break;
+				}
 #endif
 				case STARPU_CPU_RAM:      /* Impossible ! */
+				case STARPU_SCC_RAM:      /* Impossible ! */
+				case STARPU_SCC_SHM:      /* Impossible ! */
 				default:
 					STARPU_ABORT();
 			}

+ 11 - 0
src/datawizard/interfaces/matrix_interface.c

@@ -89,6 +89,17 @@ static const struct starpu_data_copy_methods matrix_copy_data_methods_s =
 	.opencl_to_ram_async = copy_opencl_to_ram_async,
 	.opencl_to_opencl_async = copy_opencl_to_opencl_async,
 #endif
+#ifdef STARPU_USE_SCC
+	.scc_src_to_sink = copy_scc_src_to_sink,
+	.scc_sink_to_src = copy_scc_sink_to_src,
+	.scc_sink_to_sink = copy_scc_sink_to_sink,
+#endif
+#ifdef STARPU_USE_MIC
+	.ram_to_mic = copy_ram_to_mic,
+	.mic_to_ram = copy_mic_to_ram,
+	.ram_to_mic_async = copy_ram_to_mic_async,
+	.mic_to_ram_async = copy_mic_to_ram_async,
+#endif
 };
 
 static void register_matrix_handle(starpu_data_handle_t handle, unsigned home_node, void *data_interface);

+ 0 - 5
src/datawizard/interfaces/multiformat_interface.c

@@ -199,8 +199,6 @@ void starpu_multiformat_data_register(starpu_data_handle_t *handleptr,
 	_starpu_codelet_check_deprecated_fields(format_ops->cuda_to_cpu_cl);
 	_starpu_codelet_check_deprecated_fields(format_ops->cpu_to_mic_cl);
 	_starpu_codelet_check_deprecated_fields(format_ops->mic_to_cpu_cl);
-	_starpu_codelet_check_deprecated_fields(format_ops->cpu_to_scc_cl);
-	_starpu_codelet_check_deprecated_fields(format_ops->scc_to_cpu_cl);
 
 	struct starpu_multiformat_interface multiformat =
 	{
@@ -237,9 +235,6 @@ static int multiformat_compare(void *data_interface_a, void *data_interface_b)
 #ifdef STARPU_USE_MIC
 		    && (multiformat_a->ops->mic_elemsize == multiformat_b->ops->mic_elemsize)
 #endif
-#ifdef STARPU_USE_SCC
-		    && (multiformat_a->ops->scc_elemsize == multiformat_b->ops->scc_elemsize)
-#endif
 		);
 }
 

+ 129 - 107
src/drivers/mic/driver_mic_source.c

@@ -21,6 +21,7 @@
 #include <starpu.h>
 #include <starpu_profiling.h>
 #include <core/sched_policy.h>
+#include <common/uthash.h>
 
 #include <drivers/driver_common/driver_common.h>
 #include <drivers/mp_common/source_common.h>
@@ -43,13 +44,10 @@ static COIENGINE handles[STARPU_MAXMICDEVS];
  */
 struct _starpu_mic_kernel
 {
+	UT_hash_handle hh;
 	char *name;
 	starpu_mic_kernel_t func[STARPU_MAXMICDEVS];
-};
-
-/* Hash table use to store _starpu_mic_kernel
- */
-static struct _starpu_htbl kernels_htbl;
+} *kernels;
 
 /* Mutex for concurrent access to the table.
  */
@@ -88,93 +86,101 @@ const struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node()
 
 const struct _starpu_mp_node *_starpu_mic_src_get_mp_node_from_memory_node(int memory_node)
 {
-	int nodeid = _starpu_memory_node_to_devid(memory_node);
+	int nodeid = _starpu_memory_node_get_devid(memory_node);
 	STARPU_ASSERT(nodeid >= 0 && nodeid < STARPU_MAXMICDEVS);
 
 	return mic_nodes[nodeid];
 }
 
 // Should be obsolete.
-/* static void _starpu_mic_src_init_context(int devid,
- * 					 struct starpu_conf *user_conf)
- * {
- * 	COIRESULT res;
- * 	char mic_sink_program_path[1024];
- *
- * 	char ***argv = _starpu_get_argv();
- * 	const char *suffixes[] = {"-mic", "_mic", NULL};
- *
- * 	char devid_env[32];
- * 	sprintf(devid_env, "DEVID=%d", devid);
- *
- * 	char nb_mic_env[32];
- * 	sprintf(nb_mic_env, "NB_MIC=%d", starpu_mic_worker_get_count());
- *
- * 	/\* Environment variables to send to the Sink, it informs it what kind
- * 	 * of node it is (architecture and type) as there is no way to discover
- * 	 * it itself *\/
- * 	const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", devid_env, nb_mic_env, NULL};
- *
- * 	/\* Let's get the helper program to run on the MIC device *\/
- * 	int mic_file_found = _starpu_src_common_locate_file(mic_sink_program_path,
- * 							getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
- * 							getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
- * 							(user_conf == NULL ? NULL : user_conf->mic_sink_program_path),
- * 							(argv ? (*argv)[0] : NULL),
- * 							suffixes);
- *
- * 	STARPU_ASSERT(mic_file_found == 0);
- *
- * 	/\* Let's get the handle which let us manage the remote MIC device *\/
- * 	res = COIEngineGetHandle(COI_ISA_MIC, devid, &handles[devid]);
- * 	if (STARPU_UNLIKELY(res != COI_SUCCESS))
- * 		STARPU_MIC_SRC_REPORT_COI_ERROR(res);
- *
- * 	/\* We launch the helper on the MIC device, which will wait for us
- * 	 * to give it work to do.
- * 	 * As we will communicate further with the device throught scif we
- * 	 * don't need to keep the process pointer *\/
- * 	res = COIProcessCreateFromFile(handles[devid], mic_sink_program_path, 0, NULL, 0,
- * 				       mic_sink_env, 1, NULL, 0, NULL,
- * 				       &process[devid]);
- * 	if (STARPU_UNLIKELY(res != COI_SUCCESS))
- * 		STARPU_MIC_SRC_REPORT_COI_ERROR(res);
- *
- * 	/\* Let's create the node structure, we'll communicate with the peer
- * 	 * through scif thanks to it *\/
- * 	mic_nodes[devid] = _starpu_mp_common_node_create(STARPU_MIC_SOURCE,
- * 							   devid);
- *
- *
- * 	// XXX: this is not replicated in `_starpu_init_mic_node'.
- * 	STARPU_PTHREAD_MUTEX_LOCK(&nb_mic_worker_init_mutex);
- * 	++nb_mic_worker_init;
- * 	STARPU_PTHREAD_MUTEX_UNLOCK(&nb_mic_worker_init_mutex);
- * } */
-
-/* static void _starpu_mic_src_free_kernel(void *kernel)
- * {
- * 	struct _starpu_mic_kernel *k = kernel;
- *
- * 	free(k->name);
- * 	free(kernel);
- * } */
+#if 0
+static void _starpu_mic_src_init_context(int devid,
+					 struct starpu_conf *user_conf)
+{
+	COIRESULT res;
+	char mic_sink_program_path[1024];
+
+	char ***argv = _starpu_get_argv();
+	const char *suffixes[] = {"-mic", "_mic", NULL};
+
+	char devid_env[32];
+	sprintf(devid_env, "DEVID=%d", devid);
+
+	char nb_mic_env[32];
+	sprintf(nb_mic_env, "NB_MIC=%d", starpu_mic_worker_get_count());
+
+	/* Environment variables to send to the Sink, it informs it what kind
+	 * of node it is (architecture and type) as there is no way to discover
+	 * it itself */
+	const char *mic_sink_env[] = {"STARPU_SINK=STARPU_MIC", devid_env, nb_mic_env, NULL};
+
+	/* Let's get the helper program to run on the MIC device */
+	int mic_file_found = _starpu_src_common_locate_file(mic_sink_program_path,
+							getenv("STARPU_MIC_SINK_PROGRAM_NAME"),
+							getenv("STARPU_MIC_SINK_PROGRAM_PATH"),
+							(user_conf == NULL ? NULL : user_conf->mic_sink_program_path),
+							(argv ? (*argv)[0] : NULL),
+							suffixes);
+
+	STARPU_ASSERT(mic_file_found == 0);
+
+	/* Let's get the handle which let us manage the remote MIC device */
+	res = COIEngineGetHandle(COI_ISA_MIC, devid, &handles[devid]);
+	if (STARPU_UNLIKELY(res != COI_SUCCESS))
+		STARPU_MIC_SRC_REPORT_COI_ERROR(res);
+
+	/* We launch the helper on the MIC device, which will wait for us
+	 * to give it work to do.
+	 * As we will communicate further with the device throught scif we
+	 * don't need to keep the process pointer */
+	res = COIProcessCreateFromFile(handles[devid], mic_sink_program_path, 0, NULL, 0,
+				       mic_sink_env, 1, NULL, 0, NULL,
+				       &process[devid]);
+	if (STARPU_UNLIKELY(res != COI_SUCCESS))
+		STARPU_MIC_SRC_REPORT_COI_ERROR(res);
+
+	/* Let's create the node structure, we'll communicate with the peer
+	 * through scif thanks to it */
+	mic_nodes[devid] = _starpu_mp_common_node_create(STARPU_MIC_SOURCE,
+							   devid);
+
+
+	// XXX: this is not replicated in `_starpu_init_mic_node'.
+	STARPU_PTHREAD_MUTEX_LOCK(&nb_mic_worker_init_mutex);
+	++nb_mic_worker_init;
+	STARPU_PTHREAD_MUTEX_UNLOCK(&nb_mic_worker_init_mutex);
+}
 
-/* static void _starpu_mic_src_deinit_context(int devid)
- * {
- * 	_starpu_mp_common_send_command(mic_nodes[devid], STARPU_EXIT, NULL, 0);
- *
- * 	COIProcessDestroy(process[devid], -1, 0, NULL, NULL);
- *
- * 	_starpu_mp_common_node_destroy(mic_nodes[devid]);
- *
- * 	STARPU_PTHREAD_MUTEX_LOCK(&nb_mic_worker_init_mutex);
- * 	unsigned int tmp = --nb_mic_worker_init;
- * 	STARPU_PTHREAD_MUTEX_UNLOCK(&nb_mic_worker_init_mutex);
- *
- * 	if (tmp == 0)
- * 		_starpu_htbl_destroy(&kernels_htbl, _starpu_mic_src_free_kernel);
- * } */
+static void _starpu_mic_src_free_kernel(void *kernel)
+{
+	struct _starpu_mic_kernel *k = kernel;
+
+	free(k->name);
+	free(kernel);
+}
+
+static void _starpu_mic_src_deinit_context(int devid)
+{
+	_starpu_mp_common_send_command(mic_nodes[devid], STARPU_EXIT, NULL, 0);
+
+	COIProcessDestroy(process[devid], -1, 0, NULL, NULL);
+
+	_starpu_mp_common_node_destroy(mic_nodes[devid]);
+
+	STARPU_PTHREAD_MUTEX_LOCK(&nb_mic_worker_init_mutex);
+	unsigned int tmp = --nb_mic_worker_init;
+	STARPU_PTHREAD_MUTEX_UNLOCK(&nb_mic_worker_init_mutex);
+
+	if (tmp == 0) {
+		struct _starpu_mic_kernel *kernel, *tmp;
+		HASH_ITER(hh, kernels, kernel, tmp)
+		{
+			HASH_DEL(kernels, kernel);
+			free(kernel);
+		}
+	}
+}
+#endif
 
 static int
 _starpu_mic_src_finalize_job (struct _starpu_job *j, struct _starpu_worker *worker)
@@ -271,7 +277,9 @@ int _starpu_mic_src_register_kernel(starpu_mic_func_symbol_t *symbol, const char
 	unsigned int func_name_size = (strlen(func_name) + 1) * sizeof(char);
 
 	STARPU_PTHREAD_MUTEX_LOCK(&htbl_mutex);
-	struct _starpu_mic_kernel *kernel = _starpu_htbl_search(&kernels_htbl, func_name, func_name_size);
+	struct _starpu_mic_kernel *kernel;
+	
+	HASH_FIND_STR(kernels, func_name, kernel);
 
 	if (kernel != NULL)
 	{
@@ -296,17 +304,10 @@ int _starpu_mic_src_register_kernel(starpu_mic_func_symbol_t *symbol, const char
 		return -ENOMEM;
 	}
 
-	int ret = _starpu_htbl_insert(&kernels_htbl, func_name, func_name_size, kernel);
-	STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
-	if (ret != 0)
-	{
-		free(kernel->name);
-		free(kernel);
-		return -ENOMEM;
-	}
-
 	memcpy(kernel->name, func_name, func_name_size);
 
+	HASH_ADD_STR(kernels, name, kernel);
+
 	unsigned int nb_mic_devices = _starpu_mic_src_get_device_count();
 	unsigned int i;
 	for (i = 0; i < nb_mic_devices; ++i)
@@ -314,6 +315,8 @@ int _starpu_mic_src_register_kernel(starpu_mic_func_symbol_t *symbol, const char
 
 	*symbol = kernel;
 
+	STARPU_PTHREAD_MUTEX_UNLOCK(&htbl_mutex);
+
 	return 0;
 }
 
@@ -495,7 +498,7 @@ int _starpu_mic_allocate_memory(void **addr, size_t size, unsigned memory_node)
 	 * transfert with scif is not possible when the MIC
 	 * doesn't have enought free memory.
 	 * In this cas we can't tell any things to the host. */
-	//int devid = _starpu_memory_node_to_devid(memory_node);
+	//int devid = _starpu_memory_node_get_devid(memory_node);
 	//if (_starpu_mic_get_free_mem_size(devid) < size * 1.25)
 	//	return 1;
 
@@ -640,13 +643,13 @@ void *_starpu_mic_src_worker(void *arg)
 
 	baseworker->status = STATUS_UNKNOWN;
 
-	_STARPU_TRACE_WORKER_INIT_END
+	_STARPU_TRACE_WORKER_INIT_END;
 
 	/* tell the main thread that this one is ready */
-	_STARPU_STARPU_PTHREAD_MUTEX_LOCK(&args->mutex);
+	STARPU_PTHREAD_MUTEX_LOCK(&args->mutex);
 	args->set_is_initialized = 1;
-	_STARPU_PTHREAD_COND_SIGNAL(&args->ready_cond);
-	_STARPU_STARPU_PTHREAD_MUTEX_UNLOCK(&args->mutex);
+	STARPU_PTHREAD_COND_SIGNAL(&args->ready_cond);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&args->mutex);
 
 
 	while (_starpu_machine_is_running())
@@ -660,7 +663,7 @@ void *_starpu_mic_src_worker(void *arg)
 		_starpu_datawizard_progress(memnode, 1);
 		_STARPU_TRACE_END_PROGRESS(memnode);
 
-		_STARPU_STARPU_PTHREAD_MUTEX_LOCK(baseworker->sched_mutex);
+		STARPU_PTHREAD_MUTEX_LOCK(&baseworker->sched_mutex);
 
 		/* We pop tasklists of each worker in the set and process the
 		 * first non-empty list. */
@@ -685,21 +688,40 @@ void *_starpu_mic_src_worker(void *arg)
 		/* At this point, there is really nothing to do for the thread
 		 * so we can block.
 		 * XXX: blocking drivers is in fact broken. DO NOT USE IT ! */
+		if (_starpu_worker_get_status(baseworkerid) != STATUS_SLEEPING)
+		{
+			_STARPU_TRACE_WORKER_SLEEP_START;
+			_starpu_worker_restart_sleeping(baseworkerid);
+			_starpu_worker_set_status(baseworkerid, STATUS_SLEEPING);
+		}
+
 		if (_starpu_worker_can_block(memnode))
-		    _starpu_block_worker(baseworkerid, baseworker->sched_cond, baseworker->sched_mutex);
+			STARPU_PTHREAD_COND_WAIT(&baseworker->sched_cond, &baseworker->sched_mutex);
+		else
+		{
+			if (_starpu_machine_is_running())
+				STARPU_UYIELD();
+		}
+
+		if (_starpu_worker_get_status(baseworkerid) == STATUS_SLEEPING)
+		{
+			_STARPU_TRACE_WORKER_SLEEP_END;
+			_starpu_worker_stop_sleeping(baseworkerid);
+			_starpu_worker_set_status(baseworkerid, STATUS_UNKNOWN);
+		}
 
 	restart_loop:
-		_STARPU_STARPU_PTHREAD_MUTEX_UNLOCK(baseworker->sched_mutex);
+		STARPU_PTHREAD_MUTEX_UNLOCK(&baseworker->sched_mutex);
 		continue;
 
 	task_found:
 		/* If the MIC core associated to `micworkerid' is already
 		 * processing a job, we push back this one in the worker task
 		 * list. */
-		_STARPU_STARPU_PTHREAD_MUTEX_UNLOCK(baseworker->sched_mutex);
+		STARPU_PTHREAD_MUTEX_UNLOCK(&baseworker->sched_mutex);
 
 		if (args->workers[micworkerid].current_task) {
-		    _starpu_push_task_to_workers(j);
+		    _starpu_push_task_to_workers(task);
 		    continue;
 		}
 
@@ -710,7 +732,7 @@ void *_starpu_mic_src_worker(void *arg)
 		if (!_STARPU_MIC_MAY_PERFORM(j))
 		{
 			/* this isn't a mic task */
-			_starpu_push_task_to_workers(j);
+			_starpu_push_task_to_workers(task);
 			continue;
 		}
 
@@ -733,7 +755,7 @@ void *_starpu_mic_src_worker(void *arg)
 		}
 	}
 
-	_STARPU_TRACE_WORKER_DEINIT_START
+	_STARPU_TRACE_WORKER_DEINIT_START;
 
 	_starpu_handle_all_pending_node_data_requests(memnode);
 

+ 7 - 7
src/drivers/scc/driver_scc_source.c

@@ -334,13 +334,13 @@ void *_starpu_scc_src_worker(void *arg)
 
 	args->status = STATUS_UNKNOWN;
 
-	_STARPU_TRACE_WORKER_INIT_END
+	_STARPU_TRACE_WORKER_INIT_END;
 
 	/* tell the main thread that this one is ready */
-	_STARPU_STARPU_PTHREAD_MUTEX_LOCK(&args->mutex);
+	STARPU_PTHREAD_MUTEX_LOCK(&args->mutex);
 	args->worker_is_initialized = 1;
-	_STARPU_PTHREAD_COND_SIGNAL(&args->ready_cond);
-	_STARPU_STARPU_PTHREAD_MUTEX_UNLOCK(&args->mutex);
+	STARPU_PTHREAD_COND_SIGNAL(&args->ready_cond);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&args->mutex);
 
 	struct _starpu_job * j;
 	struct starpu_task *task;
@@ -362,7 +362,7 @@ void *_starpu_scc_src_worker(void *arg)
 		if (!_STARPU_SCC_MAY_PERFORM(j))
 		{
 			/* this isn't a SCC task */
-			_starpu_push_task_to_workers(j);
+			_starpu_push_task_to_workers(task);
 			continue;
 		}
 
@@ -380,7 +380,7 @@ void *_starpu_scc_src_worker(void *arg)
 			{
 				case -EAGAIN:
 					_STARPU_DISP("ouch, put the codelet %p back ... \n", j);
-					_starpu_push_task(j);
+					_starpu_push_task_to_workers(task);
 					STARPU_ABORT();
 					continue;
 				default:
@@ -391,7 +391,7 @@ void *_starpu_scc_src_worker(void *arg)
 		_starpu_handle_job_termination(j);
 	}
 
-	_STARPU_TRACE_WORKER_DEINIT_START
+	_STARPU_TRACE_WORKER_DEINIT_START;
 
 	_starpu_handle_all_pending_node_data_requests(memnode);
 

+ 9 - 8
super-configure

@@ -1,8 +1,9 @@
-#!/bin/sh
+#!/bin/bash
 
 ROOT_DIR=$PWD
-MIC_HOST=x86_64-k1om-linux
-MIC_CC_PATH=/usr/linux-k1om-4.7/bin/
+[ -n "$MIC_HOST" ] || MIC_HOST=x86_64-k1om-linux
+[ -n "$MIC_CC_PATH" ] || MIC_CC_PATH=/usr/linux-k1om-4.7/bin/
+[ -n "$COI_DIR" ] || COI_DIR=/opt/intel/mic/coi
 DEFAULT_PREFIX=/usr/local
 
 export PATH=${MIC_CC_PATH}${PATH:+:${PATH}}
@@ -16,13 +17,13 @@ do
 
 	# We call the configure script from a build directory further in the
 	# arborescence
-	command="${ROOT_DIR}/configure --enable-mic --with-coi-dir=/opt/intel/mic/coi"
+	command="${ROOT_DIR}/configure --enable-mic --with-coi-dir=$COI_DIR"
 	prefix_found=no
 
 	if test x$arch = xmic ; then
-		command="$command --without-hwloc --with-coi-lib-dir=/opt/intel/mic/coi/device-linux-release/lib --host=$MIC_HOST"
+		command="$command --without-hwloc --with-coi-lib-dir=$COI_DIR/device-linux-release/lib --host=$MIC_HOST"
 	else
-		command="$command --with-coi-lib-dir=/opt/intel/mic/coi/host-linux-release/lib"
+		command="$command --with-coi-lib-dir=$COI_DIR/host-linux-release/lib"
 	fi
 
 	for arg in $*
@@ -68,7 +69,7 @@ do
 
 done
 
-if [ ! -f "${prefix}/mic/lib/pkgconfig/starpu-1.0-mic.pc" ]
+if [ ! -f "${prefix}/mic/lib/pkgconfig/starpu-1.2-mic.pc" ]
 then
-	ln -s "${prefix}/mic/lib/pkgconfig/starpu-1.0.pc" "${prefix}/mic/lib/pkgconfig/starpu-1.0-mic.pc"
+	ln -s "${prefix}/mic/lib/pkgconfig/starpu-1.2.pc" "${prefix}/mic/lib/pkgconfig/starpu-1.2-mic.pc"
 fi

+ 4 - 2
tests/Makefile.am

@@ -15,10 +15,10 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
-AM_CFLAGS = $(HWLOC_CFLAGS) $(FXT_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
+AM_CFLAGS = $(HWLOC_CFLAGS) $(FXT_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(STARPU_COI_CPPFLAGS) $(GLOBAL_AM_CFLAGS) -Wno-unused
 LIBS = $(top_builddir)/src/@LIBSTARPU_LINK@ $(HWLOC_LIBS) @LIBS@
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_builddir)/src -I$(top_srcdir)/src/
-AM_LDFLAGS = $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(FXT_LDFLAGS)
+AM_LDFLAGS = @STARPU_EXPORT_DYNAMIC@ $(STARPU_OPENCL_LDFLAGS) $(STARPU_CUDA_LDFLAGS) $(STARPU_COI_LDFLAGS) $(FXT_LDFLAGS)
 
 EXTRA_DIST =					\
 	helper.h				\
@@ -79,6 +79,7 @@ testbindir = $(libdir)/starpu/tests
 # What to install and what to check #
 #####################################
 
+if !STARPU_CROSS_COMPILING
 if !STARPU_HAVE_WINDOWS
 ## test loader program
 LOADER			=	loader
@@ -91,6 +92,7 @@ else
 TESTS_ENVIRONMENT	=	top_builddir="$(abs_top_builddir)" top_srcdir="$(abs_top_srcdir)" $(LOADER_BIN)
 endif
 endif
+endif
 
 TESTS = $(noinst_PROGRAMS)