Corentin Salingue hace 12 años
padre
commit
e75bbec0cf

+ 2 - 0
ChangeLog

@@ -152,6 +152,8 @@ Small features:
   * New function starpu_get_version() to return as 3 integers the
     release version of StarPU.
   * Enable by default data allocation cache
+  * Explicitly name the non-sleeping-non-running time "Overhead", and use
+    another color in vite traces.
 
 Changes:
   * Rename all filter functions to follow the pattern

+ 1 - 1
doc/doxygen/chapters/api/codelet_and_tasks.doxy

@@ -325,7 +325,7 @@ purposes.
 This field has been made deprecated. One should use instead the
 field starpu_task::handles to specify the data handles accessed
 by the task. The access modes are now defined in the field
-starpu_codelet::mode.
+starpu_codelet::modes.
 \var starpu_task::handles
 Is an array of ::starpu_data_handle_t. It specifies the handles to the
 different pieces of data accessed by the task. The number of entries

+ 14 - 0
doc/doxygen/chapters/tips_and_tricks.doxy

@@ -95,4 +95,18 @@ Or add the following line in the file <c>/etc/sysctl.conf</c>
 security.models.extensions.user_set_cpu_affinity=1
 \endverbatim
 
+\section UsingStarPUWithMKL Using StarPU With MKL 11 (Intel Composer XE 2013)
+
+Some users had issues with MKL 11 and StarPU (versions 1.1rc1 and
+1.0.5) on Linux with MKL, using 1 thread for MKL and doing all the
+parallelism using StarPU (no multithreaded tasks), setting the
+environment variable MKL_NUM_THREADS to 1, and using the threaded MKL library,
+with iomp5.
+
+Using this configuration, StarPU uses only 1 core, no matter the value of
+\ref STARPU_NCPU. The problem is actually a thread pinning issue with MKL.
+
+The solution is to set the environment variable KMP_AFFINITY to <c>disabled</c>
+(http://software.intel.com/sites/products/documentation/studio/composer/en-us/2011Update/compiler_c/optaps/common/optaps_openmp_thread_affinity.htm).
+
 */

+ 1 - 7
src/core/dependencies/implicit_data_deps.c

@@ -481,12 +481,8 @@ void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle)
 	struct _starpu_task_wrapper_list *post_sync_tasks = NULL;
 	unsigned do_submit_tasks = 0;
 
-	STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
-
-	if (handle->sequential_consistency)
+	if (handle->post_sync_tasks_cnt > 0)
 	{
-		STARPU_ASSERT(handle->post_sync_tasks_cnt > 0);
-
 		if (--handle->post_sync_tasks_cnt == 0)
 		{
 			/* unlock all tasks : we need not hold the lock while unlocking all these tasks */
@@ -496,8 +492,6 @@ void _starpu_unlock_post_sync_tasks(starpu_data_handle_t handle)
 		}
 	}
 
-	STARPU_PTHREAD_MUTEX_UNLOCK(&handle->sequential_consistency_mutex);
-
 	if (do_submit_tasks)
 	{
 		struct _starpu_task_wrapper_list *link = post_sync_tasks;

+ 6 - 1
src/datawizard/memalloc.c

@@ -926,6 +926,7 @@ unsigned starpu_data_test_if_allocated_on_node(starpu_data_handle_t handle, unsi
 	return handle->per_node[memory_node].allocated;
 }
 
+/* Record that this memchunk has been recently used */
 void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node)
 {
 	_starpu_spin_lock(&lru_rwlock[node]);
@@ -935,10 +936,11 @@ void _starpu_memchunk_recently_used(struct _starpu_mem_chunk *mc, unsigned node)
 	_starpu_spin_unlock(&lru_rwlock[node]);
 }
 
+/* Push the given memchunk, recently used, at the end of the chunks to be evicted */
 /* The mc_rwlock[node] rw-lock should be taken prior to calling this function.*/
 static void _starpu_memchunk_recently_used_move(struct _starpu_mem_chunk *mc, unsigned node)
 {
-	/* XXX Sometimes the memchunk is not in the list... */
+	/* Note: Sometimes the memchunk is not in the list... */
 	struct _starpu_mem_chunk *mc_iter;
 	for (mc_iter = _starpu_mem_chunk_list_begin(mc_list[node]);
 	     mc_iter != _starpu_mem_chunk_list_end(mc_list[node]);
@@ -954,6 +956,9 @@ static void _starpu_memchunk_recently_used_move(struct _starpu_mem_chunk *mc, un
 	}
 }
 
+/* Put the recently used memchunks at the end of the mc_list, in the same order
+ * as the LRU list, so that the most recently used memchunk eventually comes
+ * last in the mc_list */
 static void starpu_lru(unsigned node)
 {
 	STARPU_PTHREAD_RWLOCK_WRLOCK(&mc_rwlock[node]);

+ 4 - 1
src/datawizard/user_interactions.c

@@ -75,7 +75,8 @@ static void _starpu_data_acquire_fetch_data_callback(void *arg)
 	 * We enqueue the "post" sync task in the list associated to the handle
 	 * so that it is submitted by the starpu_data_release
 	 * function. */
-	_starpu_add_post_sync_tasks(wrapper->post_sync_task, handle);
+	if (wrapper->post_sync_task)
+		_starpu_add_post_sync_tasks(wrapper->post_sync_task, handle);
 
 	wrapper->callback(wrapper->callback_arg);
 
@@ -132,6 +133,8 @@ int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, unsigned node,
 	STARPU_PTHREAD_COND_INIT(&wrapper->cond, NULL);
 	STARPU_PTHREAD_MUTEX_INIT(&wrapper->lock, NULL);
 	wrapper->finished = 0;
+	wrapper->pre_sync_task = NULL;
+	wrapper->post_sync_task = NULL;
 
 	STARPU_PTHREAD_MUTEX_LOCK(&handle->sequential_consistency_mutex);
 	int sequential_consistency = handle->sequential_consistency;

+ 4 - 4
src/debug/traces/starpu_paje.c

@@ -160,7 +160,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 	poti_DefineEntityValue("Fi", "S", "FetchingInput", "1.0 .1 1.0");
 	poti_DefineEntityValue("Po", "S", "PushingOutput", "0.1 1.0 1.0");
 	poti_DefineEntityValue("C", "S", "Callback", ".0 .3 .8");
-	poti_DefineEntityValue("B", "S", "Blocked", ".9 .1 .0");
+	poti_DefineEntityValue("B", "S", "Overhead", ".5 .18 .0");
 	poti_DefineEntityValue("Sl", "S", "Sleeping", ".9 .1 .0");
 	poti_DefineEntityValue("P", "S", "Progressing", ".4 .1 .6");
 
@@ -187,7 +187,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 		poti_DefineEntityValue("Fi", ctx, "FetchingInput", "1.0 .1 1.0");
 		poti_DefineEntityValue("Po", ctx, "PushingOutput", "0.1 1.0 1.0");
 		poti_DefineEntityValue("C", ctx, "Callback", ".0 .3 .8");
-		poti_DefineEntityValue("B", ctx, "Blocked", ".9 .1 .0");
+		poti_DefineEntityValue("B", ctx, "Overhead", ".5 .18 .0");
 		poti_DefineEntityValue("Sl", ctx, "Sleeping", ".9 .1 .0");
 		poti_DefineEntityValue("P", ctx, "Progressing", ".4 .1 .6");
 	}
@@ -226,7 +226,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 6       Fi       S      FetchingInput       \"1.0 .1 1.0\"            \n\
 6       Po       S      PushingOutput       \"0.1 1.0 1.0\"            \n\
 6       C       S       Callback       \".0 .3 .8\"            \n\
-6       B       S       Blocked         \".9 .1 .0\"		\n\
+6       B       S       Overhead         \".5 .18 .0\"		\n\
 6       Sl       S      Sleeping         \".9 .1 .0\"		\n\
 6       P       S       Progressing         \".4 .1 .6\"		\n");
 	fprintf(file, "\
@@ -245,7 +245,7 @@ void _starpu_fxt_write_paje_header(FILE *file)
 6       Fi       Ctx%u      FetchingInput       \"1.0 .1 1.0\"            \n\
 6       Po       Ctx%u      PushingOutput       \"0.1 1.0 1.0\"            \n\
 6       C       Ctx%u       Callback       \".0 .3 .8\"            \n\
-6       B       Ctx%u       Blocked         \".9 .1 .0\"		\n\
+6       B       Ctx%u       Overhead         \".5 .18 .0\"		\n\
 6       Sl       Ctx%u      Sleeping         \".9 .1 .0\"		\n\
 6       P       Ctx%u       Progressing         \".4 .1 .6\"		\n",
 		i, i, i, i, i, i, i, i);

+ 2 - 2
tests/disk/disk_compute.c

@@ -23,7 +23,7 @@
 #include <stdio.h>
 #include <math.h>
 
-#define NX (30*1000000/sizeof(int))
+#define NX (30*1000000)
 
 int main(int argc, char **argv)
 {
@@ -66,7 +66,7 @@ int main(int argc, char **argv)
 
 	/* And now, you want to use your datas in StarPU */
 	/* Open the file ON the disk */
-	void * data = starpu_disk_open(dd, (void *) "STARPU_DISK_COMPUTE_DATA", NX*sizeof(int));
+	void * data = starpu_disk_open(dd, (void *) "STARPU_DISK_COMPUTE_DATA", NX);
 
 	starpu_data_handle_t vector_handleA, vector_handleB;