浏览代码

mic: update nmaxmicthreads

Thibaud Lambert 11 年之前
父节点
当前提交
21e9824ccc

+ 1 - 1
configure.ac

@@ -967,7 +967,7 @@ AC_DEFINE_UNQUOTED(STARPU_MAXMICDEVS, [$nmaxmicdev],
 AC_MSG_CHECKING(maximum number of MIC threads)
 AC_ARG_ENABLE(maxmicthreads, [AS_HELP_STRING([--enable-maxmicthreads=<number>],
 			[maximum number of MIC threads])],
-			nmaxmicthreads=$enableval, nmaxmicthreads=480)
+			nmaxmicthreads=$enableval, nmaxmicthreads=960)
 AC_MSG_RESULT($nmaxmicthread)
 
 AC_DEFINE_UNQUOTED(STARPU_MAXMICCORES, [$nmaxmicthreads],

+ 8 - 8
src/core/perfmodel/perfmodel.c

@@ -446,8 +446,8 @@ void _starpu_create_sampling_directory_if_needed(void)
 {
 	if (!directory_existence_was_tested)
 	{
-		char perf_model_dir[STARPU_NMAXWORKERS];
-		_starpu_get_perf_model_dir(perf_model_dir, STARPU_NMAXWORKERS);
+		char perf_model_dir[256];
+		_starpu_get_perf_model_dir(perf_model_dir, 256);
 
 		/* The performance of the codelets are stored in
 		 * $STARPU_PERF_MODEL_DIR/codelets/ while those of the bus are stored in
@@ -461,18 +461,18 @@ void _starpu_create_sampling_directory_if_needed(void)
 
 
 		/* Per-task performance models */
-		char perf_model_dir_codelets[STARPU_NMAXWORKERS];
-		_starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, STARPU_NMAXWORKERS);
+		char perf_model_dir_codelets[256];
+		_starpu_get_perf_model_dir_codelets(perf_model_dir_codelets, 256);
 		_starpu_mkpath_and_check(perf_model_dir_codelets, S_IRWXU);
 
 		/* Performance of the memory subsystem */
-		char perf_model_dir_bus[STARPU_NMAXWORKERS];
-		_starpu_get_perf_model_dir_bus(perf_model_dir_bus, STARPU_NMAXWORKERS);
+		char perf_model_dir_bus[256];
+		_starpu_get_perf_model_dir_bus(perf_model_dir_bus, 256);
 		_starpu_mkpath_and_check(perf_model_dir_bus, S_IRWXU);
 
 		/* Performance debug measurements */
-		char perf_model_dir_debug[STARPU_NMAXWORKERS];
-		_starpu_get_perf_model_dir_debug(perf_model_dir_debug, STARPU_NMAXWORKERS);
+		char perf_model_dir_debug[256];
+		_starpu_get_perf_model_dir_debug(perf_model_dir_debug, 256);
 		_starpu_mkpath_and_check(perf_model_dir_debug, S_IRWXU);
 
 		directory_existence_was_tested = 1;

+ 3 - 0
src/core/topology.c

@@ -1098,6 +1098,9 @@ _starpu_bind_thread_on_cpus (
 		}
 	}
 #else
+#ifdef __GLIBC__
+	pthread_setaffinity_np(pthread_self(),sizeof(cpu_set_t),&combined_worker->cpu_set);
+#endif
 #warning no parallel worker CPU binding support
 #endif
 }

+ 6 - 1
src/core/workers.c

@@ -293,10 +293,15 @@ int starpu_combined_worker_can_execute_task(unsigned workerid, struct starpu_tas
 	}
 	else
 	{
-		if ((cl->type == STARPU_SPMD)
+		if ((cl->type == STARPU_SPMD) 
 #ifdef STARPU_HAVE_HWLOC
 				|| (cl->type == STARPU_FORKJOIN)
+#else
+#ifdef __GLIBC__
+				|| (cl->type == STARPU_FORKJOIN)
 #endif
+#endif
+
 				)
 		{
 			/* TODO we should add other types of constraints */

+ 3 - 4
src/drivers/mic/driver_mic_sink.c

@@ -63,7 +63,7 @@ void _starpu_mic_sink_init(struct _starpu_mp_node *node)
 	node->thread_table = malloc(sizeof(pthread_t)*node->nb_cores);
 
 	node->run_table = malloc(sizeof(struct mp_task *)*node->nb_cores);
-	node->mutex_run_table = malloc(sizeof(pthread_mutex_t)*node->nb_cores);
+	node->sem_run_table = malloc(sizeof(sem_t)*node->nb_cores);
 
 	node->barrier_list = mp_barrier_list_new();
 	node->message_queue = mp_message_list_new();
@@ -76,8 +76,7 @@ void _starpu_mic_sink_init(struct _starpu_mp_node *node)
 	{
 		node->run_table[i] = NULL;
 
-		pthread_mutex_init(&node->mutex_run_table[i],NULL);
-		pthread_mutex_lock(&node->mutex_run_table[i]);
+		sem_init(&node->sem_run_table[i],0,0);
 
 		//init the set
 		CPU_ZERO(&cpuset);
@@ -93,7 +92,7 @@ void _starpu_mic_sink_init(struct _starpu_mp_node *node)
 		arg->task = &node->run_table[i];
 		arg->coreid = i;
 		arg->node = node;
-		arg->mutex = &node->mutex_run_table[i];
+		arg->sem = &node->sem_run_table[i];
 		
 		ret = pthread_create(&thread, &attr, _starpu_sink_thread, arg);
 		((pthread_t *)node->thread_table)[i] = thread;

+ 2 - 1
src/drivers/mp_common/mp_common.h

@@ -18,6 +18,7 @@
 #define __MP_COMMON_H__
 
 #include <pthread.h>
+#include <semaphore.h>
 
 #include <starpu.h>
 #include <common/config.h>
@@ -182,7 +183,7 @@ struct _starpu_mp_node
 
 	/*table where worker comme pick task*/
 	struct mp_task ** run_table;
-	pthread_mutex_t * mutex_run_table;
+	sem_t * sem_run_table;
 
 	/* Node general functions */
 	void (*init)(struct _starpu_mp_node *node);

+ 3 - 3
src/drivers/mp_common/sink_common.c

@@ -253,7 +253,7 @@ void* _starpu_sink_thread(void * thread_arg)
 	/* Retrieve the information from the structure */
 	struct mp_task **task = ((struct arg_sink_thread *)thread_arg)->task;
 	struct _starpu_mp_node *node = ((struct arg_sink_thread *)thread_arg)->node;
-	pthread_mutex_t * mutex = ((struct arg_sink_thread *)thread_arg)->mutex;
+	sem_t * sem = ((struct arg_sink_thread *)thread_arg)->sem;
 	int coreid =((struct arg_sink_thread *)thread_arg)->coreid;
 	/* free the structure */
 	free(thread_arg);
@@ -262,7 +262,7 @@ void* _starpu_sink_thread(void * thread_arg)
 	while(1)
 	{
 		/*Wait there is a task available */
-		pthread_mutex_lock(mutex);
+		sem_wait(sem);
 
 		/* If it's a parallel task */
 		if((*task)->is_parallel_task)
@@ -347,7 +347,7 @@ static void _starpu_sink_common_execute_thread(struct _starpu_mp_node *node, str
 	/* Add the task to the specific thread */
 	node->run_table[task->coreid] = task;
 	/* Unlock the mutex to wake up the thread which will execute the task */
-	pthread_mutex_unlock(&node->mutex_run_table[task->coreid]);
+	sem_post(&node->sem_run_table[task->coreid]);
 }
 
 /* Search for the mp_barrier correspondind to the specified combined worker 

+ 1 - 1
src/drivers/mp_common/sink_common.h

@@ -36,7 +36,7 @@ struct arg_sink_thread
 {
 	struct mp_task ** task;
 	struct _starpu_mp_node *node;
-	pthread_mutex_t* mutex;
+	sem_t* sem;
 	int coreid;
 };
 

+ 1 - 1
src/drivers/mp_common/source_common.c

@@ -361,7 +361,7 @@ static int _starpu_src_common_execute(struct _starpu_job *j,
 	_starpu_driver_start_job(worker, j, &j->cl_start, 0, profiling);
 
 
-	_STARPU_DEBUG("\nworkerid:%d, rank:%d, type:%d,	cb_workerid:%d, task_size:%d\n\n",worker->devid,worker->current_rank,task->cl->type,j->combined_workerid,j->task_size);
+	//_STARPU_DEBUG("\nworkerid:%d, rank:%d, type:%d,	cb_workerid:%d, task_size:%d\n\n",worker->devid,worker->current_rank,task->cl->type,j->combined_workerid,j->task_size);
 
 	_starpu_src_common_execute_kernel(node, kernel, worker->devid, task->cl->type,
 			(j->task_size > 1),

+ 5 - 2
src/sched_policies/parallel_eager.c

@@ -29,8 +29,8 @@ struct _starpu_peager_data
         starpu_pthread_mutex_t policy_mutex;
 };
 
-#define STARPU_NMAXCOMBINED_WORKERS 200
-/* XXX instead of 10, we should use some "MAX combination .."*/
+#define STARPU_NMAXCOMBINED_WORKERS 480
+/* instead of STARPU_NMAXCOMBINED_WORKERS, we should use some "MAX combination .."*/
 static int possible_combinations_cnt[STARPU_NMAXWORKERS];
 static int possible_combinations[STARPU_NMAXWORKERS][STARPU_NMAXCOMBINED_WORKERS];
 static int possible_combinations_size[STARPU_NMAXWORKERS][STARPU_NMAXCOMBINED_WORKERS];
@@ -203,6 +203,7 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 		return task;
 	}
 
+	_STARPU_DEBUG("workerid:%d\n", workerid);
 
 	int master = data->master_id[workerid];
 
@@ -237,6 +238,8 @@ static struct starpu_task *pop_task_peager_policy(unsigned sched_ctx_id)
 			}
 		}
 
+		_STARPU_DEBUG("## best_workerid:%d, best_size:%d\n",best_workerid,best_size);
+
 		/* In case nobody can execute this task, we let the master
 		 * worker take it anyway, so that it can discard it afterward.
 		 * */

+ 1 - 1
tests/parallel_tasks/explicit_combined_worker.c

@@ -21,7 +21,7 @@
 #include <unistd.h>
 #include "../helper.h"
 
-#define N	1000
+#define N	10
 #define VECTORSIZE	1024
 
 void codelet_null(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)

+ 3 - 3
tests/parallel_tasks/parallel_kernels.c

@@ -28,9 +28,9 @@ void codelet_null(void *descr[], STARPU_ATTRIBUTE_UNUSED void *_args)
 {
 	STARPU_SKIP_IF_VALGRIND;
 
-	//int worker_size = starpu_combined_worker_get_size();
-	//STARPU_ASSERT(worker_size > 0);
-	//usleep(1000/worker_size);
+	int worker_size = starpu_combined_worker_get_size();
+	STARPU_ASSERT(worker_size > 0);
+	usleep(1000/worker_size);
 #if 0
 	int id = starpu_worker_get_id();
 	int combined_id = starpu_combined_worker_get_id();