hace 11 años · 7ba19ce15d
--- a/src/util/openmp_runtime_support.c
+++ b/src/util/openmp_runtime_support.c
@@ -1009,9 +1009,7 @@ void starpu_omp_critical_inline_begin(const char *name)
 
				 
			
 
				 void starpu_omp_critical_inline_end(const char *name)
			
 
				 {
			
 
				-	struct starpu_omp_task *task = STARPU_PTHREAD_GETSPECIFIC(omp_task_key);
			
 
				 	struct starpu_omp_critical *critical = NULL;
			
 
				-	struct starpu_omp_task_link link;
			
 
				 
			
 
				 	if (name)
			
 
				 	{
			
@@ -1303,24 +1301,52 @@ void starpu_omp_for(void (*f)(unsigned long _first_i, unsigned long _nb_i, void
 
				 			if (loop->next_iteration >= nb_iterations)
			
 
				 				break;
			
 
				 			first_i = loop->next_iteration;
			
 
				-			loop->next_iteration += chunk;
			
 
				-			_starpu_spin_unlock(&parallel_region->lock);
			
 
				-
			
 
				-			if (first_i + chunk <= nb_iterations)
			
 
				+			if (first_i + chunk > nb_iterations)
			
 
				 			{
			
 
				-				nb_i = chunk;
			
 
				+				nb_i = nb_iterations - first_i;
			
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				-				nb_i = nb_iterations - first_i;
			
 
				+				nb_i = chunk;
			
 
				 			}
			
 
				+			loop->next_iteration += nb_i;
			
 
				+			_starpu_spin_unlock(&parallel_region->lock);
			
 
				 			f(first_i, nb_i, arg);
			
 
				 		}
			
 
				 	}
			
 
				 	else if (schedule == starpu_omp_schedule_guided)
			
 
				 	{
			
 
				-		/* TODO: implement omp_schedule_guided */
			
 
				-		_STARPU_ERROR("omp for / guided schedule not implemented\n");
			
 
				+		if (chunk == 0)
			
 
				+		{
			
 
				+			chunk = 1;
			
 
				+		}
			
 
				+		for (;;)
			
 
				+		{
			
 
				+			unsigned long first_i;
			
 
				+			unsigned long nb_i;
			
 
				+
			
 
				+			_starpu_spin_lock(&parallel_region->lock);
			
 
				+			/* upon exiting the loop, the parallel_region-lock will already be held
			
 
				+			 * for performing loop completion */
			
 
				+			if (loop->next_iteration >= nb_iterations)
			
 
				+				break;
			
 
				+			first_i = loop->next_iteration;
			
 
				+			nb_i = (nb_iterations - first_i)/parallel_region->nb_threads;
			
 
				+			if (nb_i < chunk)
			
 
				+			{
			
 
				+				if (first_i+chunk > nb_iterations)
			
 
				+				{
			
 
				+					nb_i = nb_iterations - first_i;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					nb_i = chunk;
			
 
				+				}
			
 
				+			}
			
 
				+			loop->next_iteration += nb_i;
			
 
				+			_starpu_spin_unlock(&parallel_region->lock);
			
 
				+			f(first_i, nb_i, arg);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	loop->nb_completed_threads++;
			
--- a/tests/openmp/parallel_for_01.c
+++ b/tests/openmp/parallel_for_01.c
@@ -101,7 +101,7 @@ void parallel_region_3_f(void *buffers[], void *args)
 
				 	tid = pthread_self();
			
 
				 	worker_id = starpu_worker_get_id();
			
 
				 	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				-	starpu_omp_for(for_g, (void*)"dynamic", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 0);
			
 
				+	starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 0);
			
 
				 }
			
 
				 
			
 
				 static struct starpu_codelet parallel_region_3_cl =
			
@@ -112,11 +112,74 @@ static struct starpu_codelet parallel_region_3_cl =
 
				 
			
 
				 };
			
 
				 
			
 
				+void parallel_region_4_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 0, 0);
			
 
				+}
			
 
				+
			
 
				+static struct starpu_codelet parallel_region_4_cl =
			
 
				+{
			
 
				+	.cpu_funcs    = { parallel_region_4_f, NULL },
			
 
				+	.where        = STARPU_CPU,
			
 
				+	.nbuffers     = 0
			
 
				+
			
 
				+};
			
 
				+
			
 
				+void parallel_region_5_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 0);
			
 
				+}
			
 
				+
			
 
				+static struct starpu_codelet parallel_region_5_cl =
			
 
				+{
			
 
				+	.cpu_funcs    = { parallel_region_5_f, NULL },
			
 
				+	.where        = STARPU_CPU,
			
 
				+	.nbuffers     = 0
			
 
				+
			
 
				+};
			
 
				+
			
 
				+void parallel_region_6_f(void *buffers[], void *args)
			
 
				+{
			
 
				+	(void) buffers;
			
 
				+	(void) args;
			
 
				+	int worker_id;
			
 
				+	pthread_t tid;
			
 
				+	tid = pthread_self();
			
 
				+	worker_id = starpu_worker_get_id();
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 0);
			
 
				+}
			
 
				+
			
 
				+static struct starpu_codelet parallel_region_6_cl =
			
 
				+{
			
 
				+	.cpu_funcs    = { parallel_region_6_f, NULL },
			
 
				+	.where        = STARPU_CPU,
			
 
				+	.nbuffers     = 0
			
 
				+
			
 
				+};
			
 
				+
			
 
				 int
			
 
				 main (int argc, char *argv[]) {
			
 
				 	starpu_omp_parallel_region(&parallel_region_1_cl, NULL);
			
 
				 	starpu_omp_parallel_region(&parallel_region_2_cl, NULL);
			
 
				 	starpu_omp_parallel_region(&parallel_region_3_cl, NULL);
			
 
				+	starpu_omp_parallel_region(&parallel_region_4_cl, NULL);
			
 
				+	starpu_omp_parallel_region(&parallel_region_5_cl, NULL);
			
 
				+	starpu_omp_parallel_region(&parallel_region_6_cl, NULL);
			
 
				 	return 0;
			
 
				 }
			
 
				 #endif
			
--- a/tests/openmp/parallel_for_02.c
+++ b/tests/openmp/parallel_for_02.c
@@ -64,8 +64,16 @@ void parallel_region_1_f(void *buffers[], void *args)
 
				 	starpu_omp_for(for_g, (void*)"static chunk", NB_ITERS, CHUNK, starpu_omp_sched_static, 0, 1);
			
 
				 	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				 	starpu_omp_for(for_g, (void*)"static nochunk", NB_ITERS, 0, starpu_omp_sched_static, 0, 1);
			
 
				+	
			
 
				 	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				-	starpu_omp_for(for_g, (void*)"dynamic", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 1);
			
 
				+	starpu_omp_for(for_g, (void*)"dynamic chunk", NB_ITERS, CHUNK, starpu_omp_sched_dynamic, 0, 1);
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"dynamic nochunk", NB_ITERS, 0, starpu_omp_sched_dynamic, 0, 1);
			
 
				+
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"guided chunk", NB_ITERS, CHUNK, starpu_omp_sched_guided, 0, 1);
			
 
				+	printf("[tid %p] task thread = %d\n", (void *)tid, worker_id);
			
 
				+	starpu_omp_for(for_g, (void*)"guided nochunk", NB_ITERS, 0, starpu_omp_sched_guided, 0, 1);
			
 
				 }
			
 
				 
			
 
				 static struct starpu_codelet parallel_region_1_cl =