瀏覽代碼

Add STARPU_ATOMIC_ADD/OR64 and use them to fix 64bit access on windows

Samuel Thibault 5 年之前
父節點
當前提交
fad6eddec1
共有 4 個文件被更改,包括 36 次插入13 次删除
  1. 23 0
      include/starpu_util.h
  2. 6 6
      src/core/sched_policy.c
  3. 4 4
      src/core/task.c
  4. 3 3
      src/drivers/driver_common/driver_common.c

+ 23 - 0
include/starpu_util.h

@@ -444,11 +444,25 @@ static __starpu_inline unsigned long starpu_atomic_##name##l(unsigned long *ptr,
 	}; \
 	return expr; \
 }
+#define STARPU_ATOMIC_SOMETHING64(name,expr) \
+static __starpu_inline unsigned long starpu_atomic_##name##64(uint64_t *ptr, uint64_t value) \
+{ \
+	uint64_t old, next; \
+	while (1) \
+	{ \
+		old = *ptr; \
+		next = expr; \
+		if (starpu_cmpxchg64(ptr, old, next) == old) \
+			break; \
+	}; \
+	return expr; \
+}
 
 /* Returns the new value */
 #ifdef STARPU_HAVE_SYNC_FETCH_AND_ADD
 #define STARPU_ATOMIC_ADD(ptr, value)  (__sync_fetch_and_add ((ptr), (value)) + (value))
 #define STARPU_ATOMIC_ADDL(ptr, value)  (__sync_fetch_and_add ((ptr), (value)) + (value))
+#define STARPU_ATOMIC_ADD64(ptr, value)  (__sync_fetch_and_add ((ptr), (value)) + (value))
 #else
 #if defined(STARPU_HAVE_CMPXCHG)
 STARPU_ATOMIC_SOMETHING(add, old + value)
@@ -458,11 +472,16 @@ STARPU_ATOMIC_SOMETHING(add, old + value)
 STARPU_ATOMIC_SOMETHINGL(add, old + value)
 #define STARPU_ATOMIC_ADDL(ptr, value) starpu_atomic_addl(ptr, value)
 #endif
+#if defined(STARPU_HAVE_CMPXCHG64)
+STARPU_ATOMIC_SOMETHING64(add, old + value)
+#define STARPU_ATOMIC_ADD64(ptr, value) starpu_atomic_add64(ptr, value)
+#endif
 #endif
 
 #ifdef STARPU_HAVE_SYNC_FETCH_AND_OR
 #define STARPU_ATOMIC_OR(ptr, value)  (__sync_fetch_and_or ((ptr), (value)))
 #define STARPU_ATOMIC_ORL(ptr, value)  (__sync_fetch_and_or ((ptr), (value)))
+#define STARPU_ATOMIC_OR64(ptr, value)  (__sync_fetch_and_or ((ptr), (value)))
 #else
 #if defined(STARPU_HAVE_CMPXCHG)
 STARPU_ATOMIC_SOMETHING(or, old | value)
@@ -472,6 +491,10 @@ STARPU_ATOMIC_SOMETHING(or, old | value)
 STARPU_ATOMIC_SOMETHINGL(or, old | value)
 #define STARPU_ATOMIC_ORL(ptr, value) starpu_atomic_orl(ptr, value)
 #endif
+#if defined(STARPU_HAVE_CMPXCHG64)
+STARPU_ATOMIC_SOMETHING64(or, old | value)
+#define STARPU_ATOMIC_OR64(ptr, value) starpu_atomic_or64(ptr, value)
+#endif
 #endif
 
 #ifdef STARPU_HAVE_SYNC_BOOL_COMPARE_AND_SWAP

+ 6 - 6
src/core/sched_policy.c

@@ -448,15 +448,15 @@ int _starpu_repush_task(struct _starpu_job *j)
 		;
 	if (!j->internal && !continuation)
 	{
-		(void) STARPU_ATOMIC_ADDL(& _starpu_task__g_current_submitted__value, -1);
-		int64_t value = STARPU_ATOMIC_ADDL(& _starpu_task__g_current_ready__value, 1);
+		(void) STARPU_ATOMIC_ADD64(& _starpu_task__g_current_submitted__value, -1);
+		int64_t value = STARPU_ATOMIC_ADD64(& _starpu_task__g_current_ready__value, 1);
 		_starpu_perf_counter_update_max_int64(&_starpu_task__g_peak_ready__value, value);
 		if (task->cl && task->cl->perf_counter_values)
 		{
 			struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values;
 
-			(void)STARPU_ATOMIC_ADDL(&pcv->task.current_submitted, -1);
-			value = STARPU_ATOMIC_ADDL(&pcv->task.current_ready, 1);
+			(void)STARPU_ATOMIC_ADD64(&pcv->task.current_submitted, -1);
+			value = STARPU_ATOMIC_ADD64(&pcv->task.current_ready, 1);
 			_starpu_perf_counter_update_max_int64(&pcv->task.peak_ready, value);
 		}
 	}
@@ -496,11 +496,11 @@ int _starpu_repush_task(struct _starpu_job *j)
 	{
 		if (!j->internal)
 		{
-			(void)STARPU_ATOMIC_ADDL(& _starpu_task__g_current_ready__value, -1);
+			(void)STARPU_ATOMIC_ADD64(& _starpu_task__g_current_ready__value, -1);
 			if (task->cl && task->cl->perf_counter_values)
 			{
 				struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values;
-				(void)STARPU_ATOMIC_ADDL(&pcv->task.current_ready, -1);
+				(void)STARPU_ATOMIC_ADD64(&pcv->task.current_ready, -1);
 			}
 		}
 		task->status = STARPU_TASK_RUNNING;

+ 4 - 4
src/core/task.c

@@ -853,8 +853,8 @@ int _starpu_task_submit(struct starpu_task *task, int nodeps)
 		;
 	if (!j->internal && !continuation)
 	{
-		(void) STARPU_ATOMIC_ADDL(&_starpu_task__g_total_submitted__value, 1);
-		int64_t value = STARPU_ATOMIC_ADDL(&_starpu_task__g_current_submitted__value, 1);
+		(void) STARPU_ATOMIC_ADD64(&_starpu_task__g_total_submitted__value, 1);
+		int64_t value = STARPU_ATOMIC_ADD64(&_starpu_task__g_current_submitted__value, 1);
 		_starpu_perf_counter_update_max_int64(&_starpu_task__g_peak_submitted__value, value);
 		_starpu_perf_counter_update_global_sample();
 
@@ -862,8 +862,8 @@ int _starpu_task_submit(struct starpu_task *task, int nodeps)
 		{
 			struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values;
 
-			(void) STARPU_ATOMIC_ADD(&pcv->task.total_submitted, 1);
-			value = STARPU_ATOMIC_ADDL(&pcv->task.current_submitted, 1);
+			(void) STARPU_ATOMIC_ADD64(&pcv->task.total_submitted, 1);
+			value = STARPU_ATOMIC_ADD64(&pcv->task.current_submitted, 1);
 			_starpu_perf_counter_update_max_int64(&pcv->task.peak_submitted, value);
 			_starpu_perf_counter_update_per_codelet_sample(task->cl);
 		}

+ 3 - 3
src/drivers/driver_common/driver_common.c

@@ -70,11 +70,11 @@ void _starpu_driver_start_job(struct _starpu_worker *worker, struct _starpu_job
 		STARPU_ASSERT(task->status == STARPU_TASK_READY);
 		if (!j->internal)
 		{
-			(void)STARPU_ATOMIC_ADDL(& _starpu_task__g_current_ready__value, -1);
+			(void)STARPU_ATOMIC_ADD64(& _starpu_task__g_current_ready__value, -1);
 			if (task->cl && task->cl->perf_counter_values)
 			{
 				struct starpu_perf_counter_sample_cl_values * const pcv = task->cl->perf_counter_values;
-				(void)STARPU_ATOMIC_ADDL(&pcv->task.current_ready, -1);
+				(void)STARPU_ATOMIC_ADD64(&pcv->task.current_ready, -1);
 			}
 		}
 		task->status = STARPU_TASK_RUNNING;
@@ -217,7 +217,7 @@ void _starpu_driver_update_job_feedback(struct _starpu_job *j, struct _starpu_wo
 	if (cl->perf_counter_values)
 	{
 		struct starpu_perf_counter_sample_cl_values * const pcv = cl->perf_counter_values;
-		(void)STARPU_ATOMIC_ADD(&pcv->task.total_executed, 1);
+		(void)STARPU_ATOMIC_ADD64(&pcv->task.total_executed, 1);
 		_starpu_perf_counter_update_acc_double(&pcv->task.cumul_execution_time, measured);
 		_starpu_perf_counter_update_per_codelet_sample(cl);
 	}