Nathalie Furmento лет назад: 13
Родитель
Сommit
7674139df7

+ 6 - 0
doc/chapters/basic-api.texi

@@ -407,6 +407,12 @@ thus permit data transfer and computation overlapping. The allocated buffer must
 be freed thanks to the @code{starpu_free} function.
 @end deftypefun
 
+@deftypefun void starpu_malloc_set_align (size_t @var{align})
+This functions sets an alignment constraints for @code{starpu_malloc}
+allocations. @var{align} must be a power of two. This is for instance called
+automatically by the OpenCL driver to specify its own alignment constraints.
+@end deftypefun
+
 @deftypefun int starpu_free (void *@var{A})
 This function frees memory which has previously allocated with
 @code{starpu_malloc}.

+ 2 - 1
include/starpu_data.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -85,6 +85,7 @@ int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, unsigned node, e
 void starpu_data_release(starpu_data_handle_t handle);
 void starpu_data_release_on_node(starpu_data_handle_t handle, unsigned node);
 
+void starpu_malloc_set_align(size_t);
 int starpu_malloc(void **A, size_t dim);
 int starpu_free(void *A);
 void starpu_memory_display_stats();

+ 1 - 1
mpi/src/starpu_mpi.c

@@ -648,7 +648,7 @@ static void _starpu_mpi_handle_request_termination(struct _starpu_mpi_req *req)
 		}
 		starpu_data_release(req->data_handle);
 	}
-	
+
 	/* Execute the specified callback, if any */
 	if (req->callback)
 		req->callback(req->callback_arg);

+ 2 - 0
src/core/task.c

@@ -250,6 +250,8 @@ int _starpu_submit_job(struct _starpu_job *j)
 	return ret;
 }
 
+/* Note: this is racy, so valgrind would complain. But since we'll always put
+ * the same values, this is not a problem. */
 void _starpu_codelet_check_deprecated_fields(struct starpu_codelet *cl)
 {
 	if (!cl)

+ 30 - 2
src/datawizard/malloc.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2010, 2012  Université de Bordeaux 1
+ * Copyright (C) 2009-2010, 2012-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -24,6 +24,15 @@
 #include <starpu_cuda.h>
 #include <drivers/opencl/driver_opencl.h>
 
+static size_t malloc_align = sizeof(void*);
+
+void starpu_malloc_set_align(size_t align)
+{
+	STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align must be a power of two");
+	if (malloc_align < align)
+		malloc_align = align;
+}
+
 #if (defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER))// || defined(STARPU_USE_OPENCL)
 struct malloc_pinned_codelet_struct
 {
@@ -32,6 +41,8 @@ struct malloc_pinned_codelet_struct
 };
 #endif
 
+/* Would be difficult to do it this way, we need to remember the cl_mem to be able to free it later... */
+
 //#ifdef STARPU_USE_OPENCL
 //static void malloc_pinned_opencl_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 //{
@@ -75,6 +86,7 @@ int starpu_malloc(void **A, size_t dim)
 {
 #ifdef STARPU_DEVEL
 #warning TODO: we need to request _starpu_memory_manager_can_allocate_size()
+#warning TODO: if it fails, we should reclaim memory
 #endif
 
 	if (STARPU_UNLIKELY(!_starpu_worker_may_perform_blocking_calls()))
@@ -143,7 +155,23 @@ int starpu_malloc(void **A, size_t dim)
         else
 #endif
 	{
-		*A = malloc(dim);
+#ifdef STARPU_HAVE_POSIX_MEMALIGN
+		if (malloc_align != sizeof(void*))
+		{
+			if (posix_memalign(A, malloc_align, dim))
+				*A = NULL;
+		}
+		else
+#elif defined(STARPU_HAVE_MEMALIGN)
+		if (malloc_align != sizeof(void*))
+		{
+			*A = memalign(malloc_align, dim);
+		}
+		else
+#endif
+		{
+			*A = malloc(dim);
+		}
 	}
 
 	STARPU_ASSERT(*A);

+ 1 - 1
src/debug/traces/starpu_fxt.c

@@ -990,7 +990,7 @@ static void handle_mpi_start(struct fxt_ev_64 *ev, struct starpu_fxt_options *op
 		program_container_alias(program_container, STARPU_POTI_STR_LEN, prefix);
 		char new_mpicommthread_container_alias[STARPU_POTI_STR_LEN], new_mpicommthread_container_name[STARPU_POTI_STR_LEN];
 		mpicommthread_container_alias(new_mpicommthread_container_alias, STARPU_POTI_STR_LEN, prefix);
-		snprintf(new_memnode_container_name, STARPU_POTI_STR_LEN, "%smpict", prefix);
+		snprintf(new_mpicommthread_container_alias, STARPU_POTI_STR_LEN, "%smpict", prefix);
 		poti_CreateContainer(date, new_mpicommthread_container_alias, "MPICt", program_container, new_mpicommthread_container_name);
 #else
 		fprintf(out_paje_file, "7	%.9f	%smpict		MPICt	%sp	%smpict\n", date, prefix, prefix, prefix);

+ 6 - 0
src/drivers/opencl/driver_opencl.c

@@ -158,6 +158,7 @@ void starpu_opencl_get_current_context(cl_context *context)
 cl_int _starpu_opencl_init_context(int devid)
 {
 	cl_int err;
+	cl_uint uint;
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
 
@@ -168,6 +169,11 @@ cl_int _starpu_opencl_init_context(int devid)
         contexts[devid] = clCreateContext(NULL, 1, &devices[devid], NULL, NULL, &err);
         if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
 
+        err = clGetDeviceInfo(devices[devid], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(uint), &uint, NULL);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+	starpu_malloc_set_align(uint/8);
+
         // Create execution queue for the given device
         queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err);
         if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);

+ 5 - 1
tests/microbenchs/tasks_overhead.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2011  Université de Bordeaux 1
+ * Copyright (C) 2010-2011, 2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -26,7 +26,11 @@
 starpu_data_handle_t data_handles[8];
 float *buffers[8];
 
+#ifdef STARPU_QUICK_CHECK
+static unsigned ntasks = 128;
+#else
 static unsigned ntasks = 65536;
+#endif
 static unsigned nbuffers = 0;
 
 struct starpu_task *tasks;

+ 21 - 1
tools/valgrind/starpu.suppr

@@ -48,6 +48,27 @@
 }
 
 {
+   We do not care about the race on the entry->mean variable, we only want a good-enough estimation.
+   Helgrind:Race
+   fun: _starpu_history_based_job_expected_perf
+   ...
+}
+
+{
+   We do not care about races on profiling statistics
+   Helgrind:Race
+   fun: starpu_profiling_status_get
+   ...
+}
+
+{
+   This is racy, but since we'll always put the same values, this is not a problem.
+   Helgrind:Race
+   fun:  _starpu_codelet_check_deprecated_fields
+   ...
+}
+
+{
    fscanf error
    Memcheck:Cond
    ...
@@ -55,4 +76,3 @@
    fun:_starpu_load_bus_performance_files
    ...
 }
-