Просмотр исходного кода

Add starpu_malloc_set_align to let OpenCL driver specify its alignment constraints.

Samuel Thibault лет назад: 13
Родитель
Сommit
b7f1c0940c

+ 6 - 0
doc/chapters/basic-api.texi

@@ -407,6 +407,12 @@ thus permit data transfer and computation overlapping. The allocated buffer must
 be freed thanks to the @code{starpu_free} function.
 @end deftypefun
 
+@deftypefun void starpu_malloc_set_align (size_t @var{align})
+This functions sets an alignment constraints for @code{starpu_malloc}
+allocations. @var{align} must be a power of two. This is for instance called
+automatically by the OpenCL driver to specify its own alignment constraints.
+@end deftypefun
+
 @deftypefun int starpu_free (void *@var{A})
 This function frees memory which has previously allocated with
 @code{starpu_malloc}.

+ 2 - 1
include/starpu_data.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux 1
+ * Copyright (C) 2010-2013  Université de Bordeaux 1
  * Copyright (C) 2010, 2011, 2012, 2013  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -85,6 +85,7 @@ int starpu_data_acquire_on_node_cb(starpu_data_handle_t handle, unsigned node, e
 void starpu_data_release(starpu_data_handle_t handle);
 void starpu_data_release_on_node(starpu_data_handle_t handle, unsigned node);
 
+void starpu_malloc_set_align(size_t);
 int starpu_malloc(void **A, size_t dim);
 int starpu_free(void *A);
 void starpu_memory_display_stats();

+ 28 - 1
src/datawizard/malloc.c

@@ -24,6 +24,15 @@
 #include <starpu_cuda.h>
 #include <drivers/opencl/driver_opencl.h>
 
+static size_t malloc_align = sizeof(void*);
+
+void starpu_malloc_set_align(size_t align)
+{
+	STARPU_ASSERT_MSG(!(align & (align - 1)), "Alignment given to starpu_malloc_set_align must be a power of two");
+	if (malloc_align < align)
+		malloc_align = align;
+}
+
 #if (defined(STARPU_USE_CUDA) && !defined(HAVE_CUDA_MEMCPY_PEER))// || defined(STARPU_USE_OPENCL)
 struct malloc_pinned_codelet_struct
 {
@@ -32,6 +41,8 @@ struct malloc_pinned_codelet_struct
 };
 #endif
 
+/* Would be difficult to do it this way, we need to remember the cl_mem to be able to free it later... */
+
 //#ifdef STARPU_USE_OPENCL
 //static void malloc_pinned_opencl_codelet(void *buffers[] STARPU_ATTRIBUTE_UNUSED, void *arg)
 //{
@@ -144,7 +155,23 @@ int starpu_malloc(void **A, size_t dim)
         else
 #endif
 	{
-		*A = malloc(dim);
+#ifdef STARPU_HAVE_POSIX_MEMALIGN
+		if (malloc_align != sizeof(void*))
+		{
+			if (posix_memalign(A, malloc_align, dim))
+				*A = NULL;
+		}
+		else
+#elif defined(STARPU_HAVE_MEMALIGN)
+		if (malloc_align != sizeof(void*))
+		{
+			*A = memalign(malloc_align, dim);
+		}
+		else
+#endif
+		{
+			*A = malloc(dim);
+		}
 	}
 
 	STARPU_ASSERT(*A);

+ 6 - 0
src/drivers/opencl/driver_opencl.c

@@ -158,6 +158,7 @@ void starpu_opencl_get_current_context(cl_context *context)
 cl_int _starpu_opencl_init_context(int devid)
 {
 	cl_int err;
+	cl_uint uint;
 
 	_STARPU_PTHREAD_MUTEX_LOCK(&big_lock);
 
@@ -168,6 +169,11 @@ cl_int _starpu_opencl_init_context(int devid)
         contexts[devid] = clCreateContext(NULL, 1, &devices[devid], NULL, NULL, &err);
         if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);
 
+        err = clGetDeviceInfo(devices[devid], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(uint), &uint, NULL);
+	if (STARPU_UNLIKELY(err != CL_SUCCESS))
+		STARPU_OPENCL_REPORT_ERROR(err);
+	starpu_malloc_set_align(uint/8);
+
         // Create execution queue for the given device
         queues[devid] = clCreateCommandQueue(contexts[devid], devices[devid], 0, &err);
         if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err);