浏览代码

- merge trunk

Olivier Aumage 11 年之前
父节点
当前提交
7070adec2e
共有 5 个文件被更改,包括 138 次插入3 次删除
  1. 1 0
      ChangeLog
  2. 3 0
      configure.ac
  3. 16 2
      doc/tutorial/Makefile
  4. 117 0
      doc/tutorial/vector_scal_task_insert.c
  5. 1 1
      src/datawizard/datawizard.c

+ 1 - 0
ChangeLog

@@ -47,6 +47,7 @@ New features:
     CUDA and OpenCL kernel execution.
     CUDA and OpenCL kernel execution.
   * Add CUDA concurrent kernel execution support through
   * Add CUDA concurrent kernel execution support through
     the STARPU_NWORKER_PER_CUDA environment variable.
     the STARPU_NWORKER_PER_CUDA environment variable.
+  * New locality work stealing scheduler (lws).
 
 
 Small features:
 Small features:
   * Tasks can now have a name (via the field const char *name of
   * Tasks can now have a name (via the field const char *name of

+ 3 - 0
configure.ac

@@ -978,6 +978,7 @@ if test x$enable_simgrid = xyes ; then
    	if test -n "$SIMGRID_CFLAGS" ; then
    	if test -n "$SIMGRID_CFLAGS" ; then
 	   	CFLAGS="$SIMGRID_CFLAGS $CFLAGS"
 	   	CFLAGS="$SIMGRID_CFLAGS $CFLAGS"
 	   	CXXFLAGS="$SIMGRID_CFLAGS $CXXFLAGS"
 	   	CXXFLAGS="$SIMGRID_CFLAGS $CXXFLAGS"
+	   	NVCCFLAGS="$SIMGRID_CFLAGS $NVCCFLAGS"
 	fi
 	fi
 	if test -n "$SIMGRID_LIBS" ; then
 	if test -n "$SIMGRID_LIBS" ; then
 		LDFLAGS="$SIMGRID_LIBS $LDFLAGS"
 		LDFLAGS="$SIMGRID_LIBS $LDFLAGS"
@@ -985,11 +986,13 @@ if test x$enable_simgrid = xyes ; then
 	if test "$simgrid_dir" != "no" ; then
 	if test "$simgrid_dir" != "no" ; then
 	   	CFLAGS="-I$simgrid_dir/include $CFLAGS"
 	   	CFLAGS="-I$simgrid_dir/include $CFLAGS"
 	   	CXXFLAGS="-I$simgrid_dir/include $CXXFLAGS"
 	   	CXXFLAGS="-I$simgrid_dir/include $CXXFLAGS"
+	   	NVCCFLAGS="-I$simgrid_dir/include $NVCCFLAGS"
 	   	LDFLAGS="-L$simgrid_dir/lib $LDFLAGS"
 	   	LDFLAGS="-L$simgrid_dir/lib $LDFLAGS"
 	fi
 	fi
 	if test "$simgrid_include_dir" != "no" ; then
 	if test "$simgrid_include_dir" != "no" ; then
 	   	CFLAGS="-I$simgrid_include_dir $CFLAGS"
 	   	CFLAGS="-I$simgrid_include_dir $CFLAGS"
 	   	CXXFLAGS="-I$simgrid_include_dir $CXXFLAGS"
 	   	CXXFLAGS="-I$simgrid_include_dir $CXXFLAGS"
+	   	NVCCFLAGS="-I$simgrid_include_dir $NVCCFLAGS"
 	fi
 	fi
 	if test "$simgrid_lib_dir" != "no" ; then
 	if test "$simgrid_lib_dir" != "no" ; then
 	   	LDFLAGS="-L$simgrid_lib_dir $LDFLAGS"
 	   	LDFLAGS="-L$simgrid_lib_dir $LDFLAGS"

+ 16 - 2
doc/tutorial/Makefile

@@ -1,7 +1,7 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
 #
 # Copyright (C) 2009-2011  Université de Bordeaux 1
 # Copyright (C) 2009-2011  Université de Bordeaux 1
-# Copyright (C) 2010-2013  Centre National de la Recherche Scientifique
+# Copyright (C) 2010-2014  Centre National de la Recherche Scientifique
 #
 #
 # StarPU is free software; you can redistribute it and/or modify
 # StarPU is free software; you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as published by
 # it under the terms of the GNU Lesser General Public License as published by
@@ -24,7 +24,7 @@ HAS_OPENCL	=	$(shell pkg-config --libs starpu-1.1 |grep -i opencl)
 %.o: %.cu
 %.o: %.cu
 	nvcc $(CFLAGS) $< -c
 	nvcc $(CFLAGS) $< -c
 
 
-TARGETS = hello_world vector_scal hello_world_plugin vector_scal_plugin
+TARGETS = hello_world vector_scal hello_world_plugin vector_scal_plugin vector_scal_task_insert
 
 
 all: $(TARGETS)
 all: $(TARGETS)
 
 
@@ -42,6 +42,20 @@ endif
 vector_scal: $(VECTOR_SCAL_PREREQUISITES)
 vector_scal: $(VECTOR_SCAL_PREREQUISITES)
 	$(VECTOR_SCAL_COMPILER) $(LDFLAGS) $^ -o $@
 	$(VECTOR_SCAL_COMPILER) $(LDFLAGS) $^ -o $@
 
 
+VECTOR_SCAL_TASK_INSERT_PREREQUISITES		=	vector_scal_task_insert.o vector_scal_cpu.o
+ifneq ($(strip $(HAS_CUDA)),)
+VECTOR_SCAL_TASK_INSERT_PREREQUISITES		+=	vector_scal_cuda.o
+VECTOR_SCAL_TASK_INSERT_COMPILER		=	$(NVCC)
+else
+VECTOR_SCAL_TASK_INSERT_COMPILER		=	$(CC)
+endif
+ifneq ($(strip $(HAS_OPENCL)),)
+VECTOR_SCAL_TASK_INSERT_PREREQUISITES += vector_scal_opencl.o
+endif
+
+vector_scal_task_insert: $(VECTOR_SCAL_TASK_INSERT_PREREQUISITES)
+	$(VECTOR_SCAL_TASK_INSERT_COMPILER) $(LDFLAGS) $^ -o $@
+
 hello_world_plugin: hello_world_plugin.c
 hello_world_plugin: hello_world_plugin.c
 	$(CC) $(CFLAGS) -fplugin=`pkg-config starpu-1.1 --variable=gccplugin` $(LDFLAGS) $^ -o $@
 	$(CC) $(CFLAGS) -fplugin=`pkg-config starpu-1.1 --variable=gccplugin` $(LDFLAGS) $^ -o $@
 
 

+ 117 - 0
doc/tutorial/vector_scal_task_insert.c

@@ -0,0 +1,117 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010, 2011, 2012, 2013, 2014  Centre National de la Recherche Scientifique
+ * Copyright (C) 2010-2012  Université de Bordeaux 1
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+/*
+ * This example demonstrates how to use StarPU to scale an array by a factor.
+ * It shows how to manipulate data with StarPU's data management library.
+ *  1- how to declare a piece of data to StarPU (starpu_vector_data_register)
+ *  2- how to submit a task to StarPU
+ *  3- how a kernel can manipulate the data (buffers[0].vector.ptr)
+ */
+#include <starpu.h>
+
+#define    NX    2048
+
+extern void vector_scal_cpu(void *buffers[], void *_args);
+extern void vector_scal_cuda(void *buffers[], void *_args);
+extern void vector_scal_opencl(void *buffers[], void *_args);
+
+static struct starpu_codelet cl = {
+	/* CPU implementation of the codelet */
+	.cpu_funcs = {vector_scal_cpu, NULL},
+#ifdef STARPU_USE_CUDA
+	/* CUDA implementation of the codelet */
+	.cuda_funcs = {vector_scal_cuda, NULL},
+#endif
+#ifdef STARPU_USE_OPENCL
+	/* OpenCL implementation of the codelet */
+	.opencl_funcs = {vector_scal_opencl, NULL},
+#endif
+	.nbuffers = 1,
+	.modes = {STARPU_RW}
+};
+
+#ifdef STARPU_USE_OPENCL
+struct starpu_opencl_program programs;
+#endif
+
+int main(int argc, char **argv)
+{
+	/* We consider a vector of float that is initialized just as any of C
+	 * data */
+	float vector[NX];
+	unsigned i;
+	for (i = 0; i < NX; i++)
+		vector[i] = 1.0f;
+
+	fprintf(stderr, "BEFORE : First element was %f\n", vector[0]);
+
+	/* Initialize StarPU with default configuration */
+	int ret = starpu_init(NULL);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+#ifdef STARPU_USE_OPENCL
+	starpu_opencl_load_opencl_from_file("vector_scal_opencl_kernel.cl", &programs, NULL);
+#endif
+
+	/* Tell StaPU to associate the "vector" vector with the "vector_handle"
+	 * identifier. When a task needs to access a piece of data, it should
+	 * refer to the handle that is associated to it.
+	 * In the case of the "vector" data interface:
+	 *  - the first argument of the registration method is a pointer to the
+	 *    handle that should describe the data
+	 *  - the second argument is the memory node where the data (ie. "vector")
+	 *    resides initially: STARPU_MAIN_RAM stands for an address in main memory, as
+	 *    opposed to an adress on a GPU for instance.
+	 *  - the third argument is the adress of the vector in RAM
+	 *  - the fourth argument is the number of elements in the vector
+	 *  - the fifth argument is the size of each element.
+	 */
+	starpu_data_handle_t vector_handle;
+	starpu_vector_data_register(&vector_handle, STARPU_MAIN_RAM, (uintptr_t)vector,
+				    NX, sizeof(vector[0]));
+
+	float factor = 3.14;
+
+	ret = starpu_task_insert(&cl,
+				 /* an argument is passed to the codelet, beware that this is a
+				  * READ-ONLY buffer and that the codelet may be given a pointer to a
+				  * COPY of the argument */
+				 STARPU_VALUE, &factor, sizeof(factor),
+				 /* the codelet manipulates one buffer in RW mode */
+				 STARPU_RW, vector_handle,
+				 0);
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_task_insert");
+
+	/* Wait for tasks completion */
+	starpu_task_wait_for_all();
+
+	/* StarPU does not need to manipulate the array anymore so we can stop
+	 * monitoring it */
+	starpu_data_unregister(vector_handle);
+
+#ifdef STARPU_USE_OPENCL
+	starpu_opencl_unload_opencl(&programs);
+#endif
+
+	/* terminate StarPU, no task can be submitted after */
+	starpu_shutdown();
+
+	fprintf(stderr, "AFTER First element is %f\n", vector[0]);
+
+	return 0;
+}

+ 1 - 1
src/datawizard/datawizard.c

@@ -39,7 +39,7 @@ int __starpu_datawizard_progress(unsigned memory_node, unsigned may_alloc, unsig
 	/* in case some other driver requested data */
 	/* in case some other driver requested data */
 	if (_starpu_handle_pending_node_data_requests(memory_node))
 	if (_starpu_handle_pending_node_data_requests(memory_node))
 		ret = 1;
 		ret = 1;
-	if (push_requests)
+	if (ret || push_requests)
 	{
 	{
 		unsigned pushed;
 		unsigned pushed;
 		if (_starpu_handle_node_data_requests(memory_node, may_alloc, &pushed) == 0)
 		if (_starpu_handle_node_data_requests(memory_node, may_alloc, &pushed) == 0)