浏览代码

merge trunk

Nathalie Furmento 8 年之前
父节点
当前提交
6ef6b36c78

+ 4 - 0
ChangeLog

@@ -28,6 +28,8 @@ New features:
   * Add STARPU_CUDA_THREAD_PER_DEV environment variable to support driving all
     GPUs from only one thread when almost all kernels are asynchronous.
   * Add starpu_replay tool to replay tasks.rec files with Simgrid.
+  * Add experimental support of NUMA nodes. Use STARPU_USE_NUMA to activate it.
+  * Add a new set of functions to make Out-of-Core based on HDF5 Library.
 
 Small features:
   * Scheduling contexts may now be associated a user data pointer at creation
@@ -52,6 +54,8 @@ Small features:
   * Add priority management to StarPU-MPI.
   * Add STARPU_MAIN_THREAD_CPUID and STARPU_MPI_THREAD_CPUID environment
     variables.
+  * Add disk to disk copy functions and support asynchronous full read/write
+    in disk backends.
 
 Changes:
   * Vastly improve simgrid simulation time.

+ 14 - 7
configure.ac

@@ -3054,14 +3054,21 @@ if test x$enable_mlr = xyes -a "$starpu_windows" != "yes" ; then
 			else
 				AC_MSG_RESULT(no)
 				AC_MSG_CHECKING(min-dgels source)
-				if test ! -d $PWD/min-dgels; then
-					cp -r $srcdir/min-dgels $PWD/
+				if test "${cross_compiling}" != "no" ; then
+					# Cross-compiling is not supported by min-dgels
+					AC_MSG_RESULT(no)
+					install_min_dgels=no
+					support_mlr=no
+				else
+					if test ! -d $PWD/min-dgels; then
+						cp -r $srcdir/min-dgels $PWD/
+					fi
+					AC_MSG_RESULT(yes)
+					DGELS_LIBS="-Wl,--start-group $STARPU_BUILD_DIR/min-dgels/build/minlibblas.a $STARPU_BUILD_DIR/min-dgels/build/minlibdgels.a $STARPU_BUILD_DIR/min-dgels/build/minlibf2c.a -Wl,--end-group"
+					AC_DEFINE(STARPU_MLR_MODEL, [1], [use user defined library])
+					AC_ARG_VAR([DGELS_LIBS], [linker flags for lapack dgels])
+					install_min_dgels=yes
 				fi
-				AC_MSG_RESULT(yes)
-				DGELS_LIBS="-Wl,--start-group $STARPU_BUILD_DIR/min-dgels/build/*.a -Wl,--end-group"
-				AC_DEFINE(STARPU_MLR_MODEL, [1], [use user defined library])
-				AC_ARG_VAR([DGELS_LIBS], [linker flags for lapack dgels])
-				install_min_dgels=yes
 			fi
 		fi
 	fi

+ 6 - 0
examples/heat/heat.sh

@@ -22,6 +22,12 @@ set -e
 
 PREFIX=$(dirname $0)
 
+if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
+	STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/heat
+	# in case libtool got into play
+	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/heat" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/heat
+fi
+
 $PREFIX/heat -shape 0
 $PREFIX/heat -shape 1
 # sometimes lead to pivot being 0

+ 12 - 0
examples/lu/lu.sh

@@ -21,12 +21,24 @@ set -e
 
 PREFIX=$(dirname $0)
 
+if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
+	STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/lu_implicit_example_float
+	# in case libtool got into play
+	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_implicit_example_float" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_implicit_example_float
+fi
+
 $PREFIX/lu_implicit_example_float -size $((960 * 4)) -nblocks 4 -piv
 $PREFIX/lu_implicit_example_float -size $((960 * 4)) -nblocks 4 -no-stride
 $PREFIX/lu_implicit_example_float -size $((960 * 4)) -nblocks 4 -bound
 $PREFIX/lu_implicit_example_float -size $((960 * 2)) -nblocks 2 -bounddeps
 $PREFIX/lu_implicit_example_float -size $((960 * 2)) -nblocks 2 -bound -bounddeps -bounddepsprio
 
+if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
+	STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/lu_example_float
+	# in case libtool got into play
+	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_example_float" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/lu_example_float
+fi
+
 $PREFIX/lu_example_float -size $((960 * 4)) -nblocks 4 -piv
 $PREFIX/lu_example_float -size $((960 * 4)) -nblocks 4 -no-stride
 $PREFIX/lu_example_float -size $((960 * 4)) -nblocks 4 -bound

+ 18 - 18
mic-configure

@@ -165,14 +165,14 @@ do
 	cd "build_${arch}"
 
 	if test x$arch = xmic ; then
-		LIBRARY_PATH=$SINK_LIBRARY_PATH \
+		LIBRARY_PATH=$SINK_LIBRARY_PATH:$MIC_LIBRARY_PATH \
 		INCLUDE=$SINK_INCLUDE \
 		C_INCLUDE_PATH=$SINK_C_INCLUDE_PATH \
 		CPLUS_INCLUDE_PATH=$SINK_CPLUS_INCLUDE_PATH \
 		PKG_CONFIG_PATH=$SINK_PKG_CONFIG_PATH \
 		$command "$@" "${params[@]}" "${mic_params[@]}"
 		MIC_BUILD_ENV="\
-LIBRARY_PATH=$SINK_LIBRARY_PATH \\
+LIBRARY_PATH=$SINK_LIBRARY_PATH:$MIC_LIBRARY_PATH \\
 	INCLUDE=$SINK_INCLUDE \\
 	C_INCLUDE_PATH=$SINK_C_INCLUDE_PATH \\
 	CPLUS_INCLUDE_PATH=$SINK_CPLUS_INCLUDE_PATH \\
@@ -192,55 +192,55 @@ then
 cat > Makefile << EOF
 all:
 	$MIC_BUILD_ENV
-	\$(MAKE) -C build_mic
+	\$(MAKE) \$(MFLAGS) -C build_mic
 
 clean:
-	\$(MAKE) -C build_mic clean
+	\$(MAKE) \$(MFLAGS) -C build_mic clean
 
 distclean: clean
 	rm -f Makefile
 
 check:
 	$MIC_BUILD_ENV
-	\$(MAKE) -C build_mic check
+	\$(MAKE) \$(MFLAGS) -C build_mic check
 
 showcheck:
-	\$(MAKE) -C build_mic showcheck
+	\$(MAKE) \$(MFLAGS) -C build_mic showcheck
 
 install:
 	$MIC_BUILD_ENV
-	\$(MAKE) -C build_mic install
+	\$(MAKE) \$(MFLAGS) -C build_mic install
 	ln -sf "${prefix}/mic/lib/pkgconfig/starpu-1.3.pc" "${prefix}/mic/lib/pkgconfig/starpu-1.3-mic.pc"
 EOF
 else
 cat > Makefile << EOF
 all:
-	\$(MAKE) -C build_host
+	\$(MAKE) \$(MFLAGS) -C build_host
 	$MIC_BUILD_ENV
-	\$(MAKE) -C build_mic
+	\$(MAKE) \$(MFLAGS) -C build_mic
 
 clean:
-	\$(MAKE) -C build_host clean
-	\$(MAKE) -C build_mic clean
+	\$(MAKE) \$(MFLAGS) -C build_host clean
+	\$(MAKE) \$(MFLAGS) -C build_mic clean
 
 distclean: clean
 	rm -f Makefile
 
 check:
-	\$(MAKE) -C build_host check
+	\$(MAKE) \$(MFLAGS) -C build_host check
 	$MIC_BUILD_ENV
-	\$(MAKE) -C build_mic check ; \
+	\$(MAKE) \$(MFLAGS) -C build_mic check ; \
 	RET=\$\$? ; \
-	STARPU_NCPUS=0 \$(MAKE) -C build_mic check && [ \$\$RET == 0 ]
+	STARPU_NCPUS=0 \$(MAKE) \$(MFLAGS) -C build_mic check && [ \$\$RET == 0 ]
 
 showcheck:
-	\$(MAKE) -C build_host showcheck
-	\$(MAKE) -C build_mic showcheck
+	\$(MAKE) \$(MFLAGS) -C build_host showcheck
+	\$(MAKE) \$(MFLAGS) -C build_mic showcheck
 
 install:
-	\$(MAKE) -C build_host install
+	\$(MAKE) \$(MFLAGS) -C build_host install
 	$MIC_BUILD_ENV
-	\$(MAKE) -C build_mic install
+	\$(MAKE) \$(MFLAGS) -C build_mic install
 	ln -sf "${prefix}/mic/lib/pkgconfig/starpu-1.3.pc" "${prefix}/mic/lib/pkgconfig/starpu-1.3-mic.pc"
 EOF
 fi

+ 5 - 2
min-dgels/Makefile.in

@@ -1,10 +1,13 @@
+CC = @CC@
+LD = @LD@
+
 CLAPACK=base
 ADDITIONAL=additional
 
 all:
 	mkdir -p build
-	cd $(CLAPACK) && $(MAKE) blaslib
-	cd $(CLAPACK) && $(MAKE) f2clib
+	cd $(CLAPACK) && $(MAKE) blaslib CC="$(CC)" LD="$(LD)"
+	cd $(CLAPACK) && $(MAKE) f2clib CC="$(CC)" LD="$(LD)"
 	cd $(ADDITIONAL) && $(CC) -c -fPIC *.c && ar cr ../build/minlibdgels.a *.o && ranlib ../build/minlibdgels.a
 
 install:

+ 2 - 2
min-dgels/base/F2CLIBS/libf2c/Makefile

@@ -21,7 +21,7 @@ include $(TOPDIR)/make.inc
 # compile, then strip unnecessary symbols
 .c.o:
 	$(CC) -c -DSkip_f2c_Undefs $(CFLAGS) $*.c
-	ld -r -x -o $*.xxx $*.o
+	$(LD) -r -x -o $*.xxx $*.o
 	mv $*.xxx $*.o
 ## Under Solaris (and other systems that do not understand ld -x),
 ## omit -x in the ld line above.
@@ -83,7 +83,7 @@ libminf2c.a: $(OFILES)
 ## of "cc -shared".
 
 libf2c.so: $(OFILES)
-	cc -shared -o libf2c.so $(OFILES)
+	$(CC) -shared -o libf2c.so $(OFILES)
 
 ### If your system lacks ranlib, you don't need it; see README.
 

+ 33 - 0
src/core/disk_ops/disk_hdf5.c

@@ -849,6 +849,37 @@ static void * starpu_hdf5_async_write(void *base, void *obj, void *buf, off_t of
         return finished;
 }
 
+void * starpu_hdf5_async_full_read (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node)
+{
+        struct starpu_hdf5_obj * dataObj = (struct starpu_hdf5_obj *) obj;
+
+        starpu_sem_t * finished;
+        _STARPU_MALLOC(finished, sizeof(*finished));
+        starpu_sem_init(finished, 0, 0);
+
+        _starpu_hdf5_protect_start(base);
+        *size = _starpu_get_size_obj(dataObj);
+        _starpu_hdf5_protect_stop(base);
+
+        _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); 
+
+        starpu_hdf5_send_work(base, obj, 0, NULL, NULL, 0, *ptr, *size, (void*) finished, FULL_READ);
+        
+        return finished;
+}
+
+void * starpu_hdf5_async_full_write (void * base, void * obj, void * ptr, size_t size)
+{
+        starpu_sem_t * finished;
+        _STARPU_MALLOC(finished, sizeof(*finished));
+        starpu_sem_init(finished, 0, 0);
+
+        starpu_hdf5_send_work(NULL, NULL, 0, base, obj, 0, ptr, size, (void*) finished, FULL_WRITE);
+
+        return finished;
+
+}
+
 void *  starpu_hdf5_copy(void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size)
 {
         starpu_sem_t * finished;
@@ -940,6 +971,8 @@ struct starpu_disk_ops starpu_disk_hdf5_ops =
 
 	.async_read = starpu_hdf5_async_read,
 	.async_write = starpu_hdf5_async_write,
+	.async_full_read = starpu_hdf5_async_full_read,
+	.async_full_write = starpu_hdf5_async_full_write,
 	.wait_request = starpu_hdf5_wait,
 	.test_request = starpu_hdf5_test,
 	.free_request = starpu_hdf5_free_request

+ 2 - 0
src/core/disk_ops/disk_unistd.c

@@ -69,6 +69,8 @@ struct starpu_disk_ops starpu_disk_unistd_ops =
 #ifdef HAVE_AIO_H
 	.async_read = starpu_unistd_global_async_read,
 	.async_write = starpu_unistd_global_async_write,
+	.async_full_read = starpu_unistd_global_async_full_read,
+	.async_full_write = starpu_unistd_global_async_full_write,
 	.wait_request = starpu_unistd_global_wait_request,
 	.test_request = starpu_unistd_global_test_request,
 	.free_request = starpu_unistd_global_free_request,

+ 2 - 0
src/core/disk_ops/disk_unistd_o_direct.c

@@ -134,6 +134,8 @@ struct starpu_disk_ops starpu_disk_unistd_o_direct_ops =
         .wait_request = starpu_unistd_global_wait_request,
         .test_request = starpu_unistd_global_test_request,
 	.free_request = starpu_unistd_global_free_request,
+	.async_full_read = starpu_unistd_global_async_full_read,
+	.async_full_write = starpu_unistd_global_async_full_write,
 #endif
 	.full_read = starpu_unistd_global_full_read,
 	.full_write = starpu_unistd_global_full_write

+ 47 - 0
src/core/disk_ops/unistd/disk_unistd_global.c

@@ -536,6 +536,53 @@ int starpu_unistd_global_full_write(void *base STARPU_ATTRIBUTE_UNUSED, void *ob
 	return starpu_unistd_global_write(base, obj, ptr, 0, size);
 }
 
+#if HAVE_AIO_H
+void * starpu_unistd_global_async_full_read (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node)
+{
+        struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj;
+	int fd = tmp->descriptor;
+
+	if (fd < 0)
+		fd = _starpu_unistd_reopen(obj);
+#ifdef STARPU_HAVE_WINDOWS
+	*size = _filelength(fd);
+#else
+	struct stat st;
+	int ret = fstat(fd, &st);
+	STARPU_ASSERT(ret==0);
+
+	*size = st.st_size;
+#endif
+	if (tmp->descriptor < 0)
+		_starpu_unistd_reclose(fd);
+
+	/* Allocated aligned buffer */
+	_starpu_malloc_flags_on_node(dst_node, ptr, *size, 0);
+	return starpu_unistd_global_async_read(base, obj, *ptr, 0, *size);
+}
+
+void * starpu_unistd_global_async_full_write (void * base, void * obj, void * ptr, size_t size)
+{
+        struct starpu_unistd_global_obj *tmp = (struct starpu_unistd_global_obj *) obj;
+
+        /* update file size to realise the next good full_read */
+        if(size != tmp->size)
+        {
+		int fd = tmp->descriptor;
+
+		if (fd < 0)
+			fd = _starpu_unistd_reopen(obj);
+		int val = _starpu_ftruncate(fd,size);
+		if (tmp->descriptor < 0)
+			_starpu_unistd_reclose(fd);
+		STARPU_ASSERT(val == 0);
+		tmp->size = size;
+        }
+
+	return starpu_unistd_global_async_write(base, obj, ptr, 0, size);
+}
+#endif
+
 #ifdef STARPU_UNISTD_USE_COPY
 static void * starpu_unistd_internal_thread(void * arg)
 {

+ 2 - 0
src/core/disk_ops/unistd/disk_unistd_global.h

@@ -51,6 +51,8 @@ void starpu_unistd_global_unplug (void *base);
 int get_unistd_global_bandwidth_between_disk_and_main_ram(unsigned node, void *base);
 void* starpu_unistd_global_async_read (void *base, void *obj, void *buf, off_t offset, size_t size);
 void* starpu_unistd_global_async_write (void *base, void *obj, void *buf, off_t offset, size_t size);
+void * starpu_unistd_global_async_full_write (void * base, void * obj, void * ptr, size_t size);
+void * starpu_unistd_global_async_full_read (void * base, void * obj, void ** ptr, size_t * size, unsigned dst_node);
 void starpu_unistd_global_wait_request(void * async_channel);
 int starpu_unistd_global_test_request(void * async_channel);
 void starpu_unistd_global_free_request(void * async_channel);

+ 10 - 9
src/core/topology.c

@@ -533,7 +533,7 @@ _starpu_init_mpi_topology (struct _starpu_machine_config *config, long mpi_idx)
 	struct _starpu_machine_topology *topology = &config->topology;
 
 	int nbcores;
-	_starpu_src_common_sink_nbcores (mpi_ms_nodes[mpi_idx], &nbcores);
+	_starpu_src_common_sink_nbcores (_starpu_mpi_ms_nodes[mpi_idx], &nbcores);
 	topology->nhwmpicores[mpi_idx] = nbcores;
 }
 
@@ -551,7 +551,7 @@ _starpu_init_mic_topology (struct _starpu_machine_config *config, long mic_idx)
 	struct _starpu_machine_topology *topology = &config->topology;
 
 	int nbcores;
-	_starpu_src_common_sink_nbcores (mic_nodes[mic_idx], &nbcores);
+	_starpu_src_common_sink_nbcores (_starpu_mic_nodes[mic_idx], &nbcores);
 	topology->nhwmiccores[mic_idx] = nbcores;
 }
 
@@ -618,7 +618,7 @@ _starpu_init_mic_node (struct _starpu_machine_config *config, int mic_idx,
 
 	/* Let's create the node structure, we'll communicate with the peer
 	 * through scif thanks to it */
-	mic_nodes[mic_idx] =
+	_starpu_mic_nodes[mic_idx] =
 		_starpu_mp_common_node_create(STARPU_NODE_MIC_SOURCE, mic_idx);
 
 	return 0;
@@ -1092,6 +1092,7 @@ _starpu_init_mic_config (struct _starpu_machine_config *config,
 		config->workers[worker_idx].worker_mask = STARPU_MIC;
 		config->worker_mask |= STARPU_MIC;
 	}
+	_starpu_mic_nodes[mic_idx]->baseworkerid = topology->nworkers;
 
 	topology->nworkers += topology->nmiccores[mic_idx];
 }
@@ -1155,7 +1156,7 @@ _starpu_init_mpi_config (struct _starpu_machine_config *config,
                 config->workers[worker_idx].worker_mask = STARPU_MPI_MS;
                 config->worker_mask |= STARPU_MPI_MS;
         }
-	mpi_ms_nodes[mpi_idx]->baseworkerid = topology->nworkers;
+	_starpu_mpi_ms_nodes[mpi_idx]->baseworkerid = topology->nworkers;
 
         topology->nworkers += topology->nmpicores[mpi_idx];
 }
@@ -1250,7 +1251,7 @@ _starpu_init_mp_config (struct _starpu_machine_config *config,
 		{
 			unsigned i;
 			for (i = 0; i < topology->nmpidevices; i++)
-				mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
+				_starpu_mpi_ms_nodes[i] = _starpu_mp_common_node_create(STARPU_NODE_MPI_SOURCE, i);
 
 			for (i = 0; i < topology->nmpidevices; i++)
 				_starpu_init_mpi_config (config, user_conf, i);
@@ -1264,20 +1265,20 @@ _starpu_init_mp_config (struct _starpu_machine_config *config,
 static void
 _starpu_deinit_mic_node (unsigned mic_idx)
 {
-	_starpu_mp_common_send_command(mic_nodes[mic_idx], STARPU_MP_COMMAND_EXIT, NULL, 0);
+	_starpu_mp_common_send_command(_starpu_mic_nodes[mic_idx], STARPU_MP_COMMAND_EXIT, NULL, 0);
 
 	COIProcessDestroy(_starpu_mic_process[mic_idx], -1, 0, NULL, NULL);
 
-	_starpu_mp_common_node_destroy(mic_nodes[mic_idx]);
+	_starpu_mp_common_node_destroy(_starpu_mic_nodes[mic_idx]);
 }
 #endif
 
 #ifdef STARPU_USE_MPI_MASTER_SLAVE
 static void _starpu_deinit_mpi_node(int devid)
 {
-        _starpu_mp_common_send_command(mpi_ms_nodes[devid], STARPU_MP_COMMAND_EXIT, NULL, 0);
+        _starpu_mp_common_send_command(_starpu_mpi_ms_nodes[devid], STARPU_MP_COMMAND_EXIT, NULL, 0);
 
-        _starpu_mp_common_node_destroy(mpi_ms_nodes[devid]);
+        _starpu_mp_common_node_destroy(_starpu_mpi_ms_nodes[devid]);
 }
 #endif
 

+ 54 - 11
src/datawizard/copy_driver.c

@@ -539,6 +539,8 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
                 {
                         req->async_channel.type = STARPU_DISK_RAM;
                         req->async_channel.event.disk_event.requests = NULL;
+                        req->async_channel.event.disk_event.ptr = NULL;
+                        req->async_channel.event.disk_event.handle = NULL;
                 }
 		if(copy_methods->any_to_any)
 			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL);
@@ -551,14 +553,18 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 			handle->ops->pack_data(handle, src_node, &ptr, &size);
 			ret = _starpu_disk_full_write(src_node, dst_node, obj, ptr, size, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL);
 			if (ret == 0)
+			{
 				/* write is already finished, ptr was allocated in pack_data */
 				_starpu_free_flags_on_node(src_node, ptr, size, 0);
+			}
+			else if (ret == -EAGAIN)
+			{
+				req->async_channel.event.disk_event.ptr = ptr;
+				req->async_channel.event.disk_event.node = src_node;
+				req->async_channel.event.disk_event.size = size;
+			}
 
-#ifdef STARPU_DEVEL
-#warning TODO: support asynchronous disk requests for packed data
-#endif
-			/* For now, asynchronous is not supported */
-			STARPU_ASSERT(ret == 0);
+			STARPU_ASSERT(ret == 0 || ret == -EAGAIN);
 		}
 		break;
 
@@ -567,6 +573,8 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
                 {
                         req->async_channel.type = STARPU_DISK_RAM;
                         req->async_channel.event.disk_event.requests = NULL;
+                        req->async_channel.event.disk_event.ptr = NULL;
+                        req->async_channel.event.disk_event.handle = NULL;
                 }
 		if(copy_methods->any_to_any)
 			ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req && !starpu_asynchronous_copy_disabled()  ? &req->async_channel : NULL);
@@ -583,12 +591,15 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
 				/* ptr is allocated in full_read */
 				_starpu_free_flags_on_node(dst_node, ptr, size, 0);
 			}
-
-#ifdef STARPU_DEVEL
-#warning TODO: support asynchronous disk requests for packed data
-#endif
-			/* For now, asynchronous is not supported */
-			STARPU_ASSERT(ret == 0);
+			else if (ret == -EAGAIN)
+			{
+				req->async_channel.event.disk_event.ptr = ptr;
+				req->async_channel.event.disk_event.node = dst_node;
+				req->async_channel.event.disk_event.size = size;
+				req->async_channel.event.disk_event.handle = handle;
+			}
+			
+			STARPU_ASSERT(ret == 0 || ret == -EAGAIN);
 		}
 		break;
 
@@ -597,6 +608,8 @@ static int copy_data_1_to_1_generic(starpu_data_handle_t handle,
                 {
                         req->async_channel.type = STARPU_DISK_RAM;
                         req->async_channel.event.disk_event.requests = NULL;
+                        req->async_channel.event.disk_event.ptr = NULL;
+                        req->async_channel.event.disk_event.handle = NULL;
                 }
 		ret = copy_methods->any_to_any(src_interface, src_node, dst_interface, dst_node, req && !starpu_asynchronous_copy_disabled() ? &req->async_channel : NULL);
 		break;
@@ -886,6 +899,21 @@ void _starpu_driver_wait_request_completion(struct _starpu_async_channel *async_
 #endif
 	case STARPU_DISK_RAM:
 		starpu_disk_wait_request(async_channel);
+		if (async_channel->event.disk_event.ptr != NULL)
+		{
+			if (async_channel->event.disk_event.handle != NULL)
+			{
+				/* read is finished, we can already unpack */
+				async_channel->event.disk_event.handle->ops->unpack_data(async_channel->event.disk_event.handle, async_channel->event.disk_event.node, async_channel->event.disk_event.ptr, async_channel->event.disk_event.size);
+				/* ptr is allocated in full_read */
+				_starpu_free_flags_on_node(async_channel->event.disk_event.node, async_channel->event.disk_event.ptr, async_channel->event.disk_event.size, 0);
+			}
+			else
+			{
+				/* write is finished, ptr was allocated in pack_data */
+				_starpu_free_flags_on_node(async_channel->event.disk_event.node, async_channel->event.disk_event.ptr, async_channel->event.disk_event.size, 0);
+			}
+		}
 		break;
 	case STARPU_CPU_RAM:
 	default:
@@ -951,6 +979,21 @@ unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *as
 #endif
 	case STARPU_DISK_RAM:
 		success = starpu_disk_test_request(async_channel);
+		if (async_channel->event.disk_event.ptr != NULL && success)
+		{
+			if (async_channel->event.disk_event.handle != NULL)
+			{
+				/* read is finished, we can already unpack */
+				async_channel->event.disk_event.handle->ops->unpack_data(async_channel->event.disk_event.handle, async_channel->event.disk_event.node, async_channel->event.disk_event.ptr, async_channel->event.disk_event.size);
+				/* ptr is allocated in full_read */
+				_starpu_free_flags_on_node(async_channel->event.disk_event.node, async_channel->event.disk_event.ptr, async_channel->event.disk_event.size, 0);
+			}
+			else
+			{
+				/* write is finished, ptr was allocated in pack_data */
+				_starpu_free_flags_on_node(async_channel->event.disk_event.node, async_channel->event.disk_event.ptr, async_channel->event.disk_event.size, 0);
+			}
+		}
 		break;
 	case STARPU_CPU_RAM:
 	default:

+ 5 - 1
src/datawizard/copy_driver.h

@@ -75,11 +75,15 @@ LIST_TYPE(_starpu_disk_backend_event,
 	void *backend_event;
 );
         
-
 struct _starpu_disk_async_event
 {
 	unsigned memory_node;
         struct _starpu_disk_backend_event_list * requests;
+
+	void * ptr;
+	unsigned node;
+	size_t size;
+	starpu_data_handle_t handle;
 };
 
 /* this is a structure that can be queried to see whether an asynchronous

+ 5 - 5
src/drivers/mic/driver_mic_source.c

@@ -34,7 +34,7 @@
 
 /* Array of structures containing all the informations useful to send
  * and receive informations with devices */
-struct _starpu_mp_node *mic_nodes[STARPU_MAXMICDEVS];
+struct _starpu_mp_node *_starpu_mic_nodes[STARPU_MAXMICDEVS];
 
 static COIENGINE handles[STARPU_MAXMICDEVS];
 
@@ -82,7 +82,7 @@ struct _starpu_mp_node *_starpu_mic_src_get_actual_thread_mp_node()
 	int devid = actual_worker->devid;
 	STARPU_ASSERT(devid >= 0 && devid < STARPU_MAXMICDEVS);
 
-	return mic_nodes[devid];
+	return _starpu_mic_nodes[devid];
 }
 
 struct _starpu_mp_node *_starpu_mic_src_get_mp_node_from_memory_node(int memory_node)
@@ -90,7 +90,7 @@ struct _starpu_mp_node *_starpu_mic_src_get_mp_node_from_memory_node(int memory_
 	int devid = _starpu_memory_node_get_devid(memory_node);
 	STARPU_ASSERT_MSG(devid >= 0 && devid < STARPU_MAXMICDEVS, "bogus devid %d for memory node %d\n", devid, memory_node);
 
-	return mic_nodes[devid];
+	return _starpu_mic_nodes[devid];
 }
 
 static void _starpu_mic_src_free_kernel(void *kernel)
@@ -175,7 +175,7 @@ starpu_mic_kernel_t _starpu_mic_src_get_kernel(starpu_mic_func_symbol_t symbol)
 
 	if (kernel->func[devid] == NULL)
 	{
-		struct _starpu_mp_node *node = mic_nodes[devid];
+		struct _starpu_mp_node *node = _starpu_mic_nodes[devid];
 		int ret = _starpu_src_common_lookup(node, (void (**)(void))&kernel->func[devid], kernel->name);
 		if (ret)
 			return NULL;
@@ -554,7 +554,7 @@ void *_starpu_mic_src_worker(void *arg)
 	STARPU_PTHREAD_COND_SIGNAL(&worker_set->ready_cond);
 	STARPU_PTHREAD_MUTEX_UNLOCK(&worker_set->mutex);
 
-	_starpu_src_common_worker(worker_set, baseworkerid, mic_nodes[devid]);
+	_starpu_src_common_worker(worker_set, baseworkerid, _starpu_mic_nodes[devid]);
 
 	return NULL;
 

+ 1 - 1
src/drivers/mic/driver_mic_source.h

@@ -30,7 +30,7 @@
 
 /* Array of structures containing all the informations useful to send
  * and receive informations with devices */
-extern struct _starpu_mp_node *mic_nodes[STARPU_MAXMICDEVS];
+extern struct _starpu_mp_node *_starpu_mic_nodes[STARPU_MAXMICDEVS];
 
 struct _starpu_mic_async_event *event;
 

+ 1 - 0
src/drivers/mp_common/source_common.c

@@ -60,6 +60,7 @@ static int _starpu_src_common_finalize_job (struct _starpu_job *j, struct _starp
 	if(j->task_size > 1)
 	{
 		struct _starpu_combined_worker * cb_worker = _starpu_get_combined_worker_struct(worker->combined_workerid);
+		(void) STARPU_ATOMIC_ADD(&j->after_work_busy_barrier, -1);
 
 		STARPU_PTHREAD_MUTEX_LOCK(&cb_worker->count_mutex);
 		count = cb_worker->count--;

+ 6 - 6
src/drivers/mpi/driver_mpi_source.c

@@ -47,7 +47,7 @@ struct _starpu_mpi_ms_kernel
 
 /* Array of structures containing all the informations useful to send
  * and receive informations with devices */
-struct _starpu_mp_node *mpi_ms_nodes[STARPU_MAXMPIDEVS];
+struct _starpu_mp_node *_starpu_mpi_ms_nodes[STARPU_MAXMPIDEVS];
 
 struct _starpu_mp_node *_starpu_mpi_ms_src_get_actual_thread_mp_node()
 {
@@ -57,7 +57,7 @@ struct _starpu_mp_node *_starpu_mpi_ms_src_get_actual_thread_mp_node()
 	int devid = actual_worker->devid;
 	STARPU_ASSERT(devid >= 0 && devid < STARPU_MAXMPIDEVS);
 
-	return mpi_ms_nodes[devid];
+	return _starpu_mpi_ms_nodes[devid];
 }
 
 void _starpu_mpi_source_init(struct _starpu_mp_node *node)
@@ -76,7 +76,7 @@ struct _starpu_mp_node *_starpu_mpi_src_get_mp_node_from_memory_node(int memory_
         int devid = _starpu_memory_node_get_devid(memory_node);
         STARPU_ASSERT_MSG(devid >= 0 && devid < STARPU_MAXMPIDEVS, "bogus devid %d for memory node %d\n", devid, memory_node);
 
-        return mpi_ms_nodes[devid];
+        return _starpu_mpi_ms_nodes[devid];
 }
 
 int _starpu_mpi_src_allocate_memory(void ** addr, size_t size, unsigned memory_node)
@@ -200,7 +200,7 @@ starpu_mpi_ms_kernel_t starpu_mpi_ms_get_kernel(starpu_mpi_ms_func_symbol_t symb
 
         if (kernel->func[devid] == NULL)
         {
-                struct _starpu_mp_node *node = mpi_ms_nodes[devid];
+                struct _starpu_mp_node *node = _starpu_mpi_ms_nodes[devid];
                 int ret = _starpu_src_common_lookup(node, (void (**)(void))&kernel->func[devid], kernel->name);
                 if (ret)
                         return NULL;
@@ -376,9 +376,9 @@ void *_starpu_mpi_src_worker(void *arg)
 
 
 #ifndef STARPU_MPI_MASTER_SLAVE_MULTIPLE_THREAD
-        _starpu_src_common_workers_set(worker_set_mpi, nbsinknodes, mpi_ms_nodes);
+        _starpu_src_common_workers_set(worker_set_mpi, nbsinknodes, _starpu_mpi_ms_nodes);
 #else
-        _starpu_src_common_worker(worker_set, baseworkerid, mpi_ms_nodes[devid]);
+        _starpu_src_common_worker(worker_set, baseworkerid, _starpu_mpi_ms_nodes[devid]);
 #endif
 
         return NULL;

+ 1 - 1
src/drivers/mpi/driver_mpi_source.h

@@ -25,7 +25,7 @@
 
 /* Array of structures containing all the informations useful to send
  * and receive informations with devices */
-extern struct _starpu_mp_node *mpi_ms_nodes[STARPU_MAXMPIDEVS];
+extern struct _starpu_mp_node *_starpu_mpi_ms_nodes[STARPU_MAXMPIDEVS];
 struct _starpu_mp_node *_starpu_mpi_src_get_mp_node_from_memory_node(int memory_node);
 struct _starpu_mp_node *_starpu_mpi_ms_src_get_actual_thread_mp_node();
 

+ 1 - 0
tests/Makefile.am

@@ -279,6 +279,7 @@ myPROGRAMS +=				\
 	datawizard/temporary_partition		\
 	datawizard/redux_acquire		\
 	disk/disk_copy				\
+	disk/disk_copy_unpack			\
 	disk/disk_copy_to_disk			\
 	disk/disk_compute			\
 	disk/disk_pack				\

+ 7 - 0
tests/datawizard/locality.sh

@@ -21,6 +21,13 @@
 set -e
 
 PREFIX=$(dirname $0)
+
+if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
+	STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/locality
+	# in case libtool got into play
+	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/locality" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/locality
+fi
+
 test -x $PREFIX/../../tools/starpu_fxt_tool || exit 77
 STARPU_SCHED=modular-eager STARPU_FXT_PREFIX=$PREFIX/ $PREFIX/locality
 $PREFIX/../../tools/starpu_fxt_tool -i $PREFIX/prof_file_${USER}_0

+ 168 - 0
tests/disk/disk_copy_unpack.c

@@ -0,0 +1,168 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2017  Inria
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <starpu.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <math.h>
+#include <common/config.h>
+#include "../helper.h"
+
+/*
+ * Test pack / unpack methods before pushing data on disk with async read/write.
+ */
+
+/* size of one vector */
+#ifdef STARPU_QUICK_CHECK
+#  define	DISK	64
+#  define	NX	(256*1024/sizeof(double))
+#else
+#  define	NX	(32*1048576/sizeof(double))
+#  define	DISK	200
+#endif
+
+#if !defined(STARPU_HAVE_SETENV)
+#warning setenv is not defined. Skipping test
+int main(int argc, char **argv)
+{
+	return STARPU_TEST_SKIPPED;
+}
+#else
+
+int dotest(struct starpu_disk_ops *ops, void *param)
+{
+	unsigned *A;
+	int ret;
+
+	/* Initialize StarPU without GPU devices to make sure the memory of the GPU devices will not be used */
+	struct starpu_conf conf;
+	ret = starpu_conf_init(&conf);
+	if (ret == -EINVAL)
+		return EXIT_FAILURE;
+	conf.ncuda = 0;
+	conf.nopencl = 0;
+	conf.nmic = 0;
+	conf.nscc = 0;
+	ret = starpu_init(&conf);
+	if (ret == -ENODEV) goto enodev;
+
+	/* register a disk */
+	int new_dd = starpu_disk_register(ops, param, 1024*1024*DISK);
+	/* can't write on /tmp/ */
+	if (new_dd == -ENOENT) goto enoent;
+
+	/* allocate two memory spaces */
+	starpu_malloc_flags((void **)&A, NX*sizeof(unsigned), STARPU_MALLOC_COUNT);
+
+	FPRINTF(stderr, "TEST DISK MEMORY \n");
+
+	unsigned int j;
+	/* initialization with bad values */
+	for(j = 0; j < NX; ++j)
+	{
+		A[j] = j;
+	}
+
+	starpu_data_handle_t vector_handleA;
+
+	static const struct starpu_data_copy_methods my_vector_copy_data_methods_s =
+	{
+		.any_to_any = NULL,
+	};
+	
+	starpu_interface_vector_ops.copy_methods = &my_vector_copy_data_methods_s;
+
+	/* register vector in starpu */
+	starpu_vector_data_register(&vector_handleA, STARPU_MAIN_RAM, (uintptr_t)A, NX, sizeof(unsigned));
+
+	/* Move and invalidate copy to an other disk */
+	starpu_data_acquire_on_node(vector_handleA, new_dd, STARPU_RW);
+	starpu_data_release_on_node(vector_handleA, new_dd);
+
+	starpu_data_acquire_on_node(vector_handleA, new_dd, STARPU_RW);
+	starpu_data_release_on_node(vector_handleA, new_dd);
+
+	/* free them */
+	starpu_data_unregister(vector_handleA);
+
+	/* check if computation is correct */
+	int try = 1;
+	for (j = 0; j < NX; ++j)
+		if (A[j] != j)
+		{
+			FPRINTF(stderr, "Fail A %u != %u \n", A[j], j);
+			try = 0;
+		}
+
+	starpu_free_flags(A, NX*sizeof(double), STARPU_MALLOC_COUNT);
+
+	/* terminate StarPU, no task can be submitted after */
+	starpu_shutdown();
+
+	if(try)
+		FPRINTF(stderr, "TEST SUCCESS\n");
+	else
+		FPRINTF(stderr, "TEST FAIL\n");
+	return try ? EXIT_SUCCESS : EXIT_FAILURE;
+
+enodev:
+	return STARPU_TEST_SKIPPED;
+enoent:
+	FPRINTF(stderr, "Couldn't write data: ENOENT\n");
+	starpu_shutdown();
+	return STARPU_TEST_SKIPPED;
+}
+
+static int merge_result(int old, int new)
+{
+	if (new == EXIT_FAILURE)
+		return EXIT_FAILURE;
+	if (old == 0)
+		return 0;
+	return new;
+}
+
+int main(void)
+{
+	int ret = 0;
+	int ret2;
+	char s[128];
+	char *ptr;
+
+	snprintf(s, sizeof(s), "/tmp/%s-disk-XXXXXX", getenv("USER"));
+	ptr = _starpu_mkdtemp(s);
+	if (!ptr)
+	{
+		FPRINTF(stderr, "Cannot make directory <%s>\n", s);
+		return STARPU_TEST_SKIPPED;
+	}
+
+	ret = merge_result(ret, dotest(&starpu_disk_stdio_ops, s));
+	ret = merge_result(ret, dotest(&starpu_disk_unistd_ops, s));
+#ifdef STARPU_LINUX_SYS
+	ret = merge_result(ret, dotest(&starpu_disk_unistd_o_direct_ops, s));
+#endif
+#ifdef STARPU_HAVE_HDF5
+	ret = merge_result(ret, dotest(&starpu_disk_hdf5_ops, s));
+#endif
+
+	ret2 = rmdir(s);
+	if (ret2 < 0)
+		STARPU_CHECK_RETURN_VALUE(-errno, "rmdir '%s'\n", s);
+	return ret;
+}
+#endif

+ 8 - 3
tests/loader-cross.sh.in

@@ -2,7 +2,7 @@
 #
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2013, 2015 Universite de Bordeaux
+# Copyright (C) 2013, 2015, 2017 Universite de Bordeaux
 # Copyright (C) 2015       CNRS
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -27,15 +27,20 @@ then
     echo "[Error] Syntax: $0 <mic_executable> <args>"
     exit 1
 fi
+case "$exec" in
+	/*) ;;
+	*) exec="$PWD/$exec"
+esac
 
-NATIVE=${PWD/\/build_mic\//\/build_host\/}
+NATIVE=${exec/\/build_mic\//\/build_host\/}
 DIR="$(dirname "$exec")"
 FILE="$(basename "$exec")"
 
 export SINK_LD_LIBRARY_PATH="$top_builddir/src/.libs:$SINK_LD_LIBRARY_PATH"
 export STARPU_MIC_SINK_PROGRAM_NAME="$exec" 
+export STARPU_MIC_SINK_PROGRAM_PATH="$DIR"
 
 # in case libtool got into play
 [ -x "$DIR/.libs/$FILE" ] && STARPU_MIC_SINK_PROGRAM_NAME="$DIR/.libs/$FILE" 
 
-$top_builddir/../build_host/tests/loader "$NATIVE/$exec" "$@"
+$top_builddir/../build_host/tests/loader "$NATIVE" "$@"

+ 7 - 1
tests/microbenchs/microbench.sh

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2016  Université de Bordeaux
+# Copyright (C) 2016-2017  Université de Bordeaux
 # Copyright (C) 2016, 2017  CNRS
 #
 # StarPU is free software; you can redistribute it and/or modify
@@ -33,6 +33,12 @@ test_scheds()
 	pass=""
 	skip=""
 
+	if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
+		STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/$TEST
+		# in case libtool got into play
+		[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/$TEST" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/$TEST
+	fi
+
 	RESULT=0
 	for sched in $SCHEDS;
 	do

+ 7 - 0
tests/overlap/overlap.sh

@@ -21,6 +21,13 @@
 set -e
 
 PREFIX=$(dirname $0)
+
+if [ -n "$STARPU_MIC_SINK_PROGRAM_PATH" ] ; then
+	STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/overlap
+	# in case libtool got into play
+	[ -x "$STARPU_MIC_SINK_PROGRAM_PATH/.libs/overlap" ] && STARPU_MIC_SINK_PROGRAM_NAME=$STARPU_MIC_SINK_PROGRAM_PATH/.libs/overlap
+fi
+
 STARPU_SCHED=dmdas STARPU_FXT_PREFIX=$PREFIX/ $PREFIX/overlap
 [ ! -x $PREFIX/../../tools/starpu_perfmodel_display ] || $PREFIX/../../tools/starpu_perfmodel_display -s overlap_sleep_1024_24
 if [ -x $PREFIX/../../tools/starpu_fxt_tool ];