Nathalie Furmento il y a 7 ans
Parent
commit
32a15e902f

+ 1 - 1
Makefile.am

@@ -189,7 +189,7 @@ endif
 txt_DATA = AUTHORS COPYING.LGPL README README.dev STARPU-REVISION
 EXTRA_DIST = autogen.sh AUTHORS COPYING.LGPL README README.dev STARPU-VERSION STARPU-REVISION build-aux/svn2cl.xsl mic-configure
 
-DISTCLEANFILES = STARPU-REVISION
+DISTCLEANFILES = STARPU-REVISION dsm/Makefile
 
 
 include starpu-top/extradist

+ 1 - 0
configure.ac

@@ -465,6 +465,7 @@ if test x$dsm_included != xyes; then
 	mkdir -p dsm
 	echo "dist:" > ./dsm/Makefile
 	echo "distdir:" >> ./dsm/Makefile
+	echo "distclean:" >> ./dsm/Makefile
         enable_dsm=no
 fi
 

+ 27 - 3
examples/heat/dw_factolu_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2012, 2014-2015  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2012, 2014-2015, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2016, 2017  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -20,6 +20,10 @@
  */
 #include "dw_factolu.h"
 
+#ifdef STARPU_HAVE_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
 #ifdef STARPU_USE_CUDA
 #include <cublas.h>
 #include <starpu_cublas_v2.h>
@@ -342,7 +346,17 @@ static inline void dw_common_codelet_update_u11(void *descr[], int s, STARPU_ATT
 			{
 				float pivot;
 				pivot = sub11[z+z*ld];
-				STARPU_ASSERT(pivot != 0.0f);
+
+#ifdef STARPU_HAVE_VALGRIND_H
+				if (RUNNING_ON_VALGRIND)
+				{
+					if (fpclassify(pivot) == FP_ZERO)
+						/* Running in valgrind, don't care about the result */
+						pivot = 1.0f;
+				}
+				else
+#endif
+					STARPU_ASSERT(fpclassify(pivot) != FP_ZERO);
 
 				STARPU_SSCAL(nx - z - 1, (1.0f/pivot), &sub11[z+(z+1)*ld], ld);
 
@@ -362,7 +376,17 @@ static inline void dw_common_codelet_update_u11(void *descr[], int s, STARPU_ATT
 				cudaMemcpyAsync(&pivot, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, stream);
 				cudaStreamSynchronize(stream);
 
-				STARPU_ASSERT(pivot != 0.0f);
+#ifdef STARPU_HAVE_VALGRIND_H
+				if (RUNNING_ON_VALGRIND)
+				{
+					if (fpclassify(pivot) == FP_ZERO)
+						/* Running in valgrind, don't care about the result */
+						pivot = 1.0f;
+				}
+				else
+#endif
+					STARPU_ASSERT(fpclassify(pivot) != FP_ZERO);
+
 				float scal = 1.0f/pivot;
 
 				status = cublasSscal(starpu_cublas_get_local_handle(),

+ 3 - 3
examples/lu/xlu_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010-2012, 2014-2016  Université de Bordeaux
+ * Copyright (C) 2009, 2010-2012, 2014-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2015  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -512,7 +512,7 @@ static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 					pivot = sub11[z+z*ld];
 				}
 			
-				STARPU_ASSERT(pivot != 0.0);
+				STARPU_ASSERT(fpclassify(pivot) != FP_ZERO);
 
 				CPU_SCAL(nx - z - 1, (1.0/pivot), &sub11[z+(z+1)*ld], ld);
 		
@@ -561,7 +561,7 @@ static inline void STARPU_LU(common_u11_pivot)(void *descr[],
 					cudaStreamSynchronize(stream);
 				}
 
-				STARPU_ASSERT(pivot != 0.0);
+				STARPU_ASSERT(fpclassify(pivot) != FP_ZERO);
 				
 				inv_pivot = 1.0/pivot;
 				status = CUBLAS_SCAL(handle,

+ 2 - 1
include/starpu_config.h.in

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009-2016  Université de Bordeaux
+ * Copyright (C) 2009-2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016, 2017  CNRS
  * Copyright (C) 2014, 2017  INRIA
  *
@@ -43,6 +43,7 @@
 #undef STARPU_SIMGRID_HAVE_XBT_BARRIER_INIT
 #undef STARPU_HAVE_SIMGRID_MSG_H
 #undef STARPU_HAVE_XBT_SYNCHRO_H
+#undef STARPU_HAVE_VALGRIND_H
 #undef STARPU_NON_BLOCKING_DRIVERS
 
 #undef STARPU_HAVE_ICC

+ 1 - 0
mpi/examples/mpi_lu/plu_outofcore_example.c

@@ -188,6 +188,7 @@ static void init_matrix(int rank)
 	size_t blocksize = (size_t)(size/nblocks)*(size/nblocks)*sizeof(TYPE);
 
 	int disk_node = starpu_disk_register(&starpu_disk_unistd_ops, path, STARPU_MAX(1024*1024, size*size*sizeof(TYPE)));
+	assert(disk_node >= 0);
 
 	char filename[sizeof(nblocks)*3 + 1 + sizeof(nblocks)*3 + 1];
 

+ 21 - 27
src/core/disk.c

@@ -49,9 +49,10 @@ struct disk_register
 };
 
 static int add_disk_in_list(unsigned node, struct starpu_disk_ops *func, void *base);
-static int get_location_with_node(unsigned node);
+static inline unsigned get_location_with_node(unsigned node);
 
 static struct disk_register **disk_register_list = NULL;
+static unsigned memnode_to_disknode[STARPU_MAXNODES];
 static int disk_number = -1;
 static int size_register_list = 2;
 
@@ -163,13 +164,13 @@ void _starpu_disk_unregister(void)
 
 void *_starpu_disk_alloc(unsigned node, size_t size)
 {
-	int pos = get_location_with_node(node);
+	unsigned pos = get_location_with_node(node);
 	return disk_register_list[pos]->functions->alloc(disk_register_list[pos]->base, size);
 }
 
 void _starpu_disk_free(unsigned node, void *obj, size_t size)
 {
-	int pos = get_location_with_node(node);
+	unsigned pos = get_location_with_node(node);
 	disk_register_list[pos]->functions->free(disk_register_list[pos]->base, obj, size);
 }
 
@@ -177,7 +178,7 @@ void _starpu_disk_free(unsigned node, void *obj, size_t size)
 int _starpu_disk_read(unsigned src_node, unsigned dst_node STARPU_ATTRIBUTE_UNUSED, void *obj, void *buf, off_t offset, size_t size, struct _starpu_async_channel *channel)
 {
         void *event = NULL;
-	int pos = get_location_with_node(src_node);
+	unsigned pos = get_location_with_node(src_node);
 
         if (channel != NULL)
 	{
@@ -207,7 +208,7 @@ int _starpu_disk_read(unsigned src_node, unsigned dst_node STARPU_ATTRIBUTE_UNUS
 int _starpu_disk_write(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node, void *obj, void *buf, off_t offset, size_t size, struct _starpu_async_channel *channel)
 {
         void *event = NULL;
-	int pos = get_location_with_node(dst_node);
+	unsigned pos = get_location_with_node(dst_node);
 
         if (channel != NULL)
         {
@@ -235,8 +236,8 @@ int _starpu_disk_write(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_n
 
 int _starpu_disk_copy(unsigned node_src, void *obj_src, off_t offset_src, unsigned node_dst, void *obj_dst, off_t offset_dst, size_t size, struct _starpu_async_channel *channel)
 {
-	int pos_src = get_location_with_node(node_src);
-	int pos_dst = get_location_with_node(node_dst);
+	unsigned pos_src = get_location_with_node(node_src);
+	unsigned pos_dst = get_location_with_node(node_dst);
 	/* both nodes have same copy function */
         void * event;
 	channel->event.disk_event.memory_node = node_src;
@@ -252,7 +253,7 @@ int _starpu_disk_copy(unsigned node_src, void *obj_src, off_t offset_src, unsign
 int _starpu_disk_full_read(unsigned src_node, unsigned dst_node, void *obj, void **ptr, size_t *size, struct _starpu_async_channel *channel)
 {
         void *event = NULL;
-	int pos = get_location_with_node(src_node);
+	unsigned pos = get_location_with_node(src_node);
 
 	if (channel != NULL)
 	{
@@ -281,7 +282,7 @@ int _starpu_disk_full_read(unsigned src_node, unsigned dst_node, void *obj, void
 int _starpu_disk_full_write(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned dst_node, void *obj, void *ptr, size_t size, struct _starpu_async_channel *channel)
 {
         void *event = NULL;
-	int pos = get_location_with_node(dst_node);
+	unsigned pos = get_location_with_node(dst_node);
 
 	if (channel != NULL)
 	{
@@ -309,19 +310,19 @@ int _starpu_disk_full_write(unsigned src_node STARPU_ATTRIBUTE_UNUSED, unsigned
 
 void *starpu_disk_open(unsigned node, void *pos, size_t size)
 {
-	int position = get_location_with_node(node);
+	unsigned position = get_location_with_node(node);
 	return disk_register_list[position]->functions->open(disk_register_list[position]->base, pos, size);
 }
 
 void starpu_disk_close(unsigned node, void *obj, size_t size)
 {
-	int position = get_location_with_node(node);
+	unsigned position = get_location_with_node(node);
 	disk_register_list[position]->functions->close(disk_register_list[position]->base, obj, size);
 }
 
 void starpu_disk_wait_request(struct _starpu_async_channel *async_channel)
 {
-	int position = get_location_with_node(async_channel->event.disk_event.memory_node);
+	unsigned position = get_location_with_node(async_channel->event.disk_event.memory_node);
 
         if (async_channel->event.disk_event.requests != NULL && !_starpu_disk_backend_event_list_empty(async_channel->event.disk_event.requests))
         {
@@ -352,7 +353,7 @@ void starpu_disk_wait_request(struct _starpu_async_channel *async_channel)
 
 int starpu_disk_test_request(struct _starpu_async_channel *async_channel)
 {
-	int position = get_location_with_node(async_channel->event.disk_event.memory_node);
+	unsigned position = get_location_with_node(async_channel->event.disk_event.memory_node);
 
         if (async_channel->event.disk_event.requests != NULL && !_starpu_disk_backend_event_list_empty(async_channel->event.disk_event.requests))
         {
@@ -424,28 +425,21 @@ static int add_disk_in_list(unsigned node,  struct starpu_disk_ops *func, void *
 	dr->functions = func;
 	n = ++disk_number;
 	disk_register_list[n] = dr;
+	memnode_to_disknode[node] = n;
 	return n;
 }
 
-static int get_location_with_node(unsigned node)
+static inline unsigned get_location_with_node(unsigned node)
 {
-#ifdef STARPU_DEVEL
-#warning optimize with a MAXNODE array
-#endif
-	int i;
-	for (i = 0; i <= disk_number; ++i)
-		if (disk_register_list[i]->node == node)
-			return i;
-	STARPU_ASSERT_MSG(false, "Disk node not found !(%u) ", node);
-	return -1;
+	return memnode_to_disknode[node];
 }
 
 int _starpu_disk_can_copy(unsigned node1, unsigned node2)
 {
 	if (starpu_node_get_kind(node1) == STARPU_DISK_RAM && starpu_node_get_kind(node2) == STARPU_DISK_RAM)
 	{
-		int pos1 = get_location_with_node(node1);
-		int pos2 = get_location_with_node(node2);
+		unsigned pos1 = get_location_with_node(node1);
+		unsigned pos2 = get_location_with_node(node2);
 		if (disk_register_list[pos1]->functions == disk_register_list[pos2]->functions)
 			/* they must have a copy function */
 			if (disk_register_list[pos1]->functions->copy != NULL)
@@ -456,13 +450,13 @@ int _starpu_disk_can_copy(unsigned node1, unsigned node2)
 
 void _starpu_set_disk_flag(unsigned node, int flag)
 {
-	int pos = get_location_with_node(node);
+	unsigned pos = get_location_with_node(node);
 	disk_register_list[pos]->flag = flag;
 }
 
 int _starpu_get_disk_flag(unsigned node)
 {
-	int pos = get_location_with_node(node);
+	unsigned pos = get_location_with_node(node);
 	return disk_register_list[pos]->flag;
 }
 

+ 153 - 61
src/core/disk_ops/disk_hdf5.c

@@ -34,8 +34,6 @@
 #define NITER	_starpu_calibration_minimum
 #define STARPU_CHUNK_DIM 4096
 
-/* TODO: support disk-to-disk copy with HD5Ocopy */
-
 /* ------------------- use HDF5 to write on disk -------------------  */
 
 #ifndef H5_HAVE_THREADSAFE
@@ -66,16 +64,20 @@ static struct _starpu_hdf5_work_list global_work_list;        /* This list conta
 
 #endif									
 
-
-
-enum hdf5_work_type { READ, WRITE, FULL_READ, FULL_WRITE };
+enum hdf5_work_type { READ, WRITE, FULL_READ, FULL_WRITE, COPY };
 
 LIST_TYPE(_starpu_hdf5_work,
         enum hdf5_work_type type;
-        struct starpu_hdf5_obj * obj;
-        struct starpu_hdf5_base * base;
+
+        struct starpu_hdf5_base * base_src;
+        struct starpu_hdf5_obj * obj_src;
+        off_t offset_src;
+
+        struct starpu_hdf5_base * base_dst;
+        struct starpu_hdf5_obj * obj_dst;
+        off_t offset_dst;
+
         void * ptr;
-        off_t offset;
         size_t size;
         void * event;
 );
@@ -103,14 +105,16 @@ struct starpu_hdf5_obj
 static inline void _starpu_hdf5_protect_start(void * base STARPU_ATTRIBUTE_UNUSED)
 {
 #ifndef H5_HAVE_THREADSAFE
-        STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX);
+	if (base != NULL)
+		STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX);
 #endif
 }
 
 static inline void _starpu_hdf5_protect_stop(void * base STARPU_ATTRIBUTE_UNUSED)
 {
 #ifndef H5_HAVE_THREADSAFE
-        STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX);
+	if (base != NULL)
+		STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX);
 #endif
 }
 
@@ -121,8 +125,8 @@ static void starpu_hdf5_full_read_internal(struct _starpu_hdf5_work * work)
 {
         herr_t status;
 
-        status = H5Dread(work->obj->dataset, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL, H5P_DEFAULT, work->ptr);
-        STARPU_ASSERT_MSG(status >= 0, "Can not read data associed to this dataset (%s)\n", work->obj->path);
+        status = H5Dread(work->obj_src->dataset, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL, H5P_DEFAULT, work->ptr);
+        STARPU_ASSERT_MSG(status >= 0, "Can not read data associed to this dataset (%s)\n", work->obj_src->path);
 }
 
 /* TODO : Dataspace may not be NATIVE_CHAR for opened data */
@@ -131,22 +135,22 @@ static void starpu_hdf5_full_write_internal(struct _starpu_hdf5_work * work)
         herr_t status;
 
 	/* Update size of dataspace */
-	if (work->size > work->obj->size)
+	if (work->size > work->obj_dst->size)
 	{
 		/* Get official datatype */
-		hid_t datatype = H5Dget_type(work->obj->dataset);
+		hid_t datatype = H5Dget_type(work->obj_dst->dataset);
 		hsize_t sizeDatatype = H5Tget_size(datatype);
 		
 		/* Count in number of elements */
 		hsize_t extendsdim[1] = {work->size/sizeDatatype};
-		status = H5Dset_extent (work->obj->dataset, extendsdim);
+		status = H5Dset_extent (work->obj_dst->dataset, extendsdim);
 		STARPU_ASSERT_MSG(status >= 0, "Error when extending HDF5 dataspace !\n");
-		work->obj->size = work->size;
+		work->obj_dst->size = work->size;
 	}
 
         /* Write ALL the dataspace */
-        status = H5Dwrite(work->obj->dataset, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL, H5P_DEFAULT, work->ptr);
-        STARPU_ASSERT_MSG(status >= 0, "Can not write data to this dataset (%s)\n", work->obj->path);
+        status = H5Dwrite(work->obj_dst->dataset, H5T_NATIVE_CHAR, H5S_ALL, H5S_ALL, H5P_DEFAULT, work->ptr);
+        STARPU_ASSERT_MSG(status >= 0, "Can not write data to this dataset (%s)\n", work->obj_dst->path);
 }
 
 static void starpu_hdf5_read_internal(struct _starpu_hdf5_work * work)
@@ -154,43 +158,43 @@ static void starpu_hdf5_read_internal(struct _starpu_hdf5_work * work)
         herr_t status;
 
         /* Get official datatype */
-        hid_t datatype = H5Dget_type(work->obj->dataset);
+        hid_t datatype = H5Dget_type(work->obj_src->dataset);
         hsize_t sizeDatatype = H5Tget_size(datatype);
 
         /* count in element, not in byte */
-        work->offset /= sizeDatatype;
+        work->offset_src /= sizeDatatype;
         work->size /= sizeDatatype;
 
         /* duplicate the dataspace in the dataset */
-        hid_t dataspace_select = H5Dget_space(work->obj->dataset);
-        STARPU_ASSERT_MSG(dataspace_select >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj->path);
+        hid_t dataspace_select = H5Dget_space(work->obj_src->dataset);
+        STARPU_ASSERT_MSG(dataspace_select >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path);
 
         /* Select what we want of the duplicated dataspace (it's called an hyperslab). This operation is done on place */
-        hsize_t offsets[1] = {work->offset};
+        hsize_t offsets[1] = {work->offset_src};
         hsize_t count[1] = {work->size};
         /* stride and block size are NULL which is equivalent of a shift of 1 */
         status = H5Sselect_hyperslab(dataspace_select, H5S_SELECT_SET, offsets, NULL, count, NULL);
-        STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj->path);
+        STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path);
 
         /* create the dataspace for the received data which describes ptr */
         hsize_t dims_receive[1] = {work->size};
         hid_t dataspace_receive = H5Screate_simple(1, dims_receive, NULL);
-        STARPU_ASSERT_MSG(dataspace_receive >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj->path);
+        STARPU_ASSERT_MSG(dataspace_receive >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path);
 
         /* Receiver has to be an hyperslabs */
         offsets[0] = 0;
         count[0] = work->size;
         status = H5Sselect_hyperslab(dataspace_receive, H5S_SELECT_SET, offsets, NULL, count, NULL);
-        STARPU_ASSERT_MSG(dataspace_receive >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj->path);
+        STARPU_ASSERT_MSG(dataspace_receive >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path);
 
-        status = H5Dread(work->obj->dataset, datatype, dataspace_receive, dataspace_select, H5P_DEFAULT, work->ptr);
-        STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj->path);
+        status = H5Dread(work->obj_src->dataset, datatype, dataspace_receive, dataspace_select, H5P_DEFAULT, work->ptr);
+        STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path);
 
         /* don't need these dataspaces */
         status = H5Sclose(dataspace_select);
-        STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj->path);
+        STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path);
         status = H5Sclose(dataspace_receive);
-        STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj->path);
+        STARPU_ASSERT_MSG(status >= 0, "Error when reading this HDF5 dataset (%s)\n", work->obj_src->path);
 }
 
 static void starpu_hdf5_write_internal(struct _starpu_hdf5_work * work)
@@ -198,53 +202,98 @@ static void starpu_hdf5_write_internal(struct _starpu_hdf5_work * work)
         herr_t status;
 
         /* Get official datatype */
-        hid_t datatype = H5Dget_type(work->obj->dataset);
+        hid_t datatype = H5Dget_type(work->obj_dst->dataset);
         hsize_t sizeDatatype = H5Tget_size(datatype);
 
 	/* Update size of dataspace */
-	if (work->size + work->offset > work->obj->size)
+	if (work->size + work->offset_dst > work->obj_dst->size)
 	{
 		/* Count in number of elements */
-		hsize_t extendsdim[1] = {(work->offset + work->size)/sizeDatatype};
-		status = H5Dset_extent (work->obj->dataset, extendsdim);
+		hsize_t extendsdim[1] = {(work->offset_dst + work->size)/sizeDatatype};
+		status = H5Dset_extent (work->obj_dst->dataset, extendsdim);
 		STARPU_ASSERT_MSG(status >= 0, "Error when extending HDF5 dataspace !\n");
-		work->obj->size = work->offset + work->size;
+		work->obj_dst->size = work->offset_dst + work->size;
 	}
 
         /* count in element, not in byte */
-        work->offset /= sizeDatatype;
+        work->offset_dst /= sizeDatatype;
         work->size /= sizeDatatype;
 
         /* duplicate the dataspace in the dataset */
-        hid_t dataspace_select = H5Dget_space(work->obj->dataset);
-        STARPU_ASSERT_MSG(dataspace_select >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj->path);
+        hid_t dataspace_select = H5Dget_space(work->obj_dst->dataset);
+        STARPU_ASSERT_MSG(dataspace_select >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path);
 
         /* Select what we want of the duplicated dataspace (it's called an hyperslab). This operation is done on place */
-        hsize_t offsets[1] = {work->offset};
+        hsize_t offsets[1] = {work->offset_dst};
         hsize_t count[1] = {work->size};
         /* stride and block size are NULL which is equivalent of a shift of 1 */
         status = H5Sselect_hyperslab(dataspace_select, H5S_SELECT_SET, offsets, NULL, count, NULL);
-        STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj->path);
+        STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path);
 
         /* create the dataspace for the received data which describes ptr */
         hsize_t dims_send[1] = {work->size};
         hid_t dataspace_send = H5Screate_simple(1, dims_send, NULL);
-        STARPU_ASSERT_MSG(dataspace_send >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj->path);
+        STARPU_ASSERT_MSG(dataspace_send >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path);
 
         /* Receiver has to be an hyperslabs */
         offsets[0] = 0;
         count[0] = work->size;
         status = H5Sselect_hyperslab(dataspace_send, H5S_SELECT_SET, offsets, NULL, count, NULL);
-        STARPU_ASSERT_MSG(dataspace_send >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj->path);
+        STARPU_ASSERT_MSG(dataspace_send >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path);
 
-        status = H5Dwrite(work->obj->dataset, datatype, dataspace_send, dataspace_select, H5P_DEFAULT, work->ptr);
-        STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj->path);
+        status = H5Dwrite(work->obj_dst->dataset, datatype, dataspace_send, dataspace_select, H5P_DEFAULT, work->ptr);
+        STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path);
 
         /* don't need these dataspaces */
         status = H5Sclose(dataspace_select);
-        STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj->path);
+        STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path);
         status = H5Sclose(dataspace_send);
-        STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj->path);
+        STARPU_ASSERT_MSG(status >= 0, "Error when writing this HDF5 dataset (%s)\n", work->obj_dst->path);
+}
+
+unsigned warned = 0;
+static void starpu_hdf5_copy_internal(struct _starpu_hdf5_work * work)
+{
+	herr_t status;
+
+	/* HDF5 H50copy supports only same size in both areas and copies the entire object */
+	if (work->offset_src == 0 && work->offset_dst == 0 && work->size == work->obj_src->size && work->size == work->obj_dst->size)
+	{
+		/* Add creation of intermediate group. It's not really used in our case but HDF5 doc is very weird */
+		hid_t grp_prop = H5Pcreate(H5P_LINK_CREATE);
+		status = H5Pset_create_intermediate_group(grp_prop, 1);
+		STARPU_ASSERT_MSG(status >= 0, "Can not copy data (%s) associed to this disk (%s) to the data (%s) on this disk (%s)\n", work->obj_src->path, work->base_src->path, work->obj_dst->path, work->base_dst->path);
+
+		hid_t copy_prop = H5Pcreate(H5P_OBJECT_COPY);
+		status = H5Pset_copy_object(copy_prop, H5O_COPY_SHALLOW_HIERARCHY_FLAG);
+		STARPU_ASSERT_MSG(status >= 0, "Can not copy data (%s) associed to this disk (%s) to the data (%s) on this disk (%s)\n", work->obj_src->path, work->base_src->path, work->obj_dst->path, work->base_dst->path);
+
+		status = H5Ocopy(work->obj_src->dataset, work->obj_src->path, work->obj_dst->dataset, work->obj_dst->path, copy_prop, grp_prop); 
+		STARPU_ASSERT_MSG(status >= 0, "Can not copy data (%s) associed to this disk (%s) to the data (%s) on this disk (%s)\n", work->obj_src->path, work->base_src->path, work->obj_dst->path, work->base_dst->path);
+
+		H5Pclose(grp_prop);
+		H5Pclose(copy_prop);
+	}
+	else
+	{
+		if (!warned)
+		{
+			_STARPU_DISP("Direct disk to disk copy is not supported for a piece of data. Data will be transfered to RAM memory and then, be pushed on disk \n");
+			warned = 1;
+		}
+
+		void ** ptr = NULL;
+		int ret = _starpu_malloc_flags_on_node(STARPU_MAIN_RAM, ptr, work->size, 0);
+		STARPU_ASSERT_MSG(ret == 0, "Cannot allocate %lu bytes to perform disk to disk operation", work->size);
+
+		/* buffer is only used internally to store intermediate data */
+		work->ptr = *ptr;
+		
+		starpu_hdf5_read_internal(work);
+		starpu_hdf5_write_internal(work);
+
+		_starpu_free_flags_on_node(STARPU_MAIN_RAM, *ptr, work->size, 0);
+	}
 }
 
 static void * _starpu_hdf5_internal_thread(void * arg)
@@ -266,7 +315,19 @@ static void * _starpu_hdf5_internal_thread(void * arg)
                         struct _starpu_hdf5_work * work = _starpu_hdf5_work_list_pop_back(&HDF5_VAR_WORK_LIST);
                         STARPU_PTHREAD_MUTEX_UNLOCK(&HDF5_VAR_MUTEX);
 
-                        _starpu_hdf5_protect_start(work->base);
+			if (work->base_src < work->base_dst)
+			{
+				_starpu_hdf5_protect_start(work->base_src);
+				if (work->base_src != work->base_dst)
+					_starpu_hdf5_protect_start(work->base_dst);
+			}
+			else
+			{
+				_starpu_hdf5_protect_start(work->base_dst);
+				if (work->base_src != work->base_dst)
+					_starpu_hdf5_protect_start(work->base_src);
+			}
+
                         switch(work->type)
                         {
                                 case READ:
@@ -284,11 +345,16 @@ static void * _starpu_hdf5_internal_thread(void * arg)
                                 case FULL_WRITE:
                                         starpu_hdf5_full_write_internal(work);
                                         break;
+				
+				case COPY:
+					starpu_hdf5_copy_internal(work);
+					break;
 
                                 default:
                                         STARPU_ABORT();
                         }
-                        _starpu_hdf5_protect_stop(work->base);
+                        _starpu_hdf5_protect_stop(work->base_src);
+                        _starpu_hdf5_protect_stop(work->base_dst);
 
                         /* Update event to tell it's finished */
                         starpu_sem_post((starpu_sem_t *) work->event);
@@ -337,22 +403,36 @@ static hsize_t _starpu_get_size_obj(struct starpu_hdf5_obj * obj)
         return dims[0]*sizeDatatype;
 }
 
-static void starpu_hdf5_send_work(void *base, void *obj, void *buf, off_t offset, size_t size, void * event, enum hdf5_work_type type)
+static void starpu_hdf5_send_work(void *base_src, void *obj_src, off_t offset_src, void *base_dst, void *obj_dst, off_t offset_dst, void *buf, size_t size, void * event, enum hdf5_work_type type)
 {
-        struct starpu_hdf5_obj * dataObj = (struct starpu_hdf5_obj *) obj;
-        struct starpu_hdf5_base * fileBase = (struct starpu_hdf5_base *) base;
+        struct starpu_hdf5_obj * dataObj_src = (struct starpu_hdf5_obj *) obj_src;
+        struct starpu_hdf5_obj * dataObj_dst = (struct starpu_hdf5_obj *) obj_dst;
+        struct starpu_hdf5_base * fileBase_src = (struct starpu_hdf5_base *) base_src;
+        struct starpu_hdf5_base * fileBase_dst = (struct starpu_hdf5_base *) base_dst;
 
         struct _starpu_hdf5_work * work;
         _STARPU_MALLOC(work, sizeof(*work));
 
         work->type = type;
-        work->obj = dataObj;
-        work->base = fileBase;
+
+        work->base_src = fileBase_src;
+        work->obj_src = dataObj_src;
+        work->offset_src = offset_src;
+
+        work->base_dst = fileBase_dst;
+        work->obj_dst = dataObj_dst;
+        work->offset_dst = offset_dst;
+
         work->ptr = buf;
-        work->offset = offset;
         work->size = size;
         work->event = event;
 
+        struct starpu_hdf5_base * fileBase;
+	if (fileBase_src != NULL)
+		fileBase = fileBase_src;
+	else	
+		fileBase = fileBase_dst;
+
         STARPU_PTHREAD_MUTEX_LOCK(&HDF5_VAR_MUTEX);
         _starpu_hdf5_work_list_push_front(&HDF5_VAR_WORK_LIST, work);
         /* Wake up internal thread */
@@ -685,7 +765,7 @@ static int starpu_hdf5_full_read(void *base, void *obj, void **ptr, size_t *size
 
         _starpu_malloc_flags_on_node(dst_node, ptr, *size, 0); 
 
-        starpu_hdf5_send_work(base, obj, *ptr, 0, *size, (void*) &finished, FULL_READ);
+        starpu_hdf5_send_work(base, obj, 0, NULL, NULL, 0, *ptr, *size, (void*) &finished, FULL_READ);
         
         starpu_hdf5_wait(&finished);
 
@@ -699,7 +779,7 @@ static int starpu_hdf5_full_write(void *base, void *obj, void *ptr, size_t size)
         starpu_sem_t finished;
         starpu_sem_init(&finished, 0, 0);
 
-        starpu_hdf5_send_work(base, obj, ptr, 0, size, (void*) &finished, FULL_WRITE);
+        starpu_hdf5_send_work(NULL, NULL, 0, base, obj, 0, ptr, size, (void*) &finished, FULL_WRITE);
 
         starpu_hdf5_wait(&finished);
 
@@ -713,7 +793,7 @@ static int starpu_hdf5_read(void *base, void *obj, void *buf, off_t offset, size
         starpu_sem_t finished;
         starpu_sem_init(&finished, 0, 0);
 
-        starpu_hdf5_send_work(base, obj, buf, offset, size, (void*) &finished, READ);
+        starpu_hdf5_send_work(base, obj, offset, NULL, NULL, 0, buf, size, (void*) &finished, READ);
 
         starpu_hdf5_wait(&finished);
 
@@ -727,7 +807,7 @@ static int starpu_hdf5_write(void *base, void *obj, const void *buf, off_t offse
         starpu_sem_t finished;
         starpu_sem_init(&finished, 0, 0);
 
-        starpu_hdf5_send_work(base, obj, (void *) buf, offset, size, (void*) &finished, WRITE);
+        starpu_hdf5_send_work(NULL, NULL, 0, base, obj, offset, (void *) buf, size, (void*) &finished, WRITE);
 
         starpu_hdf5_wait(&finished);
 
@@ -742,7 +822,7 @@ static void * starpu_hdf5_async_read(void *base, void *obj, void *buf, off_t off
         _STARPU_MALLOC(finished, sizeof(*finished));
         starpu_sem_init(finished, 0, 0);
 
-        starpu_hdf5_send_work(base, obj, buf, offset, size, (void*) finished, READ);
+        starpu_hdf5_send_work(base, obj, offset, NULL, NULL, 0, buf, size, (void*) finished, READ);
 
         return finished;
 }
@@ -753,11 +833,23 @@ static void * starpu_hdf5_async_write(void *base, void *obj, void *buf, off_t of
         _STARPU_MALLOC(finished, sizeof(*finished));
         starpu_sem_init(finished, 0, 0);
 
-        starpu_hdf5_send_work(base, obj, (void *) buf, offset, size, (void*) finished, WRITE);
+        starpu_hdf5_send_work(NULL, NULL, 0, base, obj, offset, (void *) buf, size, (void*) finished, WRITE);
 
         return finished;
 }
 
+void *  starpu_hdf5_copy(void *base_src, void* obj_src, off_t offset_src,  void *base_dst, void* obj_dst, off_t offset_dst, size_t size)
+{
+        starpu_sem_t * finished;
+        _STARPU_MALLOC(finished, sizeof(*finished));
+        starpu_sem_init(finished, 0, 0);
+
+	starpu_hdf5_send_work(base_src, obj_src, offset_src, base_dst, obj_dst, offset_dst, NULL, size, (void*) finished, COPY);
+
+        return finished;
+}
+
+
 static void starpu_hdf5_free_request(void * event)
 {
         starpu_sem_destroy(event);
@@ -830,7 +922,7 @@ struct starpu_disk_ops starpu_disk_hdf5_ops =
 	.write = starpu_hdf5_write,
 	.plug = starpu_hdf5_plug,
 	.unplug = starpu_hdf5_unplug,
-	.copy = NULL,
+	.copy = starpu_hdf5_copy,
 	.bandwidth = get_hdf5_bandwidth_between_disk_and_main_ram,
 	.full_read = starpu_hdf5_full_read,
 	.full_write = starpu_hdf5_full_write,

+ 1 - 1
src/core/disk_ops/disk_leveldb.cpp

@@ -192,7 +192,7 @@ static int starpu_leveldb_write(void *base, void *obj, const void *buf, off_t of
 		memcpy(buffer, (void *) value_read, tmp->size);
 
 		/* put the new data on their new place */
-		memcpy(buffer, (void *) (buf_tmp+offset), size);
+		memcpy(buffer + offset, (void *) buf_tmp, size);
 	}
 
 	/* and write them */

+ 14 - 0
src/datawizard/copy_driver.c

@@ -55,10 +55,17 @@ void _starpu_wake_all_blocked_workers_on_node(unsigned nodeid)
 		condition = &descr->conditions_attached_to_node[nodeid][cond_id];
 
 		if (condition->worker == cur_worker)
+		{
+			if (condition->cond == &condition->worker->sched_cond)
+			{
+				condition->worker->state_keep_awake = 1;
+			}
+
 			/* No need to wake myself, and I might be called from
 			 * the scheduler with mutex locked, through
 			 * starpu_prefetch_task_input_on_node */
 			continue;
+		}
 
 		/* wake anybody waiting on that condition */
 		STARPU_PTHREAD_MUTEX_LOCK_SCHED(&condition->worker->sched_mutex);
@@ -94,10 +101,17 @@ void starpu_wake_all_blocked_workers(void)
 		condition = &descr->conditions_all[cond_id];
 
 		if (condition->worker == cur_worker)
+		{
+			if (condition->cond == &condition->worker->sched_cond)
+			{
+				condition->worker->state_keep_awake = 1;
+			}
+
 			/* No need to wake myself, and I might be called from
 			 * the scheduler with mutex locked, through
 			 * starpu_prefetch_task_input_on_node */
 			continue;
+		}
 
 		/* wake anybody waiting on that condition */
 		STARPU_PTHREAD_MUTEX_LOCK_SCHED(&condition->worker->sched_mutex);

+ 0 - 7
tests/loader.c

@@ -282,13 +282,6 @@ int main(int argc, char *argv[])
 	child_pid = fork();
 	if (child_pid == 0)
 	{
-		// get a new pgid
-		if (setpgid(0, 0) == -1)
-		{
-			perror("setpgid");
-			fprintf(stderr, "[error] setpgid. Mark test as failed\n");
-			exit(EXIT_FAILURE);
-		}
 		if (launcher)
 		{
 			/* "Launchers" such as Valgrind need to be inserted

+ 3 - 5
tools/dev/cppcheck/suppressions.txt

@@ -39,10 +39,11 @@ redundantAssignment:tests/datawizard/mpi_like_async.c:211
 unusedPrivateFunction:tests/main/combined_workers/bfs/timer.h:45
 redundantAssignment:tests/main/driver_api/init_run_deinit.c
 redundantAssignment:tests/main/driver_api/run_driver.c
+unreadVariable:tests/datawizard/variable_size.c
 
 uselessAssignmentPtrArg:mpi/src/starpu_mpi.c:171
-unreadVariable:mpi/src/starpu_mpi.c:991
-unusedVariable:mpi/src/starpu_mpi.c:992
+unreadVariable:mpi/src/starpu_mpi.c:996
+unusedVariable:mpi/src/starpu_mpi.c:997
 redundantAssignment:src/core/workers.c
 
 invalidPointerCast:src/core/perfmodel/perfmodel_nan.c:74
@@ -58,9 +59,6 @@ unusedStructMember:src/core/simgrid.c:226
 wrongPrintfScanfArgNum:src/core/simgrid.c:962
 duplicateExpression:src/util/starpu_task_insert.c:52
 
-// TODO: this could be an error?
-redundantCopy:src/core/disk_ops/disk_leveldb.cpp:194
-
 nullPointerRedundantCheck:src/common/rbtree.c
 unreadVariable:src/datawizard/interfaces/*
 unreadVariable:src/drivers/driver_common/driver_common.c:493