Browse Source

Disable copy_file_range when syscall returned ENOSYS

Corentin Salingue 8 years ago
parent
commit
95b0e16660
2 changed files with 72 additions and 5 deletions
  1. 21 0
      src/core/disk.c
  2. 51 5
      src/core/disk_ops/unistd/disk_unistd_global.c

+ 21 - 0
src/core/disk.c

@@ -252,6 +252,27 @@ int _starpu_disk_copy(unsigned node_src, void *obj_src, off_t offset_src, unsign
 											       size);
         add_async_event(channel, event);
 
+
+	/* Something goes wrong with copy disk to disk... */
+	if (!event)
+	{
+		disk_register_list[pos_src]->functions->copy = NULL;
+
+		/* perform a read, and after a write... */
+		void * ptr;
+		int ret = _starpu_malloc_flags_on_node(STARPU_MAIN_RAM, &ptr, size, 0);
+		STARPU_ASSERT_MSG(ret == 0, "Cannot allocate %lu bytes to perform disk to disk operation", size);
+
+		ret = _starpu_disk_read(node_src, STARPU_MAIN_RAM, obj_src, ptr, offset_src, size, NULL);
+		STARPU_ASSERT_MSG(ret == 0, "Cannot read %lu bytes to perform disk to disk copy", size);
+		ret = _starpu_disk_write(STARPU_MAIN_RAM, node_dst, obj_dst, ptr, offset_dst, size, NULL);
+		STARPU_ASSERT_MSG(ret == 0, "Cannot write %lu bytes to perform disk to disk copy", size);
+
+		_starpu_free_flags_on_node(STARPU_MAIN_RAM, ptr, size, 0);
+
+		return 0;
+	}
+
 	STARPU_ASSERT(event);
 	return -EAGAIN;
 }

+ 51 - 5
src/core/disk_ops/unistd/disk_unistd_global.c

@@ -93,6 +93,10 @@ struct starpu_unistd_copy_thread
 
 struct starpu_unistd_copy_thread copy_thread[STARPU_MAXNODES][STARPU_MAXNODES];
 static unsigned starpu_unistd_nb_disk_opened = 0;
+/* copy_file_range syscall can return ENOSYS. Use global var to catch
+ * and prevent StarPU using direct disk to disk copy */
+enum starpu_unistd_failed_copy { INIT, CHECKED, FAILED };
+static enum starpu_unistd_failed_copy starpu_unistd_copy_failed = INIT;
 #endif
 
 struct starpu_unistd_base
@@ -550,7 +554,20 @@ static void * starpu_unistd_internal_thread(void * arg)
 			struct starpu_unistd_work_copy * work = starpu_unistd_work_copy_list_pop_back(copy_thread->list);
 			STARPU_PTHREAD_MUTEX_UNLOCK(&copy_thread->mutex);
 
-			copy_file_range(work->fd_src, &work->off_src, work->fd_dst, &work->off_dst, work->len, work->flags);
+			starpu_ssize_t ret = copy_file_range(work->fd_src, &work->off_src, work->fd_dst, &work->off_dst, work->len, work->flags);
+
+			STARPU_PTHREAD_MUTEX_LOCK(&copy_thread->mutex);
+			if (starpu_unistd_copy_failed == INIT && ret == -1 && errno == ENOSYS)
+			{
+				starpu_unistd_copy_failed = FAILED;
+			} 
+			else
+			{
+				STARPU_ASSERT_MSG(ret == work->len, "Copy_file_range failed (value %zd instead of %zd and errno %d)", ret, work->len, errno);
+				starpu_unistd_copy_failed = CHECKED;
+			}
+			STARPU_PTHREAD_MUTEX_UNLOCK(&copy_thread->mutex);
+
 
 			starpu_sem_post(&work->finished);
 
@@ -652,6 +669,9 @@ void starpu_unistd_global_unplug(void *base)
 			ending_working_thread(&copy_thread[fileBase->disk_index][i]);
 	}
 	starpu_unistd_nb_disk_opened--;
+
+	if (starpu_unistd_nb_disk_opened == 0)
+		starpu_unistd_copy_failed = INIT;
 #endif
 
 	free(fileBase->path);
@@ -972,10 +992,36 @@ void *  starpu_unistd_global_copy(void *base_src, void* obj_src, off_t offset_sr
 
 	event->event.event_copy = work;
 
-	STARPU_PTHREAD_MUTEX_LOCK(&copy_thread[unistd_base_src->disk_index][unistd_base_dst->disk_index].mutex);
-	starpu_unistd_work_copy_list_push_front(copy_thread[unistd_base_src->disk_index][unistd_base_dst->disk_index].list, work);
-        STARPU_PTHREAD_COND_BROADCAST(&copy_thread[unistd_base_src->disk_index][unistd_base_dst->disk_index].cond);
-	STARPU_PTHREAD_MUTEX_UNLOCK(&copy_thread[unistd_base_src->disk_index][unistd_base_dst->disk_index].mutex);
+	struct starpu_unistd_copy_thread * thread = &copy_thread[unistd_base_src->disk_index][unistd_base_dst->disk_index];
+
+	unsigned check = 0;
+	STARPU_PTHREAD_MUTEX_LOCK(&thread->mutex);
+	if (starpu_unistd_copy_failed == INIT)
+		check = 1;
+	STARPU_PTHREAD_MUTEX_UNLOCK(&thread->mutex);
+
+
+	STARPU_PTHREAD_MUTEX_LOCK(&thread->mutex);
+	starpu_unistd_work_copy_list_push_front(thread->list, work);
+        STARPU_PTHREAD_COND_BROADCAST(&thread->cond);
+	STARPU_PTHREAD_MUTEX_UNLOCK(&thread->mutex);
+
+	if (check)
+	{
+		starpu_unistd_global_wait_request((void *) event);
+		/* add token when StarPU will test/wait the request */
+		starpu_sem_post(&work->finished);
+	
+		STARPU_PTHREAD_MUTEX_LOCK(&thread->mutex);
+		/* here copy_file_range does not work */
+		if (starpu_unistd_copy_failed == FAILED)
+		{
+			STARPU_PTHREAD_MUTEX_UNLOCK(&thread->mutex);
+			starpu_unistd_global_free_request((void *) event);
+			return NULL;
+		}
+		STARPU_PTHREAD_MUTEX_UNLOCK(&thread->mutex);
+	}
 
 	return event;
 }