15 年之前 · e3defda082
--- a/src/datawizard/coherency.c
+++ b/src/datawizard/coherency.c
@@ -167,15 +167,20 @@ int fetch_data_on_node(data_state *state, uint32_t requesting_node,
 
				 	if (!r) {
			
 
				 		//fprintf(stderr, "no request matched that one so we post a request %s\n", is_prefetch?"PREFETCH":"");
			
 
				 		/* find someone who already has the data */
			
 
				-		uint32_t src_node = select_src_node(state);
			
 
				-	
			
 
				-		STARPU_ASSERT(src_node != requesting_node);
			
 
				+		uint32_t src_node = 0;
			
 
				+
			
 
				+		/* if the data is in read only mode, there is no need for a source */
			
 
				+		if (read)
			
 
				+		{
			
 
				+			src_node = select_src_node(state);
			
 
				+			STARPU_ASSERT(src_node != requesting_node);
			
 
				+		}
			
 
				 	
			
 
				 		unsigned src_is_a_gpu = (get_node_kind(src_node) == CUDA_RAM);
			
 
				 		unsigned dst_is_a_gpu = (get_node_kind(requesting_node) == CUDA_RAM);
			
 
				 
			
 
				 		/* we have to perform 2 successive requests for GPU->GPU transfers */
			
 
				-		if (src_is_a_gpu && dst_is_a_gpu) {
			
 
				+		if (read && (src_is_a_gpu && dst_is_a_gpu)) {
			
 
				 			unsigned reuse_r_src_to_ram;
			
 
				 			data_request_t r_src_to_ram;
			
 
				 			data_request_t r_ram_to_dst;
			
--- a/src/datawizard/copy-driver.c
+++ b/src/datawizard/copy-driver.c
@@ -192,8 +192,11 @@ static int copy_data_1_to_1_generic(data_state *state, uint32_t src_node, uint32
 
				 int __attribute__((warn_unused_result)) driver_copy_data_1_to_1(data_state *state, uint32_t src_node, 
			
 
				 		uint32_t dst_node, unsigned donotread, struct data_request_s *req, unsigned may_alloc)
			
 
				 {
			
 
				-	STARPU_ASSERT(state->per_node[src_node].allocated);
			
 
				-	STARPU_ASSERT(state->per_node[src_node].refcnt);
			
 
				+	if (req->read)
			
 
				+	{
			
 
				+		STARPU_ASSERT(state->per_node[src_node].allocated);
			
 
				+		STARPU_ASSERT(state->per_node[src_node].refcnt);
			
 
				+	}
			
 
				 
			
 
				 	int ret_alloc, ret_copy;
			
 
				 	unsigned __attribute__((unused)) com_id = 0;
			
--- a/src/datawizard/data_request.c
+++ b/src/datawizard/data_request.c
@@ -97,7 +97,9 @@ data_request_t create_data_request(data_state *state, uint32_t src_node, uint32_
 
				 	state->per_node[dst_node].request = r;
			
 
				 
			
 
				 	state->per_node[dst_node].refcnt++;
			
 
				-	state->per_node[src_node].refcnt++;
			
 
				+
			
 
				+	if (read)
			
 
				+		state->per_node[src_node].refcnt++;
			
 
				 
			
 
				 	r->refcnt = 1;
			
 
				 
			
@@ -175,8 +177,11 @@ void post_data_request(data_request_t r, uint32_t handling_node)
 
				 	int res;
			
 
				 //	fprintf(stderr, "POST REQUEST\n");
			
 
				 
			
 
				-	STARPU_ASSERT(r->state->per_node[r->src_node].allocated);
			
 
				-	STARPU_ASSERT(r->state->per_node[r->src_node].refcnt);
			
 
				+	if (r->read)
			
 
				+	{
			
 
				+		STARPU_ASSERT(r->state->per_node[r->src_node].allocated);
			
 
				+		STARPU_ASSERT(r->state->per_node[r->src_node].refcnt);
			
 
				+	}
			
 
				 
			
 
				 	/* insert the request in the proper list */
			
 
				 	res = pthread_mutex_lock(&data_requests_list_mutex[handling_node]);
			
@@ -210,7 +215,9 @@ static void handle_data_request_completion(data_request_t r)
 
				 	r->completed = 1;
			
 
				 	
			
 
				 	state->per_node[r->dst_node].refcnt--;
			
 
				-	state->per_node[r->src_node].refcnt--;
			
 
				+
			
 
				+	if (r->read)
			
 
				+		state->per_node[r->src_node].refcnt--;
			
 
				 
			
 
				 	r->refcnt--;
			
 
				 
			
@@ -239,8 +246,11 @@ static int handle_data_request(data_request_t r, unsigned may_alloc)
 
				 
			
 
				 	starpu_spin_lock(&r->lock);
			
 
				 
			
 
				-	STARPU_ASSERT(state->per_node[r->src_node].allocated);
			
 
				-	STARPU_ASSERT(state->per_node[r->src_node].refcnt);
			
 
				+	if (r->read)
			
 
				+	{
			
 
				+		STARPU_ASSERT(state->per_node[r->src_node].allocated);
			
 
				+		STARPU_ASSERT(state->per_node[r->src_node].refcnt);
			
 
				+	}
			
 
				 
			
 
				 	/* perform the transfer */
			
 
				 	/* the header of the data must be locked by the worker that submitted the request */
			
--- a/src/datawizard/hierarchy.c
+++ b/src/datawizard/hierarchy.c
@@ -60,9 +60,6 @@ void register_new_data(data_state *state, uint32_t home_node, uint32_t wb_mask)
 
				 
			
 
				 	state->is_not_important = 0;
			
 
				 
			
 
				-	/* make sure we do have a valid copy */
			
 
				-	STARPU_ASSERT(home_node < MAXNODES);
			
 
				-
			
 
				 	state->wb_mask = wb_mask;
			
 
				 
			
 
				 	/* that new data is invalid from all nodes perpective except for the
			
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -80,6 +80,7 @@ check_PROGRAMS += 				\
 
				 	core/static_restartable_tag		\
			
 
				 	core/empty_task				\
			
 
				 	core/empty_task_sync_point		\
			
 
				+	datawizard/write_only_tmp_buffer	\
			
 
				 	errorcheck/starpu_init_noworker		\
			
 
				 	helper/cublas_init			\
			
 
				 	helper/pinned_memory			\
			
@@ -124,6 +125,9 @@ core_empty_task_SOURCES =			\
 
				 core_empty_task_sync_point_SOURCES =		\
			
 
				 	core/empty_task_sync_point.c
			
 
				 
			
 
				+datawizard_write_only_tmp_buffer_SOURCES =	\
			
 
				+	datawizard/write_only_tmp_buffer.c
			
 
				+
			
 
				 errorcheck_starpu_init_noworker_SOURCES =	\
			
 
				 	errorcheck/starpu_init_noworker.c
			
 
				 
			
--- a/tests/datawizard/write_only_tmp_buffer.c
+++ b/tests/datawizard/write_only_tmp_buffer.c
@@ -0,0 +1,113 @@
 
				+/*
			
 
				+ * StarPU
			
 
				+ * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU Lesser General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2.1 of the License, or (at
			
 
				+ * your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful, but
			
 
				+ * WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
			
 
				+ *
			
 
				+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
			
 
				+ */
			
 
				+
			
 
				+#include <stdio.h>
			
 
				+#include <unistd.h>
			
 
				+#include <errno.h>
			
 
				+#include <starpu.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#define VECTORSIZE	1024
			
 
				+
			
 
				+starpu_data_handle v_handle;
			
 
				+
			
 
				+static void cuda_codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
			
 
				+{
			
 
				+	fprintf(stderr, "POUET CUDA\n");
			
 
				+	int *buf = (int *)buffers[0].vector.ptr;
			
 
				+
			
 
				+	cudaMemset(buf, 42, sizeof(int));
			
 
				+}
			
 
				+
			
 
				+static void core_codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
			
 
				+{
			
 
				+	fprintf(stderr, "POUET CUDA\n");
			
 
				+	int *buf = (int *)buffers[0].vector.ptr;
			
 
				+
			
 
				+	*buf = 42;
			
 
				+}
			
 
				+
			
 
				+static void display_var(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
			
 
				+{
			
 
				+	int *buf = (int *)buffers[0].vector.ptr;
			
 
				+
			
 
				+	fprintf(stderr, "Value = %d (should be %d)\n", *buf, 42);
			
 
				+	fflush(stderr);
			
 
				+
			
 
				+	if (*buf != 42)
			
 
				+		exit(-1);
			
 
				+}
			
 
				+
			
 
				+static starpu_codelet cl = {
			
 
				+	.where = CORE|CUDA,
			
 
				+	.core_func = core_codelet_null,
			
 
				+	.cuda_func = cuda_codelet_null,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
 
				+static starpu_codelet display_cl = {
			
 
				+	.where = CORE,
			
 
				+	.core_func = display_var,
			
 
				+	.nbuffers = 1
			
 
				+};
			
 
				+
			
 
				+
			
 
				+int main(int argc, char **argv)
			
 
				+{
			
 
				+	starpu_init(NULL);
			
 
				+
			
 
				+	/* The buffer should never be explicitely allocated */
			
 
				+	starpu_register_vector_data(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(int));
			
 
				+
			
 
				+	struct starpu_task *task = starpu_task_create();
			
 
				+		task->cl = &cl;
			
 
				+		task->buffers[0].handle = v_handle;
			
 
				+		task->buffers[0].mode = STARPU_W;
			
 
				+
			
 
				+		task->synchronous = 1;
			
 
				+
			
 
				+	int ret = starpu_submit_task(task);
			
 
				+	if (ret == -ENODEV)
			
 
				+			goto enodev;
			
 
				+
			
 
				+//	starpu_wait_task(task);
			
 
				+
			
 
				+	task = starpu_task_create();
			
 
				+		task->cl = &display_cl;
			
 
				+		task->buffers[0].handle = v_handle;
			
 
				+		task->buffers[0].mode = STARPU_R;
			
 
				+
			
 
				+		task->synchronous = 1;
			
 
				+
			
 
				+	ret = starpu_submit_task(task);
			
 
				+	if (ret == -ENODEV)
			
 
				+			goto enodev;
			
 
				+
			
 
				+//	starpu_wait_task(task);
			
 
				+
			
 
				+	/* this should get rid of automatically allocated buffers */
			
 
				+	starpu_delete_data(v_handle);
			
 
				+
			
 
				+	starpu_shutdown();
			
 
				+
			
 
				+	return 0;
			
 
				+
			
 
				+enodev:
			
 
				+	fprintf(stderr, "WARNING: No one can execute this task\n");
			
 
				+	/* yes, we do not perform the computation but we did detect that no one
			
 
				+ 	 * could perform the kernel, so this is not an error from StarPU */
			
 
				+	return 0;
			
 
				+}