Browse Source

Properly handle the case of 'write-only' buffers: there is no need for having a
valid copy somewhere, and there is no need for having the buffer allocated in
main memory.

Cédric Augonnet 15 years ago
parent
commit
e3defda082

+ 9 - 4
src/datawizard/coherency.c

@@ -167,15 +167,20 @@ int fetch_data_on_node(data_state *state, uint32_t requesting_node,
 	if (!r) {
 		//fprintf(stderr, "no request matched that one so we post a request %s\n", is_prefetch?"PREFETCH":"");
 		/* find someone who already has the data */
-		uint32_t src_node = select_src_node(state);
-	
-		STARPU_ASSERT(src_node != requesting_node);
+		uint32_t src_node = 0;
+
+		/* if the data is in read only mode, there is no need for a source */
+		if (read)
+		{
+			src_node = select_src_node(state);
+			STARPU_ASSERT(src_node != requesting_node);
+		}
 	
 		unsigned src_is_a_gpu = (get_node_kind(src_node) == CUDA_RAM);
 		unsigned dst_is_a_gpu = (get_node_kind(requesting_node) == CUDA_RAM);
 
 		/* we have to perform 2 successive requests for GPU->GPU transfers */
-		if (src_is_a_gpu && dst_is_a_gpu) {
+		if (read && (src_is_a_gpu && dst_is_a_gpu)) {
 			unsigned reuse_r_src_to_ram;
 			data_request_t r_src_to_ram;
 			data_request_t r_ram_to_dst;

+ 5 - 2
src/datawizard/copy-driver.c

@@ -192,8 +192,11 @@ static int copy_data_1_to_1_generic(data_state *state, uint32_t src_node, uint32
 int __attribute__((warn_unused_result)) driver_copy_data_1_to_1(data_state *state, uint32_t src_node, 
 		uint32_t dst_node, unsigned donotread, struct data_request_s *req, unsigned may_alloc)
 {
-	STARPU_ASSERT(state->per_node[src_node].allocated);
-	STARPU_ASSERT(state->per_node[src_node].refcnt);
+	if (req->read)
+	{
+		STARPU_ASSERT(state->per_node[src_node].allocated);
+		STARPU_ASSERT(state->per_node[src_node].refcnt);
+	}
 
 	int ret_alloc, ret_copy;
 	unsigned __attribute__((unused)) com_id = 0;

+ 16 - 6
src/datawizard/data_request.c

@@ -97,7 +97,9 @@ data_request_t create_data_request(data_state *state, uint32_t src_node, uint32_
 	state->per_node[dst_node].request = r;
 
 	state->per_node[dst_node].refcnt++;
-	state->per_node[src_node].refcnt++;
+
+	if (read)
+		state->per_node[src_node].refcnt++;
 
 	r->refcnt = 1;
 
@@ -175,8 +177,11 @@ void post_data_request(data_request_t r, uint32_t handling_node)
 	int res;
 //	fprintf(stderr, "POST REQUEST\n");
 
-	STARPU_ASSERT(r->state->per_node[r->src_node].allocated);
-	STARPU_ASSERT(r->state->per_node[r->src_node].refcnt);
+	if (r->read)
+	{
+		STARPU_ASSERT(r->state->per_node[r->src_node].allocated);
+		STARPU_ASSERT(r->state->per_node[r->src_node].refcnt);
+	}
 
 	/* insert the request in the proper list */
 	res = pthread_mutex_lock(&data_requests_list_mutex[handling_node]);
@@ -210,7 +215,9 @@ static void handle_data_request_completion(data_request_t r)
 	r->completed = 1;
 	
 	state->per_node[r->dst_node].refcnt--;
-	state->per_node[r->src_node].refcnt--;
+
+	if (r->read)
+		state->per_node[r->src_node].refcnt--;
 
 	r->refcnt--;
 
@@ -239,8 +246,11 @@ static int handle_data_request(data_request_t r, unsigned may_alloc)
 
 	starpu_spin_lock(&r->lock);
 
-	STARPU_ASSERT(state->per_node[r->src_node].allocated);
-	STARPU_ASSERT(state->per_node[r->src_node].refcnt);
+	if (r->read)
+	{
+		STARPU_ASSERT(state->per_node[r->src_node].allocated);
+		STARPU_ASSERT(state->per_node[r->src_node].refcnt);
+	}
 
 	/* perform the transfer */
 	/* the header of the data must be locked by the worker that submitted the request */

+ 0 - 3
src/datawizard/hierarchy.c

@@ -60,9 +60,6 @@ void register_new_data(data_state *state, uint32_t home_node, uint32_t wb_mask)
 
 	state->is_not_important = 0;
 
-	/* make sure we do have a valid copy */
-	STARPU_ASSERT(home_node < MAXNODES);
-
 	state->wb_mask = wb_mask;
 
 	/* that new data is invalid from all nodes perpective except for the

+ 4 - 0
tests/Makefile.am

@@ -80,6 +80,7 @@ check_PROGRAMS += 				\
 	core/static_restartable_tag		\
 	core/empty_task				\
 	core/empty_task_sync_point		\
+	datawizard/write_only_tmp_buffer	\
 	errorcheck/starpu_init_noworker		\
 	helper/cublas_init			\
 	helper/pinned_memory			\
@@ -124,6 +125,9 @@ core_empty_task_SOURCES =			\
 core_empty_task_sync_point_SOURCES =		\
 	core/empty_task_sync_point.c
 
+datawizard_write_only_tmp_buffer_SOURCES =	\
+	datawizard/write_only_tmp_buffer.c
+
 errorcheck_starpu_init_noworker_SOURCES =	\
 	errorcheck/starpu_init_noworker.c
 

+ 113 - 0
tests/datawizard/write_only_tmp_buffer.c

@@ -0,0 +1,113 @@
+/*
+ * StarPU
+ * Copyright (C) INRIA 2008-2009 (see AUTHORS file)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <starpu.h>
+#include <stdlib.h>
+
+#define VECTORSIZE	1024
+
+starpu_data_handle v_handle;
+
+static void cuda_codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+{
+	fprintf(stderr, "POUET CUDA\n");
+	int *buf = (int *)buffers[0].vector.ptr;
+
+	cudaMemset(buf, 42, sizeof(int));
+}
+
+static void core_codelet_null(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+{
+	fprintf(stderr, "POUET CUDA\n");
+	int *buf = (int *)buffers[0].vector.ptr;
+
+	*buf = 42;
+}
+
+static void display_var(starpu_data_interface_t *buffers, __attribute__ ((unused)) void *_args)
+{
+	int *buf = (int *)buffers[0].vector.ptr;
+
+	fprintf(stderr, "Value = %d (should be %d)\n", *buf, 42);
+	fflush(stderr);
+
+	if (*buf != 42)
+		exit(-1);
+}
+
+static starpu_codelet cl = {
+	.where = CORE|CUDA,
+	.core_func = core_codelet_null,
+	.cuda_func = cuda_codelet_null,
+	.nbuffers = 1
+};
+
+static starpu_codelet display_cl = {
+	.where = CORE,
+	.core_func = display_var,
+	.nbuffers = 1
+};
+
+
+int main(int argc, char **argv)
+{
+	starpu_init(NULL);
+
+	/* The buffer should never be explicitely allocated */
+	starpu_register_vector_data(&v_handle, (uint32_t)-1, (uintptr_t)NULL, VECTORSIZE, sizeof(int));
+
+	struct starpu_task *task = starpu_task_create();
+		task->cl = &cl;
+		task->buffers[0].handle = v_handle;
+		task->buffers[0].mode = STARPU_W;
+
+		task->synchronous = 1;
+
+	int ret = starpu_submit_task(task);
+	if (ret == -ENODEV)
+			goto enodev;
+
+//	starpu_wait_task(task);
+
+	task = starpu_task_create();
+		task->cl = &display_cl;
+		task->buffers[0].handle = v_handle;
+		task->buffers[0].mode = STARPU_R;
+
+		task->synchronous = 1;
+
+	ret = starpu_submit_task(task);
+	if (ret == -ENODEV)
+			goto enodev;
+
+//	starpu_wait_task(task);
+
+	/* this should get rid of automatically allocated buffers */
+	starpu_delete_data(v_handle);
+
+	starpu_shutdown();
+
+	return 0;
+
+enodev:
+	fprintf(stderr, "WARNING: No one can execute this task\n");
+	/* yes, we do not perform the computation but we did detect that no one
+ 	 * could perform the kernel, so this is not an error from StarPU */
+	return 0;
+}