Browse Source

Fix cudaMemset synchronization

Samuel Thibault 6 years ago
parent
commit
babb4e46f0
2 changed files with 5 additions and 2 deletions
  1. 4 1
      src/core/perfmodel/perfmodel_bus.c
  2. 1 1
      tests/datawizard/manual_reduction.c

+ 4 - 1
src/core/perfmodel/perfmodel_bus.c

@@ -1,7 +1,7 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
  * Copyright (C) 2011-2014,2016,2017                      Inria
- * Copyright (C) 2009-2018                                Université de Bordeaux
+ * Copyright (C) 2009-2019                                Université de Bordeaux
  * Copyright (C) 2010-2017                                CNRS
  * Copyright (C) 2013                                     Corentin Salingue
  *
@@ -217,6 +217,7 @@ static void measure_bandwidth_between_host_and_dev_on_numa_with_cuda(int dev, in
 	/* Fill them */
 	memset(h_buffer, 0, size);
 	cudaMemset(d_buffer, 0, size);
+	cudaThreadSynchronize();
 
 	/* hack to avoid third party libs to rebind threads */
 	_starpu_bind_thread_on_cpu(cpu, STARPU_NOWORKERID, NULL);
@@ -335,6 +336,7 @@ static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst)
 	cures = cudaMalloc((void **)&s_buffer, size);
 	STARPU_ASSERT(cures == cudaSuccess);
 	cudaMemset(s_buffer, 0, size);
+	cudaThreadSynchronize();
 
 	/* Initialize CUDA context on the destination */
 	/* We do not need to enable OpenGL interoperability at this point,
@@ -360,6 +362,7 @@ static void measure_bandwidth_between_dev_and_dev_cuda(int src, int dst)
 	cures = cudaMalloc((void **)&d_buffer, size);
 	STARPU_ASSERT(cures == cudaSuccess);
 	cudaMemset(d_buffer, 0, size);
+	cudaThreadSynchronize();
 
 	unsigned iter;
 	double timing;

+ 1 - 1
tests/datawizard/manual_reduction.c

@@ -74,7 +74,7 @@ static void initialize_per_worker_handle(void *arg)
 			{
 				STARPU_CUDA_REPORT_ERROR(status);
 			}
-			status = cudaMemset((void *)per_worker[workerid], 0, sizeof(variable));
+			status = cudaMemsetAsync((void *)per_worker[workerid], 0, sizeof(variable), starpu_cuda_get_local_stream());
 			if (status)
 				STARPU_CUDA_REPORT_ERROR(status);
 			break;