瀏覽代碼

Use cudaMemcpyAsync instead of cudaMemcpy

Samuel Thibault 12 年之前
父節點
當前提交
51a7e8c979
共有 1 個文件被更改,包括 3 次插入3 次删除
  1. 3 3
      gcc-plugin/examples/cholesky/cholesky_kernels.c

+ 3 - 3
gcc-plugin/examples/cholesky/cholesky_kernels.c

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2009, 2010  Université de Bordeaux 1
+ * Copyright (C) 2009, 2010, 2012  Université de Bordeaux 1
  * Copyright (C) 2010, 2011  Centre National de la Recherche Scientifique
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -177,8 +177,8 @@ static inline void chol_common_codelet_update_u11(float *sub11, unsigned nx, uns
 			for (z = 0; z < nx; z++)
 			{
 				float lambda11;
-				cudaMemcpy(&lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost);
-				cudaStreamSynchronize(0);
+				cudaMemcpyAsync(&lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream());
+				cudaStreamSynchronize(starpu_cuda_get_local_stream());
 
 				STARPU_ASSERT(lambda11 != 0.0f);