浏览代码

mult: no need to memset C, beta is already zero

Samuel Thibault 5 年之前
父节点
当前提交
e0c3bb6326
共有 1 个文件被更改,包括 0 次插入18 次删除
  1. 0 18
      examples/mult/xgemm.c

+ 0 - 18
examples/mult/xgemm.c

@@ -177,15 +177,6 @@ static void cublas_mult(void *descr[], void *arg)
 
 	cudaStream_t stream = starpu_cuda_get_local_stream();
 
-	if (nxC == ldC)
-		cudaMemsetAsync(subC, 0, sizeof(*subC) * nxC * nyC, stream);
-	else
-	{
-		unsigned i;
-		for (i = 0; i < nyC; i++)
-			cudaMemsetAsync(subC + i*ldC, 0, sizeof(*subC) * nxC, stream);
-	}
-
 	cublasStatus_t status = CUBLAS_GEMM(starpu_cublas_get_local_handle(),
 			CUBLAS_OP_N, CUBLAS_OP_N,
 			nxC, nyC, nyA,
@@ -213,15 +204,6 @@ void cpu_mult(void *descr[], void *arg)
 
 	int worker_size = starpu_combined_worker_get_size();
 
-	if (nxC == ldC)
-		memset(subC, 0, sizeof(*subC) * nxC * nyC);
-	else
-	{
-		unsigned i;
-		for (i = 0; i < nyC; i++)
-			memset(subC + i*ldC, 0, sizeof(*subC) * nxC);
-	}
-
 	if (worker_size == 1)
 	{
 		/* Sequential CPU task */