|
@@ -177,15 +177,6 @@ static void cublas_mult(void *descr[], void *arg)
|
|
|
|
|
|
cudaStream_t stream = starpu_cuda_get_local_stream();
|
|
|
|
|
|
- if (nxC == ldC)
|
|
|
- cudaMemsetAsync(subC, 0, sizeof(*subC) * nxC * nyC, stream);
|
|
|
- else
|
|
|
- {
|
|
|
- unsigned i;
|
|
|
- for (i = 0; i < nyC; i++)
|
|
|
- cudaMemsetAsync(subC + i*ldC, 0, sizeof(*subC) * nxC, stream);
|
|
|
- }
|
|
|
-
|
|
|
cublasStatus_t status = CUBLAS_GEMM(starpu_cublas_get_local_handle(),
|
|
|
CUBLAS_OP_N, CUBLAS_OP_N,
|
|
|
nxC, nyC, nyA,
|
|
@@ -213,15 +204,6 @@ void cpu_mult(void *descr[], void *arg)
|
|
|
|
|
|
int worker_size = starpu_combined_worker_get_size();
|
|
|
|
|
|
- if (nxC == ldC)
|
|
|
- memset(subC, 0, sizeof(*subC) * nxC * nyC);
|
|
|
- else
|
|
|
- {
|
|
|
- unsigned i;
|
|
|
- for (i = 0; i < nyC; i++)
|
|
|
- memset(subC + i*ldC, 0, sizeof(*subC) * nxC);
|
|
|
- }
|
|
|
-
|
|
|
if (worker_size == 1)
|
|
|
{
|
|
|
/* Sequential CPU task */
|