|
@@ -47,7 +47,6 @@ void vector_cuda_func(void *descr[], void *cl_arg __attribute__((unused)))
|
|
int nx = STARPU_VECTOR_GET_NX(descr[0]);
|
|
int nx = STARPU_VECTOR_GET_NX(descr[0]);
|
|
|
|
|
|
float sum = cublasSasum(nx, matrix, 1);
|
|
float sum = cublasSasum(nx, matrix, 1);
|
|
- cudaThreadSynchronize();
|
|
|
|
sum /= nx;
|
|
sum /= nx;
|
|
|
|
|
|
cudaMemcpyAsync(matrix, &sum, sizeof(matrix[0]), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream());
|
|
cudaMemcpyAsync(matrix, &sum, sizeof(matrix[0]), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream());
|
|
@@ -79,7 +78,6 @@ void matrix_cuda_func(void *descr[], void *cl_arg __attribute__((unused)))
|
|
int ny = STARPU_MATRIX_GET_NY(descr[0]);
|
|
int ny = STARPU_MATRIX_GET_NY(descr[0]);
|
|
|
|
|
|
float sum = cublasSasum(nx*ny, matrix, 1);
|
|
float sum = cublasSasum(nx*ny, matrix, 1);
|
|
- cudaThreadSynchronize();
|
|
|
|
sum /= nx*ny;
|
|
sum /= nx*ny;
|
|
|
|
|
|
cudaMemcpyAsync(matrix, &sum, sizeof(matrix[0]), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream());
|
|
cudaMemcpyAsync(matrix, &sum, sizeof(matrix[0]), cudaMemcpyHostToDevice, starpu_cuda_get_local_stream());
|
|
@@ -196,6 +194,7 @@ int main(int argc, char **argv)
|
|
ret = starpu_init(NULL);
|
|
ret = starpu_init(NULL);
|
|
if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
|
|
if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
|
|
STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
|
|
STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
|
|
|
|
+ starpu_helper_cublas_init();
|
|
|
|
|
|
devices = starpu_cpu_worker_get_count();
|
|
devices = starpu_cpu_worker_get_count();
|
|
if (devices)
|
|
if (devices)
|
|
@@ -220,6 +219,7 @@ int main(int argc, char **argv)
|
|
|
|
|
|
error:
|
|
error:
|
|
if (ret == -ENODEV) ret=STARPU_TEST_SKIPPED;
|
|
if (ret == -ENODEV) ret=STARPU_TEST_SKIPPED;
|
|
|
|
+ starpu_helper_cublas_shutdown();
|
|
starpu_shutdown();
|
|
starpu_shutdown();
|
|
STARPU_RETURN(ret);
|
|
STARPU_RETURN(ret);
|
|
}
|
|
}
|