9 years ago · e5b21e47fd
--- a/ChangeLog
+++ b/ChangeLog
@@ -57,6 +57,9 @@ New features:
 
																   * Add starpu_data_acquire_try and starpu_data_acquire_on_node_try.
															
 
																   * Add NVCC_CC environment variable.
															
 
																   * Add -no-foo options to starpu_fxt_tool to make traces lighter
															
 
																+  * Add starpu_cusparse_init/shutdown/get_local_handle for proper CUDA
															
 
																+    overlapping with cusparse.
															
 
																+
															
 
																 Small changes:
															
 
																   * Output generated through STARPU_MPI_COMM has been modified to
															
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,6 +1,6 @@
 
																 # StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																 #
															
 
																-# Copyright (C) 2009-2016  Université de Bordeaux
															
 
																+# Copyright (C) 2009-2017  Université de Bordeaux
															
 
																 # Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
															
 
																 # Copyright (C) 2014  INRIA
															
 
																 # Copyright (C) 2016  Inria
															
@@ -99,6 +99,7 @@ versinclude_HEADERS = 				\
 
																 	include/starpu_disk.h			\
															
 
																 	include/starpu_cublas.h			\
															
 
																 	include/starpu_cublas_v2.h		\
															
 
																+	include/starpu_cusparse.h		\
															
 
																 	include/starpu_driver.h			\
															
 
																 	include/starpu_stdlib.h			\
															
 
																 	include/starpu_thread.h			\
															
--- a/configure.ac
+++ b/configure.ac
@@ -1116,6 +1116,8 @@ if test x$enable_cuda = xyes; then
 
																 	fi
															
 
																 	AC_CHECK_HEADERS([cuda_gl_interop.h])
															
 
																+
															
 
																+	AC_CHECK_LIB([cusparse], [cusparseCreate])
															
 
																 fi
															
 
																 dnl Hey dude, are you around?
															
--- a/doc/doxygen/chapters/210_check_list_performance.doxy
+++ b/doc/doxygen/chapters/210_check_list_performance.doxy
@@ -65,9 +65,13 @@ Calling starpu_cublas_init() makes StarPU already do appropriate calls for the
 
																 CUBLAS library. Some libraries like Magma may however change the current stream of CUBLAS v1,
															
 
																 one then has to call <c>cublasSetKernelStream(starpu_cuda_get_local_stream())</c> at
															
 
																 the beginning of the codelet to make sure that CUBLAS is really using the proper
															
 
																-stream. When using CUBLAS v2, starpu_cublas_local_handle() can be called to queue CUBLAS
															
 
																+stream. When using CUBLAS v2, starpu_cublas_get_local_handle() can be called to queue CUBLAS
															
 
																 kernels with the proper configuration.
															
 
																+Similarly, calling starpu_cusparse_init() makes StarPU create CUSPARSE handles
															
 
																+on each CUDA device, starpu_cusparse_get_local_handle() can then be used to
															
 
																+queue CUSPARSE kernels with the proper configuration.
															
 
																+
															
 
																 If the kernel can be made to only use this local stream or other self-allocated
															
 
																 streams, i.e. the whole kernel submission can be made asynchronous, then
															
 
																 one should enable asynchronous execution of the kernel.  That means setting
															
--- a/doc/doxygen/chapters/api/cuda_extensions.doxy
+++ b/doc/doxygen/chapters/api/cuda_extensions.doxy
@@ -95,4 +95,21 @@ Report a cublas error.
 
																 Calls starpu_cublas_report_error(), passing the current
															
 
																 function, file and line position.
															
 
																+\fn void starpu_cusparse_init(void)
															
 
																+\ingroup API_CUDA_Extensions
															
 
																+Calling starpu_cusparse_init() will initialize CUSPARSE on every CUDA device
															
 
																+controlled by StarPU. This call blocks until CUSPARSE has been properly
															
 
																+initialized on every device.
															
 
																+
															
 
																+\fn cusparseHandle_t starpu_cusparse_get_local_handle(void)
															
 
																+\ingroup API_CUDA_Extensions
															
 
																+This function returns the CUSPARSE handle to be used to queue CUSPARSE
															
 
																+kernels. It is properly initialized and configured for multistream by
															
 
																+starpu_cusparse_init().
															
 
																+
															
 
																+\fn void starpu_cusparse_shutdown(void)
															
 
																+\ingroup API_CUDA_Extensions
															
 
																+This function synchronously deinitializes the CUSPARSE library on
															
 
																+every CUDA device.
															
 
																+
															
 
																 */
															
--- a/include/starpu.h
+++ b/include/starpu.h
@@ -62,6 +62,7 @@ typedef UINT_PTR uintptr_t;
 
																 #include <starpu_rand.h>
															
 
																 #include <starpu_cuda.h>
															
 
																 #include <starpu_cublas.h>
															
 
																+#include <starpu_cusparse.h>
															
 
																 #include <starpu_bound.h>
															
 
																 #include <starpu_hash.h>
															
 
																 #include <starpu_profiling.h>
															
--- a/include/starpu_cublas_v2.h
+++ b/include/starpu_cublas_v2.h
@@ -1,6 +1,6 @@
 
																 /* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																  *
															
 
																- * Copyright (C) 2010-2012  Université de Bordeaux
															
 
																+ * Copyright (C) 2010-2012, 2017  Université de Bordeaux
															
 
																  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
															
 
																  *
															
 
																  * StarPU is free software; you can redistribute it and/or modify
															
@@ -18,6 +18,8 @@
 
																 #ifndef __STARPU_CUBLAS_V2_H__
															
 
																 #define __STARPU_CUBLAS_V2_H__
															
 
																+#if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
															
 
																+
															
 
																 #include <cublas_v2.h>
															
 
																 #ifdef __cplusplus
															
@@ -31,4 +33,6 @@ cublasHandle_t starpu_cublas_get_local_handle(void);
 
																 }
															
 
																 #endif
															
 
																+#endif
															
 
																+
															
 
																 #endif /* __STARPU_CUBLAS_V2_H__ */
															
--- a/include/starpu_cusparse.h
+++ b/include/starpu_cusparse.h
@@ -0,0 +1,40 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2010-2012, 2017  Université de Bordeaux
															
 
																+ * Copyright (C) 2010, 2011, 2012, 2013  CNRS
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#ifndef __STARPU_CUSPARSE_H__
															
 
																+#define __STARPU_CUSPARSE_H__
															
 
																+
															
 
																+#if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
															
 
																+
															
 
																+#include <cusparse.h>
															
 
																+
															
 
																+#ifdef __cplusplus
															
 
																+extern "C"
															
 
																+{
															
 
																+#endif
															
 
																+
															
 
																+void starpu_cusparse_init(void);
															
 
																+cusparseHandle_t starpu_cusparse_get_local_handle(void);
															
 
																+void starpu_cusparse_shutdown(void);
															
 
																+
															
 
																+#ifdef __cplusplus
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+#endif
															
 
																+
															
 
																+#endif /* __STARPU_CUSPARSE_H__ */
															
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -318,6 +318,7 @@ endif
 
																 endif
															
 
																 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cublas.c
															
 
																+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cusparse.c
															
 
																 if STARPU_USE_OPENCL
															
 
																 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.c
															
--- a/src/drivers/cuda/starpu_cusparse.c
+++ b/src/drivers/cuda/starpu_cusparse.c
@@ -0,0 +1,71 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009-2012, 2014, 2017  Université de Bordeaux
															
 
																+ * Copyright (C) 2010, 2011, 2012, 2017  CNRS
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <common/config.h>
															
 
																+
															
 
																+#include <starpu.h>
															
 
																+#include <starpu_cuda.h>
															
 
																+#include <core/workers.h>
															
 
																+
															
 
																+#ifdef HAVE_LIBCUSPARSE
															
 
																+#include <cusparse.h>
															
 
																+
															
 
																+static cusparseHandle_t cusparse_handles[STARPU_NMAXWORKERS];
															
 
																+static cusparseHandle_t main_handle;
															
 
																+
															
 
																+static void init_cusparse_func(void *args STARPU_ATTRIBUTE_UNUSED)
															
 
																+{
															
 
																+	cusparseCreate(&cusparse_handles[starpu_worker_get_id_check()]);
															
 
																+	cusparseSetStream(cusparse_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream());
															
 
																+}
															
 
																+
															
 
																+static void shutdown_cusparse_func(void *args STARPU_ATTRIBUTE_UNUSED)
															
 
																+{
															
 
																+	cusparseDestroy(cusparse_handles[starpu_worker_get_id_check()]);
															
 
																+}
															
 
																+#endif
															
 
																+
															
 
																+void starpu_cusparse_init(void)
															
 
																+{
															
 
																+#ifdef HAVE_LIBCUSPARSE
															
 
																+	starpu_execute_on_each_worker(init_cusparse_func, NULL, STARPU_CUDA);
															
 
																+
															
 
																+	if (cusparseCreate(&main_handle) != CUSPARSE_STATUS_SUCCESS)
															
 
																+		main_handle = NULL;
															
 
																+#endif
															
 
																+}
															
 
																+
															
 
																+void starpu_cusparse_shutdown(void)
															
 
																+{
															
 
																+#ifdef HAVE_LIBCUSPARSE
															
 
																+	starpu_execute_on_each_worker(shutdown_cusparse_func, NULL, STARPU_CUDA);
															
 
																+
															
 
																+	if (main_handle)
															
 
																+		cusparseDestroy(main_handle);
															
 
																+#endif
															
 
																+}
															
 
																+
															
 
																+#ifdef HAVE_LIBCUSPARSE
															
 
																+cusparseHandle_t starpu_cusparse_get_local_handle(void)
															
 
																+{
															
 
																+	int workerid = starpu_worker_get_id();
															
 
																+	if (workerid >= 0)
															
 
																+		return cusparse_handles[workerid];
															
 
																+	else
															
 
																+		return main_handle;
															
 
																+}
															
 
																+#endif
															
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -154,6 +154,7 @@ myPROGRAMS +=					\
 
																 	errorcheck/starpu_init_noworker		\
															
 
																 	errorcheck/invalid_tasks		\
															
 
																 	helper/cublas_init			\
															
 
																+	helper/cusparse_init			\
															
 
																 	helper/pinned_memory			\
															
 
																 	helper/execute_on_all			\
															
 
																 	microbenchs/display_structures_size	\
															
--- a/tests/helper/cusparse_init.c
+++ b/tests/helper/cusparse_init.c
@@ -0,0 +1,72 @@
 
																+/* StarPU --- Runtime system for heterogeneous multicore architectures.
															
 
																+ *
															
 
																+ * Copyright (C) 2009, 2010, 2014, 2016-2017  Université de Bordeaux
															
 
																+ * Copyright (C) 2010, 2011, 2012  CNRS
															
 
																+ *
															
 
																+ * StarPU is free software; you can redistribute it and/or modify
															
 
																+ * it under the terms of the GNU Lesser General Public License as published by
															
 
																+ * the Free Software Foundation; either version 2.1 of the License, or (at
															
 
																+ * your option) any later version.
															
 
																+ *
															
 
																+ * StarPU is distributed in the hope that it will be useful, but
															
 
																+ * WITHOUT ANY WARRANTY; without even the implied warranty of
															
 
																+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
															
 
																+ *
															
 
																+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
															
 
																+ */
															
 
																+
															
 
																+#include <stdio.h>
															
 
																+#include <unistd.h>
															
 
																+#include <errno.h>
															
 
																+#include <starpu.h>
															
 
																+#include <stdlib.h>
															
 
																+#include "../helper.h"
															
 
																+
															
 
																+/*
															
 
																+ * Test initializing cusparse, and how much time that takes
															
 
																+ */
															
 
																+
															
 
																+static double start;
															
 
																+static double end;
															
 
																+
															
 
																+//static float *data = NULL;
															
 
																+
															
 
																+int main(int argc, char **argv)
															
 
																+{
															
 
																+	int ret;
															
 
																+
															
 
																+	ret = starpu_initialize(NULL, &argc, &argv);
															
 
																+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
															
 
																+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
															
 
																+
															
 
																+	unsigned ngpus = starpu_cuda_worker_get_count();
															
 
																+
															
 
																+	double init_timing;
															
 
																+	double shutdown_timing;
															
 
																+
															
 
																+	start = starpu_timing_now();
															
 
																+	starpu_cusparse_init();
															
 
																+	end = starpu_timing_now();
															
 
																+	init_timing = end - start;
															
 
																+
															
 
																+	start = starpu_timing_now();
															
 
																+	starpu_cusparse_shutdown();
															
 
																+	end = starpu_timing_now();
															
 
																+	shutdown_timing = end - start;
															
 
																+
															
 
																+	FPRINTF(stderr, "Total:\n");
															
 
																+	FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000));
															
 
																+	FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000));
															
 
																+
															
 
																+	if (ngpus != 0)
															
 
																+	{
															
 
																+		FPRINTF(stderr, "per-GPU (#gpu = %u):\n", ngpus);
															
 
																+
															
 
																+		FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000*ngpus));
															
 
																+		FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000*ngpus));
															
 
																+	}
															
 
																+
															
 
																+	starpu_shutdown();
															
 
																+
															
 
																+	return EXIT_SUCCESS;
															
 
																+}