Browse Source

Add starpu_cusparse_init/shutdown/get_local_handle for proper CUDA overlapping with cusparse.

Samuel Thibault 8 years ago
parent
commit
e5b21e47fd

+ 3 - 0
ChangeLog

@@ -57,6 +57,9 @@ New features:
   * Add starpu_data_acquire_try and starpu_data_acquire_on_node_try.
   * Add NVCC_CC environment variable.
   * Add -no-foo options to starpu_fxt_tool to make traces lighter
+  * Add starpu_cusparse_init/shutdown/get_local_handle for proper CUDA
+    overlapping with cusparse.
+
 
 Small changes:
   * Output generated through STARPU_MPI_COMM has been modified to

+ 2 - 1
Makefile.am

@@ -1,6 +1,6 @@
 # StarPU --- Runtime system for heterogeneous multicore architectures.
 #
-# Copyright (C) 2009-2016  Université de Bordeaux
+# Copyright (C) 2009-2017  Université de Bordeaux
 # Copyright (C) 2010, 2011, 2012, 2013, 2015, 2016  CNRS
 # Copyright (C) 2014  INRIA
 # Copyright (C) 2016  Inria
@@ -99,6 +99,7 @@ versinclude_HEADERS = 				\
 	include/starpu_disk.h			\
 	include/starpu_cublas.h			\
 	include/starpu_cublas_v2.h		\
+	include/starpu_cusparse.h		\
 	include/starpu_driver.h			\
 	include/starpu_stdlib.h			\
 	include/starpu_thread.h			\

+ 2 - 0
configure.ac

@@ -1116,6 +1116,8 @@ if test x$enable_cuda = xyes; then
 	fi
 
 	AC_CHECK_HEADERS([cuda_gl_interop.h])
+
+	AC_CHECK_LIB([cusparse], [cusparseCreate])
 fi
 
 dnl Hey dude, are you around?

+ 5 - 1
doc/doxygen/chapters/210_check_list_performance.doxy

@@ -65,9 +65,13 @@ Calling starpu_cublas_init() makes StarPU already do appropriate calls for the
 CUBLAS library. Some libraries like Magma may however change the current stream of CUBLAS v1,
 one then has to call <c>cublasSetKernelStream(starpu_cuda_get_local_stream())</c> at
 the beginning of the codelet to make sure that CUBLAS is really using the proper
-stream. When using CUBLAS v2, starpu_cublas_local_handle() can be called to queue CUBLAS
+stream. When using CUBLAS v2, starpu_cublas_get_local_handle() can be called to queue CUBLAS
 kernels with the proper configuration.
 
+Similarly, calling starpu_cusparse_init() makes StarPU create CUSPARSE handles
+on each CUDA device, starpu_cusparse_get_local_handle() can then be used to
+queue CUSPARSE kernels with the proper configuration.
+
 If the kernel can be made to only use this local stream or other self-allocated
 streams, i.e. the whole kernel submission can be made asynchronous, then
 one should enable asynchronous execution of the kernel.  That means setting

+ 17 - 0
doc/doxygen/chapters/api/cuda_extensions.doxy

@@ -95,4 +95,21 @@ Report a cublas error.
 Calls starpu_cublas_report_error(), passing the current
 function, file and line position.
 
+\fn void starpu_cusparse_init(void)
+\ingroup API_CUDA_Extensions
+Calling starpu_cusparse_init() will initialize CUSPARSE on every CUDA device
+controlled by StarPU. This call blocks until CUSPARSE has been properly
+initialized on every device.
+
+\fn cusparseHandle_t starpu_cusparse_get_local_handle(void)
+\ingroup API_CUDA_Extensions
+This function returns the CUSPARSE handle to be used to queue CUSPARSE
+kernels. It is properly initialized and configured for multistream by
+starpu_cusparse_init().
+
+\fn void starpu_cusparse_shutdown(void)
+\ingroup API_CUDA_Extensions
+This function synchronously deinitializes the CUSPARSE library on
+every CUDA device.
+
 */

+ 1 - 0
include/starpu.h

@@ -62,6 +62,7 @@ typedef UINT_PTR uintptr_t;
 #include <starpu_rand.h>
 #include <starpu_cuda.h>
 #include <starpu_cublas.h>
+#include <starpu_cusparse.h>
 #include <starpu_bound.h>
 #include <starpu_hash.h>
 #include <starpu_profiling.h>

+ 5 - 1
include/starpu_cublas_v2.h

@@ -1,6 +1,6 @@
 /* StarPU --- Runtime system for heterogeneous multicore architectures.
  *
- * Copyright (C) 2010-2012  Université de Bordeaux
+ * Copyright (C) 2010-2012, 2017  Université de Bordeaux
  * Copyright (C) 2010, 2011, 2012, 2013  CNRS
  *
  * StarPU is free software; you can redistribute it and/or modify
@@ -18,6 +18,8 @@
 #ifndef __STARPU_CUBLAS_V2_H__
 #define __STARPU_CUBLAS_V2_H__
 
+#if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
+
 #include <cublas_v2.h>
 
 #ifdef __cplusplus
@@ -31,4 +33,6 @@ cublasHandle_t starpu_cublas_get_local_handle(void);
 }
 #endif
 
+#endif
+
 #endif /* __STARPU_CUBLAS_V2_H__ */

+ 40 - 0
include/starpu_cusparse.h

@@ -0,0 +1,40 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2010-2012, 2017  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2013  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#ifndef __STARPU_CUSPARSE_H__
+#define __STARPU_CUSPARSE_H__
+
+#if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
+
+#include <cusparse.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+void starpu_cusparse_init(void);
+cusparseHandle_t starpu_cusparse_get_local_handle(void);
+void starpu_cusparse_shutdown(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+#endif /* __STARPU_CUSPARSE_H__ */

+ 1 - 0
src/Makefile.am

@@ -318,6 +318,7 @@ endif
 endif
 
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cublas.c
+libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/cuda/starpu_cusparse.c
 
 if STARPU_USE_OPENCL
 libstarpu_@STARPU_EFFECTIVE_VERSION@_la_SOURCES += drivers/opencl/driver_opencl.c

+ 71 - 0
src/drivers/cuda/starpu_cusparse.c

@@ -0,0 +1,71 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009-2012, 2014, 2017  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012, 2017  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <common/config.h>
+
+#include <starpu.h>
+#include <starpu_cuda.h>
+#include <core/workers.h>
+
+#ifdef HAVE_LIBCUSPARSE
+#include <cusparse.h>
+
+static cusparseHandle_t cusparse_handles[STARPU_NMAXWORKERS];
+static cusparseHandle_t main_handle;
+
+static void init_cusparse_func(void *args STARPU_ATTRIBUTE_UNUSED)
+{
+	cusparseCreate(&cusparse_handles[starpu_worker_get_id_check()]);
+	cusparseSetStream(cusparse_handles[starpu_worker_get_id_check()], starpu_cuda_get_local_stream());
+}
+
+static void shutdown_cusparse_func(void *args STARPU_ATTRIBUTE_UNUSED)
+{
+	cusparseDestroy(cusparse_handles[starpu_worker_get_id_check()]);
+}
+#endif
+
+void starpu_cusparse_init(void)
+{
+#ifdef HAVE_LIBCUSPARSE
+	starpu_execute_on_each_worker(init_cusparse_func, NULL, STARPU_CUDA);
+
+	if (cusparseCreate(&main_handle) != CUSPARSE_STATUS_SUCCESS)
+		main_handle = NULL;
+#endif
+}
+
+void starpu_cusparse_shutdown(void)
+{
+#ifdef HAVE_LIBCUSPARSE
+	starpu_execute_on_each_worker(shutdown_cusparse_func, NULL, STARPU_CUDA);
+
+	if (main_handle)
+		cusparseDestroy(main_handle);
+#endif
+}
+
+#ifdef HAVE_LIBCUSPARSE
+cusparseHandle_t starpu_cusparse_get_local_handle(void)
+{
+	int workerid = starpu_worker_get_id();
+	if (workerid >= 0)
+		return cusparse_handles[workerid];
+	else
+		return main_handle;
+}
+#endif

+ 1 - 0
tests/Makefile.am

@@ -154,6 +154,7 @@ myPROGRAMS +=					\
 	errorcheck/starpu_init_noworker		\
 	errorcheck/invalid_tasks		\
 	helper/cublas_init			\
+	helper/cusparse_init			\
 	helper/pinned_memory			\
 	helper/execute_on_all			\
 	microbenchs/display_structures_size	\

+ 72 - 0
tests/helper/cusparse_init.c

@@ -0,0 +1,72 @@
+/* StarPU --- Runtime system for heterogeneous multicore architectures.
+ *
+ * Copyright (C) 2009, 2010, 2014, 2016-2017  Université de Bordeaux
+ * Copyright (C) 2010, 2011, 2012  CNRS
+ *
+ * StarPU is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * StarPU is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * See the GNU Lesser General Public License in COPYING.LGPL for more details.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <starpu.h>
+#include <stdlib.h>
+#include "../helper.h"
+
+/*
+ * Test initializing cusparse, and how much time that takes
+ */
+
+static double start;
+static double end;
+
+//static float *data = NULL;
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	ret = starpu_initialize(NULL, &argc, &argv);
+	if (ret == -ENODEV) return STARPU_TEST_SKIPPED;
+	STARPU_CHECK_RETURN_VALUE(ret, "starpu_init");
+
+	unsigned ngpus = starpu_cuda_worker_get_count();
+
+	double init_timing;
+	double shutdown_timing;
+
+	start = starpu_timing_now();
+	starpu_cusparse_init();
+	end = starpu_timing_now();
+	init_timing = end - start;
+
+	start = starpu_timing_now();
+	starpu_cusparse_shutdown();
+	end = starpu_timing_now();
+	shutdown_timing = end - start;
+
+	FPRINTF(stderr, "Total:\n");
+	FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000));
+	FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000));
+
+	if (ngpus != 0)
+	{
+		FPRINTF(stderr, "per-GPU (#gpu = %u):\n", ngpus);
+
+		FPRINTF(stderr, "\tinit: %2.2f us\n", init_timing/(1000*ngpus));
+		FPRINTF(stderr, "\tshutdown: %2.2f us\n", shutdown_timing/(1000*ngpus));
+	}
+
+	starpu_shutdown();
+
+	return EXIT_SUCCESS;
+}