瀏覽代碼

In case MAGMA is available (a recent version with pkg-config is now required),
use the POTRF kernel for Cholesky instead of recoding it in a VERY inefficient
way. This should significantly improve the scalability of Cholesky over MPI.

Cédric Augonnet 14 年之前
父節點
當前提交
08e2b87a27
共有 5 個文件被更改,包括 51 次插入72 次删除
  1. 9 64
      configure.ac
  2. 2 2
      examples/Makefile.am
  3. 20 3
      examples/cholesky/cholesky_kernels.c
  4. 2 2
      mpi/Makefile.am
  5. 18 1
      mpi/examples/cholesky/mpi_cholesky_kernels.c

+ 9 - 64
configure.ac

@@ -408,72 +408,17 @@ if test x$enable_cuda = xyes; then
 	fi
 fi
 
-enable_magma=no
+have_magma=no
 if test x$enable_cuda = xyes; then
-	# Should we use the MAGMA library (instead or in addition to CUBLAS)
-	magma_dir=/usr/local # default
-	enable_magma=maybe
-
-	AC_ARG_WITH(magma, [AS_HELP_STRING([--with-magma=<path>],
-			[specify that MAGMA should be used and its installation directory])],
-			[
-				if test x$withval = xyes; then
-					# No path was specified but MAGMA is explicitely enabled
-					enable_magma=yes
-				else
-				if test x$withval = xno; then
-					# MAGMA is explicitely disabled
-					enable_magma=no
-				else
-					# MAGMA is enabled and the PATH is given in $withval
-					enable_magma=yes
-					magma_dir="$withval"
-				fi
-				fi
-			], [])
-
-	# Do we have a valid MAGMA setup ?
-	if test x$enable_magma = xyes -o x$enable_magma = xmaybe; then
-		SAVED_LDFLAGS="${LDFLAGS}"
-		SAVED_CPPFLAGS="${CPPFLAGS}"
-		if test -d "$magma_dir/lib/"; then
-			LDFLAGS="${SAVED_LDFLAGS} -L$magma_dir/lib/ "
-			have_valid_magma=yes
-			STARPU_CHECK_LIB(BLAS, magmablas, main,,[have_valid_magma=no])
-			STARPU_CHECK_LIB(BLAS, magma, magmablas_sgemm,,[have_valid_magma=no])
-		fi
-
-		if test -d "$magma_dir/include/"; then
-			CPPFLAGS="${SAVED_CPPFLAGS} -I$magma_dir/include/ "
-			AC_CHECK_HEADER([magmablas.h],,[have_valid_magma=no])
-			#AC_CHECK_HEADER([magma.h],,[have_valid_magma=no])
-		fi
-
-		if test x$have_valid_magma = xno; then
-			# If MAGMA was explicitely required, this is an error
-			if test x$enable_magma = xyes; then
-				AC_MSG_ERROR([cannot find MAGMA])
-			fi
-
-			# Restore old flags and don't use MAGMA
-			LDFLAGS="${SAVED_LDFLAGS}"
-			CPPFLAGS="${SAVED_CPPFLAGS}"
-			enable_magma=no
-		else
-			enable_magma=yes
-		fi
-
-	else
-		have_valid_magma=no
-		enable_magma=no
-	fi
+	PKG_CHECK_MODULES([MAGMA],  [magma], [
+	AC_DEFINE([STARPU_HAVE_MAGMA], [1], [Define to 1 if you have the MAGMA library.])
+	AC_SUBST([STARPU_HAVE_MAGMA], [1])
+	have_magma=yes
+], [:])
 fi
-
+AM_CONDITIONAL(STARPU_HAVE_MAGMA, [test x$have_magma = xyes])
 AC_MSG_CHECKING(whether MAGMA should be used)
-AC_MSG_RESULT($enable_magma)
-if test x$enable_magma = xyes; then
-	AC_DEFINE(STARPU_HAVE_MAGMA, [1], [use MAGMA library])
-fi
+AC_MSG_RESULT($have_magma)
 
 # cufftDoubleComplex may not be available on an old CUDA setup
 AC_CHECK_TYPE(cufftDoubleComplex,
@@ -1369,7 +1314,7 @@ AC_MSG_NOTICE([
 	Allocation cache:  $enable_allocation_cache
 
 	MPI enabled:   $use_mpi
-	Magma enabled: $enable_magma
+	Magma enabled: $have_magma
 	BLAS library:  $blas_lib
 	hwloc:         $have_valid_hwloc
 

+ 2 - 2
examples/Makefile.am

@@ -14,8 +14,8 @@
 #
 # See the GNU Lesser General Public License in COPYING.LGPL for more details.
 
-AM_CFLAGS = $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
-LIBS = $(top_builddir)/src/libstarpu.la $(HWLOC_LIBS) @LIBS@
+AM_CFLAGS = $(MAGMA_CFLAGS) $(HWLOC_CFLAGS) -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS)
+LIBS = $(top_builddir)/src/libstarpu.la $(MAGMA_LIBS) $(HWLOC_LIBS) @LIBS@
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/examples/ -I$(top_builddir)/include
 AM_LDFLAGS = $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS)
 

+ 20 - 3
examples/cholesky/cholesky_kernels.c

@@ -20,6 +20,10 @@
 #include "../common/blas.h"
 #ifdef STARPU_USE_CUDA
 #include <starpu_cuda.h>
+#ifdef STARPU_HAVE_MAGMA
+#include "magma.h"
+#include "magma_lapack.h"
+#endif
 #endif
 
 /*
@@ -179,13 +183,27 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 			break;
 #ifdef STARPU_USE_CUDA
 		case 1:
+#ifdef STARPU_HAVE_MAGMA
 			{
+			int ret;
+			int info;
+			ret = magma_spotrf_gpu('L', nx, sub11, ld, &info);
+			if (ret != MAGMA_SUCCESS) {
+				fprintf(stderr, "Error in Magma: %d\n", ret);
+				STARPU_ABORT();
+			}
+			cudaError_t cures = cudaThreadSynchronize();
+			STARPU_ASSERT(!cures);
+			}
+#else
+			{
+
 			float *lambda11;
 			cudaHostAlloc((void **)&lambda11, sizeof(float), 0);
 
 			for (z = 0; z < nx; z++)
 			{
-
+				
 				cudaMemcpyAsync(lambda11, &sub11[z+z*ld], sizeof(float), cudaMemcpyDeviceToHost, starpu_cuda_get_local_stream());
 				cudaStreamSynchronize(starpu_cuda_get_local_stream());
 
@@ -206,8 +224,7 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 			cudaStreamSynchronize(starpu_cuda_get_local_stream());
 			cudaFreeHost(lambda11);
 			}
-		
-
+#endif
 			break;
 #endif
 		default:

+ 2 - 2
mpi/Makefile.am

@@ -58,8 +58,8 @@ NVCCFLAGS += -I$(top_srcdir)/include/ -I$(top_builddir)/include
 	$(NVCC) $< -c -o $@ --compiler-options -fno-strict-aliasing  $(NVCCFLAGS) -I$(top_srcdir)/include/  -I$(top_builddir)/include/
 endif
 
-AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS)
-LIBS = $(top_builddir)/src/libstarpu.la @LIBS@ $(FXT_LIBS)
+AM_CFLAGS = -Wall $(STARPU_CUDA_CPPFLAGS) $(STARPU_OPENCL_CPPFLAGS) $(FXT_CFLAGS) $(MAGMA_CFLAGS)
+LIBS = $(top_builddir)/src/libstarpu.la @LIBS@ $(FXT_LIBS) $(MAGMA_LIBS)
 AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir)/mpi/ -I$(top_srcdir)/src/  -I$(top_srcdir)/examples/ -I$(top_builddir)/src -I$(top_builddir)/include
 AM_LDFLAGS = $(STARPU_CUDA_LDFLAGS) $(STARPU_OPENCL_LDFLAGS)
 

+ 18 - 1
mpi/examples/cholesky/mpi_cholesky_kernels.c

@@ -22,6 +22,10 @@
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <cublas.h>
+#ifdef STARPU_HAVE_MAGMA
+#include "magma.h"
+#include "magma_lapack.h"
+#endif
 #endif
 
 /*
@@ -171,6 +175,19 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 			break;
 #ifdef STARPU_USE_CUDA
 		case 1:
+#ifdef STARPU_HAVE_MAGMA
+			{
+				int ret;
+				int info;
+				ret = magma_spotrf_gpu('L', nx, sub11, ld, &info);
+				if (ret != MAGMA_SUCCESS) {
+					fprintf(stderr, "Error in Magma: %d\n", ret);
+					STARPU_ABORT();
+				}
+				cudaError_t cures = cudaThreadSynchronize();
+				STARPU_ASSERT(!cures);
+			}
+#else
 			for (z = 0; z < nx; z++)
 			{
 				float lambda11;
@@ -191,7 +208,7 @@ static inline void chol_common_codelet_update_u11(void *descr[], int s, __attrib
 			}
 		
 			cudaThreadSynchronize();
-
+#endif
 			break;
 #endif
 		default: