starpu_cuda.h 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #ifndef __STARPU_CUDA_H__
  17. #define __STARPU_CUDA_H__
  18. #include <starpu_config.h>
  19. #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
  20. #include <cuda.h>
  21. #include <cuda_runtime.h>
  22. #include <cuda_runtime_api.h>
  23. #ifdef STARPU_HAVE_LIBNVIDIA_ML
  24. #include <nvml.h>
  25. #endif
  26. #ifdef __cplusplus
  27. extern "C"
  28. {
  29. #endif
  30. /**
  31. @defgroup API_CUDA_Extensions CUDA Extensions
  32. @{
  33. */
  34. /**
  35. Report a CUBLAS error.
  36. */
  37. void starpu_cublas_report_error(const char *func, const char *file, int line, int status);
  38. /**
  39. Call starpu_cublas_report_error(), passing the current function, file and line position.
  40. */
  41. #define STARPU_CUBLAS_REPORT_ERROR(status) starpu_cublas_report_error(__starpu_func__, __FILE__, __LINE__, status)
  42. /**
  43. Report a CUDA error.
  44. */
  45. void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status);
  46. /**
  47. Call starpu_cuda_report_error(), passing the current function, file and line position.
  48. */
  49. #define STARPU_CUDA_REPORT_ERROR(status) starpu_cuda_report_error(__starpu_func__, __FILE__, __LINE__, status)
  50. /**
  51. Return the current worker’s CUDA stream. StarPU provides a stream
  52. for every CUDA device controlled by StarPU. This function is only
  53. provided for convenience so that programmers can easily use
  54. asynchronous operations within codelets without having to create a
  55. stream by hand. Note that the application is not forced to use the
  56. stream provided by starpu_cuda_get_local_stream() and may also
  57. create its own streams. Synchronizing with
  58. <c>cudaDeviceSynchronize()</c> is allowed, but will reduce the
  59. likelihood of having all transfers overlapped.
  60. */
  61. cudaStream_t starpu_cuda_get_local_stream(void);
  62. /**
  63. Return a pointer to device properties for worker \p workerid
  64. (assumed to be a CUDA worker).
  65. */
  66. const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid);
  67. /**
  68. Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
  69. to the pointer \p dst_ptr on \p dst_node. The function first tries to
  70. copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
  71. asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
  72. data synchronously. The function returns <c>-EAGAIN</c> if the
  73. asynchronous launch was successfull. It returns 0 if the synchronous
  74. copy was successful, or fails otherwise.
  75. */
  76. int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind);
  77. /**
  78. Copy \p numblocks blocks of \p blocksize bytes from the pointer \p src_ptr on
  79. \p src_node to the pointer \p dst_ptr on \p dst_node.
  80. The blocks start at addresses which are ld_src (resp. ld_dst) bytes apart in
  81. the source (resp. destination) interface.
  82. The function first tries to copy the data asynchronous (unless \p stream is
  83. <c>NULL</c>). If the asynchronous copy fails or if \p stream is <c>NULL</c>,
  84. it copies the data synchronously. The function returns <c>-EAGAIN</c> if the
  85. asynchronous launch was successfull. It returns 0 if the synchronous copy was
  86. successful, or fails otherwise.
  87. */
  88. int starpu_cuda_copy2d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node,
  89. size_t blocksize,
  90. size_t numblocks, size_t ld_src, size_t ld_dst,
  91. cudaStream_t stream, enum cudaMemcpyKind kind);
  92. /**
  93. Copy \p numblocks_1 * \p numblocks_2 blocks of \p blocksize bytes from the
  94. pointer \p src_ptr on \p src_node to the pointer \p dst_ptr on \p dst_node.
  95. The blocks are grouped by \p numblocks_1 blocks whose start addresses are
  96. ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination)
  97. interface.
  98. The function first tries to copy the data asynchronous (unless \p stream is
  99. <c>NULL</c>). If the asynchronous copy fails or if \p stream is <c>NULL</c>,
  100. it copies the data synchronously. The function returns <c>-EAGAIN</c> if the
  101. asynchronous launch was successfull. It returns 0 if the synchronous copy was
  102. successful, or fails otherwise.
  103. */
  104. int starpu_cuda_copy3d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node,
  105. size_t blocksize,
  106. size_t numblocks_1, size_t ld1_src, size_t ld1_dst,
  107. size_t numblocks_2, size_t ld2_src, size_t ld2_dst,
  108. cudaStream_t stream, enum cudaMemcpyKind kind);
  109. /**
  110. Call <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
  111. according to whether \p devid is among the field
  112. starpu_conf::cuda_opengl_interoperability.
  113. */
  114. void starpu_cuda_set_device(unsigned devid);
  115. #ifdef STARPU_HAVE_LIBNVIDIA_ML
  116. /**
  117. Return the nvml device for a CUDA device
  118. */
  119. nvmlDevice_t starpu_cuda_get_nvmldev(unsigned devid);
  120. #endif
  121. /** @} */
  122. #ifdef __cplusplus
  123. }
  124. #endif
  125. #endif /* STARPU_USE_CUDA && !STARPU_DONT_INCLUDE_CUDA_HEADERS */
  126. #endif /* __STARPU_CUDA_H__ */