starpu_cuda.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2010-2020 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. *
  5. * StarPU is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published by
  7. * the Free Software Foundation; either version 2.1 of the License, or (at
  8. * your option) any later version.
  9. *
  10. * StarPU is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. *
  14. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  15. */
  16. #ifndef __STARPU_CUDA_H__
  17. #define __STARPU_CUDA_H__
  18. #include <starpu_config.h>
  19. #if defined STARPU_USE_CUDA && !defined STARPU_DONT_INCLUDE_CUDA_HEADERS
  20. #include <cuda.h>
  21. #include <cuda_runtime.h>
  22. #include <cuda_runtime_api.h>
  23. #ifdef __cplusplus
  24. extern "C"
  25. {
  26. #endif
  27. /**
  28. @defgroup API_CUDA_Extensions CUDA Extensions
  29. @{
  30. */
  31. /**
  32. Report a CUBLAS error.
  33. */
  34. void starpu_cublas_report_error(const char *func, const char *file, int line, int status);
  35. /**
  36. Call starpu_cublas_report_error(), passing the current function, file and line position.
  37. */
  38. #define STARPU_CUBLAS_REPORT_ERROR(status) starpu_cublas_report_error(__starpu_func__, __FILE__, __LINE__, status)
  39. /**
  40. Report a CUDA error.
  41. */
  42. void starpu_cuda_report_error(const char *func, const char *file, int line, cudaError_t status);
  43. /**
  44. Call starpu_cuda_report_error(), passing the current function, file and line position.
  45. */
  46. #define STARPU_CUDA_REPORT_ERROR(status) starpu_cuda_report_error(__starpu_func__, __FILE__, __LINE__, status)
  47. /**
  48. Return the current worker’s CUDA stream. StarPU provides a stream
  49. for every CUDA device controlled by StarPU. This function is only
  50. provided for convenience so that programmers can easily use
  51. asynchronous operations within codelets without having to create a
  52. stream by hand. Note that the application is not forced to use the
  53. stream provided by starpu_cuda_get_local_stream() and may also
  54. create its own streams. Synchronizing with
  55. <c>cudaDeviceSynchronize()</c> is allowed, but will reduce the
  56. likelihood of having all transfers overlapped.
  57. */
  58. cudaStream_t starpu_cuda_get_local_stream(void);
  59. /**
  60. Return a pointer to device properties for worker \p workerid
  61. (assumed to be a CUDA worker).
  62. */
  63. const struct cudaDeviceProp *starpu_cuda_get_device_properties(unsigned workerid);
  64. /**
  65. Copy \p ssize bytes from the pointer \p src_ptr on \p src_node
  66. to the pointer \p dst_ptr on \p dst_node. The function first tries to
  67. copy the data asynchronous (unless \p stream is <c>NULL</c>). If the
  68. asynchronous copy fails or if \p stream is <c>NULL</c>, it copies the
  69. data synchronously. The function returns <c>-EAGAIN</c> if the
  70. asynchronous launch was successfull. It returns 0 if the synchronous
  71. copy was successful, or fails otherwise.
  72. */
  73. int starpu_cuda_copy_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node, size_t ssize, cudaStream_t stream, enum cudaMemcpyKind kind);
  74. /**
  75. Copy \p numblocks blocks of \p blocksize bytes from the pointer \p src_ptr on
  76. \p src_node to the pointer \p dst_ptr on \p dst_node.
  77. The blocks start at addresses which are ld_src (resp. ld_dst) bytes apart in
  78. the source (resp. destination) interface.
  79. The function first tries to copy the data asynchronous (unless \p stream is
  80. <c>NULL</c>). If the asynchronous copy fails or if \p stream is <c>NULL</c>,
  81. it copies the data synchronously. The function returns <c>-EAGAIN</c> if the
  82. asynchronous launch was successfull. It returns 0 if the synchronous copy was
  83. successful, or fails otherwise.
  84. */
  85. int starpu_cuda_copy2d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node,
  86. size_t blocksize,
  87. size_t numblocks, size_t ld_src, size_t ld_dst,
  88. cudaStream_t stream, enum cudaMemcpyKind kind);
  89. /**
  90. Copy \p numblocks_1 * \p numblocks_2 blocks of \p blocksize bytes from the
  91. pointer \p src_ptr on \p src_node to the pointer \p dst_ptr on \p dst_node.
  92. The blocks are grouped by \p numblocks_1 blocks whose start addresses are
  93. ld1_src (resp. ld1_dst) bytes apart in the source (resp. destination)
  94. interface.
  95. The function first tries to copy the data asynchronous (unless \p stream is
  96. <c>NULL</c>). If the asynchronous copy fails or if \p stream is <c>NULL</c>,
  97. it copies the data synchronously. The function returns <c>-EAGAIN</c> if the
  98. asynchronous launch was successfull. It returns 0 if the synchronous copy was
  99. successful, or fails otherwise.
  100. */
  101. int starpu_cuda_copy3d_async_sync(void *src_ptr, unsigned src_node, void *dst_ptr, unsigned dst_node,
  102. size_t blocksize,
  103. size_t numblocks_1, size_t ld1_src, size_t ld1_dst,
  104. size_t numblocks_2, size_t ld2_src, size_t ld2_dst,
  105. cudaStream_t stream, enum cudaMemcpyKind kind);
  106. /**
  107. Call <c>cudaSetDevice(\p devid)</c> or <c>cudaGLSetGLDevice(\p devid)</c>,
  108. according to whether \p devid is among the field
  109. starpu_conf::cuda_opengl_interoperability.
  110. */
  111. void starpu_cuda_set_device(unsigned devid);
  112. /** @} */
  113. #ifdef __cplusplus
  114. }
  115. #endif
  116. #endif /* STARPU_USE_CUDA && !STARPU_DONT_INCLUDE_CUDA_HEADERS */
  117. #endif /* __STARPU_CUDA_H__ */