driver_cuda.h 5.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. /* StarPU --- Runtime system for heterogeneous multicore architectures.
  2. *
  3. * Copyright (C) 2008-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
  4. * Copyright (C) 2015 Mathieu Lirzin
  5. *
  6. * StarPU is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation; either version 2.1 of the License, or (at
  9. * your option) any later version.
  10. *
  11. * StarPU is distributed in the hope that it will be useful, but
  12. * WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. *
  15. * See the GNU Lesser General Public License in COPYING.LGPL for more details.
  16. */
  17. #ifndef __DRIVER_CUDA_H__
  18. #define __DRIVER_CUDA_H__
  19. /** @file */
  20. #include <common/config.h>
  21. void _starpu_cuda_preinit(void);
  22. #ifdef STARPU_USE_CUDA
  23. #include <cuda.h>
  24. #include <cuda_runtime_api.h>
  25. #include <cublas.h>
  26. #ifdef STARPU_HAVE_LIBNVIDIA_ML
  27. #include <nvml.h>
  28. #endif
  29. #endif
  30. #include <starpu.h>
  31. #include <core/workers.h>
  32. #include <datawizard/node_ops.h>
  33. #pragma GCC visibility push(hidden)
  34. extern struct _starpu_driver_ops _starpu_driver_cuda_ops;
  35. extern struct _starpu_node_ops _starpu_driver_cuda_node_ops;
  36. void _starpu_cuda_init(void);
  37. unsigned _starpu_get_cuda_device_count(void);
  38. extern int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES];
  39. #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
  40. void _starpu_cuda_discover_devices (struct _starpu_machine_config *);
  41. void _starpu_init_cuda(void);
  42. void *_starpu_cuda_worker(void *);
  43. #ifdef STARPU_HAVE_LIBNVIDIA_ML
  44. nvmlDevice_t _starpu_cuda_get_nvmldev(struct cudaDeviceProp *props);
  45. #endif
  46. #else
  47. # define _starpu_cuda_discover_devices(config) ((void) config)
  48. #endif
  49. #ifdef STARPU_USE_CUDA
  50. cudaStream_t starpu_cuda_get_local_in_transfer_stream(void);
  51. cudaStream_t starpu_cuda_get_in_transfer_stream(unsigned dst_node);
  52. cudaStream_t starpu_cuda_get_local_out_transfer_stream(void);
  53. cudaStream_t starpu_cuda_get_out_transfer_stream(unsigned src_node);
  54. cudaStream_t starpu_cuda_get_peer_transfer_stream(unsigned src_node, unsigned dst_node);
  55. #endif
  56. unsigned _starpu_cuda_test_request_completion(struct _starpu_async_channel *async_channel);
  57. void _starpu_cuda_wait_request_completion(struct _starpu_async_channel *async_channel);
  58. int _starpu_cuda_copy_interface_from_cpu_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
  59. int _starpu_cuda_copy_interface_from_cuda_to_cuda(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
  60. int _starpu_cuda_copy_interface_from_cuda_to_cpu(starpu_data_handle_t handle, void *src_interface, unsigned src_node, void *dst_interface, unsigned dst_node, struct _starpu_data_request *req);
  61. int _starpu_cuda_copy_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
  62. int _starpu_cuda_copy_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
  63. int _starpu_cuda_copy_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t size, struct _starpu_async_channel *async_channel);
  64. int _starpu_cuda_copy2d_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel);
  65. int _starpu_cuda_copy2d_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel);
  66. int _starpu_cuda_copy2d_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks, size_t ld_src, size_t ld_dst, struct _starpu_async_channel *async_channel);
  67. int _starpu_cuda_copy3d_data_from_cuda_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, struct _starpu_async_channel *async_channel);
  68. int _starpu_cuda_copy3d_data_from_cuda_to_cpu(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, struct _starpu_async_channel *async_channel);
  69. int _starpu_cuda_copy3d_data_from_cpu_to_cuda(uintptr_t src, size_t src_offset, unsigned src_node, uintptr_t dst, size_t dst_offset, unsigned dst_node, size_t blocksize, size_t numblocks_1, size_t ld1_src, size_t ld1_dst, size_t numblocks_2, size_t ld2_src, size_t ld2_dst, struct _starpu_async_channel *async_channel);
  70. int _starpu_cuda_is_direct_access_supported(unsigned node, unsigned handling_node);
  71. uintptr_t _starpu_cuda_malloc_on_node(unsigned dst_node, size_t size, int flags);
  72. void _starpu_cuda_free_on_node(unsigned dst_node, uintptr_t addr, size_t size, int flags);
  73. #pragma GCC visibility pop
  74. #endif // __DRIVER_CUDA_H__