18 #ifndef __DRIVER_CUDA_H__
19 #define __DRIVER_CUDA_H__
23 #include <common/config.h>
25 #ifdef STARPU_USE_CUDA
27 #include <cuda_runtime_api.h>
29 #ifdef STARPU_HAVE_LIBNVIDIA_ML
35 #include <core/workers.h>
41 void _starpu_cuda_init(
void);
42 unsigned _starpu_get_cuda_device_count(
void);
43 extern int _starpu_cuda_bus_ids[STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES][STARPU_MAXCUDADEVS+STARPU_MAXNUMANODES];
45 #if defined(STARPU_USE_CUDA) || defined(STARPU_SIMGRID)
47 void _starpu_init_cuda(
void);
48 void *_starpu_cuda_worker(
void *);
49 #ifdef STARPU_HAVE_LIBNVIDIA_ML
50 nvmlDevice_t _starpu_cuda_get_nvmldev(
struct cudaDeviceProp *props);
53 # define _starpu_cuda_discover_devices(config) ((void) config)
56 #ifdef STARPU_USE_CUDA
57 cudaStream_t starpu_cuda_get_local_in_transfer_stream(
void);
58 cudaStream_t starpu_cuda_get_in_transfer_stream(
unsigned dst_node);
59 cudaStream_t starpu_cuda_get_local_out_transfer_stream(
void);
60 cudaStream_t starpu_cuda_get_out_transfer_stream(
unsigned src_node);
61 cudaStream_t starpu_cuda_get_peer_transfer_stream(
unsigned src_node,
unsigned dst_node);
67 int _starpu_cuda_copy_interface_from_cpu_to_cuda(starpu_data_handle_t handle,
void *src_interface,
unsigned src_node,
void *dst_interface,
unsigned dst_node,
struct _starpu_data_request *req);
68 int _starpu_cuda_copy_interface_from_cuda_to_cuda(starpu_data_handle_t handle,
void *src_interface,
unsigned src_node,
void *dst_interface,
unsigned dst_node,
struct _starpu_data_request *req);
69 int _starpu_cuda_copy_interface_from_cuda_to_cpu(starpu_data_handle_t handle,
void *src_interface,
unsigned src_node,
void *dst_interface,
unsigned dst_node,
struct _starpu_data_request *req);
71 int _starpu_cuda_copy_data_from_cuda_to_cuda(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t size,
struct _starpu_async_channel *async_channel);
72 int _starpu_cuda_copy_data_from_cuda_to_cpu(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t size,
struct _starpu_async_channel *async_channel);
73 int _starpu_cuda_copy_data_from_cpu_to_cuda(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t size,
struct _starpu_async_channel *async_channel);
75 int _starpu_cuda_copy2d_data_from_cuda_to_cuda(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t blocksize,
size_t numblocks,
size_t ld_src,
size_t ld_dst,
struct _starpu_async_channel *async_channel);
76 int _starpu_cuda_copy2d_data_from_cuda_to_cpu(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t blocksize,
size_t numblocks,
size_t ld_src,
size_t ld_dst,
struct _starpu_async_channel *async_channel);
77 int _starpu_cuda_copy2d_data_from_cpu_to_cuda(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t blocksize,
size_t numblocks,
size_t ld_src,
size_t ld_dst,
struct _starpu_async_channel *async_channel);
79 int _starpu_cuda_copy3d_data_from_cuda_to_cuda(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t blocksize,
size_t numblocks_1,
size_t ld1_src,
size_t ld1_dst,
size_t numblocks_2,
size_t ld2_src,
size_t ld2_dst,
struct _starpu_async_channel *async_channel);
80 int _starpu_cuda_copy3d_data_from_cuda_to_cpu(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t blocksize,
size_t numblocks_1,
size_t ld1_src,
size_t ld1_dst,
size_t numblocks_2,
size_t ld2_src,
size_t ld2_dst,
struct _starpu_async_channel *async_channel);
81 int _starpu_cuda_copy3d_data_from_cpu_to_cuda(uintptr_t src,
size_t src_offset,
unsigned src_node, uintptr_t dst,
size_t dst_offset,
unsigned dst_node,
size_t blocksize,
size_t numblocks_1,
size_t ld1_src,
size_t ld1_dst,
size_t numblocks_2,
size_t ld2_src,
size_t ld2_dst,
struct _starpu_async_channel *async_channel);
83 int _starpu_cuda_is_direct_access_supported(
unsigned node,
unsigned handling_node);
84 uintptr_t _starpu_cuda_malloc_on_node(
unsigned dst_node,
size_t size,
int flags);
85 void _starpu_cuda_free_on_node(
unsigned dst_node, uintptr_t addr,
size_t size,
int flags);
Definition: copy_driver.h:127
Definition: workers.h:359
Definition: node_ops.h:48