StarPU Internal Handbook
mp_common.h
Go to the documentation of this file.
1 /* StarPU --- Runtime system for heterogeneous multicore architectures.
2  *
3  * Copyright (C) 2012-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
4  * Copyright (C) 2013 Thibaut Lambert
5  *
6  * StarPU is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation; either version 2.1 of the License, or (at
9  * your option) any later version.
10  *
11  * StarPU is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14  *
15  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
16  */
17 
18 #ifndef __MP_COMMON_H__
19 #define __MP_COMMON_H__
20 
23 #include <semaphore.h>
24 
25 #include <starpu.h>
26 #include <common/config.h>
27 #include <common/list.h>
28 #include <common/barrier.h>
29 #include <common/thread.h>
31 #include <datawizard/copy_driver.h>
32 
33 #ifdef STARPU_USE_MP
34 
35 #ifdef STARPU_USE_MIC
36 #include <scif.h>
37 #endif /* STARPU_USE_MIC */
38 
39 #define BUFFER_SIZE 65536
40 
41 #define STARPU_MP_SRC_NODE 0
42 #define STARPU_MP_SINK_NODE(a) ((a) + 1)
43 
44 #define STARPU_MP_COMMON_REPORT_ERROR(node, status) \
45  (node)->report_error(__starpu_func__, __FILE__, __LINE__, (status))
46 enum _starpu_mp_command
47 {
48  STARPU_MP_COMMAND_EXIT,
49  STARPU_MP_COMMAND_EXECUTE,
50  STARPU_MP_COMMAND_EXECUTE_DETACHED,
51  STARPU_MP_COMMAND_ERROR_EXECUTE,
52  STARPU_MP_COMMAND_ERROR_EXECUTE_DETACHED,
53  STARPU_MP_COMMAND_LOOKUP,
54  STARPU_MP_COMMAND_ANSWER_LOOKUP,
55  STARPU_MP_COMMAND_ERROR_LOOKUP,
56  STARPU_MP_COMMAND_ALLOCATE,
57  STARPU_MP_COMMAND_ANSWER_ALLOCATE,
58  STARPU_MP_COMMAND_ERROR_ALLOCATE,
59  STARPU_MP_COMMAND_FREE,
61  STARPU_MP_COMMAND_RECV_FROM_HOST,
62  STARPU_MP_COMMAND_SEND_TO_HOST,
63  STARPU_MP_COMMAND_RECV_FROM_SINK,
64  STARPU_MP_COMMAND_SEND_TO_SINK,
66  STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC,
67  STARPU_MP_COMMAND_RECV_FROM_HOST_ASYNC_COMPLETED,
68  STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC,
69  STARPU_MP_COMMAND_SEND_TO_HOST_ASYNC_COMPLETED,
70  STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC,
71  STARPU_MP_COMMAND_RECV_FROM_SINK_ASYNC_COMPLETED,
72  STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC,
73  STARPU_MP_COMMAND_SEND_TO_SINK_ASYNC_COMPLETED,
74 
75  STARPU_MP_COMMAND_TRANSFER_COMPLETE,
76  STARPU_MP_COMMAND_SINK_NBCORES,
77  STARPU_MP_COMMAND_ANSWER_SINK_NBCORES,
78  STARPU_MP_COMMAND_EXECUTION_SUBMITTED,
79  STARPU_MP_COMMAND_EXECUTION_COMPLETED,
80  STARPU_MP_COMMAND_EXECUTION_DETACHED_SUBMITTED,
81  STARPU_MP_COMMAND_EXECUTION_DETACHED_COMPLETED,
82  STARPU_MP_COMMAND_PRE_EXECUTION,
83  STARPU_MP_COMMAND_SYNC_WORKERS,
84 };
85 
86 const char *_starpu_mp_common_command_to_string(const int command);
87 
88 enum _starpu_mp_node_kind
89 {
90  STARPU_NODE_MIC_SINK,
91  STARPU_NODE_MIC_SOURCE,
92  STARPU_NODE_MPI_SINK,
93  STARPU_NODE_MPI_SOURCE,
94  STARPU_NODE_INVALID_KIND
95 };
96 
97 const char *_starpu_mp_common_node_kind_to_string(const int kind);
98 
99 union _starpu_mp_connection
100 {
101 #ifdef STARPU_USE_MIC
102  scif_epd_t mic_endpoint;
103 #endif
104 #ifdef STARPU_USE_MPI_MASTER_SLAVE
105  int mpi_remote_nodeid;
106 #endif
107 };
108 
109 struct _starpu_mp_transfer_command
110 {
111  size_t size;
112  void *addr;
113  void *event;
114 };
115 
116 struct _starpu_mp_transfer_command_to_device
117 {
118  int devid;
119  size_t size;
120  void *addr;
121  void *event;
122 };
123 
124 LIST_TYPE(mp_barrier,
125  int id;
126  starpu_pthread_barrier_t before_work_barrier;
127  starpu_pthread_barrier_t after_work_barrier;
128  );
129 
130 LIST_TYPE(mp_message,
131  enum _starpu_mp_command type;
132  char *buffer;
133  int size;
134  );
135 
136 struct mp_task
137 {
138  void (*kernel)(void **, void *);
139  void **interfaces;
140  unsigned nb_interfaces;
141  void *cl_arg;
142  unsigned coreid;
143  enum starpu_codelet_type type;
144  int is_parallel_task;
145  int combined_workerid;
146  int detached;
147  struct mp_barrier* mp_barrier;
148 };
149 
150 LIST_TYPE(_starpu_mp_event,
151  struct _starpu_async_channel event;
152  void * remote_event;
153  enum _starpu_mp_command answer_cmd;
154 );
155 
156 
159 struct _starpu_mp_node
160 {
161  enum _starpu_mp_node_kind kind;
162 
163  int baseworkerid;
164 
165  /*the number of core on the device
166  * Must be initialized during init function*/
167  int nb_cores;
168 
169  /*Is starpu running*/
170  int is_running;
171 
175  void *buffer;
176 
180  int peer_id;
181 
184  int devid;
185 
188  unsigned int nb_mp_sinks;
189 
192  union _starpu_mp_connection mp_connection;
193 
196  union _starpu_mp_connection host_sink_dt_connection;
197 
201  starpu_pthread_mutex_t connection_mutex;
202 
211  union _starpu_mp_connection *sink_sink_dt_connections;
212 
216  struct _starpu_mp_event_list event_list;
217 
219  starpu_pthread_barrier_t init_completed_barrier;
220 
222  void* thread_table;
223 
224  /*list where threads add messages to send to the source node */
225  struct mp_message_list message_queue;
226  starpu_pthread_mutex_t message_queue_mutex;
227 
228  /*list of barrier for combined worker*/
229  struct mp_barrier_list barrier_list;
230  starpu_pthread_mutex_t barrier_mutex;
231 
232  /*table where worker comme pick task*/
233  struct mp_task ** run_table;
234  struct mp_task ** run_table_detached;
235  sem_t * sem_run_table;
236 
238  void (*init) (struct _starpu_mp_node *node);
239  void (*launch_workers) (struct _starpu_mp_node *node);
240  void (*deinit) (struct _starpu_mp_node *node);
241  void (*report_error) (const char *, const char *, const int, const int);
242 
244  int (*mp_recv_is_ready) (const struct _starpu_mp_node *);
245  void (*mp_send) (const struct _starpu_mp_node *, void *, int);
246  void (*mp_recv) (const struct _starpu_mp_node *, void *, int);
247 
249  void (*dt_send) (const struct _starpu_mp_node *, void *, int, void *);
250  void (*dt_recv) (const struct _starpu_mp_node *, void *, int, void *);
251  void (*dt_send_to_device) (const struct _starpu_mp_node *, int, void *, int, void *);
252  void (*dt_recv_from_device) (const struct _starpu_mp_node *, int, void *, int, void *);
253 
255  int (*dt_test) (struct _starpu_async_channel *);
256 
257  void (*(*get_kernel_from_job) (const struct _starpu_mp_node *,struct _starpu_job *))(void);
258  void (*(*lookup) (const struct _starpu_mp_node *, char* ))(void);
259  void (*bind_thread) (const struct _starpu_mp_node *, int,int *,int);
260  void (*execute) (struct _starpu_mp_node *, void *, int);
261  void (*allocate) (const struct _starpu_mp_node *, void *, int);
262  void (*free) (const struct _starpu_mp_node *, void *, int);
263 };
264 
265 struct _starpu_mp_node * _starpu_mp_common_node_create(enum _starpu_mp_node_kind node_kind, int peer_devid) STARPU_ATTRIBUTE_MALLOC;
266 
267 void _starpu_mp_common_node_destroy(struct _starpu_mp_node *node);
268 
269 void _starpu_mp_common_send_command(const struct _starpu_mp_node *node,
270  const enum _starpu_mp_command command,
271  void *arg, int arg_size);
272 
273 enum _starpu_mp_command _starpu_mp_common_recv_command(const struct _starpu_mp_node *node, void **arg, int *arg_size);
274 
275 
276 #endif /* STARPU_USE_MP */
277 
278 #endif /* __MP_COMMON_H__ */
Definition: copy_driver.h:127
Definition: jobs.h:79