Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
network.h
Go to the documentation of this file.
1/*
2 * Copyright 2025 Stanford University, NVIDIA Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18// Realm inter-node networking abstractions
19
20#ifndef REALM_NETWORK_H
21#define REALM_NETWORK_H
22
23#include "realm/realm_config.h"
24#include "realm/module.h"
25#include "realm/nodeset.h"
26#include "realm/memory.h"
27#include "realm/bytearray.h"
28
29#include <map>
30
31namespace Realm {
32
33 // NodeID defined in nodeset.h
34
35 class NetworkModule;
36 class MemoryImpl;
37 class IBMemory;
38 class ByteArray;
39 class ActiveMessageImpl;
40 class IncomingMessageManager;
41 class NetworkSegment;
42
43 // a RemoteAddress is used to name the target of an RDMA operation - in some
44 // cases it's as simple as a pointer, but in others additional info is needed
45 // (hopefully we won't need more than 16B anywhere though)
47 union {
48 struct {
49 uintptr_t ptr;
50 uintptr_t extra;
51 };
52 unsigned char raw_bytes[384];
53 };
54 };
55
56 // a LocalAddress is used to name the local source of an RDMA write or target
57 // of an RDMA read
58 struct LocalAddress {
60 uintptr_t offset;
61 };
62
63 namespace Network {
64 // a few globals for efficiency
65 extern NodeID my_node_id;
66 extern NodeID max_node_id;
67 extern NodeSet all_peers;
68 // all peers that can access shared memory from this node
69 // NOTE: This is an over-estimation. Users should be robust to the fact that this may
70 // include peers that are not able to access shared memory.
71 extern NodeSet shared_peers;
72
73 // in most cases, there will be a single network module - if so, we set
74 // this so we don't have to do a per-node lookup
76
77 // gets the network for a given node
79
80 // and a few "global" operations that abstract over any/all networks
81 void barrier(void);
82
83 // a quiescence check across all nodes (i.e. has anybody sent anything
84 // since the previous quiescence check)
86
87 // collective communication across all nodes (TODO: subcommunicators?)
88 template <typename T>
89 T broadcast(NodeID root, T val);
90
91 template <typename T>
92 void gather(NodeID root, T val, std::vector<T> &result);
93 template <typename T>
94 void gather(NodeID root, T val); // for non-root participants
95
96 // untyped versions
97 void broadcast(NodeID root, const void *val_in, void *val_out, size_t bytes);
98 void gather(NodeID root, const void *val_in, void *vals_out, size_t bytes);
99
100 // for sending active messages
102 create_active_message_impl(NodeID target, unsigned short msgid, size_t header_size,
103 size_t max_payload_size, const void *src_payload_addr,
104 size_t src_payload_lines, size_t src_payload_line_stride,
105 void *storage_base, size_t storage_size);
106
108 NodeID target, unsigned short msgid, size_t header_size, size_t max_payload_size,
109 const LocalAddress &src_payload_addr, size_t src_payload_lines,
110 size_t src_payload_line_stride, const RemoteAddress &dest_payload_addr,
111 void *storage_base, size_t storage_size);
112
114 NodeID target, unsigned short msgid, size_t header_size, size_t max_payload_size,
115 const RemoteAddress &dest_payload_addr, void *storage_base, size_t storage_size);
116
118 const NodeSet &targets, unsigned short msgid, size_t header_size,
119 size_t max_payload_size, const void *src_payload_addr, size_t src_payload_lines,
120 size_t src_payload_line_stride, void *storage_base, size_t storage_size);
121
122 size_t recommended_max_payload(NodeID target, bool with_congestion,
123 size_t header_size);
124 size_t recommended_max_payload(const NodeSet &targets, bool with_congestion,
125 size_t header_size);
126 size_t recommended_max_payload(NodeID target, const RemoteAddress &dest_payload_addr,
127 bool with_congestion, size_t header_size);
128 size_t recommended_max_payload(NodeID target, const void *data, size_t bytes_per_line,
129 size_t lines, size_t line_stride, bool with_congestion,
130 size_t header_size);
131 size_t recommended_max_payload(const NodeSet &targets, const void *data,
132 size_t bytes_per_line, size_t lines,
133 size_t line_stride, bool with_congestion,
134 size_t header_size);
135 size_t recommended_max_payload(NodeID target, const LocalAddress &src_payload_addr,
136 size_t bytes_per_line, size_t lines,
137 size_t line_stride,
138 const RemoteAddress &dest_payload_addr,
139 bool with_congestion, size_t header_size);
140 }; // namespace Network
141
142 // a network module provides additional functionality on top of a normal Realm
143 // module
145 protected:
146 NetworkModule(const std::string &_name);
147
148 public:
149 // all subclasses should define this (static) method - its responsibilities
150 // are:
151 // 1) determine if the network module should even be loaded
152 // 2) fix the command line if the spawning system hijacked it
153 // static NetworkModule *create_network_module(RuntimeImpl *runtime,
154 // int *argc, const char ***argv);
155
156 // Enumerates all the peers that the current node could potentially share memory with
157 virtual void get_shared_peers(NodeSet &shared_peers) = 0;
158
159 // actual parsing of the command line should wait until here if at all
160 // possible
161 virtual void parse_command_line(RuntimeImpl *runtime,
162 std::vector<std::string> &cmdline);
163
164 // "attaches" to the network, if that is meaningful - attempts to
165 // bind/register/(pick your network-specific verb) the requested memory
166 // segments with the network
167 virtual void attach(RuntimeImpl *runtime,
168 std::vector<NetworkSegment *> &segments) = 0;
169
170 // detaches from the network
171 virtual void detach(RuntimeImpl *runtime,
172 std::vector<NetworkSegment *> &segments) = 0;
173
174 // collective communication within this network
175 virtual void barrier(void) = 0;
176 virtual void broadcast(NodeID root, const void *val_in, void *val_out,
177 size_t bytes) = 0;
178 virtual void gather(NodeID root, const void *val_in, void *vals_out,
179 size_t bytes) = 0;
180 virtual void allgatherv(const char *val_in, size_t bytes, std::vector<char> &vals_out,
181 std::vector<size_t> &lengths) = 0;
182
183 virtual size_t sample_messages_received_count(void) = 0;
184 virtual bool check_for_quiescence(size_t sampled_receive_count) = 0;
185
186 // used to create a remote proxy for a memory
187 virtual MemoryImpl *create_remote_memory(RuntimeImpl *runtime, Memory m, size_t size,
188 Memory::Kind kind,
189 const ByteArray &rdma_info) = 0;
190 virtual IBMemory *create_remote_ib_memory(RuntimeImpl *runtime, Memory m, size_t size,
191 Memory::Kind kind,
192 const ByteArray &rdma_info) = 0;
193
194 virtual ActiveMessageImpl *
195 create_active_message_impl(NodeID target, unsigned short msgid, size_t header_size,
196 size_t max_payload_size, const void *src_payload_addr,
197 size_t src_payload_lines, size_t src_payload_line_stride,
198 void *storage_base, size_t storage_size) = 0;
199
201 NodeID target, unsigned short msgid, size_t header_size, size_t max_payload_size,
202 const LocalAddress &src_payload_addr, size_t src_payload_lines,
203 size_t src_payload_line_stride, const RemoteAddress &dest_payload_addr,
204 void *storage_base, size_t storage_size) = 0;
205
206 virtual ActiveMessageImpl *
207 create_active_message_impl(NodeID target, unsigned short msgid, size_t header_size,
208 size_t max_payload_size,
209 const RemoteAddress &dest_payload_addr, void *storage_base,
210 size_t storage_size) = 0;
211
213 const NodeSet &targets, unsigned short msgid, size_t header_size,
214 size_t max_payload_size, const void *src_payload_addr, size_t src_payload_lines,
215 size_t src_payload_line_stride, void *storage_base, size_t storage_size) = 0;
216
217 virtual size_t recommended_max_payload(NodeID target, bool with_congestion,
218 size_t header_size) = 0;
219 virtual size_t recommended_max_payload(const NodeSet &targets, bool with_congestion,
220 size_t header_size) = 0;
221 virtual size_t recommended_max_payload(NodeID target,
222 const RemoteAddress &dest_payload_addr,
223 bool with_congestion, size_t header_size) = 0;
224 virtual size_t recommended_max_payload(NodeID target, const void *data,
225 size_t bytes_per_line, size_t lines,
226 size_t line_stride, bool with_congestion,
227 size_t header_size) = 0;
228 virtual size_t recommended_max_payload(const NodeSet &targets, const void *data,
229 size_t bytes_per_line, size_t lines,
230 size_t line_stride, bool with_congestion,
231 size_t header_size) = 0;
232 virtual size_t recommended_max_payload(NodeID target,
233 const LocalAddress &src_payload_addr,
234 size_t bytes_per_line, size_t lines,
235 size_t line_stride,
236 const RemoteAddress &dest_payload_addr,
237 bool with_congestion, size_t header_size) = 0;
238 };
239
240 namespace NetworkSegmentInfo {
241 // "enum" (using a namespace so that they can be extended in other
242 // headers) describing the different kind of memories that a network
243 // segment can live in
244 typedef unsigned MemoryType;
245
246 // each memory type gets to define what the extra data means for itself
247 typedef uintptr_t MemoryTypeExtraData;
248
249 static const MemoryType Unknown = 0;
250
251 // generic memory that is read/write-able by the host CPUs
252 static const MemoryType HostMem = 1;
253
254 // optional flags for a network segment
255 typedef unsigned FlagsType;
256 struct OptionFlags {
257 // registration should be performed on-demand rather than eagerly
258 static const FlagsType OnDemandRegistration = 1U << 0;
259 };
260 }; // namespace NetworkSegmentInfo
261
263 public:
265
266 // normally a request will just be for a particular size
267 void request(NetworkSegmentInfo::MemoryType _memtype, size_t _bytes,
268 size_t _alignment, NetworkSegmentInfo::MemoryTypeExtraData _memextra = 0,
270
271 // but it can also be for a pre-allocated chunk of memory with a fixed address
272 void assign(NetworkSegmentInfo::MemoryType _memtype, void *_base, size_t _bytes,
275
276 void *base; // once this is non-null, it cannot be changed
277 size_t bytes, alignment;
281
282 // again, a single network puts itself here in addition to adding to the map
285
286 // a map from each of the networks that successfully bound the segment to
287 // whatever data (if any) that network needs to track the binding
288 std::map<NetworkModule *, ByteArray> networks;
289
290 void add_rdma_info(NetworkModule *network, const void *data, size_t len);
291 const ByteArray *get_rdma_info(NetworkModule *network) const;
292
293 // returns whether the segment is registered for all networks,
294 // or for a specific network
295 bool is_registered() const;
296 bool is_registered(NetworkModule *network) const;
297
298 // tests whether an address range is in segment
299 bool in_segment(const void *range_base, size_t range_bytes) const;
300 bool in_segment(uintptr_t range_base, size_t range_bytes) const;
301 };
302
303}; // namespace Realm
304
305#include "realm/network.inl"
306
307#endif
Definition activemsg.h:205
Definition bytearray.h:53
Definition ib_memory.h:30
Definition activemsg.h:345
Definition mem_impl.h:50
Definition memory.h:33
Kind
Definition memory.h:59
Definition module.h:42
Definition network.h:144
virtual IBMemory * create_remote_ib_memory(RuntimeImpl *runtime, Memory m, size_t size, Memory::Kind kind, const ByteArray &rdma_info)=0
virtual size_t sample_messages_received_count(void)=0
virtual bool check_for_quiescence(size_t sampled_receive_count)=0
virtual ActiveMessageImpl * create_active_message_impl(NodeID target, unsigned short msgid, size_t header_size, size_t max_payload_size, const RemoteAddress &dest_payload_addr, void *storage_base, size_t storage_size)=0
virtual size_t recommended_max_payload(NodeID target, const RemoteAddress &dest_payload_addr, bool with_congestion, size_t header_size)=0
virtual void attach(RuntimeImpl *runtime, std::vector< NetworkSegment * > &segments)=0
virtual size_t recommended_max_payload(NodeID target, const LocalAddress &src_payload_addr, size_t bytes_per_line, size_t lines, size_t line_stride, const RemoteAddress &dest_payload_addr, bool with_congestion, size_t header_size)=0
virtual size_t recommended_max_payload(const NodeSet &targets, const void *data, size_t bytes_per_line, size_t lines, size_t line_stride, bool with_congestion, size_t header_size)=0
virtual void parse_command_line(RuntimeImpl *runtime, std::vector< std::string > &cmdline)
virtual void broadcast(NodeID root, const void *val_in, void *val_out, size_t bytes)=0
virtual size_t recommended_max_payload(NodeID target, const void *data, size_t bytes_per_line, size_t lines, size_t line_stride, bool with_congestion, size_t header_size)=0
virtual ActiveMessageImpl * create_active_message_impl(NodeID target, unsigned short msgid, size_t header_size, size_t max_payload_size, const LocalAddress &src_payload_addr, size_t src_payload_lines, size_t src_payload_line_stride, const RemoteAddress &dest_payload_addr, void *storage_base, size_t storage_size)=0
virtual ActiveMessageImpl * create_active_message_impl(const NodeSet &targets, unsigned short msgid, size_t header_size, size_t max_payload_size, const void *src_payload_addr, size_t src_payload_lines, size_t src_payload_line_stride, void *storage_base, size_t storage_size)=0
virtual MemoryImpl * create_remote_memory(RuntimeImpl *runtime, Memory m, size_t size, Memory::Kind kind, const ByteArray &rdma_info)=0
virtual void barrier(void)=0
virtual void detach(RuntimeImpl *runtime, std::vector< NetworkSegment * > &segments)=0
virtual void get_shared_peers(NodeSet &shared_peers)=0
virtual ActiveMessageImpl * create_active_message_impl(NodeID target, unsigned short msgid, size_t header_size, size_t max_payload_size, const void *src_payload_addr, size_t src_payload_lines, size_t src_payload_line_stride, void *storage_base, size_t storage_size)=0
NetworkModule(const std::string &_name)
virtual void allgatherv(const char *val_in, size_t bytes, std::vector< char > &vals_out, std::vector< size_t > &lengths)=0
virtual void gather(NodeID root, const void *val_in, void *vals_out, size_t bytes)=0
virtual size_t recommended_max_payload(NodeID target, bool with_congestion, size_t header_size)=0
virtual size_t recommended_max_payload(const NodeSet &targets, bool with_congestion, size_t header_size)=0
Definition network.h:262
NetworkModule * single_network
Definition network.h:283
NetworkSegmentInfo::FlagsType flags
Definition network.h:280
NetworkSegmentInfo::MemoryType memtype
Definition network.h:278
const ByteArray * get_rdma_info(NetworkModule *network) const
bool is_registered(NetworkModule *network) const
ByteArray * single_network_data
Definition network.h:284
bool in_segment(const void *range_base, size_t range_bytes) const
size_t alignment
Definition network.h:277
NetworkSegmentInfo::MemoryTypeExtraData memextra
Definition network.h:279
std::map< NetworkModule *, ByteArray > networks
Definition network.h:288
void assign(NetworkSegmentInfo::MemoryType _memtype, void *_base, size_t _bytes, NetworkSegmentInfo::MemoryTypeExtraData _memextra=0, NetworkSegmentInfo::FlagsType _flags=0)
void add_rdma_info(NetworkModule *network, const void *data, size_t len)
bool is_registered() const
void request(NetworkSegmentInfo::MemoryType _memtype, size_t _bytes, size_t _alignment, NetworkSegmentInfo::MemoryTypeExtraData _memextra=0, NetworkSegmentInfo::FlagsType _flags=0)
void * base
Definition network.h:276
bool in_segment(uintptr_t range_base, size_t range_bytes) const
Definition nodeset.h:117
Definition runtime_impl.h:264
#define REALM_INTERNAL_API_EXTERNAL_LINKAGE
Definition compiler_support.h:218
unsigned MemoryType
Definition network.h:244
uintptr_t MemoryTypeExtraData
Definition network.h:247
unsigned FlagsType
Definition network.h:255
NodeSet all_peers
ActiveMessageImpl * create_active_message_impl(NodeID target, unsigned short msgid, size_t header_size, size_t max_payload_size, const void *src_payload_addr, size_t src_payload_lines, size_t src_payload_line_stride, void *storage_base, size_t storage_size)
T broadcast(NodeID root, T val)
bool check_for_quiescence(IncomingMessageManager *message_manager)
void gather(NodeID root, T val, std::vector< T > &result)
NodeID max_node_id
size_t recommended_max_payload(NodeID target, bool with_congestion, size_t header_size)
NodeSet shared_peers
NetworkModule * single_network
void barrier(void)
NetworkModule * get_network(NodeID node)
Definition activemsg.h:38
int NodeID
Definition nodeset.h:40
Definition network.h:58
uintptr_t offset
Definition network.h:60
const NetworkSegment * segment
Definition network.h:59
static const FlagsType OnDemandRegistration
Definition network.h:258
Definition network.h:46
uintptr_t extra
Definition network.h:50
uintptr_t ptr
Definition network.h:49
unsigned char raw_bytes[384]
Definition network.h:52
unsigned short msgid
Definition ucp_internal.h:2