Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
machine_impl.h
Go to the documentation of this file.
1/*
2 * Copyright 2025 Stanford University, NVIDIA Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18// implementation for Realm Machine class
19
20#ifndef REALM_MACHINE_IMPL_H
21#define REALM_MACHINE_IMPL_H
22
23#include "realm/machine.h"
24#include "realm/network.h"
25#include "realm/mutex.h"
26#include "realm/atomics.h"
27
28#include <vector>
29#include <set>
30#include <optional>
31
32namespace Realm {
33
34 template <typename KT, typename AT>
38 bool add_affinity(KT key, const AT &aff, bool is_local);
39
40 std::map<KT, AT *> all;
41 std::map<KT, AT *> local;
42 std::map<KT, AT *> best;
43 };
44
54
67
69 MachineNodeInfo(int _node, RuntimeImpl *_runtime_impl);
75 bool add_process_info(const Machine::ProcessInfo &proc_info);
76
77 void update_kind_maps(void);
78
79 int node;
80
81 // The runtime_impl is not const because we calls get_processor_impl(), which is not
82 // const but the runtime_impl should not be modified in this class
84
86
87 std::map<Processor, MachineProcInfo *> procs;
88 std::map<Processor::Kind, std::map<Processor, MachineProcInfo *>> proc_by_kind;
89
90 std::map<Memory, MachineMemInfo *> mems;
91 std::map<Memory::Kind, std::map<Memory, MachineMemInfo *>> mem_by_kind;
92 };
93
95 public:
96 MachineImpl(RuntimeImpl *_runtime_impl);
98
100
101 void get_all_memories(std::set<Memory> &mset) const;
102 void get_all_processors(std::set<Processor> &pset) const;
103
104 void get_local_processors(std::set<Processor> &pset) const;
105 void get_local_processors_by_kind(std::set<Processor> &pset,
106 Processor::Kind kind) const;
107
108 // Return the set of memories visible from a processor
109 void get_visible_memories(Processor p, std::set<Memory> &mset, bool local_only) const;
110
111 // Return the set of memories visible from a memory
112 void get_visible_memories(Memory m, std::set<Memory> &mset, bool local_only) const;
113
114 // Return the set of processors which can all see a given memory
115 void get_shared_processors(Memory m, std::set<Processor> &pset,
116 bool local_only) const;
117
119
121 bool has_affinity(Memory m1, Memory m2, Machine::AffinityDetails *details = 0) const;
122
123 int get_proc_mem_affinity(std::vector<Machine::ProcessorMemoryAffinity> &result,
124 Processor restrict_proc = Processor::NO_PROC,
125 Memory restrict_memory = Memory::NO_MEMORY,
126 bool local_only = true) const;
127
128 int get_mem_mem_affinity(std::vector<Machine::MemoryMemoryAffinity> &result,
129 Memory restrict_mem1 = Memory::NO_MEMORY,
130 Memory restrict_mem2 = Memory::NO_MEMORY,
131 bool local_only = true) const;
132
133 void parse_node_announce_data(int node_id, const void *args, size_t arglen,
134 bool remote);
135
137 bool lock_held = false);
138
141
143
145
146 void add_process_info(int node_id, const Machine::ProcessInfo &process_info,
147 bool lock_held = false);
148
149 mutable Mutex mutex;
150 std::vector<Machine::ProcessorMemoryAffinity> proc_mem_affinities;
151 std::set<Machine::MachineUpdateSubscriber *> subscribers;
152
153 std::map<int, MachineNodeInfo *> nodeinfos;
154
155 protected:
160
162 };
163
164 template <typename T, typename T2>
166 public:
167 virtual ~QueryPredicate(void){};
168
169 virtual QueryPredicate<T, T2> *clone(void) const = 0;
170
171 virtual bool matches_predicate(const MachineImpl *machine, T thing,
172 const T2 *info = 0) const = 0;
173 };
174
176
178 public:
179 ProcessorHasAffinityPredicate(Memory _memory, unsigned _min_bandwidth,
180 unsigned _max_latency);
181
182 virtual ProcQueryPredicate *clone(void) const;
183
184 virtual bool matches_predicate(const MachineImpl *machine, Processor thing,
185 const MachineProcInfo *info = 0) const;
186
187 protected:
190 unsigned max_latency;
191 };
192
193 namespace Config {
194 extern bool use_machine_query_cache;
195 };
196
203
205 public:
206 ProcessorQueryImpl(const Machine &_machine);
207 ProcessorQueryImpl(const MachineImpl *_machine_impl);
208
209 static unsigned int init, cache_invalid_count;
211 static std::map<Processor::Kind, std::vector<Processor>> _proc_cache;
212 static std::map<Processor::Kind, std::map<Memory, std::vector<Processor>>>
214
215 protected:
216 // these things are refcounted and copied-on-write
219
220 public:
221 void add_reference(void);
223 // makes and returns if a copy if more than one reference is held
225
226 void restrict_to_node(int new_node_id);
229
232 size_t count_matches(void) const;
234
236 {
237 cached_mem = m;
238 if(predicates.size() == 1)
239 is_cached_mem = true;
240 else
241 is_cached_mem = false;
242 };
244 {
246 is_cached_mem = false;
247 };
249
250 public:
251 // Best affinity cost function structure
257
258 // Friend declarations for Machine::ProcessorQuery methods
261 int latency_weight);
262
263 protected:
270 std::vector<ProcQueryPredicate *> predicates;
271
272 // Best affinity cost function (not a predicate)
273 std::optional<BestAffinityCostFn> best_affinity_cost;
274
278 mutable std::vector<Processor> *cur_cached_list;
279 mutable unsigned int invalid_count;
280 unsigned int cur_index;
281 // cached list of processors
282 std::vector<Processor> *cached_list() const;
284 bool cached_query(Processor &pval, QueryType q) const;
285 bool cached_query(size_t &count) const;
288 // helper to compute best affinity filtered results and store in cur_cached_list
290 };
291
293
295 public:
296 MemoryHasProcAffinityPredicate(Processor _proc, unsigned _min_bandwidth,
297 unsigned _max_latency);
298
299 virtual MemoryQueryPredicate *clone(void) const;
300
301 virtual bool matches_predicate(const MachineImpl *machine, Memory thing,
302 const MachineMemInfo *info = 0) const;
303
304 protected:
307 unsigned max_latency;
308 };
309
311 public:
312 MemoryHasMemAffinityPredicate(Memory _memory, unsigned _min_bandwidth,
313 unsigned _max_latency);
314
315 virtual MemoryQueryPredicate *clone(void) const;
316
317 virtual bool matches_predicate(const MachineImpl *machine, Memory thing,
318 const MachineMemInfo *info = 0) const;
319
320 protected:
323 unsigned max_latency;
324 };
325
327 public:
328 MemoryQueryImpl(const Machine &_machine);
329 MemoryQueryImpl(const MachineImpl *_machine_impl);
330
331 static unsigned int init, cache_invalid_count;
333 static std::map<Memory::Kind, std::vector<Memory>> _mem_cache;
334
335 protected:
336 // these things are refcounted and copied-on-write
339
340 public:
341 void add_reference(void);
343 // makes and returns if a copy if more than one reference is held
345
346 void restrict_to_node(int new_node_id);
348 void restrict_by_capacity(size_t new_min_bytes);
350
351 Memory first_match(void) const;
353 size_t count_matches(void) const;
354 Memory random_match(void) const;
356 bool cached_query(Memory p, Memory &pval);
357 bool cached_query(Memory &pval, QueryType q) const;
358 bool cached_query(size_t &count) const;
360
361 public:
362 // Best affinity cost function structures
373
374 // Friend declarations for Machine::MemoryQuery methods
375 friend Machine::MemoryQuery &
377 int latency_weight);
378 friend Machine::MemoryQuery &
380 int latency_weight);
381
382 protected:
391 mutable std::vector<Memory> *cur_cached_list;
392 mutable unsigned int invalid_count;
393 unsigned int cur_index;
394 std::vector<MemoryQueryPredicate *> predicates;
395
396 // Best affinity cost functions (not predicates)
397 std::optional<BestProcAffinityCostFn> best_proc_affinity_cost;
398 std::optional<BestMemAffinityCostFn> best_mem_affinity_cost;
399
400 std::vector<Memory> *cached_list() const;
402 // helper to compute best affinity filtered results and store in cur_cached_list
404 };
405
407 inline MachineImpl *get_machine(void) { return machine_singleton; }
408 extern void cleanup_query_caches();
409 // active messages
410
421
422}; // namespace Realm
423
424 // include "machine_impl.inl"
425
426#endif // ifndef REALM_MACHINE_IMPL_H
Definition machine_impl.h:94
void get_shared_processors(Memory m, std::set< Processor > &pset, bool local_only) const
bool has_affinity(Memory m1, Memory m2, Machine::AffinityDetails *details=0) const
MachineNodeInfo * get_nodeinfo(Memory m) const
MachineImpl(RuntimeImpl *_runtime_impl)
void parse_node_announce_data(int node_id, const void *args, size_t arglen, bool remote)
RuntimeImpl * get_runtime_impl(void) const
void add_process_info(int node_id, const Machine::ProcessInfo &process_info, bool lock_held=false)
bool has_affinity(Processor p, Memory m, Machine::AffinityDetails *details=0) const
RuntimeImpl * runtime_impl
Definition machine_impl.h:161
MachineNodeInfo * get_nodeinfo(int node) const
void get_local_processors(std::set< Processor > &pset) const
void enumerate_mem_mem_affinities(void)
void remove_subscription(Machine::MachineUpdateSubscriber *subscriber)
std::map< int, MachineNodeInfo * > nodeinfos
Definition machine_impl.h:153
int get_proc_mem_affinity(std::vector< Machine::ProcessorMemoryAffinity > &result, Processor restrict_proc=Processor::NO_PROC, Memory restrict_memory=Memory::NO_MEMORY, bool local_only=true) const
void add_proc_mem_affinity(const Machine::ProcessorMemoryAffinity &pma, bool lock_held=false)
int get_mem_mem_affinity(std::vector< Machine::MemoryMemoryAffinity > &result, Memory restrict_mem1=Memory::NO_MEMORY, Memory restrict_mem2=Memory::NO_MEMORY, bool local_only=true) const
std::vector< Machine::ProcessorMemoryAffinity > proc_mem_affinities
Definition machine_impl.h:150
std::set< Machine::MachineUpdateSubscriber * > subscribers
Definition machine_impl.h:151
void get_all_memories(std::set< Memory > &mset) const
MachineNodeInfo * get_nodeinfo(Processor p) const
void invalidate_query_caches()
void get_local_processors_by_kind(std::set< Processor > &pset, Processor::Kind kind) const
void get_all_processors(std::set< Processor > &pset) const
void get_visible_memories(Processor p, std::set< Memory > &mset, bool local_only) const
bool get_process_info(Processor p, Machine::ProcessInfo *info) const
void add_subscription(Machine::MachineUpdateSubscriber *subscriber)
Mutex mutex
Definition machine_impl.h:149
void update_kind_maps(void)
void get_visible_memories(Memory m, std::set< Memory > &mset, bool local_only) const
Definition machine.h:32
MemoryQuery(const Machine &m)
ProcessorQuery(const Machine &m)
Definition machine_impl.h:310
Memory memory
Definition machine_impl.h:321
virtual bool matches_predicate(const MachineImpl *machine, Memory thing, const MachineMemInfo *info=0) const
MemoryHasMemAffinityPredicate(Memory _memory, unsigned _min_bandwidth, unsigned _max_latency)
virtual MemoryQueryPredicate * clone(void) const
unsigned max_latency
Definition machine_impl.h:323
unsigned min_bandwidth
Definition machine_impl.h:322
Definition machine_impl.h:294
unsigned min_bandwidth
Definition machine_impl.h:306
MemoryHasProcAffinityPredicate(Processor _proc, unsigned _min_bandwidth, unsigned _max_latency)
Processor proc
Definition machine_impl.h:305
virtual bool matches_predicate(const MachineImpl *machine, Memory thing, const MachineMemInfo *info=0) const
unsigned max_latency
Definition machine_impl.h:307
virtual MemoryQueryPredicate * clone(void) const
Definition machine_impl.h:326
bool is_restricted_node
Definition machine_impl.h:385
Memory next(Memory after)
void restrict_by_capacity(size_t new_min_bytes)
friend Machine::MemoryQuery & Machine::MemoryQuery::best_affinity_to(Memory m, int bandwidth_weight, int latency_weight)
Memory first_match(void) const
std::vector< Memory > * cur_cached_list
Definition machine_impl.h:391
bool valid_cache
Definition machine_impl.h:390
MemoryQueryImpl(const Machine &_machine)
bool cached_query(size_t &count) const
void add_predicate(MemoryQueryPredicate *pred)
int restricted_node_id
Definition machine_impl.h:386
Memory random_match(void) const
bool is_restricted_kind
Definition machine_impl.h:387
atomic< int > references
Definition machine_impl.h:383
void remove_reference(void)
size_t restricted_min_capacity
Definition machine_impl.h:389
std::vector< MemoryQueryPredicate * > predicates
Definition machine_impl.h:394
std::optional< BestMemAffinityCostFn > best_mem_affinity_cost
Definition machine_impl.h:398
bool shared_cached_list
Definition machine_impl.h:390
void restrict_to_kind(Memory::Kind new_kind)
Memory mutated_cached_query(Memory p)
unsigned int invalid_count
Definition machine_impl.h:392
bool cached_query(Memory p, Memory &pval)
Memory build_best_affinity_cache() const
void restrict_to_node(int new_node_id)
static unsigned int init
Definition machine_impl.h:331
static std::map< Memory::Kind, std::vector< Memory > > _mem_cache
Definition machine_impl.h:333
static unsigned int cache_invalid_count
Definition machine_impl.h:331
Memory cache_next(Memory after)
unsigned int cur_index
Definition machine_impl.h:393
const MachineImpl * machine
Definition machine_impl.h:384
MemoryQueryImpl(const MemoryQueryImpl &copy_from)
std::vector< Memory > * cached_list() const
std::optional< BestProcAffinityCostFn > best_proc_affinity_cost
Definition machine_impl.h:397
Memory next_match(Memory after) const
Memory::Kind restricted_kind
Definition machine_impl.h:388
MemoryQueryImpl(const MachineImpl *_machine_impl)
bool cached_query(Memory &pval, QueryType q) const
size_t count_matches(void) const
static bool global_valid_cache
Definition machine_impl.h:332
MemoryQueryImpl * writeable_reference(void)
Definition memory.h:33
Kind
Definition memory.h:59
static const Memory NO_MEMORY
Definition memory.h:49
Definition machine_impl.h:177
virtual bool matches_predicate(const MachineImpl *machine, Processor thing, const MachineProcInfo *info=0) const
ProcessorHasAffinityPredicate(Memory _memory, unsigned _min_bandwidth, unsigned _max_latency)
virtual ProcQueryPredicate * clone(void) const
unsigned min_bandwidth
Definition machine_impl.h:189
Memory memory
Definition machine_impl.h:188
unsigned max_latency
Definition machine_impl.h:190
Definition machine_impl.h:204
static unsigned int init
Definition machine_impl.h:209
int restricted_node_id
Definition machine_impl.h:267
bool is_restricted_kind
Definition machine_impl.h:268
bool shared_cached_list
Definition machine_impl.h:277
void set_cached_mem(Memory m)
Definition machine_impl.h:235
std::vector< Processor > * cached_list() const
ProcessorQueryImpl(const MachineImpl *_machine_impl)
friend Machine::ProcessorQuery & Machine::ProcessorQuery::best_affinity_to(Memory m, int bandwidth_weight, int latency_weight)
ProcessorQueryImpl(const ProcessorQueryImpl &copy_from)
Processor next_match(Processor after)
static std::map< Processor::Kind, std::map< Memory, std::vector< Processor > > > _proc_cache_affinity
Definition machine_impl.h:213
std::optional< BestAffinityCostFn > best_affinity_cost
Definition machine_impl.h:273
Processor first_match(void) const
void restrict_to_node(int new_node_id)
std::vector< ProcQueryPredicate * > predicates
Definition machine_impl.h:270
std::vector< Processor > * cur_cached_list
Definition machine_impl.h:278
static std::map< Processor::Kind, std::vector< Processor > > _proc_cache
Definition machine_impl.h:211
ProcessorQueryImpl * writeable_reference(void)
Processor cache_next(Processor after)
size_t count_matches(void) const
void reset_cached_mem()
Definition machine_impl.h:243
static bool global_valid_cache
Definition machine_impl.h:210
void restrict_to_kind(Processor::Kind new_kind)
Processor::Kind restricted_kind
Definition machine_impl.h:269
unsigned int cur_index
Definition machine_impl.h:280
Memory cached_mem
Definition machine_impl.h:275
atomic< int > references
Definition machine_impl.h:264
bool cached_query(size_t &count) const
Processor random_match(void) const
unsigned int invalid_count
Definition machine_impl.h:279
const MachineImpl * machine
Definition machine_impl.h:265
bool is_restricted_node
Definition machine_impl.h:266
Processor build_best_affinity_cache() const
static unsigned int cache_invalid_count
Definition machine_impl.h:209
ProcessorQueryImpl(const Machine &_machine)
void add_predicate(ProcQueryPredicate *pred)
bool valid_cache
Definition machine_impl.h:277
bool is_cached_mem
Definition machine_impl.h:276
bool cached_query(Processor &pval, QueryType q) const
Processor mutated_cached_query(Processor p)
Processor next(Processor after)
bool cached_query(Processor p, Processor &pval)
Definition processor.h:37
Kind
Definition processor.h:65
static const Processor NO_PROC
Definition processor.h:54
Definition machine_impl.h:165
virtual ~QueryPredicate(void)
Definition machine_impl.h:167
virtual QueryPredicate< T, T2 > * clone(void) const =0
virtual bool matches_predicate(const MachineImpl *machine, T thing, const T2 *info=0) const =0
Definition runtime_impl.h:264
Definition mutex.h:223
Definition atomics.h:31
bool use_machine_query_cache
Definition activemsg.h:38
QueryType
Definition machine_impl.h:198
@ QUERY_FIRST
Definition machine_impl.h:200
@ QUERY_RANDOM
Definition machine_impl.h:201
@ QUERY_NEXT
Definition machine_impl.h:199
QueryPredicate< Processor, MachineProcInfo > ProcQueryPredicate
Definition machine_impl.h:175
MachineImpl * get_machine(void)
Definition machine_impl.h:407
NodeAnnounceTag
Definition machine_impl.h:412
@ NODE_ANNOUNCE_IB_MEM
Definition machine_impl.h:416
@ NODE_ANNOUNCE_PROCESS_INFO
Definition machine_impl.h:419
@ NODE_ANNOUNCE_MEM
Definition machine_impl.h:415
@ NODE_ANNOUNCE_DMA_CHANNEL
Definition machine_impl.h:418
@ NODE_ANNOUNCE_PROC
Definition machine_impl.h:414
@ NODE_ANNOUNCE_PMA
Definition machine_impl.h:417
@ NODE_ANNOUNCE_INVALID
Definition machine_impl.h:413
MachineImpl * machine_singleton
void cleanup_query_caches()
QueryPredicate< Memory, MachineMemInfo > MemoryQueryPredicate
Definition machine_impl.h:292
Definition machine_impl.h:35
std::map< KT, AT * > all
Definition machine_impl.h:40
std::map< KT, AT * > local
Definition machine_impl.h:41
std::map< KT, AT * > best
Definition machine_impl.h:42
bool add_affinity(KT key, const AT &aff, bool is_local)
Definition machine_impl.h:55
Memory m
Definition machine_impl.h:62
bool add_proc_mem_affinity(const Machine::ProcessorMemoryAffinity &pma)
MachineAffinityInfo< Memory, Machine::MemoryMemoryAffinity > mmas_out
Definition machine_impl.h:64
MachineAffinityInfo< Processor, Machine::ProcessorMemoryAffinity > pmas
Definition machine_impl.h:63
bool add_mem_mem_affinity(const Machine::MemoryMemoryAffinity &mma)
MachineMemInfo(Memory _m)
MachineAffinityInfo< Memory, Machine::MemoryMemoryAffinity > mmas_in
Definition machine_impl.h:65
Definition machine_impl.h:68
std::map< Processor, MachineProcInfo * > procs
Definition machine_impl.h:87
MachineNodeInfo(int _node, RuntimeImpl *_runtime_impl)
std::map< Memory, MachineMemInfo * > mems
Definition machine_impl.h:90
std::map< Processor::Kind, std::map< Processor, MachineProcInfo * > > proc_by_kind
Definition machine_impl.h:88
bool add_proc_mem_affinity(const Machine::ProcessorMemoryAffinity &pma)
bool add_process_info(const Machine::ProcessInfo &proc_info)
void update_kind_maps(void)
Machine::ProcessInfo * process_info
Definition machine_impl.h:85
RuntimeImpl * runtime_impl
Definition machine_impl.h:83
int node
Definition machine_impl.h:79
std::map< Memory::Kind, std::map< Memory, MachineMemInfo * > > mem_by_kind
Definition machine_impl.h:91
bool add_processor(Processor p)
bool add_mem_mem_affinity(const Machine::MemoryMemoryAffinity &mma)
bool add_memory(Memory m)
Definition machine_impl.h:45
MachineAffinityInfo< Memory, Machine::ProcessorMemoryAffinity > pmas
Definition machine_impl.h:52
Processor p
Definition machine_impl.h:51
bool add_proc_mem_affinity(const Machine::ProcessorMemoryAffinity &pma)
MachineProcInfo(Processor _p)
Definition machine.h:106
Definition machine.h:86
int bandwidth_weight
Definition machine_impl.h:370
Memory memory
Definition machine_impl.h:369
int latency_weight
Definition machine_impl.h:371
int latency_weight
Definition machine_impl.h:366
int bandwidth_weight
Definition machine_impl.h:365
Processor proc
Definition machine_impl.h:364
int bandwidth_weight
Definition machine_impl.h:254
Memory memory
Definition machine_impl.h:253
int latency_weight
Definition machine_impl.h:255
Definition realm_c.h:285