Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
openmp_threadpool.h
Go to the documentation of this file.
1/*
2 * Copyright 2025 Stanford University, NVIDIA Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18// OpenMP (or similar) thread pool for Realm
19#ifndef REALM_OPENMP_THREADPOOL_H
20#define REALM_OPENMP_THREADPOOL_H
21
22#include "realm/processor.h"
23#include "realm/threads.h"
24#include "realm/logging.h"
25
26namespace Realm {
27
29 public:
30 // sets the number of workers and initializes the barrier for usage
31 // by a work item
32 void initialize(int _num_workers);
33
34 // starts a static loop, blocking if the previous loop in the
35 // work item has any stragglers - returns true if there's work
36 // to do, false if not
37 bool start_static(int64_t start, int64_t end, int64_t incr, int64_t chunk,
38 int thread_id, int64_t &span_start, int64_t &span_end);
39
40 // continues a static loop - span_{start,end} must contain what
41 // they were given from the previous call to start/next_static
42 bool next_static(int64_t &span_start, int64_t &span_end);
43
44 // starts a dynamic loop, blocking if the previous loop in the
45 // work item has any stragglers - does not actually request any
46 // work - use next_dynamic for that
47 void start_dynamic(int64_t start, int64_t end, int64_t incr, int64_t chunk);
48
49 // continues a dynamic loop
50 bool next_dynamic(int64_t &span_start, int64_t &span_end, int64_t &stride);
51
52 // indicates this thread is done with the current loop - blocks
53 // if other threads haven't even entered the loop yet
54 // if wait is set, blocks until all threads enter end_loop
55 void end_loop(bool wait);
56
57 protected:
59 // loop bounds and position are done with unsigned values to
60 // allow detection of overflow
64 };
65
66 class ThreadPool {
67 public:
68 ThreadPool(Processor _proc, int _num_workers, const std::string &_name_prefix,
69 int _numa_node, size_t _stack_size, CoreReservationSet &crs);
71
72 // associates the calling thread as the master of the threadpool
74
75 // returns the associated thread pool, optionally warning if none exists
76 static ThreadPool *get_associated_pool(bool warn_if_missing);
77
78 // entry point for workers - does not return until thread pool is shut down
79 void worker_entry(void);
80
93
94 struct WorkerInfo {
105 atomic<int> /*Status*/ status; // int allows CAS primitives
107 int thread_id; // in current team
108 int num_threads; // in current team
109 int app_num_threads; // num threads requested by app
110 void (*fnptr)(void *data);
111 void *data;
113
114 void push_work_item(WorkItem *new_work);
116 };
117
118 // returns the WorkerInfo (if any) associated with the caller (which
119 // can be master or worker) - optionally warns if this thread is not
120 // associated with a threadpool
121 static WorkerInfo *get_worker_info(bool warn_if_missing);
122
123 // starts worker threads running if they weren't already
125
126 // asks worker threads to shut down and waits for them to complete
128
129 void claim_workers(int count, std::set<int> &worker_ids);
130
131 void start_worker(int worker_id, int thread_id, int num_threads,
132 void (*fnptr)(void *data), void *data, WorkItem *work_item);
133
134 int get_num_workers() const { return num_workers; }
135
136 protected:
140 std::vector<CoreReservation *> core_rsrvs;
141 std::vector<Thread *> worker_threads;
142 std::vector<WorkerInfo> worker_infos;
143 };
144
145}; // namespace Realm
146
147#include "realm/openmp/openmp_threadpool.inl"
148
149#endif
Definition threads.h:382
Definition openmp_threadpool.h:28
atomic< uint64_t > loop_limit
Definition openmp_threadpool.h:61
atomic< int64_t > loop_chunk
Definition openmp_threadpool.h:62
void start_dynamic(int64_t start, int64_t end, int64_t incr, int64_t chunk)
bool next_dynamic(int64_t &span_start, int64_t &span_end, int64_t &stride)
bool next_static(int64_t &span_start, int64_t &span_end)
atomic< int64_t > loop_base
Definition openmp_threadpool.h:62
atomic< uint64_t > loop_pos
Definition openmp_threadpool.h:61
atomic< int > loop_barrier
Definition openmp_threadpool.h:63
int num_workers
Definition openmp_threadpool.h:58
void initialize(int _num_workers)
bool start_static(int64_t start, int64_t end, int64_t incr, int64_t chunk, int thread_id, int64_t &span_start, int64_t &span_end)
atomic< int64_t > loop_incr
Definition openmp_threadpool.h:62
void end_loop(bool wait)
Definition processor.h:37
Definition openmp_threadpool.h:66
static ThreadPool * get_associated_pool(bool warn_if_missing)
static WorkerInfo * get_worker_info(bool warn_if_missing)
bool workers_running
Definition openmp_threadpool.h:139
Processor proc
Definition openmp_threadpool.h:137
ThreadPool(Processor _proc, int _num_workers, const std::string &_name_prefix, int _numa_node, size_t _stack_size, CoreReservationSet &crs)
int num_workers
Definition openmp_threadpool.h:138
void start_worker(int worker_id, int thread_id, int num_threads, void(*fnptr)(void *data), void *data, WorkItem *work_item)
std::vector< Thread * > worker_threads
Definition openmp_threadpool.h:141
void stop_worker_threads(void)
int get_num_workers() const
Definition openmp_threadpool.h:134
void worker_entry(void)
std::vector< CoreReservation * > core_rsrvs
Definition openmp_threadpool.h:140
void start_worker_threads(void)
void associate_as_master(void)
std::vector< WorkerInfo > worker_infos
Definition openmp_threadpool.h:142
void claim_workers(int count, std::set< int > &worker_ids)
Definition atomics.h:31
Definition activemsg.h:38
Definition openmp_threadpool.h:81
int prev_thread_id
Definition openmp_threadpool.h:84
WorkItem * parent_work_item
Definition openmp_threadpool.h:86
atomic< uint64_t > critical_flags
Definition openmp_threadpool.h:90
atomic< int > barrier_count
Definition openmp_threadpool.h:89
WorkItem(int _num_threads)
LoopSchedule schedule
Definition openmp_threadpool.h:91
atomic< int > single_winner
Definition openmp_threadpool.h:88
int prev_num_threads
Definition openmp_threadpool.h:85
atomic< int > remaining_workers
Definition openmp_threadpool.h:87
Definition openmp_threadpool.h:94
void push_work_item(WorkItem *new_work)
int app_num_threads
Definition openmp_threadpool.h:109
Status
Definition openmp_threadpool.h:96
@ WORKER_IDLE
Definition openmp_threadpool.h:100
@ WORKER_STARTING
Definition openmp_threadpool.h:99
@ WORKER_SHUTDOWN
Definition openmp_threadpool.h:103
@ WORKER_ACTIVE
Definition openmp_threadpool.h:102
@ WORKER_MASTER
Definition openmp_threadpool.h:97
@ WORKER_NOT_RUNNING
Definition openmp_threadpool.h:98
@ WORKER_CLAIMED
Definition openmp_threadpool.h:101
WorkItem * work_item
Definition openmp_threadpool.h:112
void(* fnptr)(void *data)
Definition openmp_threadpool.h:110
ThreadPool * pool
Definition openmp_threadpool.h:106
void * data
Definition openmp_threadpool.h:111
int thread_id
Definition openmp_threadpool.h:107
WorkItem * pop_work_item(void)
atomic< int > status
Definition openmp_threadpool.h:105
int num_threads
Definition openmp_threadpool.h:108