Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
cudart_hijack.h
Go to the documentation of this file.
1/*
2 * Copyright 2025 Stanford University, NVIDIA Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18// helper objects related to Realm's hijacking of the CUDA runtime API
19
20#ifndef REALM_CUDART_HIJACK_H
21#define REALM_CUDART_HIJACK_H
22
23// so that we get types and stuff right
24#include <cuda_runtime.h>
25
26#include "realm/mutex.h"
27
28#include <set>
29#include <vector>
30#include <unordered_map>
31
32struct CUmod_st;
33struct CUfunc_st;
34
35namespace Realm {
36 namespace Cuda {
37
38 // this flag will be set on the first call into any of this hijack code - if
39 // an application is linked with -lcudart, we will NOT be hijacking the
40 // application's calls, and the cuda module needs to know that)
41 extern bool cudart_hijack_active;
42
43 // for most CUDART API entry points, calling them from a non-GPU task is
44 // a fatal error - for others (e.g. cudaDeviceSynchronize), it's either
45 // silently permitted (0), warned (1), or a fatal error (2) based on this
46 // setting
48
49 // files compiled with nvcc will use global registrations of modules, variables, etc.
50 // that get broadcast to all contexts
51
52 // Reverse engineered structure contents from cudart
53 struct FatBin {
54 int magic; // magic number
56 const unsigned long long *data;
58 };
59
61 const FatBin *fat_bin = nullptr;
62 std::vector<CUmod_st *> gpu_modules;
63 };
64
66 const FatBin *fat_bin = nullptr;
67 const void *host_fun = nullptr;
68 const char *device_fun = nullptr;
69 std::vector<CUfunc_st *> gpu_functions;
70
71 RegisteredFunction() = default;
72 RegisteredFunction(const FatBin *_fat_bin, const void *_host_fun,
73 const char *_device_fun);
74 };
75
77 const FatBin *fat_bin = nullptr;
78 const void *host_var = nullptr;
79 const char *device_name = nullptr;
80 bool external = false;
81 int size = 0;
82 bool constant = false;
83 bool global = false;
84 bool managed = false;
85 std::vector<uintptr_t> gpu_addresses;
86
87 RegisteredVariable() = default;
88 RegisteredVariable(const FatBin *_fat_bin, const void *_host_var,
89 const char *_device_name, bool _external, int _size,
90 bool _constant, bool _global, bool _managed);
91 };
92
93 class GPU;
94
96 protected:
99
101
102 public:
103 // called by a GPU when it has created its context - will result in calls back
104 // into the GPU for any modules/variables/whatever already registered
105 static void add_gpu_context(GPU *gpu);
106 static void remove_gpu_context(GPU *gpu);
107
108 // called by __cuda(un)RegisterFatBinary
109 static void register_fat_binary(const FatBin *fatbin);
110 static void unregister_fat_binary(const FatBin *fatbin);
111
112 // called by __cudaRegisterVar
113 static void register_variable(const RegisteredVariable &var);
114
115 // called by __cudaRegisterFunction
116 static void register_function(const RegisteredFunction &func);
117
118 static CUfunc_st *lookup_function(const void *func, GPU *gpu);
119 static uintptr_t lookup_variable(const void *var, GPU *gpu);
120
121 protected:
125
127 std::set<GPU *> active_gpus;
128 std::unordered_map<const FatBin *, RegisteredModule> modules;
129 std::unordered_map<const void *, RegisteredVariable> variables;
130 std::unordered_map<const void *, RegisteredFunction> functions;
131 };
132 }; // namespace Cuda
133}; // namespace Realm
134
135#endif
Definition cuda_internal.h:392
Definition cudart_hijack.h:95
static void register_variable(const RegisteredVariable &var)
void register_function_under_lock(RegisteredFunction &func, GPU *gpu)
void load_module_under_lock(RegisteredModule &mod, GPU *gpu)
std::set< GPU * > active_gpus
Definition cudart_hijack.h:127
static CUfunc_st * lookup_function(const void *func, GPU *gpu)
std::unordered_map< const void *, RegisteredVariable > variables
Definition cudart_hijack.h:129
void register_variable_under_lock(RegisteredVariable &var, GPU *gpu)
static void unregister_fat_binary(const FatBin *fatbin)
static void register_function(const RegisteredFunction &func)
RWLock rwlock
Definition cudart_hijack.h:126
static GlobalRegistrations & get_global_registrations(void)
static void remove_gpu_context(GPU *gpu)
std::unordered_map< const void *, RegisteredFunction > functions
Definition cudart_hijack.h:130
std::unordered_map< const FatBin *, RegisteredModule > modules
Definition cudart_hijack.h:128
static void register_fat_binary(const FatBin *fatbin)
static uintptr_t lookup_variable(const void *var, GPU *gpu)
static void add_gpu_context(GPU *gpu)
Definition mutex.h:398
bool cudart_hijack_active
int cudart_hijack_nongpu_sync
Definition activemsg.h:38
Definition cudart_hijack.h:53
void * filename_or_fatbins
Definition cudart_hijack.h:57
const unsigned long long * data
Definition cudart_hijack.h:56
int version
Definition cudart_hijack.h:55
int magic
Definition cudart_hijack.h:54
Definition cudart_hijack.h:65
RegisteredFunction(const FatBin *_fat_bin, const void *_host_fun, const char *_device_fun)
std::vector< CUfunc_st * > gpu_functions
Definition cudart_hijack.h:69
const char * device_fun
Definition cudart_hijack.h:68
const void * host_fun
Definition cudart_hijack.h:67
const FatBin * fat_bin
Definition cudart_hijack.h:66
Definition cudart_hijack.h:60
std::vector< CUmod_st * > gpu_modules
Definition cudart_hijack.h:62
const FatBin * fat_bin
Definition cudart_hijack.h:61
Definition cudart_hijack.h:76
RegisteredVariable(const FatBin *_fat_bin, const void *_host_var, const char *_device_name, bool _external, int _size, bool _constant, bool _global, bool _managed)
const char * device_name
Definition cudart_hijack.h:79
int size
Definition cudart_hijack.h:81
std::vector< uintptr_t > gpu_addresses
Definition cudart_hijack.h:85
bool managed
Definition cudart_hijack.h:84
bool global
Definition cudart_hijack.h:83
const FatBin * fat_bin
Definition cudart_hijack.h:77
bool external
Definition cudart_hijack.h:80
bool constant
Definition cudart_hijack.h:82
const void * host_var
Definition cudart_hijack.h:78