Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
ucc_comm.h
Go to the documentation of this file.
1/*
2 * Copyright 2025 NVIDIA Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef __UCCLAYER_H__
19#define __UCCLAYER_H__
20
21#include <cassert>
22#include <memory>
23
24// #include <ucp/api/ucp.h>
25
26#include "bootstrap/bootstrap.h"
27#include "oob_group_comm.h"
28
29namespace Realm {
30 namespace ucc {
31 class UCCComm {
32 int rank;
33 int world_sz;
34 std::unique_ptr<OOBGroupComm> oob_comm;
35
36 ucc_lib_h lib;
37 ucc_team_h team{};
38 ucc_context_h context{};
39
40 // Helper functions
41 ucc_status_t init_lib();
42 ucc_status_t create_context();
43 ucc_status_t create_team();
44
45 // Currently simply assert fails on non-success state.
46 void ucc_check(const ucc_status_t &st);
47
48 // Currently simply assert fails on non-success state.
49 ucc_status_t ucc_collective(ucc_coll_args_t &coll_args, ucc_coll_req_h &req);
50
51 public:
52 // @brief Construct UCC group communicator
53 UCCComm(int rank, int world_sz, bootstrap_handle_t *bh);
54
55 // @brief Initialize ucc library, create ucc context and team.
56 ucc_status_t init();
57
58 inline int get_rank() { return rank; };
59 inline int get_world_size() { return world_sz; };
60
61 // @brief Broadcast message from the process with root rank to all other
62 // processes of the group
63 // @param buffer Starting address of the buffer
64 // @param count Number of elements in the buffer
65 // @param datatype Type of each of the element in the buffer
66 // @param root Rank of the root
67 // @return UCC_OK on success, error otherwise.
68 ucc_status_t UCC_Bcast(void *buffer, int count, ucc_datatype_t datatype, int root);
69
70 // @brief Gather values from a group of processes at root.
71 // @param sbuf Starting address of the send buffer
72 // @param sendcount Number of elements in the send buffer
73 // @param sendtype Type of each of the element in the send buffer
74 // @param rbuf Address of the receive buffer
75 // @param rececount Number of elements to be received from each process.
76 // @param recvtype Type of the elements in receive buffer
77 // @prama root Rank of the root of gather operations
78 // @return UCC_OK on success, error otherwise.
79 ucc_status_t UCC_Gather(void *sbuf, int sendcount, ucc_datatype_t sendtype,
80 void *rbuf, int recvcount, ucc_datatype_t recvtype,
81 int root);
82
83 // @brief Gather values from a all processes in a group and distributes
84 // to all processes in the group.
85 // @param sbuf Starting address of the send buffer
86 // @param sendcount Number of elements in the send buffer
87 // @param sendtype Type of each of the element in the send buffer
88 // @param rbuf Address of the receive buffer
89 // @param rececount Number of elements to be received from each process.
90 // @param recvtype Type of the elements in receive buffer
91 // @return UCC_OK on success, error otherwise.
92 ucc_status_t UCC_Allgather(void *sbuf, int sendcount, ucc_datatype_t sendtype,
93 void *rbuf, int recvcount, ucc_datatype_t recvtype);
94
95 ucc_status_t UCC_Allreduce(void *sbuf, void *rbuf, int count,
96 ucc_datatype_t datatype, ucc_reduction_op_t op);
97
98 ucc_status_t UCC_Allgatherv(void *sbuf, int count, ucc_datatype_t sendtype,
99 void *rbuf, const std::vector<int> &recvcounts,
100 const std::vector<int> &displs,
101 ucc_datatype_t recvtype);
102
103 ucc_status_t UCC_Barrier();
104
105 ucc_status_t UCC_Finalize();
106 };
107 } // namespace ucc
108} // namespace Realm
109#endif
Definition ucc_comm.h:31
ucc_status_t UCC_Bcast(void *buffer, int count, ucc_datatype_t datatype, int root)
int get_rank()
Definition ucc_comm.h:58
ucc_status_t UCC_Finalize()
int get_world_size()
Definition ucc_comm.h:59
ucc_status_t UCC_Barrier()
ucc_status_t UCC_Gather(void *sbuf, int sendcount, ucc_datatype_t sendtype, void *rbuf, int recvcount, ucc_datatype_t recvtype, int root)
UCCComm(int rank, int world_sz, bootstrap_handle_t *bh)
ucc_status_t UCC_Allreduce(void *sbuf, void *rbuf, int count, ucc_datatype_t datatype, ucc_reduction_op_t op)
ucc_status_t UCC_Allgatherv(void *sbuf, int count, ucc_datatype_t sendtype, void *rbuf, const std::vector< int > &recvcounts, const std::vector< int > &displs, ucc_datatype_t recvtype)
ucc_status_t UCC_Allgather(void *sbuf, int sendcount, ucc_datatype_t sendtype, void *rbuf, int recvcount, ucc_datatype_t recvtype)
ucc_status_t init()
Definition activemsg.h:38
Definition bootstrap.h:32