Realm
A distributed, event-based tasking library
Loading...
Searching...
No Matches
p2p_comm.h
Go to the documentation of this file.
1/*
2 * Copyright 2025 NVIDIA Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef __UCCLAYER_H__
19#define __UCCLAYER_H__
20
21#include <cassert>
22#include <cstdint>
23#include <iostream>
24#include <memory>
25
26#include "client.h"
27#include "logger.h"
28#include "types.h"
29#include "server.h"
30#include "worker.h"
31
32namespace p2p {
33 // @class P2PComm
34 // @brief Group communicator based on p2p communication.
35 //
36 // It currently implements only one collective operation - allgather(). If
37 // necessary, it has enough information to implement any of the collective
38 // operations. All the communcation is using p2p connections.
39 class P2PComm {
40 int rank_;
41 int world_sz_;
42 std::shared_ptr<mesh::Server> receiver_;
43 std::shared_ptr<mesh::Client> sender_;
44
45 std::string self_;
46 std::vector<std::string> peers_;
47
48 std::unique_ptr<mesh::Worker> worker_;
49
50 std::shared_ptr<Logger::p2p_log> p2p_log_{nullptr};
51
52 public:
53 P2PComm(const std::string &self, const std::vector<std::string> &peers,
54 const std::string &log_file = mesh::DEF_LOG);
55
56 inline int get_rank() { return rank_; };
57 inline int get_world_size() { return world_sz_; };
58
59 // @brief Initialize p2p connectiions betweenn all the pairs of workers.
60 // @return true on success, false otherwise.
61 bool Init();
62
63 // @brief Allgather collective using p2p communication.
64 int Allgather(void *sbuf, int sendcount, uint8_t sendtype, void *rbuf, int recvcount,
65 uint8_t recvtype);
66
67 // @brief shutdown the p2p communicator
68 int Shutdown();
69 };
70} // namespace p2p
71#endif
Definition p2p_comm.h:39
P2PComm(const std::string &self, const std::vector< std::string > &peers, const std::string &log_file=mesh::DEF_LOG)
int get_world_size()
Definition p2p_comm.h:57
int Allgather(void *sbuf, int sendcount, uint8_t sendtype, void *rbuf, int recvcount, uint8_t recvtype)
int get_rank()
Definition p2p_comm.h:56
const std::string DEF_LOG("/tmp/p2p_bootstrap.log")
Definition logger.h:27