Zoltan2
Loading...
Searching...
No Matches
Zoltan2_MachineTorusTopoMgr.hpp
Go to the documentation of this file.
1#ifndef _ZOLTAN2_MACHINE_TORUS_TOPOMANAGER_HPP_
2#define _ZOLTAN2_MACHINE_TORUS_TOPOMANAGER_HPP_
3
4#include <Teuchos_Comm.hpp>
5#include <Teuchos_CommHelpers.hpp>
6#include <Zoltan2_Machine.hpp>
7
8#ifdef HAVE_ZOLTAN2_TOPOMANAGER
9#include <TopoManager.h>
10#endif
11
12namespace Zoltan2{
13
17template <typename pcoord_t, typename part_t>
18class MachineTorusTopoMgr : public Machine <pcoord_t, part_t> {
19
20public:
25 MachineTorusTopoMgr(const Teuchos::Comm<int> &comm):
26 Machine<pcoord_t,part_t>(comm),
27#if defined (CMK_BLUEGENEQ)
28 networkDim(6), tmgr(comm.getSize()),
29#elif defined (CMK_BLUEGENEP)
30 networkDim(4), tmgr(comm.getSize()),
31#else
32 networkDim(3),
33#endif
34 procCoords(NULL), machine_extent(NULL),
35 delete_transformed_coords(false),
36 transformed_network_dim(0),
37 transformed_coordinates (NULL), pl(NULL)
38 {
39 transformed_network_dim = networkDim - 1;
40 transformed_coordinates = procCoords;
41 machine_extent = new int[networkDim];
42 this->getMachineExtent(this->machine_extent);
43
44 // Allocate memory for processor coordinates.
45 procCoords = new pcoord_t *[networkDim];
46 for (int i = 0; i < networkDim; ++i) {
47 procCoords[i] = new pcoord_t[this->numRanks];
48 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
49 }
50
51 // Obtain the coordinate of the processor.
52 pcoord_t *xyz = new pcoord_t[networkDim];
54 for (int i = 0; i < networkDim; i++)
55 procCoords[i][this->myRank] = xyz[i];
56 delete [] xyz;
57
58 // reduceAll the coordinates of each processor.
59 gatherMachineCoordinates(comm);
60
61 }
62
63 MachineTorusTopoMgr(const Teuchos::Comm<int> &comm,
64 const Teuchos::ParameterList &pl_ ):
65 Machine<pcoord_t,part_t>(comm),
66#if defined (CMK_BLUEGENEQ)
67 networkDim(6), tmgr(comm.getSize()),
68#elif defined (CMK_BLUEGENEP)
69 networkDim(4), tmgr(comm.getSize()),
70#else
71 networkDim(3),
72#endif
73 procCoords(NULL), machine_extent(NULL),
74 delete_transformed_coords(false),
75 transformed_network_dim(0),
76 transformed_coordinates (NULL),
77 pl(&pl_)
78 {
79 transformed_network_dim = networkDim - 1;
80 transformed_coordinates = procCoords;
81 machine_extent = new int[networkDim];
82 this->getMachineExtent(this->machine_extent);
83
84 // Allocate memory for processor coordinates.
85 procCoords = new pcoord_t *[networkDim];
86 for (int i = 0; i < networkDim; ++i) {
87 procCoords[i] = new pcoord_t[this->numRanks];
88 memset(procCoords[i], 0, sizeof(pcoord_t) * this->numRanks);
89 }
90
91 // Obtain the coordinate of the processor.
92 pcoord_t *xyz = new pcoord_t[networkDim];
94 for (int i = 0; i < networkDim; i++)
95 procCoords[i][this->myRank] = xyz[i];
96 delete [] xyz;
97
98 // reduceAll the coordinates of each processor.
99 gatherMachineCoordinates(comm);
100
101 const Teuchos::ParameterEntry *pe =
102 this->pl->getEntryPtr("Machine_Optimization_Level");
103 if (pe) {
104
105 int optimization_level = 0;
106
107 optimization_level = pe->getValue<int>(&optimization_level);
108
109 if (optimization_level == 0) {
110 transformed_network_dim = networkDim - 1;
111 transformed_coordinates = procCoords;
112 }
113
114 else if (optimization_level >= 1) {
115 transformed_network_dim = networkDim - 2;
116 transformed_coordinates = procCoords;
117 }
118 }
119
120 }
121
123 for (int i = 0; i < networkDim; i++) {
124 delete [] procCoords[i];
125 }
126 delete [] procCoords;
127 delete [] machine_extent;
128
129 if (delete_transformed_coords) {
130 for (int i = 0; i < transformed_network_dim; i++) {
131 delete [] transformed_coordinates[i];
132 }
133 delete [] transformed_coordinates;
134 }
135
136 }
137
138 bool hasMachineCoordinates() const { return true; }
139
140 int getMachineDim() const { return transformed_network_dim; }
141
142 int getRealMachineDim() const { return networkDim; }
143
144 bool getMachineExtent(int *nxyz) const {
145#if defined (CMK_BLUEGENEQ)
146 int dim = 0;
147 if (dim < transformed_network_dim)
148 nxyz[dim++] = tmgr.getDimNA();
149 if (dim < transformed_network_dim)
150 nxyz[dim++] = tmgr.getDimNB();
151 if (dim < transformed_network_dim)
152 nxyz[dim++] = tmgr.getDimNC();
153 if (dim < transformed_network_dim)
154 nxyz[dim++] = tmgr.getDimND();
155 if (dim < transformed_network_dim)
156 nxyz[dim++] = tmgr.getDimNE();
157 if (dim < transformed_network_dim)
158 nxyz[dim++] = tmgr.getDimNT();
159 return true;
160#elif defined (CMK_BLUEGENEP)
161 int dim = 0;
162 if (dim < transformed_network_dim)
163 nxyz[dim++] = tmgr.getDimNX();
164 if (dim < transformed_network_dim)
165 nxyz[dim++] = tmgr.getDimNY();
166 if (dim < transformed_network_dim)
167 nxyz[dim++] = tmgr.getDimNZ();
168 if (dim < transformed_network_dim)
169 nxyz[dim++] = tmgr.getDimNT();
170 return true;
171#else
172 return false;
173#endif
174 }
175
176 // MD TODO: Not always it has wrap-around links.
177 bool getMachineExtentWrapArounds(bool *wrap_around) const {
178#if defined (CMK_BLUEGENEQ)
179 // Leave it as this for now, figure out if there is a way to
180 // determine tourus from topomanager.
181 int dim = 0;
182 if (dim < transformed_network_dim)
183 wrap_around[dim++] = true;
184 if (dim < transformed_network_dim)
185 wrap_around[dim++] = true;
186 if (dim < transformed_network_dim)
187 wrap_around[dim++] = true;
188 if (dim < transformed_network_dim)
189 wrap_around[dim++] = true;
190 if (dim < transformed_network_dim)
191 wrap_around[dim++] = true;
192 if (dim < transformed_network_dim)
193 wrap_around[dim++] = true;
194#elif defined (CMK_BLUEGENEP)
195 int dim = 0;
196 if (dim < transformed_network_dim)
197 wrap_around[dim++] = true;
198 if (dim < transformed_network_dim)
199 wrap_around[dim++] = true;
200 if (dim < transformed_network_dim)
201 wrap_around[dim++] = true;
202 if (dim < transformed_network_dim)
203 wrap_around[dim++] = true;
204#else
205#endif
206 return true;
207 }
208
209 bool getMyMachineCoordinate(pcoord_t *xyz) {
210 for (int i = 0; i < this->transformed_network_dim; ++i) {
211 xyz[i] = transformed_coordinates[i][this->myRank];
212 }
213 return true;
214 }
215
216 bool getMyActualMachineCoordinate(pcoord_t *xyz) {
217#if defined (CMK_BLUEGENEQ)
218 int a,b,c,d,e,t;
219 tmgr.rankToCoordinates(this->myRank, a,b,c,d,e,t);
220 xyz[0] = a; xyz[1] = b; xyz[2] = c; xyz[3] = d; xyz[4] = e; xyz[5] = t;
221 //std::cout << "me:" << this->myRank
222 // << " " << a << " " << b << " " << c << " " << d
223 // << " " << e << " " << t << std::endl;
224 return true;
225#elif defined (CMK_BLUEGENEP)
226 int a,b,c,t;
227 tmgr.rankToCoordinates(this->myRank, a,b,c,t);
228 xyz[0] = a; xyz[1] = b; xyz[2] = c; xyz[3] = t;
229 return true;
230#else
231 return false;
232#endif
233 }
234
235 bool getMachineExtentWrapArounds(part_t *wrap_around) const {
236
237 int dim = 0;
238 if (dim < transformed_network_dim)
239 wrap_around[dim++] = true;
240
241 if (dim < transformed_network_dim)
242 wrap_around[dim++] = true;
243
244 if (dim < transformed_network_dim)
245 wrap_around[dim++] = true;
246
247 if (dim < transformed_network_dim)
248 wrap_around[dim++] = true;
249
250 if (dim < transformed_network_dim)
251 wrap_around[dim++] = true;
252
253 if (dim < transformed_network_dim)
254 wrap_around[dim++] = true;
255 return true;
256 }
257 inline bool getMachineCoordinate(const int rank,
258 pcoord_t *xyz) const {
259 return false;
260 }
261
262
263 bool getMachineCoordinate(const char *nodename, pcoord_t *xyz) {
264 return false; // cannot yet return from nodename
265 }
266
267 bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const {
268 allCoords = procCoords;
269 return true;
270 }
271
272 virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override {
273 hops = 0;
274 for (int i = 0; i < networkDim - 1; ++i) {
275 pcoord_t distance = procCoords[i][rank1] - procCoords[i][rank2];
276 if (distance < 0 )
277 distance = -distance;
278 if (machine_extent[i] - distance < distance)
279 distance = machine_extent[i] - distance;
280 hops += distance;
281 }
282 return true;
283 }
284
285
286private:
287
288 int networkDim;
289
290#ifdef HAVE_ZOLTAN2_TOPOMANAGER
291 TopoManager tmgr;
292#endif
293 pcoord_t **procCoords; // KDD Maybe should be RCP?
294 part_t *machine_extent;
295 const Teuchos::ParameterList *pl;
296
297
298 bool delete_transformed_coords;
299 int transformed_network_dim;
300 pcoord_t **transformed_coordinates;
301
302 void gatherMachineCoordinates(const Teuchos::Comm<int> &comm) {
303 // reduces and stores all machine coordinates.
304 pcoord_t *tmpVect = new pcoord_t [this->numRanks];
305
306 for (int i = 0; i < networkDim; i++) {
307 Teuchos::reduceAll<int, pcoord_t>(comm, Teuchos::REDUCE_SUM,
308 this->numRanks,
309 procCoords[i], tmpVect);
310 pcoord_t *tmp = tmpVect;
311 tmpVect = procCoords[i];
312 procCoords[i] = tmp;
313 }
314 delete [] tmpVect;
315 }
316};
317}
318#endif
A TopoManager Machine Class on Torus Networks.
bool getMachineCoordinate(const char *nodename, pcoord_t *xyz)
bool getAllMachineCoordinatesView(pcoord_t **&allCoords) const
MachineTorusTopoMgr(const Teuchos::Comm< int > &comm)
Constructor: A BlueGeneQ network machine description;.
bool getMachineExtentWrapArounds(bool *wrap_around) const
bool getMachineExtentWrapArounds(part_t *wrap_around) const
MachineTorusTopoMgr(const Teuchos::Comm< int > &comm, const Teuchos::ParameterList &pl_)
virtual bool getHopCount(int rank1, int rank2, pcoord_t &hops) const override
getHopCount function set hops between rank1 and rank2 return true if coordinates are available
bool getMachineCoordinate(const int rank, pcoord_t *xyz) const
MachineClass Base class for representing machine coordinates, networks, etc.
Created by mbenlioglu on Aug 31, 2020.
SparseMatrixAdapter_t::part_t part_t