MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_AggregationPhase1Algorithm_kokkos_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
47#define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
48
49#include <queue>
50#include <vector>
51
52#include <Teuchos_Comm.hpp>
53#include <Teuchos_CommHelpers.hpp>
54
55#include <Xpetra_Vector.hpp>
56
58
59#include "MueLu_Aggregates_kokkos.hpp"
60#include "MueLu_Exceptions.hpp"
61#include "MueLu_LWGraph_kokkos.hpp"
62#include "MueLu_Monitor.hpp"
63
64#include "Kokkos_Sort.hpp"
65#include <Kokkos_ScatterView.hpp>
66
67namespace MueLu {
68
69 template <class LocalOrdinal, class GlobalOrdinal, class Node>
71 BuildAggregates(const Teuchos::ParameterList& params,
72 const LWGraph_kokkos& graph,
73 Aggregates_kokkos& aggregates,
74 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
75 LO& numNonAggregatedNodes) const {
76
77 int minNodesPerAggregate = params.get<int> ("aggregation: min agg size");
78 int maxNodesPerAggregate = params.get<int> ("aggregation: max agg size");
79
80 TEUCHOS_TEST_FOR_EXCEPTION(maxNodesPerAggregate < minNodesPerAggregate,
82 "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
83
84 // Distance-2 gives less control than serial uncoupled phase 1
85 // no custom row reordering because would require making deep copy
86 // of local matrix entries and permuting it can only enforce
87 // max aggregate size
88 {
89 if(params.get<bool>("aggregation: deterministic"))
90 {
91 Monitor m(*this, "BuildAggregatesDeterministic");
92 BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
93 aggregates, aggStat, numNonAggregatedNodes);
94 } else {
95 Monitor m(*this, "BuildAggregatesRandom");
96 BuildAggregatesRandom(maxNodesPerAggregate, graph,
97 aggregates, aggStat, numNonAggregatedNodes);
98 }
99 }
100 }
101
102 template <class LocalOrdinal, class GlobalOrdinal, class Node>
104 BuildAggregatesRandom(const LO maxAggSize,
105 const LWGraph_kokkos& graph,
106 Aggregates_kokkos& aggregates,
107 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
108 LO& numNonAggregatedNodes) const
109 {
110 const LO numRows = graph.GetNodeNumVertices();
111 const int myRank = graph.GetComm()->getRank();
112
113 // Extract data from aggregates
114 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
115 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
116 auto colors = aggregates.GetGraphColors();
117
118 auto lclLWGraph = graph.getLocalLWGraph();
119
120 LO numAggregatedNodes = 0;
121 LO numLocalAggregates = aggregates.GetNumAggregates();
122 Kokkos::View<LO, device_type> aggCount("aggCount");
123 Kokkos::deep_copy(aggCount, numLocalAggregates);
124 Kokkos::parallel_for("Aggregation Phase 1: initial reduction over color == 1",
125 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
126 KOKKOS_LAMBDA (const LO nodeIdx) {
127 if(colors(nodeIdx) == 1 && aggStat(nodeIdx) == READY) {
128 const LO aggIdx = Kokkos::atomic_fetch_add (&aggCount(), 1);
129 vertex2AggId(nodeIdx, 0) = aggIdx;
130 aggStat(nodeIdx) = AGGREGATED;
131 procWinner(nodeIdx, 0) = myRank;
132 }
133 });
134 // Truely we wish to compute: numAggregatedNodes = aggCount - numLocalAggregates
135 // before updating the value of numLocalAggregates.
136 // But since we also do not want to create a host mirror of aggCount we do some trickery...
137 numAggregatedNodes -= numLocalAggregates;
138 Kokkos::deep_copy(numLocalAggregates, aggCount);
139 numAggregatedNodes += numLocalAggregates;
140
141 // Compute the initial size of the aggregates.
142 // Note lbv 12-21-17: I am pretty sure that the aggregates will always be of size 1
143 // at this point so we could simplify the code below a lot if this
144 // assumption is correct...
145 Kokkos::View<LO*, device_type> aggSizesView("aggSizes", numLocalAggregates);
146 {
147 // Here there is a possibility that two vertices assigned to two different threads contribute
148 // to the same aggregate if somethings happened before phase 1?
149 auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
150 Kokkos::parallel_for("Aggregation Phase 1: compute initial aggregates size",
151 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
152 KOKKOS_LAMBDA (const LO nodeIdx) {
153 auto aggSizesScatterViewAccess = aggSizesScatterView.access();
154 if(vertex2AggId(nodeIdx, 0) >= 0)
155 aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
156 });
157 Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
158 }
159
160 LO tmpNumAggregatedNodes = 0;
161 Kokkos::parallel_reduce("Aggregation Phase 1: main parallel_reduce over aggSizes",
162 Kokkos::RangePolicy<size_t, execution_space>(0, numRows),
163 KOKKOS_LAMBDA (const size_t nodeIdx, LO & lNumAggregatedNodes) {
164 if(colors(nodeIdx) != 1
165 && (aggStat(nodeIdx) == READY || aggStat(nodeIdx) == NOTSEL)) {
166 // Get neighbors of vertex i and look for local, aggregated,
167 // color 1 neighbor (valid root).
168 auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx);
169 for(LO j = 0; j < neighbors.length; ++j) {
170 auto nei = neighbors.colidx(j);
171 if(lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1
172 && aggStat(nei) == AGGREGATED) {
173
174 // This atomic guarentees that any other node trying to
175 // join aggregate agg has the correct size.
176 LO agg = vertex2AggId(nei, 0);
177 const LO aggSize = Kokkos::atomic_fetch_add (&aggSizesView(agg),
178 1);
179 if(aggSize < maxAggSize) {
180 //assign vertex i to aggregate with root j
181 vertex2AggId(nodeIdx, 0) = agg;
182 procWinner(nodeIdx, 0) = myRank;
183 aggStat(nodeIdx) = AGGREGATED;
184 ++lNumAggregatedNodes;
185 break;
186 } else {
187 // Decrement back the value of aggSizesView(agg)
188 Kokkos::atomic_decrement(&aggSizesView(agg));
189 }
190 }
191 }
192 }
193 // if(aggStat(nodeIdx) != AGGREGATED) {
194 // lNumNonAggregatedNodes++;
195 if(aggStat(nodeIdx) == NOTSEL) { aggStat(nodeIdx) = READY; }
196 // }
197 }, tmpNumAggregatedNodes);
198 numAggregatedNodes += tmpNumAggregatedNodes;
199 numNonAggregatedNodes -= numAggregatedNodes;
200
201 // update aggregate object
202 aggregates.SetNumAggregates(numLocalAggregates);
203 }
204
205 template <class LocalOrdinal, class GlobalOrdinal, class Node>
207 BuildAggregatesDeterministic(const LO maxAggSize,
208 const LWGraph_kokkos& graph,
209 Aggregates_kokkos& aggregates,
210 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
211 LO& numNonAggregatedNodes) const
212 {
213 const LO numRows = graph.GetNodeNumVertices();
214 const int myRank = graph.GetComm()->getRank();
215
216 auto vertex2AggId = aggregates.GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
217 auto procWinner = aggregates.GetProcWinner() ->getDeviceLocalView(Xpetra::Access::ReadWrite);
218 auto colors = aggregates.GetGraphColors();
219
220 auto lclLWGraph = graph.getLocalLWGraph();
221
222 LO numLocalAggregates = aggregates.GetNumAggregates();
223 Kokkos::View<LO, device_type> numLocalAggregatesView("Num aggregates");
224 {
225 auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
226 h_nla() = numLocalAggregates;
227 Kokkos::deep_copy(numLocalAggregatesView, h_nla);
228 }
229
230 Kokkos::View<LO*, device_type> newRoots("New root LIDs", numNonAggregatedNodes);
231 Kokkos::View<LO, device_type> numNewRoots("Number of new aggregates of current color");
232 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
233
234 //first loop build the set of new roots
235 Kokkos::parallel_for("Aggregation Phase 1: building list of new roots",
236 Kokkos::RangePolicy<execution_space>(0, numRows),
237 KOKKOS_LAMBDA(const LO i)
238 {
239 if(colors(i) == 1 && aggStat(i) == READY)
240 {
241 //i will become a root
242 newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
243 }
244 });
245 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
246 //sort new roots by LID to guarantee determinism in agg IDs
247 Kokkos::sort(newRoots, 0, h_numNewRoots());
248 LO numAggregated = 0;
249 Kokkos::parallel_reduce("Aggregation Phase 1: aggregating nodes",
250 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
251 KOKKOS_LAMBDA(const LO rootIndex, LO& lnumAggregated)
252 {
253 LO root = newRoots(rootIndex);
254 LO aggID = numLocalAggregatesView() + rootIndex;
255 LO aggSize = 1;
256 vertex2AggId(root, 0) = aggID;
257 procWinner(root, 0) = myRank;
258 aggStat(root) = AGGREGATED;
259 auto neighOfRoot = lclLWGraph.getNeighborVertices(root);
260 for(LO n = 0; n < neighOfRoot.length; n++)
261 {
262 LO neigh = neighOfRoot(n);
263 if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) == READY)
264 {
265 //add neigh to aggregate
266 vertex2AggId(neigh, 0) = aggID;
267 procWinner(neigh, 0) = myRank;
268 aggStat(neigh) = AGGREGATED;
269 aggSize++;
270 if(aggSize == maxAggSize)
271 {
272 //can't add any more nodes
273 break;
274 }
275 }
276 }
277 lnumAggregated += aggSize;
278 }, numAggregated);
279 numNonAggregatedNodes -= numAggregated;
280 // update aggregate object
281 aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots());
282 }
283
284} // end namespace
285
286#endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
void BuildAggregatesDeterministic(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesRandom(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Exception throws to report errors in the internal logical of the program.
Lightweight MueLu representation of a compressed row storage graph.
Timer to be used in non-factories.
Namespace for MueLu classes and methods.
@ AGGREGATED
Definition: MueLu_Types.hpp:73