Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_getDiagCopyWithoutOffsets_def.hpp
Go to the documentation of this file.
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42*/
43
44#ifndef TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
45#define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
46
54
56#include "Tpetra_RowGraph.hpp"
57#include "Tpetra_CrsGraph.hpp"
58#include "Tpetra_RowMatrix.hpp"
59#include "Tpetra_Vector.hpp"
60
61namespace Tpetra {
62namespace Details {
63
64// Work-around for #499: Implementation of one-argument (no offsets)
65// getLocalDiagCopy for the NOT fill-complete case.
66//
67// NOTE (mfh 18 Jul 2016) This calls functions that are NOT GPU device
68// functions! Thus, we do NOT use KOKKOS_INLINE_FUNCTION or
69// KOKKOS_FUNCTION here, because those attempt to mark the functions
70// they modify as CUDA device functions. This functor is ONLY for
71// non-CUDA execution spaces!
72template<class SC, class LO, class GO, class NT>
73class GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor {
74public:
75 typedef ::Tpetra::RowMatrix<SC, LO, GO, NT> row_matrix_type;
76 typedef ::Tpetra::Vector<SC, LO, GO, NT> vec_type;
77
78 typedef typename vec_type::impl_scalar_type IST;
79 // The output Vector determines the execution space.
80
81private:
82 typedef typename vec_type::dual_view_type::t_host::execution_space host_execution_space;
83 typedef typename vec_type::map_type map_type;
84
85 static bool
86 graphIsSorted (const row_matrix_type& A)
87 {
88 using Teuchos::RCP;
89 using Teuchos::rcp_dynamic_cast;
90 typedef Tpetra::CrsGraph<LO, GO, NT> crs_graph_type;
91 typedef Tpetra::RowGraph<LO, GO, NT> row_graph_type;
92
93 // We conservatively assume not sorted. RowGraph lacks an
94 // "isSorted" predicate, so we can't know for sure unless the cast
95 // to CrsGraph succeeds.
96 bool sorted = false;
97
98 RCP<const row_graph_type> G_row = A.getGraph ();
99 if (! G_row.is_null ()) {
100 RCP<const crs_graph_type> G_crs =
101 rcp_dynamic_cast<const crs_graph_type> (G_row);
102 if (! G_crs.is_null ()) {
103 sorted = G_crs->isSorted ();
104 }
105 }
106
107 return sorted;
108 }
109
110public:
111 // lclNumErrs [out] Total count of errors on this process.
112 GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor (LO& lclNumErrs,
113 vec_type& diag,
114 const row_matrix_type& A) :
115 A_ (A),
116 lclRowMap_ (*A.getRowMap ()),
117 lclColMap_ (*A.getColMap ()),
118 sorted_ (graphIsSorted (A))
119 {
120 const LO lclNumRows = static_cast<LO> (diag.getLocalLength ());
121 {
122 const LO matLclNumRows =
123 static_cast<LO> (lclRowMap_.getLocalNumElements ());
124 TEUCHOS_TEST_FOR_EXCEPTION
125 (lclNumRows != matLclNumRows, std::invalid_argument,
126 "diag.getLocalLength() = " << lclNumRows << " != "
127 "A.getRowMap()->getLocalNumElements() = " << matLclNumRows << ".");
128 }
129
130 // Side effects start below this point.
131 D_lcl_ = diag.getLocalViewHost(Access::OverwriteAll);
132 D_lcl_1d_ = Kokkos::subview (D_lcl_, Kokkos::ALL (), 0);
133
134 Kokkos::RangePolicy<host_execution_space, LO> range (0, lclNumRows);
135 lclNumErrs = 0;
136 Kokkos::parallel_reduce (range, *this, lclNumErrs);
137
138 // sync changes back to device, since the user doesn't know that
139 // we had to run on host.
140 //diag.template sync<typename device_type::memory_space> ();
141 }
142
143 void operator () (const LO& lclRowInd, LO& errCount) const {
144 using KokkosSparse::findRelOffset;
145
146 D_lcl_1d_(lclRowInd) = Kokkos::Details::ArithTraits<IST>::zero ();
147 const GO gblInd = lclRowMap_.getGlobalElement (lclRowInd);
148 const LO lclColInd = lclColMap_.getLocalElement (gblInd);
149
150 if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
151 errCount++;
152 }
153 else { // row index is also in the column Map on this process
154 typename row_matrix_type::local_inds_host_view_type lclColInds;
155 typename row_matrix_type::values_host_view_type curVals;
156 A_.getLocalRowView(lclRowInd, lclColInds, curVals);
157 LO numEnt = lclColInds.extent(0);
158 // The search hint is always zero, since we only call this
159 // once per row of the matrix.
160 const LO hint = 0;
161 const LO offset =
162 findRelOffset (lclColInds, numEnt, lclColInd, hint, sorted_);
163 if (offset == numEnt) { // didn't find the diagonal column index
164 errCount++;
165 }
166 else {
167 D_lcl_1d_(lclRowInd) = curVals[offset];
168 }
169 }
170 }
171
172private:
173 const row_matrix_type& A_;
174 map_type lclRowMap_;
175 map_type lclColMap_;
176 typename vec_type::dual_view_type::t_host D_lcl_;
177 decltype (Kokkos::subview (D_lcl_, Kokkos::ALL (), 0)) D_lcl_1d_;
178 const bool sorted_;
179};
180
181
182template<class SC, class LO, class GO, class NT>
183LO
185 const ::Tpetra::RowMatrix<SC, LO, GO, NT>& A,
186 const bool debug)
187{
188 using ::Tpetra::Details::gathervPrint;
189 using Teuchos::outArg;
190 using Teuchos::REDUCE_MIN;
191 using Teuchos::reduceAll;
192 typedef GetLocalDiagCopyWithoutOffsetsNotFillCompleteFunctor<SC,
193 LO, GO, NT> functor_type;
194
195 // The functor's constructor does error checking and executes the
196 // thread-parallel kernel.
197
198 LO lclNumErrs = 0;
199
200 if (debug) {
201 int lclSuccess = 1;
202 int gblSuccess = 0;
203 std::ostringstream errStrm;
204 Teuchos::RCP<const Teuchos::Comm<int> > commPtr = A.getComm ();
205 if (commPtr.is_null ()) {
206 return lclNumErrs; // this process does not participate
207 }
208 const Teuchos::Comm<int>& comm = *commPtr;
209
210 try {
211 functor_type functor (lclNumErrs, diag, A);
212 }
213 catch (std::exception& e) {
214 lclSuccess = -1;
215 errStrm << "Process " << A.getComm ()->getRank () << ": "
216 << e.what () << std::endl;
217 }
218 if (lclNumErrs != 0) {
219 lclSuccess = 0;
220 }
221
222 reduceAll<int, int> (comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
223 if (gblSuccess == -1) {
224 if (comm.getRank () == 0) {
225 // We gather into std::cerr, rather than using an
226 // std::ostringstream, because there might be a lot of MPI
227 // processes. It could take too much memory to gather all the
228 // messages to Process 0 before printing. gathervPrint gathers
229 // and prints one message at a time, thus saving memory. I
230 // don't want to run out of memory while trying to print an
231 // error message; that would hide the real problem.
232 std::cerr << "getLocalDiagCopyWithoutOffsetsNotFillComplete threw an "
233 "exception on one or more MPI processes in the matrix's comunicator."
234 << std::endl;
235 }
236 gathervPrint (std::cerr, errStrm.str (), comm);
237 // Don't need to print anything here, since we've already
238 // printed to std::cerr above.
239 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "");
240 }
241 else if (gblSuccess == 0) {
242 TEUCHOS_TEST_FOR_EXCEPTION
243 (gblSuccess != 1, std::runtime_error,
244 "getLocalDiagCopyWithoutOffsetsNotFillComplete failed on "
245 "one or more MPI processes in the matrix's communicator.");
246 }
247 }
248 else { // ! debug
249 functor_type functor (lclNumErrs, diag, A);
250 }
251
252 return lclNumErrs;
253}
254
255} // namespace Details
256} // namespace Tpetra
257
258// Explicit template instantiation macro for
259// getLocalDiagCopyWithoutOffsetsNotFillComplete. NOT FOR USERS!!!
260// Must be used inside the Tpetra namespace.
261#define TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_INSTANT( SCALAR, LO, GO, NODE ) \
262 template LO \
263 Details::getLocalDiagCopyWithoutOffsetsNotFillComplete< SCALAR, LO, GO, NODE > \
264 ( ::Tpetra::Vector< SCALAR, LO, GO, NODE >& diag, \
265 const ::Tpetra::RowMatrix< SCALAR, LO, GO, NODE >& A, \
266 const bool debug);
267
268#endif // TPETRA_DETAILS_GETDIAGCOPYWITHOUTOFFSETS_DEF_HPP
Declaration of a function that prints strings from each process.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
typename device_type::execution_space execution_space
Type of the (new) Kokkos execution space.
An abstract interface for graphs accessed by rows.
A distributed dense vector.
base_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
base_type::map_type map_type
The type of the Map specialization used by this class.
Implementation details of Tpetra.
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix's diagonal entries into a Tpetra::V...
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.