Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_SYCL.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42
43// This is a verbatim copy of the other TpetraExt_MatrixMatrix_*.hpp files
44// replacing the execution/memory space by the ones corresponding to SYCL.
45#ifndef TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
46#define TPETRA_MATRIXMATRIX_SYCL_DEF_HPP
47
48#ifdef HAVE_TPETRA_INST_SYCL
49namespace Tpetra {
50namespace MMdetails {
51
52/*********************************************************************************************************/
53// MMM KernelWrappers for Partial Specialization to SYCL
54template<class Scalar,
55 class LocalOrdinal,
56 class GlobalOrdinal,
57 class LocalOrdinalViewType>
58struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
59 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
60 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
61 const LocalOrdinalViewType & Acol2Brow,
62 const LocalOrdinalViewType & Acol2Irow,
63 const LocalOrdinalViewType & Bcol2Ccol,
64 const LocalOrdinalViewType & Icol2Ccol,
65 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
66 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
67 const std::string& label = std::string(),
68 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
69
70
71
72 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
73 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
74 const LocalOrdinalViewType & Acol2Brow,
75 const LocalOrdinalViewType & Acol2Irow,
76 const LocalOrdinalViewType & Bcol2Ccol,
77 const LocalOrdinalViewType & Icol2Ccol,
78 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
79 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
80 const std::string& label = std::string(),
81 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
82
83};
84
85// Jacobi KernelWrappers for Partial Specialization to SYCL
86template<class Scalar,
87 class LocalOrdinal,
88 class GlobalOrdinal, class LocalOrdinalViewType>
89struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType> {
90 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
91 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
92 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
93 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
94 const LocalOrdinalViewType & Acol2Brow,
95 const LocalOrdinalViewType & Acol2Irow,
96 const LocalOrdinalViewType & Bcol2Ccol,
97 const LocalOrdinalViewType & Icol2Ccol,
98 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
99 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
100 const std::string& label = std::string(),
101 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
102
103 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
104 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
105 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
106 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
107 const LocalOrdinalViewType & Acol2Brow,
108 const LocalOrdinalViewType & Acol2Irow,
109 const LocalOrdinalViewType & Bcol2Ccol,
110 const LocalOrdinalViewType & Icol2Ccol,
111 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
112 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
113 const std::string& label = std::string(),
114 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
115
116 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
117 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
118 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
119 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
120 const LocalOrdinalViewType & Acol2Brow,
121 const LocalOrdinalViewType & Acol2Irow,
122 const LocalOrdinalViewType & Bcol2Ccol,
123 const LocalOrdinalViewType & Icol2Ccol,
124 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
125 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
126 const std::string& label = std::string(),
127 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
128};
129
130
131/*********************************************************************************************************/
132// AB NewMatrix Kernel wrappers (KokkosKernels/SYCL Version)
133template<class Scalar,
134 class LocalOrdinal,
135 class GlobalOrdinal,
136 class LocalOrdinalViewType>
137void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
138 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
139 const LocalOrdinalViewType & Acol2Brow,
140 const LocalOrdinalViewType & Acol2Irow,
141 const LocalOrdinalViewType & Bcol2Ccol,
142 const LocalOrdinalViewType & Icol2Ccol,
143 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
144 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
145 const std::string& label,
146 const Teuchos::RCP<Teuchos::ParameterList>& params) {
147
148
149#ifdef HAVE_TPETRA_MMM_TIMINGS
150 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
151 using Teuchos::TimeMonitor;
152 Teuchos::RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLWrapper")))));
153#endif
154 // Node-specific code
155 typedef Kokkos::Compat::KokkosSYCLWrapperNode Node;
156 std::string nodename("SYCL");
157
158 // Lots and lots of typedefs
159 using Teuchos::RCP;
161 typedef typename KCRS::device_type device_t;
162 typedef typename KCRS::StaticCrsGraphType graph_t;
163 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
164 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
165 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
166 typedef typename KCRS::values_type::non_const_type scalar_view_t;
167 //typedef typename graph_t::row_map_type::const_type lno_view_t_const;
168
169 // Options
170 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
171 std::string myalg("SPGEMM_KK_MEMORY");
172 if(!params.is_null()) {
173 if(params->isParameter("sycl: algorithm"))
174 myalg = params->get("sycl: algorithm",myalg);
175 if(params->isParameter("sycl: team work size"))
176 team_work_size = params->get("sycl: team work size",team_work_size);
177 }
178
179 // KokkosKernelsHandle
180 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
181 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
182 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
183
184 // Grab the Kokkos::SparseCrsMatrices
185 const KCRS & Amat = Aview.origMatrix->getLocalMatrixDevice();
186 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixDevice();
187
188 c_lno_view_t Arowptr = Amat.graph.row_map,
189 Browptr = Bmat.graph.row_map;
190 const lno_nnz_view_t Acolind = Amat.graph.entries,
191 Bcolind = Bmat.graph.entries;
192 const scalar_view_t Avals = Amat.values,
193 Bvals = Bmat.values;
194
195 c_lno_view_t Irowptr;
196 lno_nnz_view_t Icolind;
197 scalar_view_t Ivals;
198 if(!Bview.importMatrix.is_null()) {
199 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
200 Irowptr = lclB.graph.row_map;
201 Icolind = lclB.graph.entries;
202 Ivals = lclB.values;
203 }
204
205
206 // Get the algorithm mode
207 std::string alg = nodename+std::string(" algorithm");
208 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
209 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
210 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
211
212 // Merge the B and Bimport matrices
213 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
214
215#ifdef HAVE_TPETRA_MMM_TIMINGS
216 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLCore"))));
217#endif
218
219 // Do the multiply on whatever we've got
220 typename KernelHandle::nnz_lno_t AnumRows = Amat.numRows();
221 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
222 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
223
224 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
225 lno_nnz_view_t entriesC;
226 scalar_view_t valuesC;
227 KernelHandle kh;
228 kh.create_spgemm_handle(alg_enum);
229 kh.set_team_work_size(team_work_size);
230
231 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
232
233 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
234 if (c_nnz_size){
235 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
236 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
237 }
238 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Amat.graph.row_map,Amat.graph.entries,Amat.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
239 kh.destroy_spgemm_handle();
240
241#ifdef HAVE_TPETRA_MMM_TIMINGS
242 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLSort"))));
243#endif
244
245 // Sort & set values
246 if (params.is_null() || params->get("sort entries",true))
247 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
248 C.setAllValues(row_mapC,entriesC,valuesC);
249
250#ifdef HAVE_TPETRA_MMM_TIMINGS
251 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SYCLESFC"))));
252#endif
253
254 // Final Fillcomplete
255 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
256 labelList->set("Timer Label",label);
257 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
258 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > dummyExport;
259 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
260}
261
262
263/*********************************************************************************************************/
264template<class Scalar,
265 class LocalOrdinal,
266 class GlobalOrdinal,
267 class LocalOrdinalViewType>
268void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(
269 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
270 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
271 const LocalOrdinalViewType & targetMapToOrigRow_dev,
272 const LocalOrdinalViewType & targetMapToImportRow_dev,
273 const LocalOrdinalViewType & Bcol2Ccol_dev,
274 const LocalOrdinalViewType & Icol2Ccol_dev,
275 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
276 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
277 const std::string& label,
278 const Teuchos::RCP<Teuchos::ParameterList>& params) {
279
280 // FIXME: Right now, this is a cut-and-paste of the serial kernel
281 typedef Kokkos::Compat::KokkosSYCLWrapperNode Node;
282
283#ifdef HAVE_TPETRA_MMM_TIMINGS
284 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
285 using Teuchos::TimeMonitor;
286 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse SerialCore"))));
287 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
288#endif
289 using Teuchos::RCP;
290 using Teuchos::rcp;
291
292
293 // Lots and lots of typedefs
294 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
295 typedef typename KCRS::StaticCrsGraphType graph_t;
296 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
297 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
298 typedef typename KCRS::values_type::non_const_type scalar_view_t;
299
300 typedef Scalar SC;
301 typedef LocalOrdinal LO;
302 typedef GlobalOrdinal GO;
303 typedef Node NO;
304 typedef Map<LO,GO,NO> map_type;
305 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
306 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
307 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
308
309 // Since this is being run on SYCL, we need to fence because the below code will use UVM
310 // typename graph_t::execution_space().fence();
311
312 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
313 // KDDKDD UVM Ideally, this function would run on device and use
314 // KDDKDD UVM KokkosKernels instead of this host implementation.
315 auto targetMapToOrigRow =
316 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
317 targetMapToOrigRow_dev);
318 auto targetMapToImportRow =
319 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
320 targetMapToImportRow_dev);
321 auto Bcol2Ccol =
322 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
323 Bcol2Ccol_dev);
324 auto Icol2Ccol =
325 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
326 Icol2Ccol_dev);
327
328 // Sizes
329 RCP<const map_type> Ccolmap = C.getColMap();
330 size_t m = Aview.origMatrix->getLocalNumRows();
331 size_t n = Ccolmap->getLocalNumElements();
332
333 // Grab the Kokkos::SparseCrsMatrices & inner stuff
334 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
335 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
336 const KCRS & Cmat = C.getLocalMatrixHost();
337
338 c_lno_view_t Arowptr = Amat.graph.row_map,
339 Browptr = Bmat.graph.row_map,
340 Crowptr = Cmat.graph.row_map;
341 const lno_nnz_view_t Acolind = Amat.graph.entries,
342 Bcolind = Bmat.graph.entries,
343 Ccolind = Cmat.graph.entries;
344 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
345 scalar_view_t Cvals = Cmat.values;
346
347 c_lno_view_t Irowptr;
348 lno_nnz_view_t Icolind;
349 scalar_view_t Ivals;
350 if(!Bview.importMatrix.is_null()) {
351 auto lclB = Bview.importMatrix->getLocalMatrixHost();
352 Irowptr = lclB.graph.row_map;
353 Icolind = lclB.graph.entries;
354 Ivals = lclB.values;
355 }
356
357#ifdef HAVE_TPETRA_MMM_TIMINGS
358 MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix SerialCore - Compare"))));
359#endif
360
361 // Classic csr assembly (low memory edition)
362 // mfh 27 Sep 2016: The c_status array is an implementation detail
363 // of the local sparse matrix-matrix multiply routine.
364
365 // The status array will contain the index into colind where this entry was last deposited.
366 // c_status[i] < CSR_ip - not in the row yet
367 // c_status[i] >= CSR_ip - this is the entry where you can find the data
368 // We start with this filled with INVALID's indicating that there are no entries yet.
369 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
370 std::vector<size_t> c_status(n, ST_INVALID);
371
372 // For each row of A/C
373 size_t CSR_ip = 0, OLD_ip = 0;
374 for (size_t i = 0; i < m; i++) {
375 // First fill the c_status array w/ locations where we're allowed to
376 // generate nonzeros for this row
377 OLD_ip = Crowptr[i];
378 CSR_ip = Crowptr[i+1];
379 for (size_t k = OLD_ip; k < CSR_ip; k++) {
380 c_status[Ccolind[k]] = k;
381
382 // Reset values in the row of C
383 Cvals[k] = SC_ZERO;
384 }
385
386 for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
387 LO Aik = Acolind[k];
388 const SC Aval = Avals[k];
389 if (Aval == SC_ZERO)
390 continue;
391
392 if (targetMapToOrigRow[Aik] != LO_INVALID) {
393 // Local matrix
394 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
395
396 for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
397 LO Bkj = Bcolind[j];
398 LO Cij = Bcol2Ccol[Bkj];
399
400 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
401 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
402 "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
403
404 Cvals[c_status[Cij]] += Aval * Bvals[j];
405 }
406
407 } else {
408 // Remote matrix
409 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
410 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
411 LO Ikj = Icolind[j];
412 LO Cij = Icol2Ccol[Ikj];
413
414 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
415 std::runtime_error, "Trying to insert a new entry (" << i << "," << Cij << ") into a static graph " <<
416 "(c_status = " << c_status[Cij] << " of [" << OLD_ip << "," << CSR_ip << "))");
417
418 Cvals[c_status[Cij]] += Aval * Ivals[j];
419 }
420 }
421 }
422 }
423
424 C.fillComplete(C.getDomainMap(), C.getRangeMap());
425}
426
427/*********************************************************************************************************/
428template<class Scalar,
429 class LocalOrdinal,
430 class GlobalOrdinal,
431 class LocalOrdinalViewType>
432void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
433 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
434 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
435 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
436 const LocalOrdinalViewType & Acol2Brow,
437 const LocalOrdinalViewType & Acol2Irow,
438 const LocalOrdinalViewType & Bcol2Ccol,
439 const LocalOrdinalViewType & Icol2Ccol,
440 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
441 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
442 const std::string& label,
443 const Teuchos::RCP<Teuchos::ParameterList>& params) {
444
445#ifdef HAVE_TPETRA_MMM_TIMINGS
446 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
447 using Teuchos::TimeMonitor;
448 Teuchos::RCP<TimeMonitor> MM;
449#endif
450
451 // Node-specific code
452 using Teuchos::RCP;
453
454 // Options
455 //int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer // unreferenced
456 std::string myalg("KK");
457 if(!params.is_null()) {
458 if(params->isParameter("sycl: jacobi algorithm"))
459 myalg = params->get("sycl: jacobi algorithm",myalg);
460 }
461
462 if(myalg == "MSAK") {
463 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
464 }
465 else if(myalg == "KK") {
466 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
467 }
468 else {
469 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
470 }
471
472#ifdef HAVE_TPETRA_MMM_TIMINGS
473 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
474#endif
475
476 // Final Fillcomplete
477 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
478 labelList->set("Timer Label",label);
479 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
480
481 // NOTE: MSAK already fillCompletes, so we have to check here
482 if(!C.isFillComplete()) {
483 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > dummyExport;
484 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
485 }
486
487}
488
489
490
491/*********************************************************************************************************/
492template<class Scalar,
493 class LocalOrdinal,
494 class GlobalOrdinal,
495 class LocalOrdinalViewType>
496void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
497 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
498 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
499 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
500 const LocalOrdinalViewType & targetMapToOrigRow_dev,
501 const LocalOrdinalViewType & targetMapToImportRow_dev,
502 const LocalOrdinalViewType & Bcol2Ccol_dev,
503 const LocalOrdinalViewType & Icol2Ccol_dev,
504 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
505 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
506 const std::string& label,
507 const Teuchos::RCP<Teuchos::ParameterList>& params) {
508
509 // FIXME: Right now, this is a cut-and-paste of the serial kernel
510 typedef Kokkos::Compat::KokkosSYCLWrapperNode Node;
511
512#ifdef HAVE_TPETRA_MMM_TIMINGS
513 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
514 using Teuchos::TimeMonitor;
515 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore"))));
516 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
517#endif
518 using Teuchos::RCP;
519 using Teuchos::rcp;
520
521 // Lots and lots of typedefs
522 typedef typename Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_host_type KCRS;
523 typedef typename KCRS::StaticCrsGraphType graph_t;
524 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
525 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
526 typedef typename KCRS::values_type::non_const_type scalar_view_t;
527 typedef typename scalar_view_t::memory_space scalar_memory_space;
528
529 typedef Scalar SC;
530 typedef LocalOrdinal LO;
531 typedef GlobalOrdinal GO;
532 typedef Node NO;
533 typedef Map<LO,GO,NO> map_type;
534 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
535 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
536 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
537
538 // Since this is being run on SYCL, we need to fence because the below host code will use UVM
539 // KDDKDD typename graph_t::execution_space().fence();
540
541 // KDDKDD UVM Without UVM, need to copy targetMap arrays to host.
542 // KDDKDD UVM Ideally, this function would run on device and use
543 // KDDKDD UVM KokkosKernels instead of this host implementation.
544 auto targetMapToOrigRow =
545 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
546 targetMapToOrigRow_dev);
547 auto targetMapToImportRow =
548 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
549 targetMapToImportRow_dev);
550 auto Bcol2Ccol =
551 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
552 Bcol2Ccol_dev);
553 auto Icol2Ccol =
554 Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
555 Icol2Ccol_dev);
556
557
558 // Sizes
559 RCP<const map_type> Ccolmap = C.getColMap();
560 size_t m = Aview.origMatrix->getLocalNumRows();
561 size_t n = Ccolmap->getLocalNumElements();
562
563 // Grab the Kokkos::SparseCrsMatrices & inner stuff
564 const KCRS & Amat = Aview.origMatrix->getLocalMatrixHost();
565 const KCRS & Bmat = Bview.origMatrix->getLocalMatrixHost();
566 const KCRS & Cmat = C.getLocalMatrixHost();
567
568 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
569 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
570 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
571 scalar_view_t Cvals = Cmat.values;
572
573 c_lno_view_t Irowptr;
574 lno_nnz_view_t Icolind;
575 scalar_view_t Ivals;
576 if(!Bview.importMatrix.is_null()) {
577 auto lclB = Bview.importMatrix->getLocalMatrixHost();
578 Irowptr = lclB.graph.row_map;
579 Icolind = lclB.graph.entries;
580 Ivals = lclB.values;
581 }
582
583 // Jacobi-specific inner stuff
584 auto Dvals =
585 Dinv.template getLocalView<scalar_memory_space>(Access::ReadOnly);
586
587#ifdef HAVE_TPETRA_MMM_TIMINGS
588 MM2 = Teuchos::null; MM2 = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse SYCLCore - Compare"))));
589#endif
590
591 // The status array will contain the index into colind where this entry was last deposited.
592 // c_status[i] < CSR_ip - not in the row yet
593 // c_status[i] >= CSR_ip - this is the entry where you can find the data
594 // We start with this filled with INVALID's indicating that there are no entries yet.
595 // Sadly, this complicates the code due to the fact that size_t's are unsigned.
596 std::vector<size_t> c_status(n, ST_INVALID);
597
598 // For each row of A/C
599 size_t CSR_ip = 0, OLD_ip = 0;
600 for (size_t i = 0; i < m; i++) {
601
602 // First fill the c_status array w/ locations where we're allowed to
603 // generate nonzeros for this row
604 OLD_ip = Crowptr[i];
605 CSR_ip = Crowptr[i+1];
606 for (size_t k = OLD_ip; k < CSR_ip; k++) {
607 c_status[Ccolind[k]] = k;
608
609 // Reset values in the row of C
610 Cvals[k] = SC_ZERO;
611 }
612
613 SC minusOmegaDval = -omega*Dvals(i,0);
614
615 // Entries of B
616 for (size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
617 Scalar Bval = Bvals[j];
618 if (Bval == SC_ZERO)
619 continue;
620 LO Bij = Bcolind[j];
621 LO Cij = Bcol2Ccol[Bij];
622
623 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
624 std::runtime_error, "Trying to insert a new entry into a static graph");
625
626 Cvals[c_status[Cij]] = Bvals[j];
627 }
628
629 // Entries of -omega * Dinv * A * B
630 for (size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
631 LO Aik = Acolind[k];
632 const SC Aval = Avals[k];
633 if (Aval == SC_ZERO)
634 continue;
635
636 if (targetMapToOrigRow[Aik] != LO_INVALID) {
637 // Local matrix
638 size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Aik]);
639
640 for (size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
641 LO Bkj = Bcolind[j];
642 LO Cij = Bcol2Ccol[Bkj];
643
644 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
645 std::runtime_error, "Trying to insert a new entry into a static graph");
646
647 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
648 }
649
650 } else {
651 // Remote matrix
652 size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Aik]);
653 for (size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
654 LO Ikj = Icolind[j];
655 LO Cij = Icol2Ccol[Ikj];
656
657 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
658 std::runtime_error, "Trying to insert a new entry into a static graph");
659
660 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
661 }
662 }
663 }
664 }
665
666#ifdef HAVE_TPETRA_MMM_TIMINGS
667 MM2= Teuchos::null;
668 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse ESFC"))));
669#endif
670
671 C.fillComplete(C.getDomainMap(), C.getRangeMap());
672
673}
674
675/*********************************************************************************************************/
676template<class Scalar,
677 class LocalOrdinal,
678 class GlobalOrdinal,
679 class LocalOrdinalViewType>
680void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
681 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> & Dinv,
682 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Aview,
683 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& Bview,
684 const LocalOrdinalViewType & Acol2Brow,
685 const LocalOrdinalViewType & Acol2Irow,
686 const LocalOrdinalViewType & Bcol2Ccol,
687 const LocalOrdinalViewType & Icol2Ccol,
688 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosSYCLWrapperNode>& C,
689 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > Cimport,
690 const std::string& label,
691 const Teuchos::RCP<Teuchos::ParameterList>& params) {
692
693#ifdef HAVE_TPETRA_MMM_TIMINGS
694 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
695 using Teuchos::TimeMonitor;
696 Teuchos::RCP<TimeMonitor> MM;
697#endif
698
699 // Check if the diagonal entries exist in debug mode
700 const bool debug = Tpetra::Details::Behavior::debug();
701 if(debug) {
702
703 auto rowMap = Aview.origMatrix->getRowMap();
704 Tpetra::Vector<Scalar> diags(rowMap);
705 Aview.origMatrix->getLocalDiagCopy(diags);
706 size_t diagLength = rowMap->getLocalNumElements();
707 Teuchos::Array<Scalar> diagonal(diagLength);
708 diags.get1dCopy(diagonal());
709
710 for(size_t i = 0; i < diagLength; ++i) {
711 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
712 std::runtime_error,
713 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
714 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
715 }
716 }
717
718 // Usings
719 using device_t = typename Kokkos::Compat::KokkosSYCLWrapperNode::device_type;
721 using graph_t = typename matrix_t::StaticCrsGraphType;
722 using lno_view_t = typename graph_t::row_map_type::non_const_type;
723 using c_lno_view_t = typename graph_t::row_map_type::const_type;
724 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
725 using scalar_view_t = typename matrix_t::values_type::non_const_type;
726
727 // KokkosKernels handle
728 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
729 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
730 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
731
732 // Get the rowPtr, colInd and vals of importMatrix
733 c_lno_view_t Irowptr;
734 lno_nnz_view_t Icolind;
735 scalar_view_t Ivals;
736 if(!Bview.importMatrix.is_null()) {
737 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
738 Irowptr = lclB.graph.row_map;
739 Icolind = lclB.graph.entries;
740 Ivals = lclB.values;
741 }
742
743 // Merge the B and Bimport matrices
744 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
745
746 // Get the properties and arrays of input matrices
747 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
748 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
749
750 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
751 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
752 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
753
754 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
755 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
756 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
757
758 // Arrays of the output matrix
759 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
760 lno_nnz_view_t entriesC;
761 scalar_view_t valuesC;
762
763 // Options
764 int team_work_size = 16;
765 std::string myalg("SPGEMM_KK_MEMORY");
766 if(!params.is_null()) {
767 if(params->isParameter("sycl: algorithm"))
768 myalg = params->get("sycl: algorithm",myalg);
769 if(params->isParameter("sycl: team work size"))
770 team_work_size = params->get("sycl: team work size",team_work_size);
771 }
772
773 // Get the algorithm mode
774 std::string nodename("SYCL");
775 std::string alg = nodename + std::string(" algorithm");
776 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
777 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
778
779
780 // KokkosKernels call
781 handle_t kh;
782 kh.create_spgemm_handle(alg_enum);
783 kh.set_team_work_size(team_work_size);
784
785 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
786 Arowptr, Acolind, false,
787 Browptr, Bcolind, false,
788 row_mapC);
789
790 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
791 if (c_nnz_size){
792 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
793 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
794 }
795
796 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
797 Arowptr, Acolind, Avals, false,
798 Browptr, Bcolind, Bvals, false,
799 row_mapC, entriesC, valuesC,
800 omega, Dinv.getLocalViewDevice(Access::ReadOnly));
801 kh.destroy_spgemm_handle();
802
803#ifdef HAVE_TPETRA_MMM_TIMINGS
804 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLSort"))));
805#endif
806
807 // Sort & set values
808 if (params.is_null() || params->get("sort entries",true))
809 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
810 C.setAllValues(row_mapC,entriesC,valuesC);
811
812#ifdef HAVE_TPETRA_MMM_TIMINGS
813 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix SYCLESFC"))));
814#endif
815
816 // Final Fillcomplete
817 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
818 labelList->set("Timer Label",label);
819 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
820 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosSYCLWrapperNode> > dummyExport;
821 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
822}
823
824 }//MMdetails
825}//Tpetra
826
827#endif//SYCL
828
829#endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
A distributed dense vector.
Namespace Tpetra contains the class and methods constituting the Tpetra library.