Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_idot.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_DETAILS_IDOT_HPP
43#define TPETRA_DETAILS_IDOT_HPP
44
60
61#include "Tpetra_iallreduce.hpp"
62#include "Tpetra_MultiVector.hpp"
63#include "Tpetra_Vector.hpp"
64#include "Teuchos_CommHelpers.hpp"
65#include "KokkosBlas1_dot.hpp"
66#include <stdexcept>
67#include <sstream>
68
69namespace Tpetra {
70namespace Details {
71
72// Temporary helper to get read-only view from multivector in requested space.
73// TODO: when https://github.com/kokkos/kokkos/issues/3850 is resolved,
74// remove this and just use templated getLocalView<Device>(ReadOnly).
75template<typename MV>
76struct GetReadOnly
77{
78 using DevView = typename MV::dual_view_type::t_dev::const_type;
79 using HostView = typename MV::dual_view_type::t_host::const_type;
80
81 template<typename exec_space>
82 static DevView get(const MV& x, typename std::enable_if<std::is_same<exec_space, typename MV::execution_space>::value>::type* = nullptr)
83 {
84 return x.getLocalViewDevice(Tpetra::Access::ReadOnly);
85 }
86
87 template<typename exec_space>
88 static HostView get(const MV& x, typename std::enable_if<!std::is_same<exec_space, typename MV::execution_space>::value>::type* = nullptr)
89 {
90 return x.getLocalViewHost(Tpetra::Access::ReadOnly);
91 }
92};
93
95
96template<class MV, class ResultView, bool runOnDevice>
97void idotLocal(const ResultView& localResult,
98 const MV& X,
99 const MV& Y)
100{
101 using pair_type = Kokkos::pair<size_t, size_t>;
102 using exec_space = typename std::conditional<runOnDevice, typename MV::execution_space, Kokkos::DefaultHostExecutionSpace>::type;
103 //if the execution space can access localResult, use it directly. Otherwise need to make a copy.
104 static_assert(Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible,
105 "idotLocal: Execution space must be able to access localResult");
106 //If Dot executes on Serial, it requires the result to be HostSpace. If localResult is CudaUVMSpace,
107 //we can just treat it like HostSpace.
108 Kokkos::View<typename ResultView::data_type, typename exec_space::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
109 localResultUnmanaged(localResult.data(), localResult.extent(0));
110 const size_t numRows = X.getLocalLength ();
111 const pair_type rowRange (0, numRows);
112 const size_t X_numVecs = X.getNumVectors ();
113 const size_t Y_numVecs = Y.getNumVectors ();
114 const size_t numVecs = X_numVecs > Y_numVecs ? X_numVecs : Y_numVecs;
115 // Check compatibility of number of columns; allow special cases of
116 // a multivector dot a single vector, or vice versa.
117 if (X_numVecs != Y_numVecs &&
118 X_numVecs != size_t (1) &&
119 Y_numVecs != size_t (1)) {
120 std::ostringstream os;
121 os << "Tpetra::idot: X.getNumVectors() = " << X_numVecs
122 << " != Y.getNumVectors() = " << Y_numVecs
123 << ", but neither is 1.";
124 throw std::invalid_argument (os.str ());
125 }
126 auto X_lcl = GetReadOnly<MV>::template get<exec_space>(X);
127 auto Y_lcl = GetReadOnly<MV>::template get<exec_space>(Y);
128 //Can the multivector dot kernel be used?
129 bool useMVDot = X.isConstantStride() && Y.isConstantStride() && X_numVecs == Y_numVecs;
130 if(useMVDot)
131 {
132 if (numVecs == size_t (1)) {
133 auto X_lcl_1d = Kokkos::subview (X_lcl, rowRange, 0);
134 auto Y_lcl_1d = Kokkos::subview (Y_lcl, rowRange, 0);
135 auto result_0d = Kokkos::subview (localResultUnmanaged, 0);
136 KokkosBlas::dot (result_0d, X_lcl_1d, Y_lcl_1d);
137 }
138 else {
139 auto X_lcl_2d = Kokkos::subview (X_lcl, rowRange, pair_type (0, X_numVecs));
140 auto Y_lcl_2d = Kokkos::subview (Y_lcl, rowRange, pair_type (0, Y_numVecs));
141 KokkosBlas::dot (localResultUnmanaged, X_lcl_2d, Y_lcl_2d);
142 }
143 }
144 else
145 {
146 auto XWhichVectors = Tpetra::getMultiVectorWhichVectors(X);
147 auto YWhichVectors = Tpetra::getMultiVectorWhichVectors(Y);
148 //Need to compute each dot individually, using 1D subviews of X_lcl and Y_lcl
149 for(size_t vec = 0; vec < numVecs; vec++) {
150 //Get the Vectors for each column of X and Y that will be dotted together.
151 //Note: "which vectors" is not populated for constant stride MVs (but it's the identity mapping)
152 size_t Xj = (numVecs == X_numVecs) ? vec : 0;
153 Xj = X.isConstantStride() ? Xj : XWhichVectors[Xj];
154 size_t Yj = (numVecs == Y_numVecs) ? vec : 0;
155 Yj = Y.isConstantStride() ? Yj : YWhichVectors[Yj];
156 auto Xcol = Kokkos::subview(X_lcl, rowRange, Xj);
157 auto Ycol = Kokkos::subview(Y_lcl, rowRange, Yj);
158
159 //Compute the rank-1 dot product, and place the result in an element of localResult
160 KokkosBlas::dot(Kokkos::subview(localResultUnmanaged, vec), Xcol, Ycol);
161 }
162 }
163}
164
165//Helper to avoid extra instantiations of KokkosBlas::dot and iallreduce.
166template<typename MV, typename ResultView>
167struct IdotHelper
168{
169 using dot_type = typename MV::dot_type;
170
171 //First version: use result directly
172 template<typename exec_space>
173 static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
174 const ResultView& globalResult, const MV& X, const MV& Y,
175 typename std::enable_if<Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* = nullptr)
176
177 {
178 constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
179 idotLocal<MV, ResultView, runOnDevice>(globalResult, X, Y);
180 //Fence because we're giving result of device kernel to MPI
181 if(runOnDevice)
182 exec_space().fence();
183 auto comm = X.getMap()->getComm();
184 return iallreduce(globalResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
185 }
186
187 //Second version: use a temporary local result, because exec_space can't write to globalResult
188 template<typename exec_space>
189 static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
190 const ResultView& globalResult, const MV& X, const MV& Y,
191 typename std::enable_if<!Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* = nullptr)
192 {
193 constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
194 Kokkos::View<dot_type*, typename exec_space::memory_space> localResult(Kokkos::ViewAllocateWithoutInitializing("idot:localResult"), X.getNumVectors());
195 idotLocal<MV, decltype(localResult), runOnDevice>(localResult, X, Y);
196 //Fence because we're giving result of device kernel to MPI
197 if(runOnDevice)
198 exec_space().fence();
199 auto comm = X.getMap()->getComm();
200 return iallreduce(localResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
201 }
202
203};
204
205
208template<class MV, class ResultView>
209std::shared_ptr< ::Tpetra::Details::CommRequest>
210idotImpl(const ResultView& globalResult,
211 const MV& X,
212 const MV& Y)
213{
214 static_assert(std::is_same<typename ResultView::non_const_value_type, typename MV::dot_type>::value,
215 "Tpetra::idot: result view's element type must match MV::dot_type");
216
217 // Execution space to use for dot kernel(s) is whichever has access to up-to-date X.
218 if(X.need_sync_device())
219 {
220 //run on host.
221 return IdotHelper<MV, ResultView>::template run<Kokkos::DefaultHostExecutionSpace>(globalResult, X, Y);
222 }
223 else
224 {
225 //run on device.
226 return IdotHelper<MV, ResultView>::template run<typename MV::execution_space>(globalResult, X, Y);
227 }
228}
229} // namespace Details
230
231//
232// SKIP DOWN TO HERE
233//
234
287template<class SC, class LO, class GO, class NT>
288std::shared_ptr< ::Tpetra::Details::CommRequest>
289idot (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type* resultRaw,
290 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
291 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
292{
293 using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
294 const size_t X_numVecs = X.getNumVectors ();
295 const size_t Y_numVecs = Y.getNumVectors ();
296 const size_t numVecs = (X_numVecs > Y_numVecs) ? X_numVecs : Y_numVecs;
297 Kokkos::View<dot_type*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
298 resultView(resultRaw, numVecs);
299 return Details::idotImpl(resultView, X, Y);
300}
301
364template<class SC, class LO, class GO, class NT>
365std::shared_ptr< ::Tpetra::Details::CommRequest>
366idot (const Kokkos::View<typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type*,
367 typename ::Tpetra::MultiVector<SC, LO, GO, NT>::device_type>& result,
368 const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
369 const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
370{
371 return Details::idotImpl(result, X, Y);
372}
373
415template<class SC, class LO, class GO, class NT>
416std::shared_ptr< ::Tpetra::Details::CommRequest>
417idot (const Kokkos::View<typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type,
418 typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type>& result,
419 const ::Tpetra::Vector<SC, LO, GO, NT>& X,
420 const ::Tpetra::Vector<SC, LO, GO, NT>& Y)
421{
422 using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
423 using result_device_t = typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type;
424 Kokkos::View<dot_type*, result_device_t, Kokkos::MemoryTraits<Kokkos::Unmanaged>> result1D(result.data(), 1);
425 return Details::idotImpl(result1D, X, Y);
426}
427
428} // namespace Tpetra
429
430#endif // TPETRA_DETAILS_IDOT_HPP
Declaration of Tpetra::iallreduce.
Implementation details of Tpetra.
std::shared_ptr< ::Tpetra::Details::CommRequest > idotImpl(const ResultView &globalResult, const MV &X, const MV &Y)
Internal (common) version of idot, a global dot product that uses a non-blocking MPI reduction.
void idotLocal(const ResultView &localResult, const MV &X, const MV &Y)
Compute dot product locally. Where the kernel runs controlled by runOnDevice.
Definition: Tpetra_idot.hpp:97
Namespace Tpetra contains the class and methods constituting the Tpetra library.
std::shared_ptr< ::Tpetra::Details::CommRequest > idot(typename ::Tpetra::MultiVector< SC, LO, GO, NT >::dot_type *resultRaw, const ::Tpetra::MultiVector< SC, LO, GO, NT > &X, const ::Tpetra::MultiVector< SC, LO, GO, NT > &Y)
Nonblocking dot product, with either Tpetra::MultiVector or Tpetra::Vector inputs,...