Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Stokhos_Multiply.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef STOKHOS_MULTIPLY_HPP
43#define STOKHOS_MULTIPLY_HPP
44
45//#include "Kokkos_Macros.hpp"
46//#include "Kokkos_Pair.hpp"
47//#include "impl/Kokkos_Traits.hpp"
48
49#include "Kokkos_Core.hpp"
50
51#include <vector> // for std::vector (needed below)
52
53namespace Stokhos {
54
55template <size_t N>
57 enum type { value = (N > 0) && !(N & (N - 1)) };
58};
59
60template <size_t N, bool OK = is_power_of_two<N>::value>
62
63template <size_t N>
64struct power_of_two<N, true> {
65 enum type { value = 1 + power_of_two<(N >> 1), true>::value };
66};
67
68template <>
69struct power_of_two<2, true> {
70 enum type { value = 1 };
71};
72
73template <>
74struct power_of_two<1, true> {
75 enum type { value = 0 };
76};
77
79
80template <unsigned> class IntegralRank {};
81
82template <typename T> struct ViewRank {
84};
85
86template <typename T> struct ViewRank< std::vector<T> > {
88};
89
90template <typename MatrixType,
91 typename InputVectorType,
92 typename OutputVectorType,
93 typename ColumnIndicesType = void,
94 typename VectorRank = typename ViewRank<InputVectorType>::type,
95 typename ImplTag = DefaultMultiply
96 > class Multiply;
97
98template <typename MatrixType,
99 typename InputVectorType,
100 typename OutputVectorType>
101void multiply(const MatrixType& A,
102 const InputVectorType& x,
103 OutputVectorType& y) {
105 multiply_type::apply( A, x, y );
106}
107
108namespace { // (anonymous)
109
110// Work-around for CWG 1558. See
111// https://en.cppreference.com/w/cpp/types/void_t
112template<class... Ts> struct make_void { typedef void type; };
113template<class... Ts>
114using replace_me_with_void_t_in_cxx17 =
115 typename make_void<Ts...>::type;
116
117template<class T, class = replace_me_with_void_t_in_cxx17<> >
118struct const_type_impl {
119 using type = T;
120};
121
122template<class T>
123struct const_type_impl<T,
124 replace_me_with_void_t_in_cxx17<typename T::const_type> > {
125 using type = typename T::const_type;
126};
127
128template<class T>
129using const_type_t = typename const_type_impl<T>::type;
130
131} // namespace (anonymous)
132
133template <typename MatrixType,
134 typename InputVectorType,
135 typename OutputVectorType>
136void multiply(const MatrixType& A,
137 const InputVectorType& x,
138 OutputVectorType& y,
139 DefaultMultiply tag) {
140 // mfh 29 Jul 2019: Not sure why, but std::vector claims to be a
141 // Kokkos::View using Kokkos::is_view. This is why I check instead
142 // whether the class has a const_type typedef.
143 using input_vector_type = const_type_t<InputVectorType>;
144 using multiply_type =
146 multiply_type::apply( A, x, y );
147}
148
149template <typename MatrixType,
150 typename InputVectorType,
151 typename OutputVectorType,
152 typename ColumnIndicesType>
153void multiply(const MatrixType& A,
154 const InputVectorType& x,
155 OutputVectorType& y,
156 const ColumnIndicesType& col) {
158 multiply_type::apply( A, x, y, col );
159}
160
161template <typename MatrixType,
162 typename InputVectorType,
163 typename OutputVectorType,
164 typename ColumnIndicesType>
165void multiply(const MatrixType& A,
166 const InputVectorType& x,
167 OutputVectorType& y,
168 const ColumnIndicesType& col,
169 DefaultMultiply tag) {
171 multiply_type::apply( A, x, y, col );
172}
173
174template <typename BlockSpec> class BlockMultiply;
175
176namespace details {
177
178/*
179 * Compute work range = (begin, end) such that adjacent threads/blocks write to
180 * separate cache lines
181 */
182template <typename scalar_type, typename execution_space, typename size_type>
183KOKKOS_INLINE_FUNCTION
184Kokkos::pair<size_type, size_type>
186 const size_type work_count,
187 const size_type thread_count,
188 const size_type thread_rank)
189{
190#if defined( KOKKOS_ENABLE_CUDA )
191 enum { cache_line =
192 std::is_same<execution_space,Kokkos::Cuda>::value ? 128 : 64 };
193#else
194 enum { cache_line = 64 };
195#endif
196
197 enum { work_align = cache_line / sizeof(scalar_type) };
198 enum { work_shift = power_of_two< work_align >::value };
199 enum { work_mask = work_align - 1 };
200
201 const size_type work_per_thread =
202 ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
203 thread_count ) << work_shift ;
204
205 size_type work_begin = thread_rank * work_per_thread;
206 size_type work_end = work_begin + work_per_thread;
207 if (work_begin > work_count)
208 work_begin = work_count;
209 if (work_end > work_count)
210 work_end = work_count;
211
212 return Kokkos::make_pair(work_begin, work_end);
213}
214
215// Functor implementing assignment update for multiply kernels
217 template <typename Scalar>
218 KOKKOS_INLINE_FUNCTION
219 void operator()(Scalar& y, const Scalar& x) const { y = x; }
220};
221
222// Functor implementing += update for multiply kernels
224 template <typename Scalar>
225 KOKKOS_INLINE_FUNCTION
226 void operator()(Scalar& y, const Scalar& x) const { y += x; }
227};
228
229// Functor implementing scaled assignment update for multiply kernels
230template <typename Value>
232 const Value a;
233 MultiplyScaledAssign(const Value& a_) : a(a_) {}
234 template <typename Scalar>
235 KOKKOS_INLINE_FUNCTION
236 void operator()(Scalar& y, const Scalar& x) const { y = a*x; }
237};
238
239// Functor implementing += update for multiply kernels
240template <typename Value>
242 const Value a;
243 MultiplyScaledUpdate(const Value& a_) : a(a_) {}
244 template <typename Scalar>
245 KOKKOS_INLINE_FUNCTION
246 void operator()(Scalar& y, const Scalar& x) const { y += a*x; }
247};
248
249// Functor implementing saxpby update for multiply kernels
250template <typename Value>
252 const Value a;
253 const Value b;
254 MultiplyScaledUpdate2(const Value& a_, const Value& b_) : a(a_), b(b_) {}
255 template <typename Scalar>
256 KOKKOS_INLINE_FUNCTION
257 void operator()(Scalar& y, const Scalar& x) const { y = a*x + b*y; }
258};
259
260} // namespace details
261
262} // namespace Stokhos
263
264#endif
Kokkos::DefaultExecutionSpace device
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION Kokkos::pair< size_type, size_type > compute_work_range(const execution_space device, const size_type work_count, const size_type thread_count, const size_type thread_rank)
Top-level namespace for Stokhos classes and functions.
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
IntegralRank< T::Rank > type
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
MultiplyScaledUpdate2(const Value &a_, const Value &b_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const