42#ifndef TPETRA_BLOCKVIEW_HPP
43#define TPETRA_BLOCKVIEW_HPP
53#include "TpetraCore_config.h"
54#include "Kokkos_ArithTraits.hpp"
55#include "Kokkos_Complex.hpp"
72template<
class ViewType1,
74 const int rank1 = ViewType1::rank>
76 static KOKKOS_INLINE_FUNCTION
void
77 run (
const ViewType2& Y,
const ViewType1& X);
84template<
class ViewType1,
86struct AbsMax<ViewType1, ViewType2, 2> {
89 static KOKKOS_INLINE_FUNCTION
void
90 run (
const ViewType2& Y,
const ViewType1& X)
92 static_assert (
static_cast<int> (ViewType1::rank) ==
static_cast<int> (ViewType2::rank),
93 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
94 typedef typename std::remove_reference<
decltype (Y(0,0)) >::type STY;
95 static_assert(! std::is_const<STY>::value,
96 "AbsMax: The type of each entry of Y must be nonconst.");
97 typedef typename std::decay<
decltype (X(0,0)) >::type STX;
98 static_assert( std::is_same<STX, STY>::value,
99 "AbsMax: The type of each entry of X and Y must be the same.");
100 typedef Kokkos::Details::ArithTraits<STY> KAT;
102 const int numCols = Y.extent (1);
103 const int numRows = Y.extent (0);
104 for (
int j = 0; j < numCols; ++j) {
105 for (
int i = 0; i < numRows; ++i) {
107 const STX X_ij = X(i,j);
110 const auto Y_ij_abs = KAT::abs (Y_ij);
111 const auto X_ij_abs = KAT::abs (X_ij);
112 Y_ij = (Y_ij_abs >= X_ij_abs) ?
113 static_cast<STY
> (Y_ij_abs) :
114 static_cast<STY
> (X_ij_abs);
124template<
class ViewType1,
129 static KOKKOS_INLINE_FUNCTION
void
130 run (
const ViewType2& Y,
const ViewType1& X)
132 static_assert (
static_cast<int> (ViewType1::rank) ==
static_cast<int> (ViewType2::rank),
133 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
135 typedef typename std::remove_reference<
decltype (Y(0)) >::type STY;
136 static_assert(! std::is_const<STY>::value,
137 "AbsMax: The type of each entry of Y must be nonconst.");
138 typedef typename std::remove_const<
typename std::remove_reference<
decltype (X(0)) >::type>::type STX;
139 static_assert( std::is_same<STX, STY>::value,
140 "AbsMax: The type of each entry of X and Y must be the same.");
141 typedef Kokkos::Details::ArithTraits<STY> KAT;
143 const int numRows = Y.extent (0);
144 for (
int i = 0; i < numRows; ++i) {
146 const STX X_i = X(i);
149 const auto Y_i_abs = KAT::abs (Y_i);
150 const auto X_i_abs = KAT::abs (X_i);
151 Y_i = (Y_i_abs >= X_i_abs) ?
152 static_cast<STY
> (Y_i_abs) :
153 static_cast<STY
> (X_i_abs);
164template<
class ViewType1,
class ViewType2, const
int rank = ViewType1::rank>
165KOKKOS_INLINE_FUNCTION
void
166absMax (
const ViewType2& Y,
const ViewType1& X)
168 static_assert (
static_cast<int> (ViewType1::rank) ==
static_cast<int> (ViewType2::rank),
169 "absMax: ViewType1 and ViewType2 must have the same rank.");
177template<
class ViewType,
178 class CoefficientType,
179 class IndexType = int,
180 const bool is_contiguous =
false,
181 const int rank = ViewType::rank>
183 static KOKKOS_INLINE_FUNCTION
void
184 run (
const CoefficientType& alpha,
const ViewType& x);
189template<
class ViewType,
190 class CoefficientType,
192struct SCAL<ViewType, CoefficientType, IndexType, false, 1> {
194 static KOKKOS_INLINE_FUNCTION
void
195 run (
const CoefficientType& alpha,
const ViewType& x)
197 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
200#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
203 for (IndexType i = 0; i < numRows; ++i)
209template<
class ViewType,
210 class CoefficientType,
212struct SCAL<ViewType, CoefficientType, IndexType, false, 2> {
214 static KOKKOS_INLINE_FUNCTION
void
215 run (
const CoefficientType& alpha,
const ViewType& A)
217 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
218 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
220 for (IndexType j = 0; j < numCols; ++j)
221 for (IndexType i = 0; i < numRows; ++i)
222 A(i,j) = alpha * A(i,j);
225template<
class ViewType,
226 class CoefficientType,
229struct SCAL<ViewType, CoefficientType, IndexType, true, rank> {
231 static KOKKOS_INLINE_FUNCTION
void
232 run (
const CoefficientType& alpha,
const ViewType& x)
234 using x_value_type =
typename std::decay<
decltype (*x.data()) >::type;
235 const IndexType span =
static_cast<IndexType
> (x.span());
236 x_value_type *__restrict__ x_ptr(x.data());
237#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
240 for (IndexType i = 0; i < span; ++i)
241 x_ptr[i] = alpha * x_ptr[i];
249template<
class ViewType,
251 class IndexType = int,
252 const bool is_contiguous =
false,
253 const int rank = ViewType::rank>
255 static KOKKOS_INLINE_FUNCTION
void
256 run (
const ViewType& x,
const InputType& val);
261template<
class ViewType,
264struct FILL<ViewType, InputType, IndexType, false, 1> {
265 static KOKKOS_INLINE_FUNCTION
void
266 run (
const ViewType& x,
const InputType& val)
268 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
269 for (IndexType i = 0; i < numRows; ++i)
275template<
class ViewType,
278struct FILL<ViewType, InputType, IndexType, false, 2> {
279 static KOKKOS_INLINE_FUNCTION
void
280 run (
const ViewType& X,
const InputType& val)
282 const IndexType numRows =
static_cast<IndexType
> (X.extent (0));
283 const IndexType numCols =
static_cast<IndexType
> (X.extent (1));
284 for (IndexType j = 0; j < numCols; ++j)
285 for (IndexType i = 0; i < numRows; ++i)
289template<
class ViewType,
293struct FILL<ViewType, InputType, IndexType, true, rank> {
294 static KOKKOS_INLINE_FUNCTION
void
295 run (
const ViewType& x,
const InputType& val)
297 const IndexType span =
static_cast<IndexType
> (x.span());
298 auto x_ptr = x.data();
299#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
302 for (IndexType i = 0; i < span; ++i)
312template<
class CoefficientType,
315 class IndexType = int,
316 const bool is_contiguous =
false,
317 const int rank = ViewType1::rank>
319 static KOKKOS_INLINE_FUNCTION
void
320 run (
const CoefficientType& alpha,
327template<
class CoefficientType,
331struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 1> {
333 static KOKKOS_INLINE_FUNCTION
void
334 run (
const CoefficientType& alpha,
338 using Kokkos::Details::ArithTraits;
339 static_assert (
static_cast<int> (ViewType1::rank) ==
static_cast<int> (ViewType2::rank),
340 "AXPY: x and y must have the same rank.");
342 const IndexType numRows =
static_cast<IndexType
> (y.extent (0));
343 if (alpha != ArithTraits<CoefficientType>::zero ()) {
345 for (IndexType i = 0; i < numRows; ++i)
346 y(i) += alpha * x(i);
353template<
class CoefficientType,
357struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 2> {
359 static KOKKOS_INLINE_FUNCTION
void
360 run (
const CoefficientType& alpha,
364 using Kokkos::Details::ArithTraits;
365 static_assert (
static_cast<int> (ViewType1::rank) ==
static_cast<int> (ViewType2::rank),
366 "AXPY: X and Y must have the same rank.");
367 const IndexType numRows =
static_cast<IndexType
> (Y.extent (0));
368 const IndexType numCols =
static_cast<IndexType
> (Y.extent (1));
370 if (alpha != ArithTraits<CoefficientType>::zero ()) {
371 for (IndexType j = 0; j < numCols; ++j)
372 for (IndexType i = 0; i < numRows; ++i)
373 Y(i,j) += alpha * X(i,j);
378template<
class CoefficientType,
383struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, true, rank> {
385 static KOKKOS_INLINE_FUNCTION
void
386 run (
const CoefficientType& alpha,
390 using Kokkos::Details::ArithTraits;
391 static_assert (
static_cast<int> (ViewType1::rank) ==
static_cast<int> (ViewType2::rank),
392 "AXPY: x and y must have the same rank.");
394 if (alpha != ArithTraits<CoefficientType>::zero ()) {
395 using x_value_type =
typename std::decay<
decltype (*x.data()) >::type;
396 using y_value_type =
typename std::decay<
decltype (*y.data()) >::type;
397 const IndexType span =
static_cast<IndexType
> (y.span());
398 const x_value_type *__restrict__ x_ptr(x.data());
399 y_value_type *__restrict__ y_ptr(y.data());
400#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
403 for (IndexType i = 0; i < span; ++i)
404 y_ptr[i] += alpha * x_ptr[i];
413template<
class ViewType1,
415 class IndexType = int,
416 const bool is_contiguous =
false,
417 const int rank = ViewType1::rank>
419 static KOKKOS_INLINE_FUNCTION
void
420 run (
const ViewType1& x,
const ViewType2& y);
425template<
class ViewType1,
428struct COPY<ViewType1, ViewType2, IndexType, false, 1> {
430 static KOKKOS_INLINE_FUNCTION
void
431 run (
const ViewType1& x,
const ViewType2& y)
433 const IndexType numRows =
static_cast<IndexType
> (x.extent (0));
435 for (IndexType i = 0; i < numRows; ++i)
442template<
class ViewType1,
445struct COPY<ViewType1, ViewType2, IndexType, false, 2> {
447 static KOKKOS_INLINE_FUNCTION
void
448 run (
const ViewType1& X,
const ViewType2& Y)
450 const IndexType numRows =
static_cast<IndexType
> (Y.extent (0));
451 const IndexType numCols =
static_cast<IndexType
> (Y.extent (1));
453 for (IndexType j = 0; j < numCols; ++j)
454 for (IndexType i = 0; i < numRows; ++i)
459template<
class ViewType1,
463struct COPY<ViewType1, ViewType2, IndexType, true, rank> {
465 static KOKKOS_INLINE_FUNCTION
void
466 run (
const ViewType1& x,
const ViewType2& y)
468 const IndexType span =
static_cast<IndexType
> (x.span());
469 using x_value_type =
typename std::decay<
decltype (*x.data()) >::type;
470 using y_value_type =
typename std::decay<
decltype (*y.data()) >::type;
471 const x_value_type *__restrict__ x_ptr(x.data());
472 y_value_type *__restrict__ y_ptr(y.data());
474#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
477 for (IndexType i = 0; i < span; ++i)
482template<
class VecType1,
486 class IndexType = int,
487 bool is_contiguous =
false,
488 class BlkLayoutType =
typename BlkType::array_layout>
490 static KOKKOS_INLINE_FUNCTION
void
491 run (
const CoeffType& alpha,
496 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
497 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
498 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
500 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
501 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
504 for (IndexType i = 0; i < numRows; ++i)
505 for (IndexType j = 0; j < numCols; ++j)
506 y(i) += alpha * A(i,j) * x(j);
510template<
class VecType1,
515struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutLeft> {
516 static KOKKOS_INLINE_FUNCTION
void
517 run (
const CoeffType& alpha,
522 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
523 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
524 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
526 using A_value_type =
typename std::decay<
decltype (A(0,0)) >::type;
527 using x_value_type =
typename std::decay<
decltype (x(0)) >::type;
528 using y_value_type =
typename std::decay<
decltype (y(0)) >::type;
530 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
531 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
533 const A_value_type *__restrict__ A_ptr(A.data());
const IndexType as1(A.stride(1));
534 const x_value_type *__restrict__ x_ptr(x.data());
535 y_value_type *__restrict__ y_ptr(y.data());
537#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
540 for (IndexType j=0;j<numCols;++j) {
541 const x_value_type x_at_j = alpha*x_ptr[j];
542 const A_value_type *__restrict__ A_at_j = A_ptr + j*as1;
543#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
546 for (IndexType i=0;i<numRows;++i)
547 y_ptr[i] += A_at_j[i] * x_at_j;
552template<
class VecType1,
557struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutRight> {
558 static KOKKOS_INLINE_FUNCTION
void
559 run (
const CoeffType& alpha,
564 static_assert (VecType1::rank == 1,
"GEMV: VecType1 must have rank 1.");
565 static_assert (BlkType::rank == 2,
"GEMV: BlkType must have rank 2.");
566 static_assert (VecType2::rank == 1,
"GEMV: VecType2 must have rank 1.");
568 using A_value_type =
typename std::decay<
decltype (A(0,0)) >::type;
569 using x_value_type =
typename std::decay<
decltype (x(0)) >::type;
570 using y_value_type =
typename std::decay<
decltype (y(0)) >::type;
572 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
573 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
575 const A_value_type *__restrict__ A_ptr(A.data());
const IndexType as0(A.stride(0));
576 const x_value_type *__restrict__ x_ptr(x.data());
577 y_value_type *__restrict__ y_ptr(y.data());
579#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
582 for (IndexType i=0;i<numRows;++i) {
583 y_value_type y_at_i(0);
584 const auto A_at_i = A_ptr + i*as0;
585#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
588 for (IndexType j=0;j<numCols;++j)
589 y_at_i += A_at_i[j] * x_ptr[j];
590 y_ptr[i] += alpha*y_at_i;
599template<
class ViewType,
600 class CoefficientType,
601 class IndexType = int,
602 const int rank = ViewType::rank>
603KOKKOS_INLINE_FUNCTION
void
604SCAL (
const CoefficientType& alpha,
const ViewType& x)
606 using LayoutType =
typename ViewType::array_layout;
607 constexpr bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
608 std::is_same<LayoutType,Kokkos::LayoutRight>::value);
613template<
class ViewType,
615 class IndexType = int,
616 const int rank = ViewType::rank>
617KOKKOS_INLINE_FUNCTION
void
618FILL (
const ViewType& x,
const InputType& val)
620 using LayoutType =
typename ViewType::array_layout;
621 constexpr bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
622 std::is_same<LayoutType,Kokkos::LayoutRight>::value);
631template<
class CoefficientType,
634 class IndexType = int,
635 const int rank = ViewType1::rank>
636KOKKOS_INLINE_FUNCTION
void
637AXPY (
const CoefficientType& alpha,
641 static_assert (
static_cast<int> (ViewType1::rank) ==
642 static_cast<int> (ViewType2::rank),
643 "AXPY: x and y must have the same rank.");
644 using LayoutType1 =
typename ViewType1::array_layout;
645 using LayoutType2 =
typename ViewType2::array_layout;
646 constexpr bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
647 constexpr bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
648 std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
649 constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
661template<
class ViewType1,
663 class IndexType = int,
664 const int rank = ViewType1::rank>
665KOKKOS_INLINE_FUNCTION
void
666COPY (
const ViewType1& x,
const ViewType2& y)
668 static_assert (
static_cast<int> (ViewType1::rank) ==
669 static_cast<int> (ViewType2::rank),
670 "COPY: x and y must have the same rank.");
671 using LayoutType1 =
typename ViewType1::array_layout;
672 using LayoutType2 =
typename ViewType2::array_layout;
673 constexpr bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
674 constexpr bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
675 std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
676 constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
691template<
class VecType1,
695 class IndexType =
int>
696KOKKOS_INLINE_FUNCTION
void
702 constexpr bool is_A_contiguous = (std::is_same<typename BlkType::array_layout, Kokkos::LayoutLeft>::value ||
703 std::is_same<typename BlkType::array_layout, Kokkos::LayoutRight>::value);
704 constexpr bool is_x_contiguous = (std::is_same<typename VecType1::array_layout, Kokkos::LayoutLeft>::value ||
705 std::is_same<typename VecType1::array_layout, Kokkos::LayoutRight>::value);
706 constexpr bool is_y_contiguous = (std::is_same<typename VecType2::array_layout, Kokkos::LayoutLeft>::value ||
707 std::is_same<typename VecType2::array_layout, Kokkos::LayoutRight>::value);
708 constexpr bool is_contiguous = is_A_contiguous && is_x_contiguous && is_y_contiguous;
709 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, is_contiguous>::run (alpha, A, x, y);
719template<
class ViewType1,
722 class CoefficientType,
723 class IndexType =
int>
724KOKKOS_INLINE_FUNCTION
void
727 const CoefficientType& alpha,
730 const CoefficientType& beta,
734 static_assert (ViewType1::rank == 2,
"GEMM: A must have rank 2 (be a matrix).");
735 static_assert (ViewType2::rank == 2,
"GEMM: B must have rank 2 (be a matrix).");
736 static_assert (ViewType3::rank == 2,
"GEMM: C must have rank 2 (be a matrix).");
738 typedef typename std::remove_reference<
decltype (A(0,0))>::type Scalar;
739 typedef Kokkos::Details::ArithTraits<Scalar> STS;
740 const Scalar
ZERO = STS::zero();
741 const Scalar ONE = STS::one();
745 if(transA[0] ==
'N' || transA[0] ==
'n') {
746 m =
static_cast<IndexType
> (A.extent (0));
747 n =
static_cast<IndexType
> (A.extent (1));
750 m =
static_cast<IndexType
> (A.extent (1));
751 n =
static_cast<IndexType
> (A.extent (0));
753 k =
static_cast<IndexType
> (C.extent (1));
756 if(alpha ==
ZERO && beta == ONE)
762 for(IndexType i=0; i<m; i++) {
763 for(IndexType j=0; j<k; j++) {
769 for(IndexType i=0; i<m; i++) {
770 for(IndexType j=0; j<k; j++) {
771 C(i,j) = beta*C(i,j);
778 if(transB[0] ==
'n' || transB[0] ==
'N') {
779 if(transA[0] ==
'n' || transA[0] ==
'N') {
781 for(IndexType j=0; j<n; j++) {
783 for(IndexType i=0; i<m; i++) {
787 else if(beta != ONE) {
788 for(IndexType i=0; i<m; i++) {
789 C(i,j) = beta*C(i,j);
792 for(IndexType l=0; l<k; l++) {
793 Scalar temp = alpha*B(l,j);
794 for(IndexType i=0; i<m; i++) {
795 C(i,j) = C(i,j) + temp*A(i,l);
802 for(IndexType j=0; j<n; j++) {
803 for(IndexType i=0; i<m; i++) {
805 for(IndexType l=0; l<k; l++) {
806 temp = temp + A(l,i)*B(l,j);
812 C(i,j) = alpha*temp + beta*C(i,j);
819 if(transA[0] ==
'n' || transA[0] ==
'N') {
821 for(IndexType j=0; j<n; j++) {
823 for(IndexType i=0; i<m; i++) {
827 else if(beta != ONE) {
828 for(IndexType i=0; i<m; i++) {
829 C(i,j) = beta*C(i,j);
832 for(IndexType l=0; l<k; l++) {
833 Scalar temp = alpha*B(j,l);
834 for(IndexType i=0; i<m; i++) {
835 C(i,j) = C(i,j) + temp*A(i,l);
842 for(IndexType j=0; j<n; j++) {
843 for(IndexType i=0; i<m; i++) {
845 for(IndexType l=0; l<k; l++) {
846 temp = temp + A(l,i)*B(j,l);
852 C(i,j) = alpha*temp + beta*C(i,j);
861template<
class LittleBlockType,
862 class LittleVectorType>
863KOKKOS_INLINE_FUNCTION
void
864GETF2 (
const LittleBlockType& A,
const LittleVectorType& ipiv,
int& info)
867 typedef typename std::decay<
decltype (ipiv(0)) >::type IndexType;
868 static_assert (std::is_integral<IndexType>::value,
869 "GETF2: The type of each entry of ipiv must be an integer type.");
870 typedef typename std::remove_reference<
decltype (A(0,0))>::type Scalar;
871 static_assert (! std::is_const<Scalar>::value,
872 "GETF2: A must not be a const View (or LittleBlock).");
873 static_assert (! std::is_const<std::remove_reference<
decltype (ipiv(0))>>::value,
874 "GETF2: ipiv must not be a const View (or LittleBlock).");
875 static_assert (LittleBlockType::rank == 2,
"GETF2: A must have rank 2 (be a matrix).");
876 typedef Kokkos::Details::ArithTraits<Scalar> STS;
877 const Scalar
ZERO = STS::zero();
879 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
880 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
881 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
884 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
885 if (pivDim < minPivDim) {
893 for(IndexType j=0; j < pivDim; j++)
897 for(IndexType i=j+1; i<numRows; i++)
899 if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
910 for(IndexType i=0; i < numCols; i++)
912 Scalar temp = A(jp,i);
919 for(IndexType i=j+1; i<numRows; i++) {
920 A(i,j) = A(i,j) / A(j,j);
928 for(IndexType r=j+1; r < numRows; r++)
930 for(IndexType c=j+1; c < numCols; c++) {
931 A(r,c) = A(r,c) - A(r,j) * A(j,c);
942template<
class LittleBlockType,
943 class LittleIntVectorType,
944 class LittleScalarVectorType,
945 const int rank = LittleScalarVectorType::rank>
947 static KOKKOS_INLINE_FUNCTION
void
948 run (
const char mode[],
949 const LittleBlockType& A,
950 const LittleIntVectorType& ipiv,
951 const LittleScalarVectorType& B,
956template<
class LittleBlockType,
957 class LittleIntVectorType,
958 class LittleScalarVectorType>
959struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
960 static KOKKOS_INLINE_FUNCTION
void
961 run (
const char mode[],
962 const LittleBlockType& A,
963 const LittleIntVectorType& ipiv,
964 const LittleScalarVectorType& B,
968 typedef typename std::decay<
decltype (ipiv(0))>::type IndexType;
971 static_assert (std::is_integral<IndexType>::value &&
972 std::is_signed<IndexType>::value,
973 "GETRS: The type of each entry of ipiv must be a signed integer.");
974 typedef typename std::decay<
decltype (A(0,0))>::type Scalar;
975 static_assert (! std::is_const<std::remove_reference<
decltype (B(0))>>::value,
976 "GETRS: B must not be a const View (or LittleBlock).");
977 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
978 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
979 static_assert (LittleScalarVectorType::rank == 1,
"GETRS: For this specialization, B must have rank 1.");
981 typedef Kokkos::Details::ArithTraits<Scalar> STS;
982 const Scalar
ZERO = STS::zero();
983 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
984 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
985 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
990 if (numRows != numCols) {
996 if (pivDim < numRows) {
1002 if(mode[0] ==
'n' || mode[0] ==
'N') {
1004 for(IndexType i=0; i<numRows; i++) {
1005 if(ipiv(i) != i+1) {
1007 B(i) = B(ipiv(i)-1);
1008 B(ipiv(i)-1) = temp;
1013 for(IndexType r=1; r < numRows; r++) {
1014 for(IndexType c=0; c < r; c++) {
1015 B(r) = B(r) - A(r,c)*B(c);
1020 for(IndexType r=numRows-1; r >= 0; r--) {
1022 if(A(r,r) ==
ZERO) {
1027 for(IndexType c=r+1; c < numCols; c++) {
1028 B(r) = B(r) - A(r,c)*B(c);
1030 B(r) = B(r) / A(r,r);
1034 else if(mode[0] ==
't' || mode[0] ==
'T') {
1039 else if (mode[0] ==
'c' || mode[0] ==
'C') {
1052template<
class LittleBlockType,
1053 class LittleIntVectorType,
1054 class LittleScalarVectorType>
1055struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1056 static KOKKOS_INLINE_FUNCTION
void
1057 run (
const char mode[],
1058 const LittleBlockType& A,
1059 const LittleIntVectorType& ipiv,
1060 const LittleScalarVectorType& B,
1064 typedef typename std::decay<
decltype (ipiv(0)) >::type IndexType;
1065 static_assert (std::is_integral<IndexType>::value,
1066 "GETRS: The type of each entry of ipiv must be an integer type.");
1067 static_assert (! std::is_const<std::remove_reference<
decltype (B(0)) > >::value,
1068 "GETRS: B must not be a const View (or LittleBlock).");
1069 static_assert (LittleBlockType::rank == 2,
"GETRS: A must have rank 2 (be a matrix).");
1070 static_assert (LittleIntVectorType::rank == 1,
"GETRS: ipiv must have rank 1.");
1071 static_assert (LittleScalarVectorType::rank == 2,
"GETRS: For this specialization, B must have rank 2.");
1076 const IndexType numRhs = B.extent (1);
1079 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1080 auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1081 GETRS<LittleBlockType, LittleIntVectorType,
decltype (B_cur), 1>::run (mode, A, ipiv, B_cur, info);
1094template<
class LittleBlockType,
1095 class LittleIntVectorType,
1096 class LittleScalarVectorType>
1097KOKKOS_INLINE_FUNCTION
void
1099 const LittleBlockType& A,
1100 const LittleIntVectorType& ipiv,
1101 const LittleScalarVectorType& B,
1104 Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1105 LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1123template<
class LittleBlockType,
1124 class LittleIntVectorType,
1125 class LittleScalarVectorType>
1126KOKKOS_INLINE_FUNCTION
void
1128 const LittleIntVectorType& ipiv,
1129 const LittleScalarVectorType& work,
1133 typedef typename std::decay<
decltype (ipiv(0))>::type IndexType;
1136 static_assert (std::is_integral<IndexType>::value &&
1137 std::is_signed<IndexType>::value,
1138 "GETRI: The type of each entry of ipiv must be a signed integer.");
1139 typedef typename std::remove_reference<
decltype (A(0,0))>::type Scalar;
1140 static_assert (! std::is_const<std::remove_reference<
decltype (A(0,0))>>::value,
1141 "GETRI: A must not be a const View (or LittleBlock).");
1142 static_assert (! std::is_const<std::remove_reference<
decltype (work(0))>>::value,
1143 "GETRI: work must not be a const View (or LittleBlock).");
1144 static_assert (LittleBlockType::rank == 2,
"GETRI: A must have rank 2 (be a matrix).");
1145 typedef Kokkos::Details::ArithTraits<Scalar> STS;
1146 const Scalar
ZERO = STS::zero();
1147 const Scalar ONE = STS::one();
1149 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
1150 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
1151 const IndexType pivDim =
static_cast<IndexType
> (ipiv.extent (0));
1152 const IndexType workDim =
static_cast<IndexType
> (work.extent (0));
1157 if (numRows != numCols) {
1163 if (pivDim < numRows) {
1169 if (workDim < numRows) {
1175 for(IndexType j=0; j < numRows; j++) {
1176 if(A(j,j) ==
ZERO) {
1181 A(j,j) = ONE / A(j,j);
1184 for(IndexType r=0; r < j; r++) {
1185 A(r,j) = A(r,r)*A(r,j);
1186 for(IndexType c=r+1; c < j; c++) {
1187 A(r,j) = A(r,j) + A(r,c)*A(c,j);
1190 for(IndexType r=0; r < j; r++) {
1191 A(r,j) = -A(j,j)*A(r,j);
1196 for(IndexType j = numCols-2; j >= 0; j--) {
1198 for(IndexType r=j+1; r < numRows; r++) {
1203 for(IndexType r=0; r < numRows; r++) {
1204 for(IndexType i=j+1; i < numRows; i++) {
1205 A(r,j) = A(r,j) - work(i)*A(r,i);
1211 for(IndexType j=numRows-1; j >= 0; j--) {
1212 IndexType jp = ipiv(j)-1;
1214 for(IndexType r=0; r < numRows; r++) {
1215 Scalar temp = A(r,j);
1228template<
class LittleBlockType,
1229 class LittleVectorTypeX,
1230 class LittleVectorTypeY,
1231 class CoefficientType,
1232 class IndexType =
int>
1233KOKKOS_INLINE_FUNCTION
void
1234GEMV (
const char trans,
1235 const CoefficientType& alpha,
1236 const LittleBlockType& A,
1237 const LittleVectorTypeX& x,
1238 const CoefficientType& beta,
1239 const LittleVectorTypeY& y)
1245 typedef typename std::remove_reference<
decltype (y(0)) >::type y_value_type;
1246 const IndexType numRows =
static_cast<IndexType
> (A.extent (0));
1247 const IndexType numCols =
static_cast<IndexType
> (A.extent (1));
1251 for (IndexType i = 0; i < numRows; ++i) {
1256 for (IndexType i = 0; i < numRows; ++i) {
1257 y_value_type y_i = 0.0;
1258 for (IndexType j = 0; j < numCols; ++j) {
1259 y_i += A(i,j) * x(j);
1268 for (IndexType i = 0; i < numRows; ++i) {
1273 for (IndexType i = 0; i < numRows; ++i) {
1279 for (IndexType i = 0; i < numRows; ++i) {
1280 y_value_type y_i = beta * y(i);
1281 for (IndexType j = 0; j < numCols; ++j) {
1282 y_i += alpha * A(i,j) * x(j);
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
@ ZERO
Replace old values with zero.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra::COPY function.
Implementation of Tpetra::FILL function.
Computes the solution to Ax=b.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Implementation of Tpetra::SCAL function.