Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Stokhos_KokkosCrsMatrixMPVectorUnitTest_Cuda.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#include "Teuchos_UnitTestHarness.hpp"
43#include "Teuchos_UnitTestRepository.hpp"
44#include "Teuchos_GlobalMPISession.hpp"
45
47
48// Instantiate test for Cuda device
49using Kokkos::Cuda;
51
52template <typename Storage, typename Ordinal, typename MultiplyOp,
53 Ordinal NumPerThread, Ordinal ThreadsPerVector>
55 Ordinal num_vec_threads,
56 Ordinal num_row_threads,
57 Teuchos::FancyOStream& out) {
58 typedef Kokkos::Cuda Device;
59
60 const Ordinal VectorSize = NumPerThread * ThreadsPerVector;
61 typedef typename Storage::template apply_N<VectorSize>::type storage_type;
63
64 const Ordinal nGrid = 5;
65 KokkosSparse::DeviceConfig dev_config(num_blocks, num_vec_threads, num_row_threads);
66
67 bool success = test_embedded_vector<Vector>(
68 nGrid, VectorSize, dev_config, MultiplyOp(), out);
69
70 return success;
71}
72
73// Test default configuration
75 Kokkos_CrsMatrix_MP, Multiply_Default, Storage, MultiplyOp )
76{
77 typedef typename Storage::ordinal_type Ordinal;
78 const Ordinal NumPerThread = 1;
79 const Ordinal ThreadsPerVector = 16;
80
81 const Ordinal num_blocks = 0;
82 const Ordinal num_vec_threads = 0;
83 const Ordinal num_row_threads = 0;
84
85 success =
86 test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
87}
88
90 Kokkos_CrsMatrix_MP, Multiply_1, Storage, MultiplyOp )
91{
92 typedef typename Storage::ordinal_type Ordinal;
93 const Ordinal NumPerThread = 1;
94 const Ordinal ThreadsPerVector = 16;
95
96 const Ordinal num_blocks = 10;
97 const Ordinal num_vec_threads = ThreadsPerVector;
98 const Ordinal num_row_threads = 4;
99
100 success =
101 test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
102}
103
105 Kokkos_CrsMatrix_MP, Multiply_2, Storage, MultiplyOp )
106{
107 typedef typename Storage::ordinal_type Ordinal;
108 const Ordinal NumPerThread = 2;
109 const Ordinal ThreadsPerVector = 16;
110
111 const Ordinal num_blocks = 10;
112 const Ordinal num_vec_threads = ThreadsPerVector;
113 const Ordinal num_row_threads = 4;
114
115 success =
116 test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
117}
118
120 Kokkos_CrsMatrix_MP, Multiply_3, Storage, MultiplyOp )
121{
122 typedef typename Storage::ordinal_type Ordinal;
123 const Ordinal NumPerThread = 3;
124 const Ordinal ThreadsPerVector = 16;
125
126 const Ordinal num_blocks = 10;
127 const Ordinal num_vec_threads = ThreadsPerVector;
128 const Ordinal num_row_threads = 4;
129
130 success =
131 test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
132}
133
135 Kokkos_CrsMatrix_MP, Multiply_4, Storage, MultiplyOp )
136{
137 typedef typename Storage::ordinal_type Ordinal;
138 const Ordinal NumPerThread = 4;
139 const Ordinal ThreadsPerVector = 16;
140
141 const Ordinal num_blocks = 10;
142 const Ordinal num_vec_threads = ThreadsPerVector;
143 const Ordinal num_row_threads = 4;
144
145 success =
146 test_cuda_embedded_vector<Storage,Ordinal,MultiplyOp,NumPerThread,ThreadsPerVector>(num_blocks, num_vec_threads, num_row_threads, out);
147}
148
149#define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( STORAGE, OP ) \
150 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
151 Kokkos_CrsMatrix_MP, Multiply_Default, STORAGE, OP ) \
152 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
153 Kokkos_CrsMatrix_MP, Multiply_1, STORAGE, OP ) \
154 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
155 Kokkos_CrsMatrix_MP, Multiply_2, STORAGE, OP ) \
156 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
157 Kokkos_CrsMatrix_MP, Multiply_3, STORAGE, OP ) \
158 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( \
159 Kokkos_CrsMatrix_MP, Multiply_4, STORAGE, OP )
160
161// Notes: SFS, DS are defined in main test header (we are also being lazy
162// and not putting ordinal/scalar/device in the names, assuming we will only
163// do one combination).
164#define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE( ORDINAL, SCALAR, DEVICE ) \
165 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, DefaultMultiply ) \
166 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( SFS, KokkosMultiply ) \
167 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, DefaultMultiply ) \
168 CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_STORAGE_OP( DS, KokkosMultiply )
169
171
172int main( int argc, char* argv[] ) {
173 Teuchos::GlobalMPISession mpiSession(&argc, &argv);
174
175 // Initialize Cuda
176 Kokkos::InitializationSettings init_args;
177 init_args.set_device_id(0);
178 Kokkos::initialize( init_args );
179 Kokkos::print_configuration(std::cout);
180
181 // Run tests
182 int ret = Teuchos::UnitTestRepository::runUnitTestsFromMain(argc, argv);
183
184 // Finish up
185 Kokkos::finalize();
186
187 return ret;
188}
#define CRSMATRIX_MP_VECTOR_TESTS_DEVICE(DEVICE)
TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL(Kokkos_CrsMatrix_MP, Multiply_Default, Storage, MultiplyOp)
int main(int argc, char *argv[])
bool test_cuda_embedded_vector(Ordinal num_blocks, Ordinal num_vec_threads, Ordinal num_row_threads, Teuchos::FancyOStream &out)
#define CRS_MATRIX_MP_VECTOR_MULTIPLY_TESTS_ORDINAL_SCALAR_DEVICE(ORDINAL, SCALAR, DEVICE)
Stokhos::StandardStorage< int, double > storage_type
Stokhos::StandardStorage< int, double > Storage