Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
TestStochastic.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41#include <utility>
42#include <cmath>
43#include <iostream>
44
45#include "Kokkos_Core.hpp"
46#include "Kokkos_Timer.hpp"
47
48#include "Stokhos_Update.hpp"
49#include "Stokhos_CrsMatrix.hpp"
61
62#if defined(HAVE_MPI) && 0
63#include "Epetra_MpiComm.h"
64#else
65#include "Epetra_SerialComm.h"
66#endif
74
75#ifdef HAVE_STOKHOS_KOKKOSLINALG
76#include "KokkosSparse_CrsMatrix.hpp"
77#include "KokkosSparse_spmv.hpp"
78#include "KokkosBlas1_update.hpp"
79#endif
80
81namespace unit_test {
82
83template< typename IntType >
84inline
85IntType map_fem_graph_coord( const IntType & N ,
86 const IntType & i ,
87 const IntType & j ,
88 const IntType & k )
89{
90 return k + N * ( j + N * i );
91}
92
93inline
94size_t generate_fem_graph( size_t N ,
95 std::vector< std::vector<size_t> > & graph )
96{
97 graph.resize( N * N * N , std::vector<size_t>() );
98
99 size_t total = 0 ;
100
101 for ( int i = 0 ; i < (int) N ; ++i ) {
102 for ( int j = 0 ; j < (int) N ; ++j ) {
103 for ( int k = 0 ; k < (int) N ; ++k ) {
104
105 const size_t row = map_fem_graph_coord((int)N,i,j,k);
106
107 graph[row].reserve(27);
108
109 for ( int ii = -1 ; ii < 2 ; ++ii ) {
110 for ( int jj = -1 ; jj < 2 ; ++jj ) {
111 for ( int kk = -1 ; kk < 2 ; ++kk ) {
112 if ( 0 <= i + ii && i + ii < (int) N &&
113 0 <= j + jj && j + jj < (int) N &&
114 0 <= k + kk && k + kk < (int) N ) {
115 size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
116
117 graph[row].push_back(col);
118 }
119 }}}
120 total += graph[row].size();
121 }}}
122
123 return total ;
124}
125
126template <typename Scalar> struct ScalarTolerances {};
127
128template <> struct ScalarTolerances<float> {
129 typedef float scalar_type;
130 static scalar_type sparse_cijk_tol() { return 1e-5; }
131};
132
133template <> struct ScalarTolerances<double> {
134 typedef double scalar_type;
135 static scalar_type sparse_cijk_tol() { return 1e-12; }
136};
137
138template< typename ScalarType , typename TensorType, class Device >
139std::vector<double>
141 const std::vector<int> & var_degree ,
142 const int nGrid ,
143 const int iterCount ,
144 const bool symmetric )
145{
146 typedef ScalarType value_type ;
147 typedef Kokkos::View< value_type** ,
148 Kokkos::LayoutLeft ,
149 Device > block_vector_type ;
150
152
154 typedef typename matrix_type::graph_type graph_type ;
155
156 typedef ScalarType value_type ;
157 typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
162
163 using Teuchos::rcp;
164 using Teuchos::RCP;
165 using Teuchos::Array;
166
167 // Create Stochastic Galerkin basis and expansion
168 const size_t num_KL = var_degree.size();
169 Array< RCP<const abstract_basis_type> > bases(num_KL);
170 for (size_t i=0; i<num_KL; i++) {
171 if (symmetric)
172 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
173 else
174 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
175 }
176 RCP<const product_basis_type> basis =
177 rcp(new product_basis_type(
179 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
180
181 //------------------------------
182 // Generate graph for "FEM" box structure:
183
184 std::vector< std::vector<size_t> > graph ;
185
186 const size_t outer_length = nGrid * nGrid * nGrid ;
187 const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
188
189 //------------------------------
190 // Generate CRS block-tensor matrix:
191
192 matrix_type matrix ;
193
194 matrix.block =
195 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
196 *Cijk );
197 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
198
199 const size_t inner_length = matrix.block.dimension();
200 const size_t inner_length_aligned = matrix.block.aligned_dimension();
201
202 matrix.values =
203 block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
204
205 block_vector_type x =
206 block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
207 block_vector_type y =
208 block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
209
210 Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
211
212 //------------------------------
213 // Generate input multivector:
214
215 Kokkos::deep_copy( x , ScalarType(1.0) );
216 block_vector_type x0 =
217 block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"),
218 inner_length_aligned , outer_length );
219 Kokkos::deep_copy( x0 , ScalarType(1.0) );
220
221 //------------------------------
222
223 Device().fence();
224 Kokkos::Timer clock ;
225 for ( int iter = 0 ; iter < iterCount ; ++iter ) {
226 Kokkos::deep_copy( x, x0 ); // akin to import
227 Stokhos::multiply( matrix , x , y );
228 }
229 Device().fence();
230
231 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
232 const double flops_per_block = matrix.block.tensor().num_flops();
233 const double flops = 1.0e-9*graph_length*flops_per_block;
234
235 std::vector<double> perf(6) ;
236
237 perf[0] = outer_length * inner_length ;
238 perf[1] = seconds_per_iter ;
239 perf[2] = flops / seconds_per_iter;
240 perf[3] = matrix.block.tensor().entry_count();
241 perf[4] = inner_length ;
242 perf[5] = flops_per_block;
243
244 return perf ;
245}
246
247template< typename ScalarType , class Device >
248std::vector<double>
250 const std::vector<int> & var_degree ,
251 const int nGrid ,
252 const int iterCount ,
253 const bool symmetric )
254{
255 typedef ScalarType value_type ;
256 typedef Kokkos::View< value_type**,
257 Kokkos::LayoutLeft ,
258 Device > block_vector_type ;
259
260 //------------------------------
261
263 value_type , Device > matrix_type ;
264
265 typedef typename matrix_type::graph_type graph_type ;
266
267 typedef ScalarType value_type ;
268 typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
273
274 using Teuchos::rcp;
275 using Teuchos::RCP;
276 using Teuchos::Array;
277
278 // Create Stochastic Galerkin basis and expansion
279 const size_t num_KL = var_degree.size();
280 Array< RCP<const abstract_basis_type> > bases(num_KL);
281 for (size_t i=0; i<num_KL; i++) {
282 if (symmetric)
283 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
284 else
285 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
286 }
287 RCP<const product_basis_type> basis =
288 rcp(new product_basis_type(
290 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
291
292 //------------------------------
293 // Generate FEM graph:
294
295 std::vector< std::vector<size_t> > fem_graph ;
296
297 const size_t fem_length = nGrid * nGrid * nGrid ;
298 const size_t fem_graph_length = unit_test::generate_fem_graph( nGrid , fem_graph );
299
300 const size_t stoch_length = basis->size();
301
302 //------------------------------
303
304 block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), stoch_length , fem_length );
305 block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), stoch_length , fem_length );
306
307 Kokkos::deep_copy( x , ScalarType(1.0) );
308
309 //------------------------------
310 // Generate CRS matrix of blocks with symmetric diagonal storage
311
312 matrix_type matrix ;
313
314 matrix.block = Stokhos::SymmetricDiagonalSpec< Device >( stoch_length );
315 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>(
316 std::string("test product tensor graph") , fem_graph );
317 matrix.values = block_vector_type(
318 Kokkos::ViewAllocateWithoutInitializing("matrix"), matrix.block.matrix_size() , fem_graph_length );
319
320 Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
321
322 //------------------------------
323
324 Device().fence();
325 Kokkos::Timer clock ;
326 for ( int iter = 0 ; iter < iterCount ; ++iter ) {
327 Stokhos::multiply( matrix , x , y );
328 }
329 Device().fence();
330
331 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
332 const double flops_per_block = 2.0*stoch_length*stoch_length;
333 const double flops = 1e-9*fem_graph_length*flops_per_block;
334
335 std::vector<double> perf(6);
336 perf[0] = fem_length * stoch_length ;
337 perf[1] = seconds_per_iter;
338 perf[2] = flops / seconds_per_iter;
339 perf[3] = Cijk->num_entries();
340 perf[4] = stoch_length;
341 perf[5] = flops_per_block;
342 return perf;
343}
344
345//----------------------------------------------------------------------------
346// Flatten to a plain CRS matrix
347//
348// Outer DOF == fem
349// Inner DOF == stochastic
350
351template< typename ScalarType , class Device >
352std::vector<double>
354 const std::vector<int> & var_degree ,
355 const int nGrid ,
356 const int iterCount ,
357 const bool symmetric )
358{
359 typedef ScalarType value_type ;
360 typedef Kokkos::View< value_type* , Device > vector_type ;
361
362 //------------------------------
363
364 typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
365 typedef typename matrix_type::values_type matrix_values_type;
366 typedef typename matrix_type::graph_type matrix_graph_type;
367
368 typedef ScalarType value_type ;
369 typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
374
375 using Teuchos::rcp;
376 using Teuchos::RCP;
377 using Teuchos::Array;
378
379 // Create Stochastic Galerkin basis and expansion
380 const size_t num_KL = var_degree.size();
381 Array< RCP<const abstract_basis_type> > bases(num_KL);
382 for (size_t i=0; i<num_KL; i++) {
383 if (symmetric)
384 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
385 else
386 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
387 }
388 RCP<const product_basis_type> basis =
389 rcp(new product_basis_type(
391 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
392
393 //------------------------------
394 // Generate FEM graph:
395
396 std::vector< std::vector<size_t> > fem_graph ;
397
398 const size_t fem_length = nGrid * nGrid * nGrid ;
399
400 unit_test::generate_fem_graph( nGrid , fem_graph );
401
402 //------------------------------
403 // Stochastic graph
404
405 const size_t stoch_length = basis->size();
406 std::vector< std::vector< int > > stoch_graph( stoch_length );
407#if defined(HAVE_MPI) && 0
408 Epetra_MpiComm comm(MPI_COMM_WORLD);
409#else
411#endif
412 Teuchos::RCP<Epetra_CrsGraph> cijk_graph = Stokhos::sparse3Tensor2CrsGraph(
413 *basis, *Cijk, comm);
414 for ( size_t i = 0 ; i < stoch_length ; ++i ) {
415 int len = cijk_graph->NumGlobalIndices(i);
416 stoch_graph[i].resize(len);
417 int len2;
418 cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
419 }
420
421 //------------------------------
422 // Generate flattened graph with FEM outer and stochastic inner
423
424 const size_t flat_length = fem_length * stoch_length ;
425
426 std::vector< std::vector<size_t> > flat_graph( flat_length );
427
428 for ( size_t iOuterRow = 0 ; iOuterRow < fem_length ; ++iOuterRow ) {
429
430 const size_t iOuterRowNZ = fem_graph[iOuterRow].size();
431
432 for ( size_t iInnerRow = 0 ; iInnerRow < stoch_length ; ++iInnerRow ) {
433
434 const size_t iInnerRowNZ = stoch_graph[ iInnerRow ].size(); ;
435 const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
436 const size_t iFlatRow = iInnerRow + iOuterRow * stoch_length ;
437
438 flat_graph[iFlatRow].resize( iFlatRowNZ );
439
440 size_t iFlatEntry = 0 ;
441
442 for ( size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
443
444 const size_t iOuterCol = fem_graph[iOuterRow][iOuterEntry];
445
446 for ( size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
447
448 const size_t iInnerCol = stoch_graph[iInnerRow][iInnerEntry] ;
449 const size_t iFlatColumn = iInnerCol + iOuterCol * stoch_length ;
450
451 flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
452
453 ++iFlatEntry ;
454 }
455 }
456 }
457 }
458
459 //------------------------------
460
461 vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), flat_length );
462 vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), flat_length );
463
464 Kokkos::deep_copy( x , ScalarType(1.0) );
465
466 //------------------------------
467
468 matrix_type matrix ;
469
470 matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
471 std::string("testing") , flat_graph );
472
473 const size_t flat_graph_length = matrix.graph.entries.extent(0);
474
475 matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), flat_graph_length );
476
477 Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
478
479 //Kokkos::write_matrix_market(matrix, "flat_commuted.mm");
480
481 //------------------------------
482
483 Device().fence();
484 Kokkos::Timer clock ;
485 for ( int iter = 0 ; iter < iterCount ; ++iter ) {
486 Stokhos::multiply( matrix , x , y );
487 }
488 Device().fence();
489
490 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
491 const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
492
493 std::vector<double> perf(4);
494 perf[0] = flat_length ;
495 perf[1] = seconds_per_iter;
496 perf[2] = flops;
497 perf[3] = flat_graph_length ;
498 return perf;
499}
500
501//----------------------------------------------------------------------------
502//----------------------------------------------------------------------------
503// Flatten to a plain CRS matrix
504//
505// Outer DOF == stochastic
506// Inner DOF == fem
507
508template< typename ScalarType , class Device >
509std::vector<double>
511 const std::vector<int> & var_degree ,
512 const int nGrid ,
513 const int iterCount ,
514 const bool symmetric )
515{
516 typedef ScalarType value_type ;
517 typedef Kokkos::View< value_type* , Device > vector_type ;
518
519 //------------------------------
520
521 typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
522 typedef typename matrix_type::values_type matrix_values_type;
523 typedef typename matrix_type::graph_type matrix_graph_type;
524
525 typedef ScalarType value_type ;
526 typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
531
532 using Teuchos::rcp;
533 using Teuchos::RCP;
534 using Teuchos::Array;
535
536 // Create Stochastic Galerkin basis and expansion
537 const size_t num_KL = var_degree.size();
538 Array< RCP<const abstract_basis_type> > bases(num_KL);
539 for (size_t i=0; i<num_KL; i++) {
540 if (symmetric)
541 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
542 else
543 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
544 }
545 RCP<const product_basis_type> basis =
546 rcp(new product_basis_type(
548 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
549
550 //------------------------------
551 // Generate FEM graph:
552
553 std::vector< std::vector<size_t> > fem_graph ;
554
555 const size_t fem_length = nGrid * nGrid * nGrid ;
556
557 unit_test::generate_fem_graph( nGrid , fem_graph );
558
559 //------------------------------
560 // Stochastic graph
561
562 const size_t stoch_length = basis->size();
563 std::vector< std::vector< int > > stoch_graph( stoch_length );
564#if defined(HAVE_MPI) && 0
565 Epetra_MpiComm comm(MPI_COMM_WORLD);
566#else
568#endif
569 Teuchos::RCP<Epetra_CrsGraph> cijk_graph = Stokhos::sparse3Tensor2CrsGraph(
570 *basis, *Cijk, comm);
571 for ( size_t i = 0 ; i < stoch_length ; ++i ) {
572 int len = cijk_graph->NumGlobalIndices(i);
573 stoch_graph[i].resize(len);
574 int len2;
575 cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
576 }
577
578 //------------------------------
579 // Generate flattened graph with stochastic outer and FEM inner
580
581 const size_t flat_length = fem_length * stoch_length ;
582
583 std::vector< std::vector<size_t> > flat_graph( flat_length );
584
585 for ( size_t iOuterRow = 0 ; iOuterRow < stoch_length ; ++iOuterRow ) {
586
587 const size_t iOuterRowNZ = stoch_graph[iOuterRow].size();
588
589 for ( size_t iInnerRow = 0 ; iInnerRow < fem_length ; ++iInnerRow ) {
590
591 const size_t iInnerRowNZ = fem_graph[iInnerRow].size();
592 const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
593 const size_t iFlatRow = iInnerRow + iOuterRow * fem_length ;
594
595 flat_graph[iFlatRow].resize( iFlatRowNZ );
596
597 size_t iFlatEntry = 0 ;
598
599 for ( size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
600
601 const size_t iOuterCol = stoch_graph[ iOuterRow ][ iOuterEntry ];
602
603 for ( size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
604
605 const size_t iInnerCol = fem_graph[ iInnerRow][iInnerEntry];
606 const size_t iFlatColumn = iInnerCol + iOuterCol * fem_length ;
607
608 flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
609 ++iFlatEntry ;
610 }
611 }
612 }
613 }
614
615 //------------------------------
616
617 vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), flat_length );
618 vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), flat_length );
619
620 Kokkos::deep_copy( x , ScalarType(1.0) );
621
622 //------------------------------
623
624 matrix_type matrix ;
625
626 matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("testing") , flat_graph );
627
628 const size_t flat_graph_length = matrix.graph.entries.extent(0);
629
630 matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), flat_graph_length );
631
632 Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
633
634 //Kokkos::write_matrix_market(matrix, "flat_original.mm");
635
636 //------------------------------
637
638 Device().fence();
639 Kokkos::Timer clock ;
640 for ( int iter = 0 ; iter < iterCount ; ++iter ) {
641 Stokhos::multiply( matrix , x , y );
642 }
643 Device().fence();
644
645 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
646 const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
647
648 std::vector<double> perf(4);
649 perf[0] = flat_length ;
650 perf[1] = seconds_per_iter;
651 perf[2] = flops;
652 perf[3] = flat_graph_length ;
653 return perf;
654}
655
656template< typename ScalarType , class Device >
657std::vector<double>
659 const std::vector<int> & var_degree ,
660 const int nGrid ,
661 const int iterCount ,
662 const bool symmetric )
663{
664 typedef ScalarType value_type ;
665 typedef Kokkos::View< value_type** ,
666 Kokkos::LayoutLeft ,
667 Device > block_vector_type ;
668
671
673 typedef typename matrix_type::graph_type graph_type ;
674
675 typedef ScalarType value_type ;
676 typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
681
682 using Teuchos::rcp;
683 using Teuchos::RCP;
684 using Teuchos::Array;
685
686 // Create Stochastic Galerkin basis and expansion
687 const size_t num_KL = var_degree.size();
688 Array< RCP<const abstract_basis_type> > bases(num_KL);
689 for (size_t i=0; i<num_KL; i++) {
690 if (symmetric)
691 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
692 else
693 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
694 }
695 RCP<const product_basis_type> basis =
696 rcp(new product_basis_type(
698 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
699
700 //------------------------------
701 // Generate graph for "FEM" box structure:
702
703 std::vector< std::vector<size_t> > graph ;
704
705 const size_t outer_length = nGrid * nGrid * nGrid ;
706 const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
707
708 //------------------------------
709 // Generate CRS block-tensor matrix:
710
711 matrix_type matrix ;
712
713 Teuchos::ParameterList params;
714 params.set("Tile Size", 128);
715 params.set("Max Tiles", 10000);
716 matrix.block =
717 Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
718 params);
719 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
720
721 const size_t inner_length = matrix.block.dimension();
722 const size_t inner_length_aligned = matrix.block.aligned_dimension();
723
724 matrix.values =
725 block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
726
727 block_vector_type x =
728 block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
729 block_vector_type y =
730 block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
731
732 Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
733
734 //------------------------------
735 // Generate input multivector:
736
737 Kokkos::deep_copy( x , ScalarType(1.0) );
738
739 //------------------------------
740
741 Device().fence();
742 Kokkos::Timer clock ;
743 for ( int iter = 0 ; iter < iterCount ; ++iter ) {
744 Stokhos::multiply( matrix , x , y );
745 }
746 Device().fence();
747
748 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
749 const double flops_per_block = matrix.block.tensor().num_flops();
750 const double flops = 1.0e-9*graph_length*flops_per_block;
751
752 // std::cout << "tensor: flops = " << flops
753 // << " time = " << seconds_per_iter << std::endl;
754
755 std::vector<double> perf(6) ;
756
757 perf[0] = outer_length * inner_length ;
758 perf[1] = seconds_per_iter ;
759 perf[2] = flops / seconds_per_iter;
760 perf[3] = matrix.block.tensor().entry_count();
761 perf[4] = inner_length ;
762 perf[5] = flops_per_block;
763
764 return perf ;
765}
766
767template< typename ScalarType , class Device >
768std::vector<double>
770 const std::vector<int> & var_degree ,
771 const int nGrid ,
772 const int iterCount ,
773 const bool symmetric )
774{
775 typedef ScalarType value_type ;
776 typedef Kokkos::View< value_type** ,
777 Kokkos::LayoutLeft ,
778 Device > block_vector_type ;
779
782
784 typedef typename matrix_type::graph_type graph_type ;
785
786 typedef ScalarType value_type ;
787 typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
792
793 using Teuchos::rcp;
794 using Teuchos::RCP;
795 using Teuchos::Array;
796
797 // Create Stochastic Galerkin basis and expansion
798 const size_t num_KL = var_degree.size();
799 Array< RCP<const abstract_basis_type> > bases(num_KL);
800 for (size_t i=0; i<num_KL; i++) {
801 if (symmetric)
802 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
803 else
804 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
805 }
806 RCP<const product_basis_type> basis =
807 rcp(new product_basis_type(
809 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
810
811 //------------------------------
812 // Generate graph for "FEM" box structure:
813
814 std::vector< std::vector<size_t> > graph ;
815
816 const size_t outer_length = nGrid * nGrid * nGrid ;
817 const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
818
819 //------------------------------
820 // Generate CRS block-tensor matrix:
821
822 matrix_type matrix ;
823
824 Teuchos::ParameterList params;
825 params.set("Tile Size", 128);
826 matrix.block =
827 Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
828 params);
829 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
830
831 const size_t inner_length = matrix.block.dimension();
832 const size_t inner_length_aligned = matrix.block.aligned_dimension();
833
834 matrix.values =
835 block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
836
837 block_vector_type x =
838 block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
839 block_vector_type y =
840 block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
841
842 Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
843
844 //------------------------------
845 // Generate input multivector:
846
847 Kokkos::deep_copy( x , ScalarType(1.0) );
848
849 //------------------------------
850
851 Device().fence();
852 Kokkos::Timer clock ;
853 for ( int iter = 0 ; iter < iterCount ; ++iter ) {
854 Stokhos::multiply( matrix , x , y );
855 }
856 Device().fence();
857
858 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
859 const double flops_per_block = matrix.block.tensor().num_flops();
860 const double flops = 1.0e-9*graph_length*flops_per_block;
861
862 // std::cout << "tensor: flops = " << flops
863 // << " time = " << seconds_per_iter << std::endl;
864
865 std::vector<double> perf(6) ;
866
867 perf[0] = outer_length * inner_length ;
868 perf[1] = seconds_per_iter ;
869 perf[2] = flops / seconds_per_iter;
870 perf[3] = matrix.block.tensor().entry_count();
871 perf[4] = inner_length ;
872 perf[5] = flops_per_block;
873
874 return perf ;
875}
876
877template< typename ScalarType , class Device >
878std::vector<double>
880 const std::vector<int> & var_degree ,
881 const int nGrid ,
882 const int iterCount ,
883 const bool symmetric )
884{
885 typedef ScalarType value_type ;
886 typedef Kokkos::View< value_type** ,
887 Kokkos::LayoutLeft ,
888 Device > block_vector_type ;
889
892
894 typedef typename matrix_type::graph_type graph_type ;
895
896 typedef ScalarType value_type ;
897 typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
902
903 using Teuchos::rcp;
904 using Teuchos::RCP;
905 using Teuchos::Array;
906
907 // Create Stochastic Galerkin basis and expansion
908 const size_t num_KL = var_degree.size();
909 Array< RCP<const abstract_basis_type> > bases(num_KL);
910 for (size_t i=0; i<num_KL; i++) {
911 if (symmetric)
912 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
913 else
914 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
915 }
916 RCP<const product_basis_type> basis =
917 rcp(new product_basis_type(
919 RCP<Cijk_type> Cijk =
921
922 //------------------------------
923 // Generate graph for "FEM" box structure:
924
925 std::vector< std::vector<size_t> > graph ;
926
927 const size_t outer_length = nGrid * nGrid * nGrid ;
928 const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
929
930 //------------------------------
931 // Generate CRS block-tensor matrix:
932
933 matrix_type matrix ;
934
935 matrix.block =
936 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
937 *Cijk );
938 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
939
940 const size_t inner_length = matrix.block.dimension();
941
942 matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length , graph_length );
943
944 block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length , outer_length );
945 block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length , outer_length );
946
947 Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
948
949 //------------------------------
950 // Generate input multivector:
951
952 Kokkos::deep_copy( x , ScalarType(1.0) );
953
954 //------------------------------
955
956 Device().fence();
957 Kokkos::Timer clock ;
958 for ( int iter = 0 ; iter < iterCount ; ++iter ) {
959 Stokhos::multiply( matrix , x , y );
960 }
961 Device().fence();
962
963 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
964 const double flops_per_block = matrix.block.tensor().num_flops();
965 const double flops = 1.0e-9*graph_length*flops_per_block;
966
967 // std::cout << "tensor: flops = " << flops
968 // << " time = " << seconds_per_iter << std::endl;
969
970 std::vector<double> perf(6) ;
971
972 perf[0] = outer_length * inner_length ;
973 perf[1] = seconds_per_iter ;
974 perf[2] = flops / seconds_per_iter;
975 perf[3] = matrix.block.tensor().num_non_zeros();
976 perf[4] = inner_length ;
977 perf[5] = flops_per_block;
978
979 return perf ;
980}
981
982template< typename ScalarType , class Device >
983std::vector<double>
985 const std::vector<int> & var_degree ,
986 const int nGrid ,
987 const int iterCount ,
988 const bool symmetric )
989{
990 typedef ScalarType value_type ;
991 typedef Kokkos::View< value_type** ,
992 Kokkos::LayoutLeft ,
993 Device > block_vector_type ;
994
997
999 typedef typename matrix_type::graph_type graph_type ;
1000
1001 typedef ScalarType value_type ;
1002 typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1005 typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1007
1008 using Teuchos::rcp;
1009 using Teuchos::RCP;
1010 using Teuchos::Array;
1011
1012 // Create Stochastic Galerkin basis and expansion
1013 const size_t num_KL = var_degree.size();
1014 Array< RCP<const abstract_basis_type> > bases(num_KL);
1015 for (size_t i=0; i<num_KL; i++) {
1016 if (symmetric)
1017 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1018 else
1019 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1020 }
1021 RCP<const product_basis_type> basis =
1022 rcp(new product_basis_type(
1024 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1025
1026 //------------------------------
1027 // Generate graph for "FEM" box structure:
1028
1029 std::vector< std::vector<size_t> > graph ;
1030
1031 const size_t outer_length = nGrid * nGrid * nGrid ;
1032 const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
1033
1034 //------------------------------
1035 // Generate CRS block-tensor matrix:
1036
1037 matrix_type matrix ;
1038
1039 Teuchos::ParameterList params;
1040 params.set("Symmetric", symmetric);
1041 matrix.block =
1042 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
1043 *Cijk,
1044 params );
1045 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
1046
1047 const size_t inner_length = matrix.block.tensor().dimension();
1048 const size_t inner_length_aligned = matrix.block.tensor().aligned_dimension();
1049
1050 matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
1051
1052 block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
1053 block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
1054
1055 Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
1056
1057 //------------------------------
1058 // Generate input multivector:
1059
1060 Kokkos::deep_copy( x , ScalarType(1.0) );
1061
1062 //------------------------------
1063
1064 Device().fence();
1065 Kokkos::Timer clock ;
1066 for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1067 Stokhos::multiply( matrix , x , y );
1068 }
1069 Device().fence();
1070
1071 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1072 const double flops_per_block = matrix.block.tensor().num_flops();
1073 const double flops = 1.0e-9*graph_length*flops_per_block;
1074
1075 // std::cout << "tensor: flops = " << flops
1076 // << " time = " << seconds_per_iter << std::endl;
1077
1078 std::vector<double> perf(6) ;
1079
1080 perf[0] = outer_length * inner_length ;
1081 perf[1] = seconds_per_iter ;
1082 perf[2] = flops / seconds_per_iter;
1083 perf[3] = matrix.block.tensor().num_non_zeros();
1084 perf[4] = inner_length ;
1085 perf[5] = flops_per_block;
1086
1087 return perf ;
1088}
1089
1090template< typename ScalarType , class Device , class SparseMatOps >
1091std::vector<double>
1093 const std::vector<int> & var_degree ,
1094 const int nGrid ,
1095 const int iterCount ,
1096 const bool test_block ,
1097 const bool symmetric )
1098{
1099 typedef ScalarType value_type ;
1100 typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1103 typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1105
1106 using Teuchos::rcp;
1107 using Teuchos::RCP;
1108 using Teuchos::Array;
1109
1110 // Create Stochastic Galerkin basis and expansion
1111 const size_t num_KL = var_degree.size();
1112 Array< RCP<const abstract_basis_type> > bases(num_KL);
1113 for (size_t i=0; i<num_KL; i++) {
1114 if (symmetric)
1115 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1116 else
1117 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1118 }
1119 RCP<const product_basis_type> basis =
1120 rcp(new product_basis_type(
1122 const size_t outer_length = basis->size();
1123 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1124
1125 //------------------------------
1126
1127 typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
1128 typedef typename matrix_type::values_type matrix_values_type;
1129 typedef typename matrix_type::graph_type matrix_graph_type;
1130
1131 //------------------------------
1132 // Generate FEM graph:
1133
1134 std::vector< std::vector<size_t> > fem_graph ;
1135
1136 const size_t inner_length = nGrid * nGrid * nGrid ;
1137 const size_t graph_length =
1138 unit_test::generate_fem_graph( nGrid , fem_graph );
1139
1140 //------------------------------
1141
1142 typedef Kokkos::View<value_type*,Device> vec_type ;
1143
1144 std::vector<matrix_type> matrix( outer_length ) ;
1145 std::vector<vec_type> x( outer_length ) ;
1146 std::vector<vec_type> y( outer_length ) ;
1147 std::vector<vec_type> tmp( outer_length ) ;
1148
1149 for (size_t block=0; block<outer_length; ++block) {
1150 matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("testing") , fem_graph );
1151
1152 matrix[block].values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length );
1153
1154 x[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length );
1155 y[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length );
1156 tmp[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("tmp"), inner_length );
1157
1158 Kokkos::deep_copy( matrix[block].values , ScalarType(1.0) );
1159
1160 Kokkos::deep_copy( x[block] , ScalarType(1.0) );
1161 Kokkos::deep_copy( y[block] , ScalarType(1.0) );
1162 }
1163
1164 Device().fence();
1165 SparseMatOps smo;
1166 Kokkos::Timer clock ;
1167 int n_apply = 0;
1168 int n_add = 0;
1169 for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1170
1171 // Original matrix-free multiply algorithm using a block apply
1172 n_apply = 0;
1173 n_add = 0;
1174 typename Cijk_type::k_iterator k_begin = Cijk->k_begin();
1175 typename Cijk_type::k_iterator k_end = Cijk->k_end();
1176 for (typename Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1177 int nj = Cijk->num_j(k_it);
1178 if (nj > 0) {
1179 int k = index(k_it);
1180 typename Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
1181 typename Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
1182 std::vector<vec_type> xx(nj), yy(nj);
1183 int jdx = 0;
1184 for (typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1185 ++j_it) {
1186 int j = index(j_it);
1187 xx[jdx] = x[j];
1188 yy[jdx] = tmp[j];
1189 jdx++;
1190 }
1191 Stokhos::multiply( matrix[k] , xx , yy, smo );
1192 n_apply += nj;
1193 jdx = 0;
1194 for (typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1195 ++j_it) {
1196 typename Cijk_type::kji_iterator i_begin = Cijk->i_begin(j_it);
1197 typename Cijk_type::kji_iterator i_end = Cijk->i_end(j_it);
1198 for (typename Cijk_type::kji_iterator i_it = i_begin; i_it != i_end;
1199 ++i_it) {
1200 int i = index(i_it);
1201 value_type c = value(i_it);
1202 Stokhos::update( value_type(1.0) , y[i] , c , yy[jdx] );
1203 ++n_add;
1204 }
1205 jdx++;
1206 }
1207 }
1208 }
1209
1210 }
1211 Device().fence();
1212
1213 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1214 const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1215 static_cast<double>(n_add)*inner_length);
1216
1217 // std::cout << "mat-free: flops = " << flops
1218 // << " time = " << seconds_per_iter << std::endl;
1219
1220 std::vector<double> perf(4);
1221 perf[0] = outer_length * inner_length;
1222 perf[1] = seconds_per_iter ;
1223 perf[2] = flops/seconds_per_iter;
1224 perf[3] = flops;
1225
1226 return perf;
1227}
1228
1229template< typename ScalarType , class Device , class SparseMatOps >
1230std::vector<double>
1232 const std::vector<int> & var_degree ,
1233 const int nGrid ,
1234 const int iterCount ,
1235 const bool test_block ,
1236 const bool symmetric )
1237{
1238 typedef ScalarType value_type ;
1239 typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1242 typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1244
1245 using Teuchos::rcp;
1246 using Teuchos::RCP;
1247 using Teuchos::Array;
1248
1249 // Create Stochastic Galerkin basis and expansion
1250 const size_t num_KL = var_degree.size();
1251 Array< RCP<const abstract_basis_type> > bases(num_KL);
1252 for (size_t i=0; i<num_KL; i++) {
1253 if (symmetric)
1254 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1255 else
1256 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1257 }
1258 RCP<const product_basis_type> basis =
1259 rcp(new product_basis_type(
1261 const size_t outer_length = basis->size();
1262 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1263
1264 //------------------------------
1265
1266 typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
1267 typedef typename matrix_type::values_type matrix_values_type;
1268 typedef typename matrix_type::graph_type matrix_graph_type;
1269
1270 //------------------------------
1271 // Generate FEM graph:
1272
1273 std::vector< std::vector<size_t> > fem_graph ;
1274
1275 const size_t inner_length = nGrid * nGrid * nGrid ;
1276 const size_t graph_length =
1277 unit_test::generate_fem_graph( nGrid , fem_graph );
1278
1279 //------------------------------
1280
1281 typedef Kokkos::View<value_type*, Kokkos::LayoutLeft, Device, Kokkos::MemoryUnmanaged> vec_type ;
1282 typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type ;
1283
1284 std::vector<matrix_type> matrix( outer_length ) ;
1285 multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing("x"),
1286 inner_length, outer_length ) ;
1287 multi_vec_type y("y", inner_length, outer_length ) ;
1288 multi_vec_type tmp_x( "tmp_x", inner_length, outer_length ) ;
1289 multi_vec_type tmp_y( "tmp_y", inner_length, outer_length ) ;
1290
1291 Kokkos::deep_copy( x , ScalarType(1.0) );
1292
1293 for (size_t block=0; block<outer_length; ++block) {
1294 matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
1295 std::string("testing") , fem_graph );
1296
1297 matrix[block].values = matrix_values_type( "matrix" , graph_length );
1298
1299 Kokkos::deep_copy( matrix[block].values , ScalarType(1.0) );
1300 }
1301
1302 Device().fence();
1303 SparseMatOps smo;
1304 Kokkos::Timer clock ;
1305 int n_apply = 0;
1306 int n_add = 0;
1307 for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1308
1309 // Original matrix-free multiply algorithm using a block apply
1310 typedef typename Cijk_type::k_iterator k_iterator;
1311 typedef typename Cijk_type::kj_iterator kj_iterator;
1312 typedef typename Cijk_type::kji_iterator kji_iterator;
1313 n_apply = 0;
1314 n_add = 0;
1315 k_iterator k_begin = Cijk->k_begin();
1316 k_iterator k_end = Cijk->k_end();
1317 for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1318 unsigned nj = Cijk->num_j(k_it);
1319 if (nj > 0) {
1320 int k = index(k_it);
1321 kj_iterator j_begin = Cijk->j_begin(k_it);
1322 kj_iterator j_end = Cijk->j_end(k_it);
1323 std::vector<int> j_indices(nj);
1324 unsigned jdx = 0;
1325 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1326 int j = index(j_it);
1327 vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1328 vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1329 Kokkos::deep_copy(tt, xx);
1330 }
1331 multi_vec_type tmp_x_view =
1332 Kokkos::subview( tmp_x, Kokkos::ALL(),
1333 std::make_pair(0u,nj));
1334 multi_vec_type tmp_y_view =
1335 Kokkos::subview( tmp_y, Kokkos::ALL(),
1336 std::make_pair(0u,nj));
1337 Stokhos::multiply( matrix[k] , tmp_x_view , tmp_y_view, smo );
1338 n_apply += nj;
1339 jdx = 0;
1340 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1341 vec_type tmp_y_view =
1342 Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1343 kji_iterator i_begin = Cijk->i_begin(j_it);
1344 kji_iterator i_end = Cijk->i_end(j_it);
1345 for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1346 int i = index(i_it);
1347 value_type c = value(i_it);
1348 vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1349 Stokhos::update( value_type(1.0) , y_view , c , tmp_y_view );
1350 ++n_add;
1351 }
1352 }
1353 }
1354 }
1355
1356 }
1357 Device().fence();
1358
1359 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1360 const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1361 static_cast<double>(n_add)*inner_length);
1362
1363 // std::cout << "mat-free: flops = " << flops
1364 // << " time = " << seconds_per_iter << std::endl;
1365
1366 std::vector<double> perf(4);
1367 perf[0] = outer_length * inner_length;
1368 perf[1] = seconds_per_iter ;
1369 perf[2] = flops/seconds_per_iter;
1370 perf[3] = flops;
1371
1372 return perf;
1373}
1374
1375#ifdef HAVE_STOKHOS_KOKKOSLINALG
1376template< typename ScalarType , class Device >
1377std::vector<double>
1378test_original_matrix_free_kokkos(
1379 const std::vector<int> & var_degree ,
1380 const int nGrid ,
1381 const int iterCount ,
1382 const bool test_block ,
1383 const bool symmetric )
1384{
1385 typedef ScalarType value_type ;
1386 typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1389 typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1391
1392 using Teuchos::rcp;
1393 using Teuchos::RCP;
1394 using Teuchos::Array;
1395
1396 // Create Stochastic Galerkin basis and expansion
1397 const size_t num_KL = var_degree.size();
1398 Array< RCP<const abstract_basis_type> > bases(num_KL);
1399 for (size_t i=0; i<num_KL; i++) {
1400 if (symmetric)
1401 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1402 else
1403 bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1404 }
1405 RCP<const product_basis_type> basis =
1406 rcp(new product_basis_type(
1407 bases, ScalarTolerances<value_type>::sparse_cijk_tol()));
1408 const size_t outer_length = basis->size();
1409 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1410
1411 //------------------------------
1412
1413 typedef int ordinal_type;
1414 typedef KokkosSparse::CrsMatrix<value_type,ordinal_type,Device> matrix_type;
1415 typedef typename matrix_type::values_type matrix_values_type;
1416 typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
1417
1418 //------------------------------
1419 // Generate FEM graph:
1420
1421 std::vector< std::vector<size_t> > fem_graph ;
1422
1423 const size_t inner_length = nGrid * nGrid * nGrid ;
1424 const size_t graph_length =
1425 unit_test::generate_fem_graph( nGrid , fem_graph );
1426
1427 //------------------------------
1428
1429 typedef Kokkos::View<value_type*,Kokkos::LayoutLeft,Device, Kokkos::MemoryUnmanaged> vec_type ;
1430 typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type;
1431
1432 std::vector<matrix_type> matrix( outer_length ) ;
1433 multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing("x"),
1434 inner_length, outer_length ) ;
1435 multi_vec_type y( "y", inner_length, outer_length ) ;
1436 multi_vec_type tmp_x( "tmp_x", inner_length, outer_length ) ;
1437 multi_vec_type tmp_y( "tmp_y", inner_length, outer_length ) ;
1438
1439 Kokkos::deep_copy( x , ScalarType(1.0) );
1440
1441 for (size_t block=0; block<outer_length; ++block) {
1442 matrix_graph_type matrix_graph =
1443 Kokkos::create_staticcrsgraph<matrix_graph_type>(
1444 std::string("test crs graph") , fem_graph );
1445
1446 matrix_values_type matrix_values = matrix_values_type(
1447 Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length );
1448 Kokkos::deep_copy(matrix_values , ScalarType(1.0) );
1449 matrix[block] = matrix_type("matrix", outer_length, matrix_values,
1450 matrix_graph);
1451 }
1452
1453 Device().fence();
1454 Kokkos::Timer clock ;
1455 int n_apply = 0;
1456 int n_add = 0;
1457 for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1458
1459 // Original matrix-free multiply algorithm using a block apply
1460 typedef typename Cijk_type::k_iterator k_iterator;
1461 typedef typename Cijk_type::kj_iterator kj_iterator;
1462 typedef typename Cijk_type::kji_iterator kji_iterator;
1463 n_apply = 0;
1464 n_add = 0;
1465 k_iterator k_begin = Cijk->k_begin();
1466 k_iterator k_end = Cijk->k_end();
1467 for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1468 unsigned nj = Cijk->num_j(k_it);
1469 if (nj > 0) {
1470 int k = index(k_it);
1471 kj_iterator j_begin = Cijk->j_begin(k_it);
1472 kj_iterator j_end = Cijk->j_end(k_it);
1473 unsigned jdx = 0;
1474 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1475 int j = index(j_it);
1476 vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1477 vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1478 Kokkos::deep_copy(tt, xx);
1479 }
1480 multi_vec_type tmp_x_view =
1481 Kokkos::subview( tmp_x, Kokkos::ALL(),
1482 std::make_pair(0u,nj));
1483 multi_vec_type tmp_y_view =
1484 Kokkos::subview( tmp_y, Kokkos::ALL(),
1485 std::make_pair(0u,nj));
1486 KokkosSparse::spmv( "N" , value_type(1.0) , matrix[k] , tmp_x_view , value_type(0.0) , tmp_y_view );
1487 n_apply += nj;
1488 jdx = 0;
1489 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1490 vec_type tmp_y_view =
1491 Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1492 kji_iterator i_begin = Cijk->i_begin(j_it);
1493 kji_iterator i_end = Cijk->i_end(j_it);
1494 for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1495 int i = index(i_it);
1496 value_type c = value(i_it);
1497 vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1498 //Stokhos::update( value_type(1.0) , y_view , c , tmp_y_view );
1499 KokkosBlas::update(value_type(1.0) , y_view, c, tmp_y_view, value_type(0.0), y_view);
1500 ++n_add;
1501 }
1502 }
1503 }
1504 }
1505
1506 }
1507 Device().fence();
1508
1509 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1510 const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1511 static_cast<double>(n_add)*inner_length);
1512
1513 // std::cout << "mat-free: flops = " << flops
1514 // << " time = " << seconds_per_iter << std::endl;
1515
1516 std::vector<double> perf(4);
1517 perf[0] = outer_length * inner_length;
1518 perf[1] = seconds_per_iter ;
1519 perf[2] = flops/seconds_per_iter;
1520 perf[3] = flops;
1521
1522 return perf;
1523}
1524#endif
1525
1526template< class Scalar, class Device >
1527void performance_test_driver_all( const int pdeg ,
1528 const int minvar ,
1529 const int maxvar ,
1530 const int nGrid ,
1531 const int nIter ,
1532 const bool test_block ,
1533 const bool symmetric )
1534{
1535 //------------------------------
1536 // Generate FEM graph:
1537
1538 std::vector< std::vector<size_t> > fem_graph ;
1539
1540 const size_t fem_nonzeros =
1541 unit_test::generate_fem_graph( nGrid , fem_graph );
1542
1543 //------------------------------
1544
1545 std::cout.precision(8);
1546
1547 //------------------------------
1548
1549 std::cout << std::endl << "\"FEM NNZ = " << fem_nonzeros << "\"" << std::endl;
1550
1551 std::cout << std::endl
1552 << "\"#nGrid\" , "
1553 << "\"#Variable\" , "
1554 << "\"PolyDegree\" , "
1555 << "\"#Bases\" , "
1556 << "\"#TensorEntry\" , "
1557 << "\"VectorSize\" , "
1558 << "\"Original-Flat MXV-Time\" , "
1559 << "\"Original-Flat MXV-Speedup\" , "
1560 << "\"Original-Flat MXV-GFLOPS\" , "
1561 << "\"Commuted-Flat MXV-Speedup\" , "
1562 << "\"Commuted-Flat MXV-GFLOPS\" , "
1563 << "\"Block-Diagonal MXV-Speedup\" , "
1564 << "\"Block-Diagonal MXV-GFLOPS\" , "
1565 << "\"Block-Crs-Tensor MXV-Speedup\" , "
1566 << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1567 << std::endl ;
1568
1569 for ( int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1570
1571 std::vector<int> var_degree( nvar , pdeg );
1572
1573 //------------------------------
1574
1575 const std::vector<double> perf_flat_original =
1576 test_product_flat_original_matrix<Scalar,Device>(
1577 var_degree , nGrid , nIter , symmetric );
1578
1579 const std::vector<double> perf_flat_commuted =
1580 test_product_flat_commuted_matrix<Scalar,Device>(
1581 var_degree , nGrid , nIter , symmetric );
1582
1583 const std::vector<double> perf_matrix =
1584 test_product_tensor_diagonal_matrix<Scalar,Device>(
1585 var_degree , nGrid , nIter , symmetric );
1586
1587 const std::vector<double> perf_crs_tensor =
1588 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1589 var_degree , nGrid , nIter , symmetric );
1590
1591 if ( perf_flat_commuted[0] != perf_flat_original[0] ||
1592 perf_flat_commuted[3] != perf_flat_original[3] ) {
1593 std::cout << "ERROR: Original and commuted matrix sizes do not match"
1594 << std::endl
1595 << " original size = " << perf_flat_original[0]
1596 << " , nonzero = " << perf_flat_original[3]
1597 << std::endl
1598 << " commuted size = " << perf_flat_commuted[0]
1599 << " , nonzero = " << perf_flat_commuted[3]
1600 << std::endl ;
1601 }
1602
1603 std::cout << nGrid << " , "
1604 << nvar << " , "
1605 << pdeg << " , "
1606 << perf_crs_tensor[4] << " , "
1607 << perf_crs_tensor[3] << " , "
1608 << perf_flat_original[0] << " , "
1609 << perf_flat_original[1] << " , "
1610 << perf_flat_original[1] / perf_flat_original[1] << " , "
1611 << perf_flat_original[2] << " , "
1612 << perf_flat_original[1] / perf_flat_commuted[1] << " , "
1613 << perf_flat_commuted[2] << " , "
1614 << perf_flat_original[1] / perf_matrix[1] << " , "
1615 << perf_matrix[2] << " , "
1616 << perf_flat_original[1] / perf_crs_tensor[1] << " , "
1617 << perf_crs_tensor[2] << " , "
1618 << std::endl ;
1619 }
1620
1621 //------------------------------
1622}
1623
1624template< class Scalar, class Device , class SparseMatOps >
1625void performance_test_driver_poly( const int pdeg ,
1626 const int minvar ,
1627 const int maxvar ,
1628 const int nGrid ,
1629 const int nIter ,
1630 const bool test_block ,
1631 const bool symmetric )
1632{
1633 std::cout.precision(8);
1634
1635 //------------------------------
1636
1637 std::vector< std::vector<size_t> > fem_graph ;
1638 const size_t graph_length =
1639 unit_test::generate_fem_graph( nGrid , fem_graph );
1640 std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1641
1642 std::cout << std::endl
1643 << "\"#nGrid\" , "
1644 << "\"#Variable\" , "
1645 << "\"PolyDegree\" , "
1646 << "\"#Bases\" , "
1647 << "\"#TensorEntry\" , "
1648 << "\"VectorSize\" , "
1649 << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1650 << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1651 << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1652 << "\"Block-Crs-Tensor MXV-Speedup\" , "
1653 << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1654 // << "\"Block-Coo-Tensor MXV-Speedup\" , "
1655 // << "\"Block-Coo-Tensor MXV-GFLOPS\" , "
1656 // << "\"Block-Tiled Crs-Tensor MXV-Speedup\" , "
1657 // << "\"Block-Tiled Crs-3-Tensor MXV-GFLOPS\" , "
1658 << std::endl ;
1659
1660 for ( int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1661 std::vector<int> var_degree( nvar , pdeg );
1662
1663 const std::vector<double> perf_crs_tensor =
1664 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1665 var_degree , nGrid , nIter , symmetric );
1666
1667 // const bool Pack = std::is_same<Device,Kokkos::Cuda>::value;
1668 // const std::vector<double> perf_coo_tensor =
1669 // test_product_tensor_matrix<Scalar,Stokhos::CooProductTensor<Scalar,Device,Pack>,Device>(
1670 // var_degree , nGrid , nIter , symmetric );
1671
1672 // const std::vector<double> perf_tiled_crs_tensor =
1673 // test_simple_tiled_product_tensor_matrix<Scalar,Device>(
1674 // var_degree , nGrid , nIter , symmetric );
1675
1676 std::vector<double> perf_original_mat_free_block;
1677#if defined(HAVE_STOKHOS_KOKKOSLINALG)
1678#if defined( KOKKOS_ENABLE_CUDA )
1679 enum { is_cuda = std::is_same<Device,Kokkos::Cuda>::value };
1680#else
1681 enum { is_cuda = false };
1682#endif
1683 if ( is_cuda )
1684 perf_original_mat_free_block =
1685 test_original_matrix_free_kokkos<Scalar,Device>(
1686 var_degree , nGrid , nIter , test_block , symmetric );
1687 else
1688 perf_original_mat_free_block =
1689 test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1690 var_degree , nGrid , nIter , test_block , symmetric );
1691#else
1692 perf_original_mat_free_block =
1693 test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1694 var_degree , nGrid , nIter , test_block , symmetric );
1695#endif
1696
1697 std::cout << nGrid << " , "
1698 << nvar << " , "
1699 << pdeg << " , "
1700 << perf_crs_tensor[4] << " , "
1701 << perf_crs_tensor[3] << " , "
1702 << perf_original_mat_free_block[0] << " , "
1703 << perf_original_mat_free_block[1] << " , "
1704 << perf_original_mat_free_block[1] /
1705 perf_original_mat_free_block[1] << " , "
1706 << perf_original_mat_free_block[2] << " , "
1707 << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1708 << perf_crs_tensor[2] << " , "
1709 // << perf_original_mat_free_block[1] / perf_coo_tensor[1] << " , "
1710 // << perf_coo_tensor[2] << " , "
1711 // << perf_original_mat_free_block[1] / perf_tiled_crs_tensor[1] << " , "
1712 // << perf_tiled_crs_tensor[2]
1713 << std::endl ;
1714 }
1715
1716 //------------------------------
1717}
1718
1719template< class Scalar, class Device , class SparseMatOps >
1721 const int minp ,
1722 const int maxp ,
1723 const int nGrid ,
1724 const int nIter ,
1725 const bool test_block ,
1726 const bool symmetric )
1727{
1728 bool do_flat_sparse =
1729 std::is_same<typename Device::memory_space,Kokkos::HostSpace>::value ;
1730
1731 std::cout.precision(8);
1732
1733 //------------------------------
1734
1735 std::vector< std::vector<size_t> > fem_graph ;
1736 const size_t graph_length =
1737 unit_test::generate_fem_graph( nGrid , fem_graph );
1738 std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1739
1740 std::cout << std::endl
1741 << "\"#nGrid\" , "
1742 << "\"#Variable\" , "
1743 << "\"PolyDegree\" , "
1744 << "\"#Bases\" , "
1745 << "\"#TensorEntry\" , "
1746 << "\"VectorSize\" , "
1747 << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1748 << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1749 << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1750 << "\"Block-Crs-Tensor MXV-Speedup\" , "
1751 << "\"Block-Crs-Tensor MXV-GFLOPS\" , ";
1752 if (do_flat_sparse)
1753 std::cout << "\"Block-Lexicographic-Sparse-3-Tensor MXV-Speedup\" , "
1754 << "\"Block-Lexicographic-Sparse-3-Tensor MXV-GFLOPS\" , "
1755 << "\"Lexicographic FLOPS / Crs FLOPS\" , ";
1756 std::cout << std::endl ;
1757
1758 for ( int p = minp ; p <= maxp ; ++p ) {
1759 std::vector<int> var_degree( nvar , p );
1760
1761 const std::vector<double> perf_crs_tensor =
1762 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1763 var_degree , nGrid , nIter , symmetric );
1764
1765 std::vector<double> perf_lexo_sparse_3_tensor;
1766 if (do_flat_sparse) {
1767 perf_lexo_sparse_3_tensor =
1768 test_lexo_block_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1769 }
1770
1771 const std::vector<double> perf_original_mat_free_block =
1772 test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1773 var_degree , nGrid , nIter , test_block , symmetric );
1774
1775 std::cout << nGrid << " , "
1776 << nvar << " , "
1777 << p << " , "
1778 << perf_crs_tensor[4] << " , "
1779 << perf_crs_tensor[3] << " , "
1780 << perf_original_mat_free_block[0] << " , "
1781 << perf_original_mat_free_block[1] << " , "
1782 << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] << " , "
1783 << perf_original_mat_free_block[2] << " , "
1784 << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1785 << perf_crs_tensor[2] << " , ";
1786 if (do_flat_sparse) {
1787 std::cout << perf_original_mat_free_block[1] / perf_lexo_sparse_3_tensor[1] << " , "
1788 << perf_lexo_sparse_3_tensor[2] << " , "
1789 << perf_lexo_sparse_3_tensor[5] / perf_crs_tensor[5];
1790 }
1791
1792
1793 std::cout << std::endl ;
1794 }
1795
1796 //------------------------------
1797}
1798
1799template< class Scalar, class Device , class SparseMatOps >
1800void performance_test_driver_linear( const int minvar ,
1801 const int maxvar ,
1802 const int varinc ,
1803 const int nGrid ,
1804 const int nIter ,
1805 const bool test_block ,
1806 const bool symmetric )
1807{
1808 std::cout.precision(8);
1809
1810 //------------------------------
1811
1812 std::vector< std::vector<size_t> > fem_graph ;
1813 const size_t graph_length =
1814 unit_test::generate_fem_graph( nGrid , fem_graph );
1815 std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1816
1817 std::cout << std::endl
1818 << "\"#nGrid\" , "
1819 << "\"#Variable\" , "
1820 << "\"PolyDegree\" , "
1821 << "\"#Bases\" , "
1822 << "\"#TensorEntry\" , "
1823 << "\"VectorSize\" , "
1824 << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1825 << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1826 << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1827 << "\"Block-Crs-Tensor MXV-Speedup\" , "
1828 << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1829 << "\"Linear-Sparse-3-Tensor MXV-Speedup\" , "
1830 << "\"Linear-Sparse-3-Tensor MXV-GFLOPS\" , "
1831 << std::endl ;
1832
1833 for ( int nvar = minvar ; nvar <= maxvar ; nvar+=varinc ) {
1834 std::vector<int> var_degree( nvar , 1 );
1835
1836 const std::vector<double> perf_crs_tensor =
1837 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1838 var_degree , nGrid , nIter , symmetric );
1839
1840 const std::vector<double> perf_linear_sparse_3_tensor =
1841 test_linear_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1842
1843 const std::vector<double> perf_original_mat_free_block =
1844 test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1845 var_degree , nGrid , nIter , test_block , symmetric );
1846
1847 std::cout << nGrid << " , "
1848 << nvar << " , "
1849 << 1 << " , "
1850 << perf_crs_tensor[4] << " , "
1851 << perf_crs_tensor[3] << " , "
1852 << perf_original_mat_free_block[0] << " , "
1853 << perf_original_mat_free_block[1] << " , "
1854 << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] << " , "
1855 << perf_original_mat_free_block[2] << " , "
1856 << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1857 << perf_crs_tensor[2] << " , "
1858 << perf_original_mat_free_block[1] / perf_linear_sparse_3_tensor[1] << " , "
1859 << perf_linear_sparse_3_tensor[2] << " , "
1860 << std::endl ;
1861 }
1862
1863 //------------------------------
1864}
1865
1866template< class Scalar, class Device >
1868
1869//----------------------------------------------------------------------------
1870
1871}
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
CRS matrix of dense blocks.
Jacobi polynomial basis.
Data structure storing a sparse 3-tensor C(i,j,k) in a a tree-based format for lexicographically orde...
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
A comparison functor implementing a strict weak ordering based lexographic ordering.
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
Abstract base class for 1-D orthogonal polynomials.
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
Bases defined by combinatorial product of polynomial bases.
Symmetric diagonal storage for a dense matrix.
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
Stokhos::LegendreBasis< int, double > basis_type
Sacado::MP::Vector< storage_type > vec_type
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< YD, YP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< ZD, ZP... > >::value >::type update(const typename Kokkos::View< XD, XP... >::array_type::non_const_value_type &alpha, const Kokkos::View< XD, XP... > &x, const typename Kokkos::View< YD, YP... >::array_type::non_const_value_type &beta, const Kokkos::View< YD, YP... > &y, const typename Kokkos::View< ZD, ZP... >::array_type::non_const_value_type &gamma, const Kokkos::View< ZD, ZP... > &z)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
Teuchos::RCP< LTBSparse3Tensor< ordinal_type, value_type > > computeTripleProductTensorLTBBlockLeaf(const TotalOrderBasis< ordinal_type, value_type, LexographicLess< MultiIndex< ordinal_type > > > &product_basis, bool symmetric=false)
Teuchos::RCP< Epetra_CrsGraph > sparse3Tensor2CrsGraph(const Stokhos::OrthogPolyBasis< ordinal_type, value_type > &basis, const Stokhos::Sparse3Tensor< ordinal_type, value_type > &Cijk, const Epetra_Comm &comm)
Build an Epetra_CrsGraph from a sparse 3 tensor.
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:265
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
void performance_test_driver_linear(const int minvar, const int maxvar, const int varinc, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
void performance_test_driver_poly_deg(const int nvar, const int minp, const int maxp, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
std::vector< double > test_product_flat_original_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void performance_test_driver_poly(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
std::vector< double > test_product_tensor_diagonal_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_simple_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_linear_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_lexo_block_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_original_matrix_free_view(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)
std::vector< double > test_original_matrix_free_vec(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)
void performance_test_driver_all(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
std::vector< double > test_product_flat_commuted_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)