Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
FadMPAssembly/fenl_functors.hpp
Go to the documentation of this file.
1/*
2//@HEADER
3// ************************************************************************
4//
5// Kokkos: Manycore Performance-Portable Multidimensional Arrays
6// Copyright (2012) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
39//
40// ************************************************************************
41//@HEADER
42*/
43
44#ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
45#define KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
46
47#include <stdio.h>
48
49#include <iostream>
50#include <fstream>
51#include <iomanip>
52#include <cstdlib>
53#include <cmath>
54#include <limits>
55
56#include <Kokkos_Pair.hpp>
57#include <Kokkos_UnorderedMap.hpp>
58
59#include <Kokkos_Timer.hpp>
60
61#include <BoxElemFixture.hpp>
62#include <HexElement.hpp>
63
65#include "Sacado.hpp"
66//#include "Fad/Sacado_Fad_SFad_MP_Vector.hpp"
68//#include "Fad/Sacado_Fad_DFad_MP_Vector.hpp"
69
70//----------------------------------------------------------------------------
71//----------------------------------------------------------------------------
72
73namespace Kokkos {
74namespace Example {
75namespace FENL {
76
78 struct Dim3 {
79 size_t x, y, z;
80 Dim3(const size_t x_, const size_t y_ = 1, const size_t z_ = 1) :
81 x(x_), y(y_), z(z_) {}
82 };
83
85 size_t num_blocks;
87
88 DeviceConfig(const size_t num_blocks_ = 0,
89 const size_t threads_per_block_x_ = 0,
90 const size_t threads_per_block_y_ = 0,
91 const size_t threads_per_block_z_ = 1) :
92 block_dim(threads_per_block_x_,threads_per_block_y_,threads_per_block_z_),
93 num_blocks(num_blocks_),
95 {}
96};
97
98template< typename ValueType , class Space >
99struct CrsMatrix {
100#ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Don't remove this until Kokkos has removed the deprecated code path probably around September 2018
101 typedef Kokkos::StaticCrsGraph< unsigned , Space , void , unsigned > StaticCrsGraphType ;
102#else
103 typedef Kokkos::StaticCrsGraph< unsigned , Space , void , void , unsigned > StaticCrsGraphType ;
104#endif
105 typedef View< ValueType * , Space > values_type ;
106
109
111
112 CrsMatrix( const StaticCrsGraphType & arg_graph )
113 : graph( arg_graph )
114 , values( "crs_matrix_values" , arg_graph.entries.extent(0) )
115 {}
116};
117
118// Traits class for creating strided local views for embedded ensemble-based,
119// specialized for ensemble UQ scalar type
120template <typename ViewType, typename Enabled = void>
122 typedef ViewType view_type;
123 // typedef Kokkos::View<typename view_type::data_type,
124 // typename view_type::array_layout,
125 // typename view_type::execution_space,
126 // Kokkos::MemoryUnmanaged> local_view_type;
128 typedef typename view_type::value_type local_value_type;
129 static const bool use_team = false;
130 KOKKOS_INLINE_FUNCTION
132 const unsigned local_rank)
133 { return v; }
134};
135
136#if defined( KOKKOS_ENABLE_CUDA )
137
138template <typename ViewType>
139struct LocalViewTraits<
140 ViewType,
141 typename std::enable_if< std::is_same<typename ViewType::execution_space,
142 Kokkos::Cuda>::value &&
143 Kokkos::is_view_mp_vector<ViewType>::value
144 >::type > {
145 typedef ViewType view_type;
147 typedef typename local_view_type::value_type local_value_type;
148 static const bool use_team = true;
149
150 KOKKOS_INLINE_FUNCTION
152 const unsigned local_rank)
153 {
154 return Kokkos::partition<1>(v, local_rank);
155 }
156};
157
158#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
159
160// Compute DeviceConfig struct's based on scalar type
161template <typename ScalarType>
163 static void eval( Kokkos::Example::FENL::DeviceConfig& dev_config_elem,
164 Kokkos::Example::FENL::DeviceConfig& dev_config_bc ) {
165 dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
166 dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
167 }
168};
169
170// Compute DeviceConfig struct's based on scalar type
171template <typename StorageType>
172struct CreateDeviceConfigs< Sacado::MP::Vector<StorageType> > {
173 typedef typename StorageType::execution_space execution_space;
174 static void eval( Kokkos::Example::FENL::DeviceConfig& dev_config_elem,
175 Kokkos::Example::FENL::DeviceConfig& dev_config_bc ) {
176 static const unsigned VectorSize = StorageType::static_size;
177#if defined( KOKKOS_ENABLE_CUDA )
178 enum { is_cuda = std::is_same< execution_space, Kokkos::Cuda >::value };
179#else
180 enum { is_cuda = false };
181#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
182 if ( is_cuda ) {
183 dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , VectorSize , 64/VectorSize );
184 dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , VectorSize , 256/VectorSize );
185 }
186 else {
187 dev_config_elem = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
188 dev_config_bc = Kokkos::Example::FENL::DeviceConfig( 0 , 1 , 1 );
189 }
190 }
191};
192
193template< class ElemNodeIdView , class CrsGraphType , unsigned ElemNode >
195public:
196
197 typedef typename ElemNodeIdView::execution_space execution_space ;
198 typedef pair<unsigned,unsigned> key_type ;
199
200 typedef Kokkos::UnorderedMap< key_type, void , execution_space > SetType ;
201 typedef typename CrsGraphType::row_map_type::non_const_type RowMapType ;
202 typedef Kokkos::View< unsigned , execution_space > UnsignedValue ;
203
204 // Static dimensions of 0 generate compiler warnings or errors.
205 typedef Kokkos::View< unsigned*[ElemNode][ElemNode] , execution_space >
207
208private:
209
215
216 const unsigned node_count ;
217 const ElemNodeIdView elem_node_id ;
223
224public:
225
226 CrsGraphType graph ;
228
229 struct Times
230 {
231 double ratio;
237 };
238
239 NodeNodeGraph( const ElemNodeIdView & arg_elem_node_id ,
240 const unsigned arg_node_count,
241 Times & results
242 )
243 : node_count(arg_node_count)
244 , elem_node_id( arg_elem_node_id )
245 , row_total( "row_total" )
246 , row_count(Kokkos::ViewAllocateWithoutInitializing("row_count") , node_count ) // will deep_copy to 0 inside loop
247 , row_map( "graph_row_map" , node_count + 1 )
248 , node_node_set()
250 , graph()
251 , elem_graph()
252 {
253 //--------------------------------
254 // Guess at capacity required for the map:
255
256 Kokkos::Timer wall_clock ;
257
258 wall_clock.reset();
260
261 // upper bound on the capacity
262 size_t set_capacity = (((28ull * node_count) / 2ull)*4ull)/3ull;
263
264
265 // Increase capacity until the (node,node) map is successfully filled.
266 {
267 // Zero the row count to restart the fill
269
270 node_node_set = SetType( set_capacity );
271
272 // May be larger that requested:
273 set_capacity = node_node_set.capacity();
274
275 Kokkos::parallel_for( elem_node_id.extent(0) , *this );
276 }
277
278 execution_space().fence();
279 results.ratio = (double)node_node_set.size() / (double)node_node_set.capacity();
280 results.fill_node_set = wall_clock.seconds();
281 //--------------------------------
282
283 wall_clock.reset();
285
286 // Exclusive scan of row_count into row_map
287 // including the final total in the 'node_count + 1' position.
288 // Zero the 'row_count' values.
289 Kokkos::parallel_scan( node_count , *this );
290
291 // Zero the row count for the fill:
293
294 unsigned graph_entry_count = 0 ;
295
296 Kokkos::deep_copy( graph_entry_count , row_total );
297
298 // Assign graph's row_map and allocate graph's entries
299 graph.row_map = row_map ;
300 graph.entries = typename CrsGraphType::entries_type( "graph_entries" , graph_entry_count );
301
302 //--------------------------------
303 // Fill graph's entries from the (node,node) set.
304
305 execution_space().fence();
306 results.scan_node_count = wall_clock.seconds();
307
308 wall_clock.reset();
310 Kokkos::parallel_for( node_node_set.capacity() , *this );
311
312 execution_space().fence();
313 results.fill_graph_entries = wall_clock.seconds();
314
315 //--------------------------------
316 // Done with the temporary sets and arrays
317 wall_clock.reset();
319
323 node_node_set.clear();
324
325 //--------------------------------
326
327 Kokkos::parallel_for( node_count , *this );
328
329 execution_space().fence();
330 results.sort_graph_entries = wall_clock.seconds();
331
332 //--------------------------------
333 // Element-to-graph mapping:
334 wall_clock.reset();
336 elem_graph = ElemGraphType("elem_graph", elem_node_id.extent(0) );
337 Kokkos::parallel_for( elem_node_id.extent(0) , *this );
338
339 execution_space().fence();
340 results.fill_element_graph = wall_clock.seconds();
341 }
342
343 //------------------------------------
344 // parallel_for: create map and count row length
345
346 KOKKOS_INLINE_FUNCTION
347 void fill_set( const unsigned ielem ) const
348 {
349 // Loop over element's (row_local_node,col_local_node) pairs:
350 for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
351
352 const unsigned row_node = elem_node_id( ielem , row_local_node );
353
354 for ( unsigned col_local_node = row_local_node ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) {
355
356 const unsigned col_node = elem_node_id( ielem , col_local_node );
357
358 // If either node is locally owned then insert the pair into the unordered map:
359
360 if ( row_node < row_count.extent(0) || col_node < row_count.extent(0) ) {
361
362 const key_type key = (row_node < col_node) ? make_pair( row_node, col_node ) : make_pair( col_node, row_node ) ;
363
364 const typename SetType::insert_result result = node_node_set.insert( key );
365
366 if ( result.success() ) {
367 if ( row_node < row_count.extent(0) ) { atomic_fetch_add( & row_count( row_node ) , (typename RowMapType::value_type)1 ); }
368 if ( col_node < row_count.extent(0) && col_node != row_node ) { atomic_fetch_add( & row_count( col_node ) , (typename RowMapType::value_type)1 ); }
369 }
370 }
371 }
372 }
373 }
374
375 KOKKOS_INLINE_FUNCTION
376 void fill_graph_entries( const unsigned iset ) const
377 {
378 if ( node_node_set.valid_at(iset) ) {
379 const key_type key = node_node_set.key_at(iset) ;
380 const unsigned row_node = key.first ;
381 const unsigned col_node = key.second ;
382
383 if ( row_node < row_count.extent(0) ) {
384 const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , (typename RowMapType::value_type)1 );
385 graph.entries( offset ) = col_node ;
386 }
387
388 if ( col_node < row_count.extent(0) && col_node != row_node ) {
389 const unsigned offset = graph.row_map( col_node ) + atomic_fetch_add( & row_count( col_node ) , (typename RowMapType::value_type)1 );
390 graph.entries( offset ) = row_node ;
391 }
392 }
393 }
394
395 KOKKOS_INLINE_FUNCTION
396 void sort_graph_entries( const unsigned irow ) const
397 {
398 typedef typename CrsGraphType::size_type size_type;
399 const size_type row_beg = graph.row_map( irow );
400 const size_type row_end = graph.row_map( irow + 1 );
401 for ( size_type i = row_beg + 1 ; i < row_end ; ++i ) {
402 const typename CrsGraphType::data_type col = graph.entries(i);
403 size_type j = i ;
404 for ( ; row_beg < j && col < graph.entries(j-1) ; --j ) {
405 graph.entries(j) = graph.entries(j-1);
406 }
407 graph.entries(j) = col ;
408 }
409 }
410
411 KOKKOS_INLINE_FUNCTION
412 void fill_elem_graph_map( const unsigned ielem ) const
413 {
414 typedef typename CrsGraphType::data_type entry_type;
415 for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
416
417 const unsigned row_node = elem_node_id( ielem , row_local_node );
418
419 for ( unsigned col_local_node = 0 ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) {
420
421 const unsigned col_node = elem_node_id( ielem , col_local_node );
422
423 entry_type entry = 0 ;
424
425 if ( row_node + 1 < graph.row_map.extent(0) ) {
426
427 const entry_type entry_end = static_cast<entry_type> (graph.row_map( row_node + 1 ));
428
429 entry = graph.row_map( row_node );
430
431 for ( ; entry < entry_end && graph.entries(entry) != static_cast<entry_type> (col_node) ; ++entry );
432
433 if ( entry == entry_end ) entry = ~0u ;
434 }
435
436 elem_graph( ielem , row_local_node , col_local_node ) = entry ;
437 }
438 }
439 }
440
441 KOKKOS_INLINE_FUNCTION
442 void operator()( const unsigned iwork ) const
443 {
444 if ( phase == FILL_NODE_SET ) {
445 fill_set( iwork );
446 }
447 else if ( phase == FILL_GRAPH_ENTRIES ) {
448 fill_graph_entries( iwork );
449 }
450 else if ( phase == SORT_GRAPH_ENTRIES ) {
451 sort_graph_entries( iwork );
452 }
453 else if ( phase == FILL_ELEMENT_GRAPH ) {
454 fill_elem_graph_map( iwork );
455 }
456 }
457
458 //------------------------------------
459 // parallel_scan: row offsets
460
461 typedef unsigned value_type ;
462
463 KOKKOS_INLINE_FUNCTION
464 void operator()( const unsigned irow , unsigned & update , const bool final ) const
465 {
466 // exclusive scan
467 if ( final ) { row_map( irow ) = update ; }
468
469 update += row_count( irow );
470
471 if ( final ) {
472 if ( irow + 1 == row_count.extent(0) ) {
473 row_map( irow + 1 ) = update ;
474 row_total() = update ;
475 }
476 }
477 }
478
479 KOKKOS_INLINE_FUNCTION
480 void init( unsigned & update ) const { update = 0 ; }
481
482 KOKKOS_INLINE_FUNCTION
483 void join( unsigned & update , const unsigned & input ) const { update += input ; }
484
485 //------------------------------------
486};
487
488} /* namespace FENL */
489} /* namespace Example */
490} /* namespace Kokkos */
491
492//----------------------------------------------------------------------------
493//----------------------------------------------------------------------------
494
495namespace Kokkos {
496namespace Example {
497namespace FENL {
498
500 enum { is_constant = false };
501
502 const double coeff_k ;
503
504 KOKKOS_INLINE_FUNCTION
505 double operator()( double pt[], unsigned ensemble_rank) const
506 { return coeff_k * std::sin(pt[0]) * std::sin(pt[1]) * std::sin(pt[2]); }
507
509 : coeff_k( val ) {}
510
512 : coeff_k( rhs.coeff_k ) {}
513};
514
515// Exponential KL from Stokhos
516template < typename Scalar, typename MeshScalar, typename Device >
518public:
519
520 // Turn into a meta-function class usable with Sacado::mpl::apply
521 template <typename T1, typename T2 = MeshScalar, typename T3 = Device>
522 struct apply {
524 };
525
526 enum { is_constant = false };
527 typedef Kokkos::View<Scalar*, Kokkos::LayoutLeft, Device> RandomVariableView;
528 typedef typename RandomVariableView::size_type size_type;
529
534
535 rf_type m_rf; // Exponential random field
536 const MeshScalar m_mean; // Mean of random field
537 const MeshScalar m_variance; // Variance of random field
538 const MeshScalar m_corr_len; // Correlation length of random field
539 const size_type m_num_rv; // Number of random variables
540 RandomVariableView m_rv; // KL random variables
541
542public:
543
545 const MeshScalar mean ,
546 const MeshScalar variance ,
547 const MeshScalar correlation_length ,
548 const size_type num_rv ) :
549 m_mean( mean ),
550 m_variance( variance ),
551 m_corr_len( correlation_length ),
552 m_num_rv( num_rv ),
553 m_rv( "KL Random Variables", m_num_rv )
554 {
555 Teuchos::ParameterList solverParams;
556 solverParams.set("Number of KL Terms", int(num_rv));
557 solverParams.set("Mean", mean);
558 solverParams.set("Standard Deviation", std::sqrt(variance));
559 int ndim = 3;
560 Teuchos::Array<double> domain_upper(ndim, 1.0), domain_lower(ndim, 0.0),
561 correlation_lengths(ndim, correlation_length);
562 solverParams.set("Domain Upper Bounds", domain_upper);
563 solverParams.set("Domain Lower Bounds", domain_lower);
564 solverParams.set("Correlation Lengths", correlation_lengths);
565
566 m_rf = rf_type(solverParams);
567 }
568
570 m_rf( rhs.m_rf ) ,
571 m_mean( rhs.m_mean ) ,
572 m_variance( rhs.m_variance ) ,
573 m_corr_len( rhs.m_corr_len ) ,
574 m_num_rv( rhs.m_num_rv ) ,
575 m_rv( rhs.m_rv ) {}
576
577 KOKKOS_INLINE_FUNCTION
578 void setRandomVariables( const RandomVariableView& rv) { m_rv = rv; }
579
580 KOKKOS_INLINE_FUNCTION
582
583 KOKKOS_INLINE_FUNCTION
584 local_scalar_type operator() ( const MeshScalar point[],
585 const size_type ensemble_rank ) const
586 {
587 local_rv_view_type local_rv =
589
590 local_scalar_type val = m_rf.evaluate(point, local_rv);
591
592 return val;
593 }
594};
595
596template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
597 class CoordinateMap , typename ScalarType >
599{
600public:
601
604
605 //------------------------------------
606
607 typedef ExecutionSpace execution_space ;
608 typedef ScalarType scalar_type ;
609
612 typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space > vector_type ;
613
614 //------------------------------------
615
617 static const unsigned TensorDim = SpatialDim * SpatialDim ;
621
622 //------------------------------------
623
626 typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
627 typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ;
628
630
631 //------------------------------------
632
633
634 //------------------------------------
635 // Computational data:
636
646
648 : elem_data()
650 , node_coords( rhs.node_coords )
651 , elem_graph( rhs.elem_graph )
654 , solution( rhs.solution )
655 , residual( rhs.residual )
656 , jacobian( rhs.jacobian )
657 {}
658
660 const vector_type & arg_solution ,
661 const elem_graph_type & arg_elem_graph ,
662 const sparse_matrix_type & arg_jacobian ,
663 const vector_type & arg_residual )
664 : elem_data()
665 , elem_node_ids( arg_mesh.elem_node() )
666 , node_coords( arg_mesh.node_coord() )
667 , elem_graph( arg_elem_graph )
670 , solution( arg_solution )
671 , residual( arg_residual )
672 , jacobian( arg_jacobian )
673 {}
674
675 //------------------------------------
676
677 KOKKOS_INLINE_FUNCTION
679 const double grad[][ FunctionCount ] , // Gradient of bases master element
680 const double x[] ,
681 const double y[] ,
682 const double z[] ,
683 double dpsidx[] ,
684 double dpsidy[] ,
685 double dpsidz[] ) const
686 {
687 enum { j11 = 0 , j12 = 1 , j13 = 2 ,
688 j21 = 3 , j22 = 4 , j23 = 5 ,
689 j31 = 6 , j32 = 7 , j33 = 8 };
690
691 // Jacobian accumulation:
692
693 double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
694
695 for( unsigned i = 0; i < FunctionCount ; ++i ) {
696 const double x1 = x[i] ;
697 const double x2 = y[i] ;
698 const double x3 = z[i] ;
699
700 const double g1 = grad[0][i] ;
701 const double g2 = grad[1][i] ;
702 const double g3 = grad[2][i] ;
703
704 J[j11] += g1 * x1 ;
705 J[j12] += g1 * x2 ;
706 J[j13] += g1 * x3 ;
707
708 J[j21] += g2 * x1 ;
709 J[j22] += g2 * x2 ;
710 J[j23] += g2 * x3 ;
711
712 J[j31] += g3 * x1 ;
713 J[j32] += g3 * x2 ;
714 J[j33] += g3 * x3 ;
715 }
716
717 // Inverse jacobian:
718
719 double invJ[ TensorDim ] = {
720 static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
721 static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
722 static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
723
724 static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
725 static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
726 static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
727
728 static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
729 static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
730 static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
731
732 const double detJ = J[j11] * invJ[j11] +
733 J[j21] * invJ[j12] +
734 J[j31] * invJ[j13] ;
735
736 const double detJinv = 1.0 / detJ ;
737
738 for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
739
740 // Transform gradients:
741
742 for( unsigned i = 0; i < FunctionCount ; ++i ) {
743 const double g0 = grad[0][i];
744 const double g1 = grad[1][i];
745 const double g2 = grad[2][i];
746
747 dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
748 dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
749 dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
750 }
751
752 return detJ ;
753 }
754
755};
756
763
764template< class FiniteElementMeshType ,
765 class SparseMatrixType ,
766 AssemblyMethod Method ,
767 class CoeffFunctionType = ElementComputationConstantCoefficient
768 >
770
771template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
772 class CoordinateMap , typename ScalarType , class CoeffFunctionType >
774 < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
775 CrsMatrix< ScalarType , ExecutionSpace > ,
776 Analytic , CoeffFunctionType > :
777 public ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
778 ScalarType> {
779public:
780
781 typedef ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
782 ScalarType> base_type;
783
786
787 static const unsigned FunctionCount = base_type::FunctionCount;
788 static const unsigned IntegrationCount = base_type::IntegrationCount;
789 static const unsigned ElemNodeCount = base_type::ElemNodeCount;
790
791 const CoeffFunctionType coeff_function ;
793
795 base_type(rhs) ,
796 coeff_function( rhs.coeff_function ) ,
797 dev_config( rhs.dev_config ) {}
798
800 const typename base_type::mesh_type & arg_mesh ,
801 const CoeffFunctionType & arg_coeff_function ,
802 const typename base_type::vector_type & arg_solution ,
803 const typename base_type::elem_graph_type & arg_elem_graph ,
804 const typename base_type::sparse_matrix_type & arg_jacobian ,
805 const typename base_type::vector_type & arg_residual ,
806 const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
807 base_type(arg_mesh, arg_solution, arg_elem_graph, arg_jacobian,
808 arg_residual),
809 coeff_function( arg_coeff_function ) ,
810 dev_config( arg_dev_config ) {}
811
812 //------------------------------------
813
814 void apply() const
815 {
816 const size_t nelem = this->elem_node_ids.extent(0);
817 parallel_for( nelem , *this );
818 }
819
820 KOKKOS_INLINE_FUNCTION
821 void gatherSolution(const unsigned ielem,
823 unsigned node_index[],
824 double x[], double y[], double z[],
825 scalar_type res[],
826 scalar_type mat[][FunctionCount]) const
827 {
828 for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
829 const unsigned ni = this->elem_node_ids( ielem , i );
830
831 node_index[i] = ni ;
832
833 x[i] = this->node_coords( ni , 0 );
834 y[i] = this->node_coords( ni , 1 );
835 z[i] = this->node_coords( ni , 2 );
836
837 val[i] = this->solution( ni ) ;
838 res[i] = 0 ;
839
840 for( unsigned j = 0; j < FunctionCount ; j++){
841 mat[i][j] = 0 ;
842 }
843 }
844 }
845
846 KOKKOS_INLINE_FUNCTION
847 void scatterResidual(const unsigned ielem,
848 const unsigned node_index[],
849 const scalar_type res[],
850 const scalar_type mat[][FunctionCount]) const
851 {
852 for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
853 const unsigned row = node_index[i] ;
854 if ( row < this->residual.extent(0) ) {
855 atomic_add( & this->residual( row ) , res[i] );
856
857 for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
858 const unsigned entry = this->elem_graph( ielem , i , j );
859 if ( entry != ~0u ) {
860 atomic_add( & this->jacobian.values( entry ) , mat[i][j] );
861 }
862 }
863 }
864 }
865 }
866
867 KOKKOS_INLINE_FUNCTION
869 const scalar_type dof_values[] ,
870 const double x[],
871 const double y[],
872 const double z[],
873 scalar_type elem_res[] ,
874 scalar_type elem_mat[][FunctionCount] ) const
875 {
876 scalar_type coeff_k;
877 double coeff_src = 1.234;
878 double advection[] = { 1.1, 1.2, 1.3 };
879 double dpsidx[ FunctionCount ] ;
880 double dpsidy[ FunctionCount ] ;
881 double dpsidz[ FunctionCount ] ;
882 double pt[] = {0.0, 0.0, 0.0};
883 for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
884
885 // If function is not constant
886 // then compute physical coordinates of integration point
887 if ( ! coeff_function.is_constant ) {
888 for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
889 pt[0] += x[j] * this->elem_data.values[i][j] ;
890 pt[1] += y[j] * this->elem_data.values[i][j] ;
891 pt[2] += z[j] * this->elem_data.values[i][j] ;
892 }
893 }
894 coeff_k = coeff_function(pt, 0);
895
896 const double integ_weight = this->elem_data.weights[i];
897 const double* bases_vals = this->elem_data.values[i];
898 const double detJ =
899 this->transform_gradients( this->elem_data.gradients[i] ,
900 x , y , z ,
901 dpsidx , dpsidy , dpsidz );
902 const double detJ_weight = detJ * integ_weight;
903 const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
904
905 scalar_type value_at_pt = 0 ;
906 scalar_type gradx_at_pt = 0 ;
907 scalar_type grady_at_pt = 0 ;
908 scalar_type gradz_at_pt = 0 ;
909 for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
910 value_at_pt += dof_values[m] * bases_vals[m] ;
911 gradx_at_pt += dof_values[m] * dpsidx[m] ;
912 grady_at_pt += dof_values[m] * dpsidy[m] ;
913 gradz_at_pt += dof_values[m] * dpsidz[m] ;
914 }
915
916 const scalar_type source_term =
917 coeff_src * value_at_pt * value_at_pt ;
918 const scalar_type source_deriv =
919 2.0 * coeff_src * value_at_pt ;
920
921 const scalar_type advection_x = advection[0];
922 const scalar_type advection_y = advection[1];
923 const scalar_type advection_z = advection[2];
924
925 const scalar_type advection_term =
926 advection_x*gradx_at_pt +
927 advection_y*grady_at_pt +
928 advection_z*gradz_at_pt ;
929
930 for ( unsigned m = 0; m < FunctionCount; ++m) {
931 scalar_type * const mat = elem_mat[m] ;
932 const double bases_val_m = bases_vals[m] * detJ_weight ;
933 const double dpsidx_m = dpsidx[m] ;
934 const double dpsidy_m = dpsidy[m] ;
935 const double dpsidz_m = dpsidz[m] ;
936
937 elem_res[m] +=
938 detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
939 dpsidy_m * grady_at_pt +
940 dpsidz_m * gradz_at_pt ) +
941 bases_val_m * ( advection_term + source_term ) ;
942
943 for( unsigned n = 0; n < FunctionCount; n++) {
944 const double dpsidx_n = dpsidx[n] ;
945 const double dpsidy_n = dpsidy[n] ;
946 const double dpsidz_n = dpsidz[n] ;
947 mat[n] +=
948 detJ_weight_coeff_k * ( dpsidx_m * dpsidx_n +
949 dpsidy_m * dpsidy_n +
950 dpsidz_m * dpsidz_n ) +
951 bases_val_m * ( advection_x * dpsidx_n +
952 advection_y * dpsidy_n +
953 advection_z * dpsidz_n +
954 source_deriv * bases_vals[n] ) ;
955 }
956 }
957 }
958 }
959
960 KOKKOS_INLINE_FUNCTION
961 void operator()( const unsigned ielem ) const
962 {
963 double x[ FunctionCount ] ;
964 double y[ FunctionCount ] ;
965 double z[ FunctionCount ] ;
966 unsigned node_index[ ElemNodeCount ];
967
968 scalar_type val[ FunctionCount ] ;
969 scalar_type elem_res[ FunctionCount ] ;
970 scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
971
972 // Gather nodal coordinates and solution vector:
973 gatherSolution(ielem, val, node_index, x, y, z, elem_res, elem_mat);
974
975 // Compute nodal element residual vector and Jacobian matrix
976 computeElementResidualJacobian( val, x, y, z, elem_res , elem_mat );
977
978 // Scatter nodal element residual and Jacobian in global vector and matrix:
979 scatterResidual( ielem, node_index, elem_res, elem_mat );
980 }
981}; /* ElementComputation */
982
983template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
984 class CoordinateMap , typename ScalarType , class CoeffFunctionType >
986 < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
987 CrsMatrix< ScalarType , ExecutionSpace > ,
988 FadElement , CoeffFunctionType > :
989 public ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
990 ScalarType> {
991public:
992
993 typedef ElementComputationBase<ExecutionSpace, Order, CoordinateMap,
994 ScalarType> base_type;
995
998
999 static const unsigned FunctionCount = base_type::FunctionCount;
1000 static const unsigned IntegrationCount = base_type::IntegrationCount;
1001 static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1002
1003 typedef Sacado::Fad::SLFad<scalar_type,FunctionCount> fad_scalar_type;
1004 //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1005
1006 const CoeffFunctionType coeff_function ;
1008
1010 base_type(rhs) ,
1011 coeff_function( rhs.coeff_function ) ,
1012 dev_config( rhs.dev_config ) {}
1013
1015 const typename base_type::mesh_type & arg_mesh ,
1016 const CoeffFunctionType & arg_coeff_function ,
1017 const typename base_type::vector_type & arg_solution ,
1018 const typename base_type::elem_graph_type & arg_elem_graph ,
1019 const typename base_type::sparse_matrix_type & arg_jacobian ,
1020 const typename base_type::vector_type & arg_residual ,
1021 const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1022 base_type(arg_mesh, arg_solution, arg_elem_graph,
1023 arg_jacobian, arg_residual),
1024 coeff_function( arg_coeff_function ) ,
1025 dev_config( arg_dev_config ) {}
1026
1027 //------------------------------------
1028
1029 void apply() const
1030 {
1031 const size_t nelem = this->elem_node_ids.extent(0);
1032 parallel_for( nelem , *this );
1033 }
1034
1035 KOKKOS_INLINE_FUNCTION
1036 void gatherSolution(const unsigned ielem,
1038 unsigned node_index[],
1039 double x[], double y[], double z[],
1040 fad_scalar_type res[]) const
1041 {
1042 for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1043 const unsigned ni = this->elem_node_ids( ielem , i );
1044
1045 node_index[i] = ni ;
1046
1047 x[i] = this->node_coords( ni , 0 );
1048 y[i] = this->node_coords( ni , 1 );
1049 z[i] = this->node_coords( ni , 2 );
1050
1051 val[i].val() = this->solution( ni );
1052 val[i].diff( i, FunctionCount );
1053 }
1054 }
1055
1056 KOKKOS_INLINE_FUNCTION
1057 void scatterResidual(const unsigned ielem,
1058 const unsigned node_index[],
1059 fad_scalar_type res[]) const
1060 {
1061 for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
1062 const unsigned row = node_index[i] ;
1063 if ( row < this->residual.extent(0) ) {
1064 atomic_add( & this->residual( row ) , res[i].val() );
1065
1066 for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
1067 const unsigned entry = this->elem_graph( ielem , i , j );
1068 if ( entry != ~0u ) {
1069 atomic_add( & this->jacobian.values( entry ) ,
1070 res[i].fastAccessDx(j) );
1071 }
1072 }
1073 }
1074 }
1075 }
1076
1077 template <typename local_scalar_type>
1078 KOKKOS_INLINE_FUNCTION
1079 void computeElementResidual(const local_scalar_type dof_values[] ,
1080 const double x[],
1081 const double y[],
1082 const double z[],
1083 local_scalar_type elem_res[] ) const
1084 {
1085 scalar_type coeff_k;
1086 double coeff_src = 1.234;
1087 double advection[] = { 1.1, 1.2, 1.3 };
1088 double dpsidx[ FunctionCount ] ;
1089 double dpsidy[ FunctionCount ] ;
1090 double dpsidz[ FunctionCount ] ;
1091 double pt[] = {0.0, 0.0, 0.0};
1092 for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1093
1094 // If function is not constant
1095 // then compute physical coordinates of integration point
1096 if ( ! coeff_function.is_constant ) {
1097 for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1098 pt[0] += x[j] * this->elem_data.values[i][j] ;
1099 pt[1] += y[j] * this->elem_data.values[i][j] ;
1100 pt[2] += z[j] * this->elem_data.values[i][j] ;
1101 }
1102 }
1103 coeff_k = coeff_function(pt, 0);
1104
1105 const double integ_weight = this->elem_data.weights[i];
1106 const double* bases_vals = this->elem_data.values[i];
1107 const double detJ =
1108 this->transform_gradients( this->elem_data.gradients[i] ,
1109 x , y , z ,
1110 dpsidx , dpsidy , dpsidz );
1111 const double detJ_weight = detJ * integ_weight;
1112 const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1113
1114 local_scalar_type value_at_pt = 0 ;
1115 local_scalar_type gradx_at_pt = 0 ;
1116 local_scalar_type grady_at_pt = 0 ;
1117 local_scalar_type gradz_at_pt = 0 ;
1118 for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1119 value_at_pt += dof_values[m] * bases_vals[m] ;
1120 gradx_at_pt += dof_values[m] * dpsidx[m] ;
1121 grady_at_pt += dof_values[m] * dpsidy[m] ;
1122 gradz_at_pt += dof_values[m] * dpsidz[m] ;
1123 }
1124
1125 const local_scalar_type source_term =
1126 coeff_src * value_at_pt * value_at_pt ;
1127
1128 const local_scalar_type advection_term =
1129 advection[0]*gradx_at_pt +
1130 advection[1]*grady_at_pt +
1131 advection[2]*gradz_at_pt;
1132
1133 for ( unsigned m = 0; m < FunctionCount; ++m) {
1134 const double bases_val_m = bases_vals[m] * detJ_weight ;
1135 const double dpsidx_m = dpsidx[m] ;
1136 const double dpsidy_m = dpsidy[m] ;
1137 const double dpsidz_m = dpsidz[m] ;
1138
1139 elem_res[m] +=
1140 detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1141 dpsidy_m * grady_at_pt +
1142 dpsidz_m * gradz_at_pt ) +
1143 bases_val_m * ( advection_term + source_term ) ;
1144 }
1145 }
1146 }
1147
1148 KOKKOS_INLINE_FUNCTION
1149 void operator()( const unsigned ielem ) const
1150 {
1151 double x[ FunctionCount ] ;
1152 double y[ FunctionCount ] ;
1153 double z[ FunctionCount ] ;
1154 unsigned node_index[ ElemNodeCount ];
1155
1156 fad_scalar_type val[ FunctionCount ] ;
1157 fad_scalar_type elem_res[ FunctionCount ] ; // this zeros elem_res
1158
1159 // Gather nodal coordinates and solution vector:
1160 gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1161
1162 // Compute nodal element residual vector:
1163 computeElementResidual( val, x, y, z, elem_res );
1164
1165 // Scatter nodal element residual in global vector:
1166 scatterResidual( ielem, node_index, elem_res );
1167 }
1168}; /* ElementComputation */
1169
1170template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
1171 class CoordinateMap , typename ScalarType , class CoeffFunctionType >
1173 < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1174 CrsMatrix< ScalarType , ExecutionSpace > ,
1175 FadElementOptimized , CoeffFunctionType > :
1176 public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1177 CrsMatrix< ScalarType , ExecutionSpace > ,
1178 FadElement, CoeffFunctionType > {
1179public:
1180
1183 FadElement , CoeffFunctionType > base_type;
1184
1187
1188 static const unsigned FunctionCount = base_type::FunctionCount;
1189 static const unsigned IntegrationCount = base_type::IntegrationCount;
1190 static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1191
1192 typedef Sacado::Fad::SLFad<scalar_type,FunctionCount> fad_scalar_type;
1193 //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1194
1196
1198 const typename base_type::mesh_type & arg_mesh ,
1199 const CoeffFunctionType & arg_coeff_function ,
1200 const typename base_type::vector_type & arg_solution ,
1201 const typename base_type::elem_graph_type & arg_elem_graph ,
1202 const typename base_type::sparse_matrix_type & arg_jacobian ,
1203 const typename base_type::vector_type & arg_residual ,
1204 const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1205 base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1206 arg_jacobian, arg_residual, arg_dev_config) {}
1207
1208 //------------------------------------
1209
1210 void apply() const
1211 {
1212 const size_t nelem = this->elem_node_ids.extent(0);
1213 parallel_for( nelem , *this );
1214 }
1215
1216 KOKKOS_INLINE_FUNCTION
1217 void gatherSolution(const unsigned ielem,
1218 scalar_type val[],
1219 unsigned node_index[],
1220 double x[], double y[], double z[],
1221 fad_scalar_type res[]) const
1222 {
1223 for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1224 const unsigned ni = this->elem_node_ids( ielem , i );
1225
1226 node_index[i] = ni ;
1227
1228 x[i] = this->node_coords( ni , 0 );
1229 y[i] = this->node_coords( ni , 1 );
1230 z[i] = this->node_coords( ni , 2 );
1231
1232 val[i] = this->solution( ni );
1233 }
1234 }
1235
1236 template <typename local_scalar_type>
1237 KOKKOS_INLINE_FUNCTION
1238 void computeElementResidual(const scalar_type dof_values[] ,
1239 const double x[],
1240 const double y[],
1241 const double z[],
1242 local_scalar_type elem_res[] ) const
1243 {
1244 scalar_type coeff_k;
1245 double coeff_src = 1.234;
1246 double advection[] = { 1.1, 1.2, 1.3 };
1247 double dpsidx[ FunctionCount ] ;
1248 double dpsidy[ FunctionCount ] ;
1249 double dpsidz[ FunctionCount ] ;
1250 double pt[] = {0.0, 0.0, 0.0};
1251 for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1252
1253 // If function is not constant
1254 // then compute physical coordinates of integration point
1255 if ( ! this->coeff_function.is_constant ) {
1256 for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1257 pt[0] += x[j] * this->elem_data.values[i][j] ;
1258 pt[1] += y[j] * this->elem_data.values[i][j] ;
1259 pt[2] += z[j] * this->elem_data.values[i][j] ;
1260 }
1261 }
1262 coeff_k = this->coeff_function(pt, 0);
1263
1264 const double integ_weight = this->elem_data.weights[i];
1265 const double* bases_vals = this->elem_data.values[i];
1266 const double detJ =
1267 this->transform_gradients( this->elem_data.gradients[i] ,
1268 x , y , z ,
1269 dpsidx , dpsidy , dpsidz );
1270 const double detJ_weight = detJ * integ_weight;
1271 const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1272
1273 local_scalar_type value_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1274 local_scalar_type gradx_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1275 local_scalar_type grady_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1276 local_scalar_type gradz_at_pt(FunctionCount, scalar_type(0.0), Sacado::NoInitDerivArray) ;
1277 for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1278 value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1279 value_at_pt.fastAccessDx(m) = bases_vals[m] ;
1280
1281 gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1282 gradx_at_pt.fastAccessDx(m) = dpsidx[m] ;
1283
1284 grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1285 grady_at_pt.fastAccessDx(m) = dpsidy[m] ;
1286
1287 gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1288 gradz_at_pt.fastAccessDx(m) = dpsidz[m] ;
1289 }
1290
1291 const local_scalar_type source_term =
1292 coeff_src * value_at_pt * value_at_pt ;
1293
1294 const local_scalar_type advection_term =
1295 advection[0]*gradx_at_pt +
1296 advection[1]*grady_at_pt +
1297 advection[2]*gradz_at_pt;
1298
1299 for ( unsigned m = 0; m < FunctionCount; ++m) {
1300 const double bases_val_m = bases_vals[m] * detJ_weight ;
1301 const double dpsidx_m = dpsidx[m] ;
1302 const double dpsidy_m = dpsidy[m] ;
1303 const double dpsidz_m = dpsidz[m] ;
1304
1305 elem_res[m] +=
1306 detJ_weight_coeff_k * ( dpsidx_m * gradx_at_pt +
1307 dpsidy_m * grady_at_pt +
1308 dpsidz_m * gradz_at_pt ) +
1309 bases_val_m * ( advection_term + source_term ) ;
1310 }
1311 }
1312 }
1313
1314 KOKKOS_INLINE_FUNCTION
1315 void operator()( const unsigned ielem ) const
1316 {
1317 double x[ FunctionCount ] ;
1318 double y[ FunctionCount ] ;
1319 double z[ FunctionCount ] ;
1320 unsigned node_index[ ElemNodeCount ];
1321
1322 scalar_type val[ FunctionCount ] ;
1323 fad_scalar_type elem_res[ FunctionCount ] ;
1324
1325 // Gather nodal coordinates and solution vector:
1326 gatherSolution( ielem, val, node_index, x, y, z, elem_res );
1327
1328 // Compute nodal element residual vector:
1329 computeElementResidual( val, x, y, z, elem_res );
1330
1331 // Scatter nodal element residual in global vector:
1332 this->scatterResidual( ielem, node_index, elem_res );
1333 }
1334}; /* ElementComputation */
1335
1336template< class ExecutionSpace , BoxElemPart::ElemOrder Order ,
1337 class CoordinateMap , typename ScalarType , class CoeffFunctionType >
1339 < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1340 CrsMatrix< ScalarType , ExecutionSpace > ,
1341 FadQuadPoint , CoeffFunctionType > :
1342 public ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1343 CrsMatrix< ScalarType , ExecutionSpace > ,
1344 Analytic , CoeffFunctionType > {
1345public:
1346
1349 Analytic , CoeffFunctionType > base_type;
1350
1353
1354 static const unsigned FunctionCount = base_type::FunctionCount;
1355 static const unsigned IntegrationCount = base_type::IntegrationCount;
1356 static const unsigned ElemNodeCount = base_type::ElemNodeCount;
1357
1358 typedef Sacado::Fad::SLFad<scalar_type,4> fad_scalar_type;
1359 //typedef Sacado::Fad::DFad<scalar_type> fad_scalar_type;
1360
1362
1364 const typename base_type::mesh_type & arg_mesh ,
1365 const CoeffFunctionType & arg_coeff_function ,
1366 const typename base_type::vector_type & arg_solution ,
1367 const typename base_type::elem_graph_type & arg_elem_graph ,
1368 const typename base_type::sparse_matrix_type & arg_jacobian ,
1369 const typename base_type::vector_type & arg_residual ,
1370 const Kokkos::Example::FENL::DeviceConfig arg_dev_config ) :
1371 base_type(arg_mesh, arg_coeff_function, arg_solution, arg_elem_graph,
1372 arg_jacobian, arg_residual, arg_dev_config) {}
1373
1374 //------------------------------------
1375
1376 void apply() const
1377 {
1378 const size_t nelem = this->elem_node_ids.extent(0);
1379 parallel_for( nelem , *this );
1380 }
1381
1382 KOKKOS_INLINE_FUNCTION
1384 const scalar_type dof_values[] ,
1385 const double x[],
1386 const double y[],
1387 const double z[],
1388 scalar_type elem_res[] ,
1389 scalar_type elem_mat[][FunctionCount] ) const
1390 {
1391 scalar_type coeff_k;
1392 double coeff_src = 1.234;
1393 double advection[] = { 1.1, 1.2, 1.3 };
1394 double dpsidx[ FunctionCount ] ;
1395 double dpsidy[ FunctionCount ] ;
1396 double dpsidz[ FunctionCount ] ;
1397 double pt[] = {0.0, 0.0, 0.0};
1398
1399 fad_scalar_type value_at_pt(4, 0, 0.0) ;
1400 fad_scalar_type gradx_at_pt(4, 1, 0.0) ;
1401 fad_scalar_type grady_at_pt(4, 2, 0.0) ;
1402 fad_scalar_type gradz_at_pt(4, 3, 0.0) ;
1403 for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1404
1405 // If function is not constant
1406 // then compute physical coordinates of integration point
1407 if ( ! this->coeff_function.is_constant ) {
1408 for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1409 pt[0] += x[j] * this->elem_data.values[i][j] ;
1410 pt[1] += y[j] * this->elem_data.values[i][j] ;
1411 pt[2] += z[j] * this->elem_data.values[i][j] ;
1412 }
1413 }
1414 coeff_k = this->coeff_function(pt, 0);
1415
1416 const double integ_weight = this->elem_data.weights[i];
1417 const double* bases_vals = this->elem_data.values[i];
1418 const double detJ =
1419 this->transform_gradients( this->elem_data.gradients[i] ,
1420 x , y , z ,
1421 dpsidx , dpsidy , dpsidz );
1422 const double detJ_weight = detJ * integ_weight;
1423 const scalar_type detJ_weight_coeff_k = detJ_weight * coeff_k;
1424
1425 value_at_pt.val() = 0.0 ;
1426 gradx_at_pt.val() = 0.0 ;
1427 grady_at_pt.val() = 0.0 ;
1428 gradz_at_pt.val() = 0.0 ;
1429 for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1430 value_at_pt.val() += dof_values[m] * bases_vals[m] ;
1431 gradx_at_pt.val() += dof_values[m] * dpsidx[m] ;
1432 grady_at_pt.val() += dof_values[m] * dpsidy[m] ;
1433 gradz_at_pt.val() += dof_values[m] * dpsidz[m] ;
1434 }
1435
1436 const fad_scalar_type source_term =
1437 coeff_src * value_at_pt * value_at_pt ;
1438
1439 const fad_scalar_type advection_term =
1440 advection[0]*gradx_at_pt +
1441 advection[1]*grady_at_pt +
1442 advection[2]*gradz_at_pt;
1443
1444 for ( unsigned m = 0; m < FunctionCount; ++m) {
1445 const double bases_val_m = bases_vals[m] * detJ_weight ;
1446 fad_scalar_type res =
1447 detJ_weight_coeff_k * ( dpsidx[m] * gradx_at_pt +
1448 dpsidy[m] * grady_at_pt +
1449 dpsidz[m] * gradz_at_pt ) +
1450 bases_val_m * ( advection_term + source_term ) ;
1451
1452 elem_res[m] += res.val();
1453
1454 scalar_type * const mat = elem_mat[m] ;
1455 for( unsigned n = 0; n < FunctionCount; n++) {
1456 mat[n] += res.fastAccessDx(0) * bases_vals[n] +
1457 res.fastAccessDx(1) * dpsidx[n] +
1458 res.fastAccessDx(2) * dpsidy[n] +
1459 res.fastAccessDx(3) * dpsidz[n];
1460 }
1461 }
1462 }
1463 }
1464
1465 KOKKOS_INLINE_FUNCTION
1466 void operator()( const unsigned ielem ) const
1467 {
1468 double x[ FunctionCount ] ;
1469 double y[ FunctionCount ] ;
1470 double z[ FunctionCount ] ;
1471 unsigned node_index[ ElemNodeCount ];
1472
1473 scalar_type val[ FunctionCount ] ;
1474 scalar_type elem_res[ FunctionCount ] ;
1475 scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
1476
1477 // Gather nodal coordinates and solution vector:
1478 this->gatherSolution( ielem, val, node_index, x, y, z, elem_res, elem_mat );
1479
1480 // Compute nodal element residual vector and Jacobian matrix:
1481 computeElementResidualJacobian( val, x, y, z, elem_res, elem_mat );
1482
1483 // Scatter nodal element residual and Jacobian in global vector and matrix:
1484 this->scatterResidual( ielem, node_index, elem_res, elem_mat );
1485 }
1486}; /* ElementComputation */
1487
1488#if 0
1489template< class FiniteElementMeshType , class SparseMatrixType
1490 , class CoeffFunctionType = ElementComputationConstantCoefficient
1491 >
1492class ElementComputation ;
1493
1494
1495template< class ExecutionSpace , BoxElemPart::ElemOrder Order , class CoordinateMap ,
1496 typename ScalarType , class CoeffFunctionType >
1497class ElementComputation
1498 < Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap >
1499 , Kokkos::Example::FENL::CrsMatrix< ScalarType , ExecutionSpace >
1500 , CoeffFunctionType >
1501{
1502public:
1503
1506
1507 //------------------------------------
1508
1509 typedef ExecutionSpace execution_space ;
1510 typedef ScalarType scalar_type ;
1511
1513 typedef typename sparse_matrix_type::StaticCrsGraphType sparse_graph_type ;
1514 typedef typename sparse_matrix_type::values_type matrix_values_type ;
1515 typedef Kokkos::View< scalar_type* , Kokkos::LayoutLeft, execution_space > vector_type ;
1516
1517 //------------------------------------
1518
1519 typedef LocalViewTraits< vector_type > local_vector_view_traits;
1520 typedef LocalViewTraits< matrix_values_type> local_matrix_view_traits;
1521 typedef typename local_vector_view_traits::local_view_type local_vector_type;
1522 typedef typename local_matrix_view_traits::local_view_type local_matrix_type;
1523 typedef typename local_vector_view_traits::local_value_type local_scalar_type;
1524 static const bool use_team = local_vector_view_traits::use_team;
1525
1526 static const unsigned SpatialDim = element_data_type::spatial_dimension ;
1527 static const unsigned TensorDim = SpatialDim * SpatialDim ;
1528 static const unsigned ElemNodeCount = element_data_type::element_node_count ;
1529 static const unsigned FunctionCount = element_data_type::function_count ;
1530 static const unsigned IntegrationCount = element_data_type::integration_count ;
1531
1532 //------------------------------------
1533
1534 typedef typename mesh_type::node_coord_type node_coord_type ;
1535 typedef typename mesh_type::elem_node_type elem_node_type ;
1536 typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
1537 typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ;
1538
1539 typedef LocalViewTraits< elem_matrices_type > local_elem_matrices_traits;
1540 typedef LocalViewTraits< elem_vectors_type > local_elem_vectors_traits;
1541 typedef typename local_elem_matrices_traits::local_view_type local_elem_matrices_type;
1542 typedef typename local_elem_vectors_traits::local_view_type local_elem_vectors_type;
1543
1545
1546 //------------------------------------
1547
1548
1549 //------------------------------------
1550 // Computational data:
1551
1552 const element_data_type elem_data ;
1553 const elem_node_type elem_node_ids ;
1554 const node_coord_type node_coords ;
1555 const elem_graph_type elem_graph ;
1556 const elem_matrices_type elem_jacobians ;
1557 const elem_vectors_type elem_residuals ;
1558 const vector_type solution ;
1559 const vector_type residual ;
1560 const sparse_matrix_type jacobian ;
1561 const CoeffFunctionType coeff_function ;
1562 const Kokkos::Example::FENL::DeviceConfig dev_config ;
1563
1564 ElementComputation( const ElementComputation & rhs )
1565 : elem_data()
1566 , elem_node_ids( rhs.elem_node_ids )
1567 , node_coords( rhs.node_coords )
1568 , elem_graph( rhs.elem_graph )
1569 , elem_jacobians( rhs.elem_jacobians )
1570 , elem_residuals( rhs.elem_residuals )
1571 , solution( rhs.solution )
1572 , residual( rhs.residual )
1573 , jacobian( rhs.jacobian )
1574 , coeff_function( rhs.coeff_function )
1575 , dev_config( rhs.dev_config )
1576 {}
1577
1578 // If the element->sparse_matrix graph is provided then perform atomic updates
1579 // Otherwise fill per-element contributions for subequent gather-add into a residual and jacobian.
1580 ElementComputation( const mesh_type & arg_mesh ,
1581 const CoeffFunctionType & arg_coeff_function ,
1582 const vector_type & arg_solution ,
1583 const elem_graph_type & arg_elem_graph ,
1584 const sparse_matrix_type & arg_jacobian ,
1585 const vector_type & arg_residual ,
1586 const Kokkos::Example::FENL::DeviceConfig arg_dev_config )
1587 : elem_data()
1588 , elem_node_ids( arg_mesh.elem_node() )
1589 , node_coords( arg_mesh.node_coord() )
1590 , elem_graph( arg_elem_graph )
1591 , elem_jacobians()
1592 , elem_residuals()
1593 , solution( arg_solution )
1594 , residual( arg_residual )
1595 , jacobian( arg_jacobian )
1596 , coeff_function( arg_coeff_function )
1597 , dev_config( arg_dev_config )
1598 {}
1599
1600 //------------------------------------
1601
1602 void apply() const
1603 {
1604 const size_t nelem = elem_node_ids.extent(0);
1605 if ( use_team ) {
1606 const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1607 const size_t league_size =
1608 (nelem + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1609 Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1610 parallel_for( config , *this );
1611 }
1612 else {
1613 parallel_for( nelem , *this );
1614 }
1615 }
1616
1617 //------------------------------------
1618
1619 static const unsigned FLOPS_transform_gradients =
1620 /* Jacobian */ FunctionCount * TensorDim * 2 +
1621 /* Inverse jacobian */ TensorDim * 6 + 6 +
1622 /* Gradient transform */ FunctionCount * 15 ;
1623
1624 KOKKOS_INLINE_FUNCTION
1625 double transform_gradients(
1626 const double grad[][ FunctionCount ] , // Gradient of bases master element
1627 const double x[] ,
1628 const double y[] ,
1629 const double z[] ,
1630 double dpsidx[] ,
1631 double dpsidy[] ,
1632 double dpsidz[] ) const
1633 {
1634 enum { j11 = 0 , j12 = 1 , j13 = 2 ,
1635 j21 = 3 , j22 = 4 , j23 = 5 ,
1636 j31 = 6 , j32 = 7 , j33 = 8 };
1637
1638 // Jacobian accumulation:
1639
1640 double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1641
1642 for( unsigned i = 0; i < FunctionCount ; ++i ) {
1643 const double x1 = x[i] ;
1644 const double x2 = y[i] ;
1645 const double x3 = z[i] ;
1646
1647 const double g1 = grad[0][i] ;
1648 const double g2 = grad[1][i] ;
1649 const double g3 = grad[2][i] ;
1650
1651 J[j11] += g1 * x1 ;
1652 J[j12] += g1 * x2 ;
1653 J[j13] += g1 * x3 ;
1654
1655 J[j21] += g2 * x1 ;
1656 J[j22] += g2 * x2 ;
1657 J[j23] += g2 * x3 ;
1658
1659 J[j31] += g3 * x1 ;
1660 J[j32] += g3 * x2 ;
1661 J[j33] += g3 * x3 ;
1662 }
1663
1664 // Inverse jacobian:
1665
1666 double invJ[ TensorDim ] = {
1667 static_cast<double>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
1668 static_cast<double>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
1669 static_cast<double>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
1670
1671 static_cast<double>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
1672 static_cast<double>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
1673 static_cast<double>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
1674
1675 static_cast<double>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
1676 static_cast<double>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
1677 static_cast<double>( J[j11] * J[j22] - J[j12] * J[j21] ) };
1678
1679 const double detJ = J[j11] * invJ[j11] +
1680 J[j21] * invJ[j12] +
1681 J[j31] * invJ[j13] ;
1682
1683 const double detJinv = 1.0 / detJ ;
1684
1685 for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
1686
1687 // Transform gradients:
1688
1689 for( unsigned i = 0; i < FunctionCount ; ++i ) {
1690 const double g0 = grad[0][i];
1691 const double g1 = grad[1][i];
1692 const double g2 = grad[2][i];
1693
1694 dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
1695 dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
1696 dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
1697 }
1698
1699 return detJ ;
1700 }
1701
1702 KOKKOS_INLINE_FUNCTION
1703 void contributeResidualJacobian(
1704 const local_scalar_type dof_values[] ,
1705 const double dpsidx[] ,
1706 const double dpsidy[] ,
1707 const double dpsidz[] ,
1708 const double detJ ,
1709 const local_scalar_type coeff_k ,
1710 const double integ_weight ,
1711 const double bases_vals[] ,
1712 local_scalar_type elem_res[] ,
1713 local_scalar_type elem_mat[][ FunctionCount ] ) const
1714 {
1715 local_scalar_type value_at_pt = 0 ;
1716 local_scalar_type gradx_at_pt = 0 ;
1717 local_scalar_type grady_at_pt = 0 ;
1718 local_scalar_type gradz_at_pt = 0 ;
1719
1720 for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
1721 value_at_pt += dof_values[m] * bases_vals[m] ;
1722 gradx_at_pt += dof_values[m] * dpsidx[m] ;
1723 grady_at_pt += dof_values[m] * dpsidy[m] ;
1724 gradz_at_pt += dof_values[m] * dpsidz[m] ;
1725 }
1726
1727 const local_scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ;
1728 const local_scalar_type res_val = value_at_pt * value_at_pt * detJ * integ_weight ;
1729 const local_scalar_type mat_val = 2.0 * value_at_pt * detJ * integ_weight ;
1730
1731 // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
1732 // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
1733
1734 for ( unsigned m = 0; m < FunctionCount; ++m) {
1735 local_scalar_type * const mat = elem_mat[m] ;
1736 const double bases_val_m = bases_vals[m];
1737 const double dpsidx_m = dpsidx[m] ;
1738 const double dpsidy_m = dpsidy[m] ;
1739 const double dpsidz_m = dpsidz[m] ;
1740
1741 elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt +
1742 dpsidy_m * grady_at_pt +
1743 dpsidz_m * gradz_at_pt ) +
1744 res_val * bases_val_m ;
1745
1746 for( unsigned n = 0; n < FunctionCount; n++) {
1747
1748 mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] +
1749 dpsidy_m * dpsidy[n] +
1750 dpsidz_m * dpsidz[n] ) +
1751 mat_val * bases_val_m * bases_vals[n];
1752 }
1753 }
1754 }
1755
1756 KOKKOS_INLINE_FUNCTION
1757 void operator()( const typename TeamPolicy< execution_space >::member_type & dev ) const
1758 {
1759
1760 const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1761 const unsigned num_element_threads = dev_config.block_dim.y ;
1762 const unsigned element_rank = dev.team_rank() / num_ensemble_threads ;
1763 const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1764
1765 const unsigned ielem =
1766 dev.league_rank() * num_element_threads + element_rank;
1767
1768 if (ielem >= elem_node_ids.extent(0))
1769 return;
1770
1771 (*this)( ielem, ensemble_rank );
1772
1773 }
1774
1775 KOKKOS_INLINE_FUNCTION
1776 void operator()( const unsigned ielem ,
1777 const unsigned ensemble_rank = 0 ) const
1778 {
1779 local_vector_type local_solution =
1780 local_vector_view_traits::create_local_view(solution,
1781 ensemble_rank);
1782 local_vector_type local_residual =
1783 local_vector_view_traits::create_local_view(residual,
1784 ensemble_rank);
1785 local_matrix_type local_jacobian_values =
1786 local_matrix_view_traits::create_local_view(jacobian.values,
1787 ensemble_rank);
1788
1789 // Gather nodal coordinates and solution vector:
1790
1791 double x[ FunctionCount ] ;
1792 double y[ FunctionCount ] ;
1793 double z[ FunctionCount ] ;
1794 local_scalar_type val[ FunctionCount ] ;
1795 unsigned node_index[ ElemNodeCount ];
1796
1797 for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
1798 const unsigned ni = elem_node_ids( ielem , i );
1799
1800 node_index[i] = ni ;
1801
1802 x[i] = node_coords( ni , 0 );
1803 y[i] = node_coords( ni , 1 );
1804 z[i] = node_coords( ni , 2 );
1805
1806 val[i] = local_solution( ni );
1807 }
1808
1809
1810 local_scalar_type elem_vec[ FunctionCount ] ;
1811 local_scalar_type elem_mat[ FunctionCount ][ FunctionCount ] ;
1812
1813 for( unsigned i = 0; i < FunctionCount ; i++ ) {
1814 elem_vec[i] = 0 ;
1815 for( unsigned j = 0; j < FunctionCount ; j++){
1816 elem_mat[i][j] = 0 ;
1817 }
1818 }
1819
1820
1821 for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
1822 double dpsidx[ FunctionCount ] ;
1823 double dpsidy[ FunctionCount ] ;
1824 double dpsidz[ FunctionCount ] ;
1825
1826 local_scalar_type coeff_k = 0 ;
1827
1828 {
1829 double pt[] = {0.0, 0.0, 0.0};
1830
1831 // If function is not constant
1832 // then compute physical coordinates of integration point
1833 if ( ! coeff_function.is_constant ) {
1834 for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
1835 pt[0] += x[j] * elem_data.values[i][j] ;
1836 pt[1] += y[j] * elem_data.values[i][j] ;
1837 pt[2] += z[j] * elem_data.values[i][j] ;
1838 }
1839 }
1840
1841 // Need to fix this for local_scalar_type!!!!!!
1842 coeff_k = coeff_function(pt, ensemble_rank);
1843 }
1844
1845 const double detJ =
1846 transform_gradients( elem_data.gradients[i] , x , y , z ,
1847 dpsidx , dpsidy , dpsidz );
1848
1849 contributeResidualJacobian( val , dpsidx , dpsidy , dpsidz ,
1850 detJ , coeff_k ,
1851 elem_data.weights[i] ,
1852 elem_data.values[i] ,
1853 elem_vec , elem_mat );
1854 }
1855
1856 for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
1857 const unsigned row = node_index[i] ;
1858 if ( row < residual.extent(0) ) {
1859 atomic_add( & local_residual( row ) , elem_vec[i] );
1860
1861 for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
1862 const unsigned entry = elem_graph( ielem , i , j );
1863 if ( entry != ~0u ) {
1864 atomic_add( & local_jacobian_values( entry ) , elem_mat[i][j] );
1865 }
1866 }
1867 }
1868 }
1869 }
1870}; /* ElementComputation */
1871#endif
1872
1873//----------------------------------------------------------------------------
1874
1875template< class FixtureType , class SparseMatrixType >
1877
1878template< class ExecutionSpace , BoxElemPart::ElemOrder Order , class CoordinateMap ,
1879 typename ScalarType >
1881 Kokkos::Example::BoxElemFixture< ExecutionSpace , Order , CoordinateMap > ,
1882 Kokkos::Example::FENL::CrsMatrix< ScalarType , ExecutionSpace > >
1883{
1884public:
1885
1888 typedef typename node_coord_type::value_type scalar_coord_type ;
1889
1890 typedef ExecutionSpace execution_space ;
1891 typedef ScalarType scalar_type ;
1892
1896 typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
1897
1898 //------------------------------------
1899
1904 static const bool use_team = local_vector_view_traits::use_team;
1905
1906 typedef double bc_scalar_type ;
1907
1908 //------------------------------------
1909 // Computational data:
1910
1919 const unsigned bc_plane ;
1920 const unsigned node_count ;
1921 bool init ;
1923
1924
1926 const vector_type & arg_solution ,
1927 const sparse_matrix_type & arg_jacobian ,
1928 const vector_type & arg_residual ,
1929 const unsigned arg_bc_plane ,
1930 const bc_scalar_type arg_bc_lower_value ,
1931 const bc_scalar_type arg_bc_upper_value ,
1932 const Kokkos::Example::FENL::DeviceConfig arg_dev_config )
1933 : node_coords( arg_mesh.node_coord() )
1934 , solution( arg_solution )
1935 , jacobian( arg_jacobian )
1936 , residual( arg_residual )
1937 , bc_lower_value( arg_bc_lower_value )
1938 , bc_upper_value( arg_bc_upper_value )
1939 , bc_lower_limit( std::numeric_limits<scalar_coord_type>::epsilon() )
1940 , bc_upper_limit( scalar_coord_type(1) - std::numeric_limits<scalar_coord_type>::epsilon() )
1941 , bc_plane( arg_bc_plane )
1942 , node_count( arg_mesh.node_count_owned() )
1943 , init( false )
1944 , dev_config( arg_dev_config )
1945 {
1946 parallel_for( node_count , *this );
1947 init = true ;
1948 }
1949
1950 void apply() const
1951 {
1952 if ( use_team ) {
1953 const size_t team_size = dev_config.block_dim.x * dev_config.block_dim.y;
1954 const size_t league_size =
1955 (node_count + dev_config.block_dim.y-1) / dev_config.block_dim.y;
1956 Kokkos::TeamPolicy< execution_space > config( league_size, team_size );
1957 parallel_for( config , *this );
1958 }
1959 else
1960 parallel_for( node_count , *this );
1961 }
1962
1963 //------------------------------------
1964
1965 KOKKOS_INLINE_FUNCTION
1966 void operator()( const typename TeamPolicy< execution_space >::member_type & dev ) const
1967 {
1968
1969 const unsigned num_ensemble_threads = dev_config.block_dim.x ;
1970 const unsigned num_node_threads = dev_config.block_dim.y ;
1971 const unsigned node_rank = dev.team_rank() / num_ensemble_threads ;
1972 const unsigned ensemble_rank = dev.team_rank() % num_ensemble_threads ;
1973
1974 const unsigned inode = dev.league_rank() * num_node_threads + node_rank;
1975
1976 if (inode >= node_count)
1977 return;
1978
1979 (*this)( inode, ensemble_rank );
1980
1981 }
1982
1983 KOKKOS_INLINE_FUNCTION
1984 void operator()( const unsigned inode ,
1985 const unsigned ensemble_rank = 0) const
1986 {
1987 local_vector_type local_residual =
1988 local_vector_view_traits::create_local_view(residual,
1989 ensemble_rank);
1990 local_matrix_type local_jacobian_values =
1991 local_matrix_view_traits::create_local_view(jacobian.values,
1992 ensemble_rank);
1993
1994 // Apply dirichlet boundary condition on the Solution and Residual vectors.
1995 // To maintain the symmetry of the original global stiffness matrix,
1996 // zero out the columns that correspond to boundary conditions, and
1997 // update the residual vector accordingly
1998
1999 const unsigned iBeg = jacobian.graph.row_map[inode];
2000 const unsigned iEnd = jacobian.graph.row_map[inode+1];
2001
2002 const scalar_coord_type c = node_coords(inode,bc_plane);
2003 const bool bc_lower = c <= bc_lower_limit ;
2004 const bool bc_upper = bc_upper_limit <= c ;
2005
2006 if ( ! init ) {
2007 solution(inode) = bc_lower ? bc_lower_value : (
2008 bc_upper ? bc_upper_value : 0 );
2009 }
2010 else {
2011 if ( bc_lower || bc_upper ) {
2012
2013 local_residual(inode) = 0 ;
2014
2015 // zero each value on the row, and leave a one
2016 // on the diagonal
2017
2018 for( unsigned i = iBeg ; i < iEnd ; ++i ) {
2019 local_jacobian_values(i) = int(inode) == int(jacobian.graph.entries(i)) ? 1 : 0 ;
2020 }
2021 }
2022 else {
2023
2024 // Find any columns that are boundary conditions.
2025 // Clear them and adjust the residual vector
2026
2027 for( unsigned i = iBeg ; i < iEnd ; ++i ) {
2028 const unsigned cnode = jacobian.graph.entries(i) ;
2029 const scalar_coord_type cc = node_coords(cnode,bc_plane);
2030
2031 if ( ( cc <= bc_lower_limit ) || ( bc_upper_limit <= cc ) ) {
2032 local_jacobian_values(i) = 0 ;
2033 }
2034 }
2035 }
2036 }
2037 }
2038};
2039
2040} /* namespace FENL */
2041} /* namespace Example */
2042} /* namespace Kokkos */
2043
2044//----------------------------------------------------------------------------
2045
2046/* A Cuda-specific specialization for the element computation functor. */
2047#if defined( __CUDACC__ )
2048// #include <NonlinearElement_Cuda.hpp>
2049#endif
2050
2051//----------------------------------------------------------------------------
2052
2053#endif /* #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP */
expr val()
Kokkos::DefaultExecutionSpace execution_space
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements.
Kokkos::View< const double *[SpaceDim], Device > node_coord_type
Kokkos::View< const unsigned *[ElemNode], Device > elem_node_type
DirichletComputation(const mesh_type &arg_mesh, const vector_type &arg_solution, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual, const unsigned arg_bc_plane, const bc_scalar_type arg_bc_lower_value, const bc_scalar_type arg_bc_upper_value, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
sparse_matrix_type::StaticCrsGraphType sparse_graph_type
ElementComputationBase(const mesh_type &arg_mesh, const vector_type &arg_solution, const elem_graph_type &arg_elem_graph, const sparse_matrix_type &arg_jacobian, const vector_type &arg_residual)
Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap > mesh_type
NodeNodeGraph< elem_node_type, sparse_graph_type, ElemNodeCount >::ElemGraphType elem_graph_type
Kokkos::View< scalar_type *[FunctionCount], execution_space > elem_vectors_type
Kokkos::View< scalar_type *[FunctionCount][FunctionCount], execution_space > elem_matrices_type
KOKKOS_INLINE_FUNCTION double transform_gradients(const double grad[][FunctionCount], const double x[], const double y[], const double z[], double dpsidx[], double dpsidy[], double dpsidz[]) const
CrsMatrix< ScalarType, ExecutionSpace > sparse_matrix_type
Kokkos::Example::HexElement_Data< mesh_type::ElemNode > element_data_type
Kokkos::View< scalar_type *, Kokkos::LayoutLeft, execution_space > vector_type
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, FadElement, CoeffFunctionType > base_type
KOKKOS_INLINE_FUNCTION void computeElementResidual(const scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const
KOKKOS_INLINE_FUNCTION void scatterResidual(const unsigned ielem, const unsigned node_index[], const scalar_type res[], const scalar_type mat[][FunctionCount]) const
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, scalar_type val[], unsigned node_index[], double x[], double y[], double z[], scalar_type res[], scalar_type mat[][FunctionCount]) const
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION void computeElementResidual(const local_scalar_type dof_values[], const double x[], const double y[], const double z[], local_scalar_type elem_res[]) const
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
KOKKOS_INLINE_FUNCTION void gatherSolution(const unsigned ielem, fad_scalar_type val[], unsigned node_index[], double x[], double y[], double z[], fad_scalar_type res[]) const
KOKKOS_INLINE_FUNCTION void computeElementResidualJacobian(const scalar_type dof_values[], const double x[], const double y[], const double z[], scalar_type elem_res[], scalar_type elem_mat[][FunctionCount]) const
ElementComputation< Kokkos::Example::BoxElemFixture< ExecutionSpace, Order, CoordinateMap >, CrsMatrix< ScalarType, ExecutionSpace >, Analytic, CoeffFunctionType > base_type
ElementComputation(const typename base_type::mesh_type &arg_mesh, const CoeffFunctionType &arg_coeff_function, const typename base_type::vector_type &arg_solution, const typename base_type::elem_graph_type &arg_elem_graph, const typename base_type::sparse_matrix_type &arg_jacobian, const typename base_type::vector_type &arg_residual, const Kokkos::Example::FENL::DeviceConfig arg_dev_config)
local_rv_view_traits::local_view_type local_rv_view_type
KOKKOS_INLINE_FUNCTION void setRandomVariables(const RandomVariableView &rv)
LocalViewTraits< RandomVariableView > local_rv_view_traits
Stokhos::KL::ExponentialRandomField< MeshScalar, Device > rf_type
Kokkos::View< Scalar *, Kokkos::LayoutLeft, Device > RandomVariableView
KOKKOS_INLINE_FUNCTION RandomVariableView getRandomVariables() const
ExponentialKLCoefficient(const MeshScalar mean, const MeshScalar variance, const MeshScalar correlation_length, const size_type num_rv)
KOKKOS_INLINE_FUNCTION local_scalar_type operator()(const MeshScalar point[], const size_type ensemble_rank) const
ExponentialKLCoefficient(const ExponentialKLCoefficient &rhs)
local_rv_view_traits::local_value_type local_scalar_type
KOKKOS_INLINE_FUNCTION void operator()(const unsigned irow, unsigned &update, const bool final) const
KOKKOS_INLINE_FUNCTION void sort_graph_entries(const unsigned irow) const
KOKKOS_INLINE_FUNCTION void fill_elem_graph_map(const unsigned ielem) const
KOKKOS_INLINE_FUNCTION void fill_graph_entries(const unsigned iset) const
KOKKOS_INLINE_FUNCTION void init(unsigned &update) const
KOKKOS_INLINE_FUNCTION void operator()(const unsigned iwork) const
Kokkos::UnorderedMap< key_type, void, execution_space > SetType
Kokkos::View< unsigned *[ElemNode][ElemNode], execution_space > ElemGraphType
KOKKOS_INLINE_FUNCTION void fill_set(const unsigned ielem) const
ElemNodeIdView::execution_space execution_space
Kokkos::View< unsigned, execution_space > UnsignedValue
KOKKOS_INLINE_FUNCTION void join(unsigned &update, const unsigned &input) const
NodeNodeGraph(const ElemNodeIdView &arg_elem_node_id, const unsigned arg_node_count, Times &results)
CrsGraphType::row_map_type::non_const_type RowMapType
Class representing a KL expansion of an exponential random field.
KOKKOS_INLINE_FUNCTION Teuchos::PromotionTraits< typenamerv_type::value_type, value_type >::promote evaluate(const point_type &point, const rv_type &random_variables) const
Evaluate random field at a point.
KOKKOS_INLINE_FUNCTION void atomic_add(volatile Sacado::UQ::PCE< Storage > *const dest, const Sacado::UQ::PCE< Storage > &src)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:265
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
static void eval(Kokkos::Example::FENL::DeviceConfig &dev_config_elem, Kokkos::Example::FENL::DeviceConfig &dev_config_bc)
Kokkos::StaticCrsGraph< unsigned, Space, void, void, unsigned > StaticCrsGraphType
CrsMatrix(const StaticCrsGraphType &arg_graph)
Dim3(const size_t x_, const size_t y_=1, const size_t z_=1)
DeviceConfig(const size_t num_blocks_=0, const size_t threads_per_block_x_=0, const size_t threads_per_block_y_=0, const size_t threads_per_block_z_=1)
ElementComputationConstantCoefficient(const ElementComputationConstantCoefficient &rhs)
KOKKOS_INLINE_FUNCTION double operator()(double pt[], unsigned ensemble_rank) const
static KOKKOS_INLINE_FUNCTION local_view_type create_local_view(const view_type &v, const unsigned local_rank)