Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
advection/advection.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Sacado Package
5// Copyright (2006) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// This library is free software; you can redistribute it and/or modify
11// it under the terms of the GNU Lesser General Public License as
12// published by the Free Software Foundation; either version 2.1 of the
13// License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23// USA
24// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25// (etphipp@sandia.gov).
26//
27// ***********************************************************************
28// @HEADER
29
30#include "Sacado.hpp"
31#include "advection.hpp"
32#include "common.hpp"
33
34#include "Kokkos_Timer.hpp"
35
36template<typename FluxView, typename WgbView, typename SrcView,
37 typename WbsView, typename ResidualView>
38void run_fad_flat(const FluxView& flux, const WgbView& wgb,
39 const SrcView& src, const WbsView& wbs,
40 const ResidualView& residual)
41{
42 typedef typename ResidualView::execution_space execution_space;
43 typedef typename ResidualView::non_const_value_type scalar_type;
44
45 const size_t num_cells = wgb.extent(0);
46 const int num_basis = wgb.extent(1);
47 const int num_points = wgb.extent(2);
48 const int num_dim = wgb.extent(3);
49
50 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
51 KOKKOS_LAMBDA (const size_t cell)
52 {
53 scalar_type value, value2;
54 for (int basis=0; basis<num_basis; ++basis) {
55 value = 0.0;
56 value2 = 0.0;
57 for (int qp=0; qp<num_points; ++qp) {
58 for (int dim=0; dim<num_dim; ++dim)
59 value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
60 value2 += src(cell,qp)*wbs(cell,basis,qp);
61 }
62 residual(cell,basis) = value+value2;
63 }
64 });
65}
66
67template<typename FluxView, typename WgbView, typename SrcView,
68 typename WbsView, typename ResidualView>
69void run_fad_scratch(const FluxView& flux, const WgbView& wgb,
70 const SrcView& src, const WbsView& wbs,
71 const ResidualView& residual)
72{
73 typedef typename ResidualView::execution_space execution_space;
74 typedef typename ResidualView::non_const_value_type scalar_type;
75 typedef Kokkos::TeamPolicy<execution_space> policy_type;
76 typedef typename policy_type::member_type team_member;
77 typedef Kokkos::View<scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
78
79 const size_t num_cells = wgb.extent(0);
80 const int num_basis = wgb.extent(1);
81 const int num_points = wgb.extent(2);
82 const int num_dim = wgb.extent(3);
83
84 const int vector_size = 1;
85 const int team_size = is_cuda_space<execution_space>::value ? 32 : 1;
86 const int fad_size = Kokkos::dimension_scalar(residual);
87 const size_t range = (num_cells+team_size-1)/team_size;
88 const size_t bytes = 2*tmp_scratch_type::shmem_size(team_size,fad_size);
89 policy_type policy(range,team_size,vector_size);
90
91 Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(bytes)),
92 KOKKOS_LAMBDA (const team_member& team)
93 {
94 const int team_rank = team.team_rank();
95 tmp_scratch_type value(team.team_scratch(0), team_size, fad_size);
96 tmp_scratch_type value2(team.team_scratch(0), team_size, fad_size);
97 const size_t cell = team.league_rank()*team_size + team_rank;
98 if (cell < num_cells) {
99 for (int basis=0; basis<num_basis; ++basis) {
100 value(team_rank) = 0.0;
101 value2(team_rank) = 0.0;
102 for (int qp=0; qp<num_points; ++qp) {
103 for (int dim=0; dim<num_dim; ++dim)
104 value(team_rank) += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
105 value2(team_rank) += src(cell,qp)*wbs(cell,basis,qp);
106 }
107 residual(cell,basis) = value(team_rank)+value2(team_rank);
108 }
109 }
110 });
111}
112
113template<int N, typename FluxView, typename WgbView, typename SrcView,
114 typename WbsView, typename ResidualView>
115void run_analytic_flat(const FluxView& flux, const WgbView& wgb,
116 const SrcView& src, const WbsView& wbs,
117 const ResidualView& residual)
118{
119 typedef typename ResidualView::execution_space execution_space;
120 typedef typename ResidualView::non_const_value_type scalar_type;
121
122 const size_t num_cells = wgb.extent(0);
123 const int num_basis = wgb.extent(1);
124 const int num_points = wgb.extent(2);
125 const int num_dim = wgb.extent(3);
126
127 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
128 KOKKOS_LAMBDA (const size_t cell)
129 {
130 scalar_type value[N+1],value2[N+1];
131 for (int basis=0; basis<num_basis; ++basis) {
132 for (int k=0; k<N+1; ++k) {
133 value[k] = 0.0;
134 value2[k] = 0.0;
135 }
136 for (int qp=0; qp<num_points; ++qp) {
137 for (int dim=0; dim<num_dim; ++dim) {
138 const scalar_type flux_val = flux(cell,qp,dim,N);
139 const scalar_type wgb_val = wgb(cell,basis,qp,dim,N);
140 value[N] += flux_val*wgb_val;
141 for(int k=0; k<N; k++)
142 value[k] +=
143 flux_val*wgb(cell,basis,qp,dim,k)+flux(cell,qp,dim,k)*wgb_val;
144 }
145 const scalar_type src_val = src(cell,qp,N);
146 const scalar_type wbs_val = wbs(cell,basis,qp,N);
147 value2[N] += src_val*wbs_val;
148 for(int k=0; k<N; k++)
149 value2[k] += src_val*wbs(cell,basis,qp,k)+src(cell,qp,k)*wbs_val;
150 }
151 for(int k=0; k<=N; k++)
152 residual(cell,basis,k) = value[k]+value2[k];
153 }
154 });
155}
156
157template<int N, typename FluxView, typename WgbView, typename SrcView,
158 typename WbsView, typename ResidualView>
159void run_analytic_team(const FluxView& flux, const WgbView& wgb,
160 const SrcView& src, const WbsView& wbs,
161 const ResidualView& residual)
162{
163 typedef typename ResidualView::execution_space execution_space;
164 typedef typename ResidualView::non_const_value_type scalar_type;
165 typedef Kokkos::TeamPolicy<execution_space> policy_type;
166 typedef typename policy_type::member_type team_member;
167 typedef Kokkos::View<scalar_type[N+1], typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
168
169 const size_t num_cells = wgb.extent(0);
170 const int num_basis = wgb.extent(1);
171 /*const*/ int num_points = wgb.extent(2);
172 /*const*/ int num_dim = wgb.extent(3);
173
174 const size_t bytes = 2*tmp_scratch_type::shmem_size();
175 policy_type policy(num_cells,num_basis,32);
176 Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerThread(bytes)),
177 KOKKOS_LAMBDA (const team_member& team)
178 {
179 tmp_scratch_type value(team.thread_scratch(0));
180 tmp_scratch_type value2(team.thread_scratch(0));
181 const size_t cell = team.league_rank();
182 Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_basis),
183 [&] (const int& basis)
184 {
185 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
186 [&] (const int& k)
187 {
188 value(k) = 0;
189 value2(k) = 0;
190 });
191 for (int qp=0; qp<num_points; ++qp) {
192 for (int dim=0; dim<num_dim; ++dim) {
193 const scalar_type flux_val = flux(cell,qp,dim,N);
194 const scalar_type wgb_val = wgb(cell,basis,qp,dim,N);
195 Kokkos::single(Kokkos::PerThread(team), [&] () {
196 value[N] += flux_val*wgb_val;
197 });
198 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
199 [&] (const int& k)
200 {
201 value[k] +=
202 flux_val*wgb(cell,basis,qp,dim,k)+flux(cell,qp,dim,k)*wgb_val;
203 });
204 }
205 const scalar_type src_val = src(cell,qp,N);
206 const scalar_type wbs_val = wbs(cell,basis,qp,N);
207 Kokkos::single(Kokkos::PerThread(team), [&] () {
208 value2[N] += src_val*wbs_val;
209 });
210 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
211 [&] (const int& k)
212 {
213 value2[k] += src_val*wbs(cell,basis,qp,k)+src(cell,qp,k)*wbs_val;
214 });
215 }
216 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
217 [&] (const int& k)
218 {
219 residual(cell,basis,k) = value[k]+value2[k];
220 });
221 });
222 });
223}
224
225template <typename FadType, int N, typename ExecSpace>
226double time_fad_flat(int ncells, int num_basis, int num_points, int ndim,
227 int ntrial, bool check)
228{
229 typedef Kokkos::View<FadType****,ExecSpace> t_4DView;
230 typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
231 typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
232
233 t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
234 t_3DView flux("",ncells,num_points,ndim,N+1);
235 t_3DView wbs("",ncells,num_basis,num_points,N+1);
236 t_2DView src("",ncells,num_points,N+1);
237 t_2DView residual("",ncells,num_basis,N+1);
238 init_fad(wgb, wbs, flux, src, residual);
239
240 // Run once to warm up, complete any UVM transfers
241 run_fad_flat(flux, wgb, src, wbs, residual);
242
243 // Time execution
244 Kokkos::fence();
245 Kokkos::Timer timer;
246 for (int i=0; i<ntrial; ++i)
247 run_fad_flat(flux, wgb, src, wbs, residual);
248 Kokkos::fence();
249 double time = timer.seconds() / ntrial / ncells;
250
251 // Check result
252 if (check)
253 check_residual(flux, wgb, src, wbs, residual);
254
255 return time;
256}
257
258template <typename FadType, int N, typename ExecSpace>
259double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim,
260 int ntrial, bool check)
261{
262 typedef Kokkos::View<FadType****,ExecSpace> t_4DView;
263 typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
264 typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
265
266 t_4DView wgb("",ncells,num_basis,num_points,ndim,N+1);
267 t_3DView flux("",ncells,num_points,ndim,N+1);
268 t_3DView wbs("",ncells,num_basis,num_points,N+1);
269 t_2DView src("",ncells,num_points,N+1);
270 t_2DView residual("",ncells,num_basis,N+1);
271 init_fad(wgb, wbs, flux, src, residual);
272
273 // Run once to warm up, complete any UVM transfers
274 run_fad_scratch(flux, wgb, src, wbs, residual);
275
276 // Time execution
277 Kokkos::fence();
278 Kokkos::Timer timer;
279 for (int i=0; i<ntrial; ++i)
280 run_fad_scratch(flux, wgb, src, wbs, residual);
281 Kokkos::fence();
282 double time = timer.seconds() / ntrial / ncells;
283
284 // Check result
285 if (check)
286 check_residual(flux, wgb, src, wbs, residual);
287
288 return time;
289}
290
291template <int N, typename ExecSpace>
292double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim,
293 int ntrial, bool check)
294{
295 typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
296 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
297 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
298
299 t_4DView wgb("",ncells,num_basis,num_points,ndim);
300 t_3DView flux("",ncells,num_points,ndim);
301 t_3DView wbs("",ncells,num_basis,num_points);
302 t_2DView src("",ncells,num_points);
303 t_2DView residual("",ncells,num_basis);
304 init_array(wgb, wbs, flux, src, residual);
305
306 // Run once to warm up, complete any UVM transfers
307 run_analytic_flat<N>(flux, wgb, src, wbs, residual);
308
309 // Time execution
310 Kokkos::fence();
311 Kokkos::Timer timer;
312 for (int i=0; i<ntrial; ++i)
313 run_analytic_flat<N>(flux, wgb, src, wbs, residual);
314 Kokkos::fence();
315 double time = timer.seconds() / ntrial / ncells;
316
317 // Check result
318 if (check)
319 check_residual(flux, wgb, src, wbs, residual);
320
321 return time;
322}
323
324template <int N, typename ExecSpace>
325double time_analytic_const(int ncells, int num_basis, int num_points, int ndim,
326 int ntrial, bool check)
327{
328 typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
329 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
330 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
331 typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
332
333 t_4DView wgb("",ncells,num_basis,num_points,ndim);
334 t_3DView flux("",ncells,num_points,ndim);
335 t_3DView wbs("",ncells,num_basis,num_points);
336 t_2DView src("",ncells,num_points);
337 t_2DView residual("",ncells,num_basis);
338 init_array(wgb, wbs, flux, src, residual);
339
340 t_3DView_const flux_const = flux;
341
342 // Run once to warm up, complete any UVM transfers
343 run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
344
345 // Time execution
346 Kokkos::fence();
347 Kokkos::Timer timer;
348 for (int i=0; i<ntrial; ++i)
349 run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
350 Kokkos::fence();
351 double time = timer.seconds() / ntrial / ncells;
352
353 // Check result
354 if (check)
355 check_residual(flux, wgb, src, wbs, residual);
356
357 return time;
358}
359
360template <int N, typename ExecSpace>
361double time_analytic_team(int ncells, int num_basis, int num_points, int ndim,
362 int ntrial, bool check)
363{
364 typedef Kokkos::View<double****[N+1],ExecSpace> t_4DView;
365 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
366 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
367 typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
368
369 t_4DView wgb("",ncells,num_basis,num_points,ndim);
370 t_3DView flux("",ncells,num_points,ndim);
371 t_3DView wbs("",ncells,num_basis,num_points);
372 t_2DView src("",ncells,num_points);
373 t_2DView residual("",ncells,num_basis);
374 init_array(wgb, wbs, flux, src, residual);
375
376 t_3DView_const flux_const = flux;
377
378 // Run once to warm up, complete any UVM transfers
379 run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
380
381 // Time execution
382 Kokkos::fence();
383 Kokkos::Timer timer;
384 for (int i=0; i<ntrial; ++i)
385 run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
386 Kokkos::fence();
387 double time = timer.seconds() / ntrial / ncells;
388
389 // Check result
390 if (check)
391 check_residual(flux, wgb, src, wbs, residual);
392
393 return time;
394}
395
396#define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
397 template double time_fad_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
398 template double time_fad_scratch< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
399
400#define INST_FUNC_N_DEV(N,DEV) \
401 INST_FUNC_FAD_N_DEV(SFadType,N,DEV) \
402 INST_FUNC_FAD_N_DEV(SLFadType,N,DEV) \
403 INST_FUNC_FAD_N_DEV(DFadType,N,DEV) \
404 template double time_analytic_flat< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
405 template double time_analytic_const< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
406 template double time_analytic_team< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
407
408#define INST_FUNC_DEV(DEV) \
409 INST_FUNC_N_DEV( fad_dim, DEV )
410
411#ifdef KOKKOS_ENABLE_SERIAL
412INST_FUNC_DEV(Kokkos::Serial)
413#endif
414
415#ifdef KOKKOS_ENABLE_OPENMP
416INST_FUNC_DEV(Kokkos::OpenMP)
417#endif
418
419#ifdef KOKKOS_ENABLE_THREADS
420INST_FUNC_DEV(Kokkos::Threads)
421#endif
422
423#ifdef KOKKOS_ENABLE_CUDA
424INST_FUNC_DEV(Kokkos::Cuda)
425#endif
const int N
void run_fad_scratch(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
#define INST_FUNC_DEV(DEV)
double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_fad_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_analytic_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_analytic_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_fad_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_analytic_const(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_analytic_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void init_array(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
int value