Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_Cuda.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
19static_assert(false,
20 "Including non-public Kokkos header files is not allowed.");
21#endif
22#ifndef KOKKOS_CUDA_HPP
23#define KOKKOS_CUDA_HPP
24
25#include <Kokkos_Macros.hpp>
26#if defined(KOKKOS_ENABLE_CUDA)
27
28#include <Kokkos_Core_fwd.hpp>
29
30#include <iosfwd>
31#include <vector>
32
33#include <impl/Kokkos_AnalyzePolicy.hpp>
34#include <Kokkos_CudaSpace.hpp>
35#include <Cuda/Kokkos_Cuda_Error.hpp> // CUDA_SAFE_CALL
36
37#include <Kokkos_Parallel.hpp>
38#include <Kokkos_TaskScheduler.hpp>
39#include <Kokkos_Layout.hpp>
40#include <Kokkos_ScratchSpace.hpp>
41#include <Kokkos_MemoryTraits.hpp>
42#include <impl/Kokkos_HostSharedPtr.hpp>
43#include <impl/Kokkos_InitializationSettings.hpp>
44
45/*--------------------------------------------------------------------------*/
46
47namespace Kokkos {
48namespace Impl {
49class CudaExec;
50class CudaInternal;
51} // namespace Impl
52} // namespace Kokkos
53
54/*--------------------------------------------------------------------------*/
55
56namespace Kokkos {
57
58namespace Impl {
59namespace Experimental {
60enum class CudaLaunchMechanism : unsigned {
61 Default = 0,
62 ConstantMemory = 1,
63 GlobalMemory = 2,
64 LocalMemory = 4
65};
66
67constexpr inline CudaLaunchMechanism operator|(CudaLaunchMechanism p1,
68 CudaLaunchMechanism p2) {
69 return static_cast<CudaLaunchMechanism>(static_cast<unsigned>(p1) |
70 static_cast<unsigned>(p2));
71}
72constexpr inline CudaLaunchMechanism operator&(CudaLaunchMechanism p1,
73 CudaLaunchMechanism p2) {
74 return static_cast<CudaLaunchMechanism>(static_cast<unsigned>(p1) &
75 static_cast<unsigned>(p2));
76}
77
78template <CudaLaunchMechanism l>
79struct CudaDispatchProperties {
80 CudaLaunchMechanism launch_mechanism = l;
81};
82} // namespace Experimental
83} // namespace Impl
94class Cuda {
95 public:
97
98
100 using execution_space = Cuda;
101
102#if defined(KOKKOS_ENABLE_CUDA_UVM)
104 using memory_space = CudaUVMSpace;
105#else
107 using memory_space = CudaSpace;
108#endif
109
111 using device_type = Kokkos::Device<execution_space, memory_space>;
112
114 using size_type = memory_space::size_type;
115
117 using array_layout = LayoutLeft;
118
120 using scratch_memory_space = ScratchMemorySpace<Cuda>;
121
123 //--------------------------------------------------
125
126
129 KOKKOS_INLINE_FUNCTION static int in_parallel() {
130#if defined(__CUDA_ARCH__)
131 return true;
132#else
133 return false;
134#endif
135 }
136
148 static bool sleep();
149
155 static bool wake();
156
163 static void impl_static_fence(const std::string& name);
164
165 void fence(const std::string& name =
166 "Kokkos::Cuda::fence(): Unnamed Instance Fence") const;
167
169#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
170 static int concurrency();
171#else
172 int concurrency() const;
173#endif
174
176 void print_configuration(std::ostream& os, bool verbose = false) const;
177
179 //--------------------------------------------------
181
182 Cuda();
183
184 Cuda(cudaStream_t stream, bool manage_stream = false);
185
186 //--------------------------------------------------------------------------
188 static void impl_finalize();
189
191 static int impl_is_initialized();
192
194 static void impl_initialize(InitializationSettings const&);
195
199 static size_type device_arch();
200
202 static size_type detect_device_count();
203
207 static std::vector<unsigned> detect_device_arch();
208
209 cudaStream_t cuda_stream() const;
210 int cuda_device() const;
211 const cudaDeviceProp& cuda_device_prop() const;
212
214 //--------------------------------------------------------------------------
215
216 static const char* name();
217
218 inline Impl::CudaInternal* impl_internal_space_instance() const {
219 return m_space_instance.get();
220 }
221 uint32_t impl_instance_id() const noexcept;
222
223 private:
224 friend bool operator==(Cuda const& lhs, Cuda const& rhs) {
225 return lhs.impl_internal_space_instance() ==
226 rhs.impl_internal_space_instance();
227 }
228 friend bool operator!=(Cuda const& lhs, Cuda const& rhs) {
229 return !(lhs == rhs);
230 }
231 Kokkos::Impl::HostSharedPtr<Impl::CudaInternal> m_space_instance;
232};
233
234namespace Tools {
235namespace Experimental {
236template <>
237struct DeviceTypeTraits<Cuda> {
239 static constexpr DeviceType id = DeviceType::Cuda;
240 static int device_id(const Cuda& exec) { return exec.cuda_device(); }
241};
242} // namespace Experimental
243} // namespace Tools
244
245namespace Impl {
246
247template <class DT, class... DP>
248struct ZeroMemset<Kokkos::Cuda, DT, DP...> {
249 ZeroMemset(const Kokkos::Cuda& exec_space_instance,
250 const View<DT, DP...>& dst,
251 typename View<DT, DP...>::const_value_type&) {
252 KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemsetAsync(
253 dst.data(), 0,
254 dst.size() * sizeof(typename View<DT, DP...>::value_type),
255 exec_space_instance.cuda_stream()));
256 }
257
258 ZeroMemset(const View<DT, DP...>& dst,
259 typename View<DT, DP...>::const_value_type&) {
260 KOKKOS_IMPL_CUDA_SAFE_CALL(
261 cudaMemset(dst.data(), 0,
262 dst.size() * sizeof(typename View<DT, DP...>::value_type)));
263 }
264};
265} // namespace Impl
266} // namespace Kokkos
267
268/*--------------------------------------------------------------------------*/
269/*--------------------------------------------------------------------------*/
270
271namespace Kokkos {
272namespace Impl {
273
274template <>
275struct MemorySpaceAccess<Kokkos::CudaSpace,
276 Kokkos::Cuda::scratch_memory_space> {
277 enum : bool { assignable = false };
278 enum : bool { accessible = true };
279 enum : bool { deepcopy = false };
280};
281
282#if defined(KOKKOS_ENABLE_CUDA_UVM)
283
284// If forcing use of UVM everywhere
285// then must assume that CudaUVMSpace
286// can be a stand-in for CudaSpace.
287// This will fail when a strange host-side execution space
288// that defines CudaUVMSpace as its preferredmemory space.
289
290template <>
291struct MemorySpaceAccess<Kokkos::CudaUVMSpace,
292 Kokkos::Cuda::scratch_memory_space> {
293 enum : bool { assignable = false };
294 enum : bool { accessible = true };
295 enum : bool { deepcopy = false };
296};
297
298#endif
299
300} // namespace Impl
301} // namespace Kokkos
302
303#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
304#endif /* #ifndef KOKKOS_CUDA_HPP */
Declaration of various MemoryLayout options.
Declaration of parallel operators.