83 lines
3.3 KiB
Plaintext
83 lines
3.3 KiB
Plaintext
/*===---- complex - CUDA wrapper for <complex> ------------------------------===
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*
|
|
*===-----------------------------------------------------------------------===
|
|
*/
|
|
|
|
#ifndef __CLANG_CUDA_WRAPPERS_COMPLEX
|
|
#define __CLANG_CUDA_WRAPPERS_COMPLEX
|
|
|
|
// Wrapper around <complex> that forces its functions to be __host__
|
|
// __device__.
|
|
|
|
// First, include host-only headers we think are likely to be included by
|
|
// <complex>, so that the pragma below only applies to <complex> itself.
|
|
#if __cplusplus >= 201103L
|
|
#include <type_traits>
|
|
#endif
|
|
#include <stdexcept>
|
|
#include <cmath>
|
|
#include <sstream>
|
|
|
|
// Next, include our <algorithm> wrapper, to ensure that device overloads of
|
|
// std::min/max are available.
|
|
#include <algorithm>
|
|
|
|
#pragma clang force_cuda_host_device begin
|
|
|
|
// When compiling for device, ask libstdc++ to use its own implements of
|
|
// complex functions, rather than calling builtins (which resolve to library
|
|
// functions that don't exist when compiling CUDA device code).
|
|
//
|
|
// This is a little dicey, because it causes libstdc++ to define a different
|
|
// set of overloads on host and device.
|
|
//
|
|
// // Present only when compiling for host.
|
|
// __host__ __device__ void complex<float> sin(const complex<float>& x) {
|
|
// return __builtin_csinf(x);
|
|
// }
|
|
//
|
|
// // Present when compiling for host and for device.
|
|
// template <typename T>
|
|
// void __host__ __device__ complex<T> sin(const complex<T>& x) {
|
|
// return complex<T>(sin(x.real()) * cosh(x.imag()),
|
|
// cos(x.real()), sinh(x.imag()));
|
|
// }
|
|
//
|
|
// This is safe because when compiling for device, all function calls in
|
|
// __host__ code to sin() will still resolve to *something*, even if they don't
|
|
// resolve to the same function as they resolve to when compiling for host. We
|
|
// don't care that they don't resolve to the right function because we won't
|
|
// codegen this host code when compiling for device.
|
|
|
|
#pragma push_macro("_GLIBCXX_USE_C99_COMPLEX")
|
|
#pragma push_macro("_GLIBCXX_USE_C99_COMPLEX_TR1")
|
|
#define _GLIBCXX_USE_C99_COMPLEX 0
|
|
#define _GLIBCXX_USE_C99_COMPLEX_TR1 0
|
|
|
|
#include_next <complex>
|
|
|
|
#pragma pop_macro("_GLIBCXX_USE_C99_COMPLEX_TR1")
|
|
#pragma pop_macro("_GLIBCXX_USE_C99_COMPLEX")
|
|
|
|
#pragma clang force_cuda_host_device end
|
|
|
|
#endif // include guard
|