diff --git a/libcudacxx/include/cuda/annotated_ptr b/libcudacxx/include/cuda/annotated_ptr index bd9f26ad591..f5e04e56623 100644 --- a/libcudacxx/include/cuda/annotated_ptr +++ b/libcudacxx/include/cuda/annotated_ptr @@ -3,50 +3,128 @@ * * NVIDIA SOFTWARE LICENSE * - * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). + * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the + * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). * - * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. + * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. + * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By + * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of + * this license, and you take legal and financial responsibility for the actions of your permitted users. * - * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions. + * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, + * regulation or generally accepted practices or guidelines in the relevant jurisdictions. * - * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license. + * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install + * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this + * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under + * this license. * * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: - * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights. - * b. You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. + * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, + * including (without limitation) terms relating to the license grant and license restrictions and protection of + * NVIDIA’s intellectual property rights. b. You agree to notify NVIDIA in writing of any known or suspected + * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms + * of your agreements with respect to distributed SOFTWARE. * * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows: * a. The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs. - * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. - * c. You may not modify or create derivative works of any portion of the SOFTWARE. - * d. You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE. - * e. You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. - * f. Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. - * g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. + * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from + * any portion of the SOFTWARE or copies of the SOFTWARE. c. You may not modify or create derivative works of any + * portion of the SOFTWARE. d. You may not bypass, disable, or circumvent any technical measure, encryption, + * security, digital rights management or authentication mechanism in the SOFTWARE. e. You may not use the SOFTWARE + * in any manner that would cause it to become subject to an open source software license. As examples, licenses that + * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in + * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f. + * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or + * application where the use or failure of the system or application can reasonably be expected to threaten or result in + * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life + * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these + * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or + * damages arising from such uses. g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, + * and their respective employees, contractors, agents, officers and directors, from and against any and all claims, + * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited + * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use + * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. * - * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. + * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may + * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, + * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use + * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in + * production or business-critical systems. * - * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE. + * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and + * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United + * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time + * without notice, but is not obligated to support or update the SOFTWARE. * - * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict. + * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal + * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is + * a conflict between the terms in this license and the license terms associated with a component, the license terms + * associated with the components control only to the extent necessary to resolve the conflict. * - * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice. + * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, + * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you + * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable + * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute + * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA + * will use Feedback at its choice. * - * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. + * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT + * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT + * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR + * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. * - * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. + * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE + * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, + * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH + * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON + * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION + * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE + * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING + * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE + * OR EXTEND THIS LIMIT. * - * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you. + * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail + * to comply with any term and condition of this license or if you commence or participate in any legal proceeding + * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if + * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of + * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of + * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this + * license are not affected by the termination of this license. All provisions of this license will survive termination, + * except for the license granted to you. * - * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. + * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State + * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware + * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the + * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English + * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction + * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be + * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. * - * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. + * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or + * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be + * void and of no effect. * - * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE. + * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, + * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States + * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s + * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws + * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not + * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from + * receiving the SOFTWARE. * - * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. + * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting + * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. + * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the + * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the + * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is + * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. * - * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party. + * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the + * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to + * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of + * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. + * This license may only be modified in a writing signed by an authorized representative of each party. * * (v. August 20, 2021) */ @@ -71,56 +149,96 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA -class access_property { - private: - std::uint64_t __descriptor = 0; - - public: - struct shared {}; - struct global {}; - struct persisting { - _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept { - return cudaAccessProperty::cudaAccessPropertyPersisting; - } - }; - struct streaming { - _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept { - return cudaAccessProperty::cudaAccessPropertyStreaming; - } - }; - struct normal { - _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept { - return cudaAccessProperty::cudaAccessPropertyNormal; - } - }; - - _CCCL_HOST_DEVICE constexpr access_property(global) noexcept : __descriptor(__detail_ap::__sm_80::__interleave_normal()) {} - _CCCL_HOST_DEVICE constexpr access_property() noexcept : __descriptor(__detail_ap::__sm_80::__interleave_normal()) {} - constexpr access_property(access_property const&) noexcept = default; - access_property& operator=(const access_property& other) noexcept = default; - - _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction) : __descriptor(__detail_ap::__interleave(normal{}, __fraction)) {} - _CCCL_HOST_DEVICE constexpr access_property(streaming, float __fraction) : __descriptor(__detail_ap::__interleave(streaming{}, __fraction)) {} - _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction) : __descriptor(__detail_ap::__interleave(persisting{}, __fraction)) {} - _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction, streaming) : __descriptor(__detail_ap::__interleave(normal{}, __fraction, streaming{})) {} - _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction, streaming) : __descriptor(__detail_ap::__interleave(persisting{}, __fraction, streaming{})) {} - - _CCCL_HOST_DEVICE constexpr access_property(normal) noexcept : access_property(normal{}, 1.0) {} - _CCCL_HOST_DEVICE constexpr access_property(streaming) noexcept : access_property(streaming{}, 1.0) {} - _CCCL_HOST_DEVICE constexpr access_property(persisting) noexcept : access_property(persisting{}, 1.0) {} - - _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal) - : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{})) {} - _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, streaming) - : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, streaming{})) {} - _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting) - : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{})) {} - _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal, streaming) - : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{}, streaming{})) {} - _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting, streaming) - : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{}, streaming{})) {} - - _CCCL_HOST_DEVICE constexpr explicit operator std::uint64_t() const noexcept { return __descriptor; } +class access_property +{ +private: + std::uint64_t __descriptor = 0; + +public: + struct shared + {}; + struct global + {}; + struct persisting + { + _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept + { + return cudaAccessProperty::cudaAccessPropertyPersisting; + } + }; + struct streaming + { + _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept + { + return cudaAccessProperty::cudaAccessPropertyStreaming; + } + }; + struct normal + { + _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept + { + return cudaAccessProperty::cudaAccessPropertyNormal; + } + }; + + _CCCL_HOST_DEVICE constexpr access_property(global) noexcept + : __descriptor(__detail_ap::__sm_80::__interleave_normal()) + {} + _CCCL_HOST_DEVICE constexpr access_property() noexcept + : __descriptor(__detail_ap::__sm_80::__interleave_normal()) + {} + constexpr access_property(access_property const&) noexcept = default; + access_property& operator=(const access_property& other) noexcept = default; + + _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction) + : __descriptor(__detail_ap::__interleave(normal{}, __fraction)) + {} + _CCCL_HOST_DEVICE constexpr access_property(streaming, float __fraction) + : __descriptor(__detail_ap::__interleave(streaming{}, __fraction)) + {} + _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction) + : __descriptor(__detail_ap::__interleave(persisting{}, __fraction)) + {} + _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction, streaming) + : __descriptor(__detail_ap::__interleave(normal{}, __fraction, streaming{})) + {} + _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction, streaming) + : __descriptor(__detail_ap::__interleave(persisting{}, __fraction, streaming{})) + {} + + _CCCL_HOST_DEVICE constexpr access_property(normal) noexcept + : access_property(normal{}, 1.0) + {} + _CCCL_HOST_DEVICE constexpr access_property(streaming) noexcept + : access_property(streaming{}, 1.0) + {} + _CCCL_HOST_DEVICE constexpr access_property(persisting) noexcept + : access_property(persisting{}, 1.0) + {} + + _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal) + : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{})) + {} + _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, streaming) + : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, streaming{})) + {} + _CCCL_HOST_DEVICE constexpr access_property( + void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting) + : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{})) + {} + _CCCL_HOST_DEVICE constexpr access_property( + void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal, streaming) + : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{}, streaming{})) + {} + _CCCL_HOST_DEVICE constexpr access_property( + void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting, streaming) + : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{}, streaming{})) + {} + + _CCCL_HOST_DEVICE constexpr explicit operator std::uint64_t() const noexcept + { + return __descriptor; + } }; _LIBCUDACXX_END_NAMESPACE_CUDA @@ -130,195 +248,201 @@ _LIBCUDACXX_END_NAMESPACE_CUDA _LIBCUDACXX_BEGIN_NAMESPACE_CUDA template -_CCCL_HOST_DEVICE -_Tp* associate_access_property(_Tp* __ptr, _Property __prop) { +_CCCL_HOST_DEVICE _Tp* associate_access_property(_Tp* __ptr, _Property __prop) +{ static_assert( - std::is_same<_Property, access_property>::value || - std::is_same<_Property, access_property::persisting>::value || - std::is_same<_Property, access_property::streaming>::value || - std::is_same<_Property, access_property::normal>::value || - std::is_same<_Property, access_property::global>::value || - std::is_same<_Property, access_property::shared>::value - , "property is not convertible to cuda::access_property"); + std::is_same<_Property, access_property>::value || std::is_same<_Property, access_property::persisting>::value + || std::is_same<_Property, access_property::streaming>::value + || std::is_same<_Property, access_property::normal>::value + || std::is_same<_Property, access_property::global>::value + || std::is_same<_Property, access_property::shared>::value, + "property is not convertible to cuda::access_property"); return __detail_ap::__associate(__ptr, __prop); } template -_CCCL_HOST_DEVICE -void apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::persisting __prop) noexcept { - NV_IF_TARGET(NV_PROVIDES_SM_80,( - if (!__isGlobal((void*)__ptr)) return; - - char* __p = reinterpret_cast(const_cast(__ptr)); - static constexpr std::size_t _LINE_SIZE = 128; - std::size_t __nbytes = static_cast(__shape); - std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes; - __end /= _LINE_SIZE; - - //Apply to all 128 bytes aligned cache lines inclusive of __p - for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) { - asm volatile ("prefetch.global.L2::evict_last [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :); - } - )) +_CCCL_HOST_DEVICE void +apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::persisting __prop) noexcept +{ + NV_IF_TARGET( + NV_PROVIDES_SM_80, + (if (!__isGlobal((void*) __ptr)) return; + + char* __p = reinterpret_cast(const_cast(__ptr)); + static constexpr std::size_t _LINE_SIZE = 128; + std::size_t __nbytes = static_cast(__shape); + std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes; + __end /= _LINE_SIZE; + + // Apply to all 128 bytes aligned cache lines inclusive of __p + for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) { + asm volatile("prefetch.global.L2::evict_last [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :); + })) } template -_CCCL_HOST_DEVICE -void apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::normal __prop) noexcept { - NV_IF_TARGET(NV_PROVIDES_SM_80,( - if (!__isGlobal((void*)__ptr)) return; - - char* __p = reinterpret_cast(const_cast(__ptr)); - static constexpr std::size_t _LINE_SIZE = 128; - std::size_t __nbytes = static_cast(__shape); - std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes; - __end /= _LINE_SIZE; - - //Apply to all 128 bytes aligned cache lines inclusive of __p - for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) { - asm volatile ("prefetch.global.L2::evict_normal [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :); - } - )) +_CCCL_HOST_DEVICE void +apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::normal __prop) noexcept +{ + NV_IF_TARGET( + NV_PROVIDES_SM_80, + (if (!__isGlobal((void*) __ptr)) return; + + char* __p = reinterpret_cast(const_cast(__ptr)); + static constexpr std::size_t _LINE_SIZE = 128; + std::size_t __nbytes = static_cast(__shape); + std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes; + __end /= _LINE_SIZE; + + // Apply to all 128 bytes aligned cache lines inclusive of __p + for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) { + asm volatile("prefetch.global.L2::evict_normal [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :); + })) } -template -class annotated_ptr: public __detail_ap::__annotated_ptr_base<_Property> { - public: - using value_type = _Tp; - using size_type = std::size_t; - using reference = value_type&; - using pointer = value_type*; - using const_pointer = value_type const*; - using difference_type = std::ptrdiff_t; - - private: - using __self = annotated_ptr<_Tp, _Property>; - - // Converting from a 64-bit to 32-bit shared pointer and maybe back just for storage might or might not be profitable. - pointer __repr = (pointer)((size_type)nullptr); - - _CCCL_HOST_DEVICE pointer __get(bool __skip_prop = false, difference_type __n = 0) const { - NV_IF_TARGET(NV_IS_DEVICE,( - if (!__skip_prop) { - return static_cast(this->__apply_prop(const_cast(static_cast(__repr + __n)))); - } - )) - return __repr + __n; - } - _CCCL_HOST_DEVICE pointer __offset(difference_type __n, bool __skip_prop = false) const { - return __get(__skip_prop, __n); - } - - public: - _CCCL_HOST_DEVICE pointer operator->() const { - return __get(); - } - - _CCCL_HOST_DEVICE reference operator*() const { - return *__get(); - } - - _CCCL_HOST_DEVICE reference operator[](difference_type __n) const { - return *__offset(__n); - } - - _CCCL_HOST_DEVICE constexpr difference_type operator-(annotated_ptr o) const { - return __repr - o.__repr; - } - - constexpr annotated_ptr() noexcept = default; - constexpr annotated_ptr(annotated_ptr const&) noexcept = default; - // No constexpr for c11 as the method can't be const - _CCCL_CONSTEXPR_CXX14 annotated_ptr& operator=(annotated_ptr const& other) noexcept = default; - - _CCCL_HOST_DEVICE explicit annotated_ptr(pointer __p) +template +class annotated_ptr : public __detail_ap::__annotated_ptr_base<_Property> +{ +public: + using value_type = _Tp; + using size_type = std::size_t; + using reference = value_type&; + using pointer = value_type*; + using const_pointer = value_type const*; + using difference_type = std::ptrdiff_t; + +private: + using __self = annotated_ptr<_Tp, _Property>; + + // Converting from a 64-bit to 32-bit shared pointer and maybe back just for storage might or might not be profitable. + pointer __repr = (pointer) ((size_type) nullptr); + + _CCCL_HOST_DEVICE pointer __get(bool __skip_prop = false, difference_type __n = 0) const + { + NV_IF_TARGET(NV_IS_DEVICE, (if (!__skip_prop) { + return static_cast( + this->__apply_prop(const_cast(static_cast(__repr + __n)))); + })) + return __repr + __n; + } + _CCCL_HOST_DEVICE pointer __offset(difference_type __n, bool __skip_prop = false) const + { + return __get(__skip_prop, __n); + } + +public: + _CCCL_HOST_DEVICE pointer operator->() const + { + return __get(); + } + + _CCCL_HOST_DEVICE reference operator*() const + { + return *__get(); + } + + _CCCL_HOST_DEVICE reference operator[](difference_type __n) const + { + return *__offset(__n); + } + + _CCCL_HOST_DEVICE constexpr difference_type operator-(annotated_ptr o) const + { + return __repr - o.__repr; + } + + constexpr annotated_ptr() noexcept = default; + constexpr annotated_ptr(annotated_ptr const&) noexcept = default; + // No constexpr for c11 as the method can't be const + _CCCL_CONSTEXPR_CXX14 annotated_ptr& operator=(annotated_ptr const& other) noexcept = default; + + _CCCL_HOST_DEVICE explicit annotated_ptr(pointer __p) : __repr(__p) - { - NV_IF_TARGET(NV_IS_DEVICE,( - _LIBCUDACXX_DEBUG_ASSERT((std::is_same<_Property, shared>::value && __isShared(__p) || __isGlobal(__p)), ""); - )) - } - - template - _CCCL_HOST_DEVICE annotated_ptr(pointer __p, _RuntimeProperty __prop) - : __detail_ap::__annotated_ptr_base<_Property>(static_cast(access_property(__prop))), __repr(__p) - { - static_assert(std::is_same<_Property, access_property>::value, - "This method requires annotated_ptr"); - static_assert(std::is_same<_RuntimeProperty, access_property::global>::value || - std::is_same<_RuntimeProperty, access_property::normal>::value || - std::is_same<_RuntimeProperty, access_property::streaming>::value || - std::is_same<_RuntimeProperty, access_property::persisting>::value || - std::is_same<_RuntimeProperty, access_property>::value, - "This method requires RuntimeProperty=global|normal|streaming|persisting|access_property"); - NV_IF_TARGET(NV_IS_DEVICE,( - _LIBCUDACXX_DEBUG_ASSERT((__isGlobal(__p) == true), ""); - )) - } - - template - _CCCL_HOST_DEVICE annotated_ptr(const annotated_ptr<_TTp,_Prop>& __other); - - _CCCL_HOST_DEVICE constexpr explicit operator bool() const noexcept { - return __repr != nullptr; - } - - _CCCL_HOST_DEVICE pointer get() const noexcept { - constexpr bool __is_shared = std::is_same<_Property, access_property::shared>::value; - return __is_shared ? __repr : &(*annotated_ptr(__repr)); - } - - _CCCL_HOST_DEVICE _Property __property() const noexcept { - return this->__get_property(); - } + { + NV_IF_TARGET( + NV_IS_DEVICE, + (_LIBCUDACXX_DEBUG_ASSERT((std::is_same<_Property, shared>::value && __isShared(__p) || __isGlobal(__p)), "");)) + } + + template + _CCCL_HOST_DEVICE annotated_ptr(pointer __p, _RuntimeProperty __prop) + : __detail_ap::__annotated_ptr_base<_Property>(static_cast(access_property(__prop))) + , __repr(__p) + { + static_assert(std::is_same<_Property, access_property>::value, + "This method requires annotated_ptr"); + static_assert( + std::is_same<_RuntimeProperty, access_property::global>::value + || std::is_same<_RuntimeProperty, access_property::normal>::value + || std::is_same<_RuntimeProperty, access_property::streaming>::value + || std::is_same<_RuntimeProperty, access_property::persisting>::value + || std::is_same<_RuntimeProperty, access_property>::value, + "This method requires RuntimeProperty=global|normal|streaming|persisting|access_property"); + NV_IF_TARGET(NV_IS_DEVICE, (_LIBCUDACXX_DEBUG_ASSERT((__isGlobal(__p) == true), "");)) + } + + template + _CCCL_HOST_DEVICE annotated_ptr(const annotated_ptr<_TTp, _Prop>& __other); + + _CCCL_HOST_DEVICE constexpr explicit operator bool() const noexcept + { + return __repr != nullptr; + } + + _CCCL_HOST_DEVICE pointer get() const noexcept + { + constexpr bool __is_shared = std::is_same<_Property, access_property::shared>::value; + return __is_shared ? __repr : &(*annotated_ptr(__repr)); + } + + _CCCL_HOST_DEVICE _Property __property() const noexcept + { + return this->__get_property(); + } }; - -template -template -_CCCL_HOST_DEVICE annotated_ptr<_Tp, _Property>::annotated_ptr(const annotated_ptr<_TTp,_Prop>& __other) - : __detail_ap::__annotated_ptr_base<_Property>(__other.__property()), __repr(__other.get()) +template +template +_CCCL_HOST_DEVICE annotated_ptr<_Tp, _Property>::annotated_ptr(const annotated_ptr<_TTp, _Prop>& __other) + : __detail_ap::__annotated_ptr_base<_Property>(__other.__property()) + , __repr(__other.get()) { static_assert(std::is_assignable::value, "pointer must be assignable from other pointer"); - static_assert((std::is_same<_Property, access_property>::value && !std::is_same<_Prop, access_property::shared>::value) || - std::is_same<_Property, _Prop>::value, "Property must be either access_property or other property, and both properties must have same address space"); + static_assert( + (std::is_same<_Property, access_property>::value && !std::is_same<_Prop, access_property::shared>::value) + || std::is_same<_Property, _Prop>::value, + "Property must be either access_property or other property, and both properties must have same address space"); // note: precondition "__other.__rep must be compatible with _Property" currently always holds } -template -_CCCL_HOST_DEVICE -void memcpy_async(_Dst* __dst, - annotated_ptr<_Src,_SrcProperty> __src, - _Shape __shape, _Sync & __sync) { +template +_CCCL_HOST_DEVICE void memcpy_async(_Dst* __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync) +{ memcpy_async(__dst, &(*__src), __shape, __sync); } -template -_CCCL_HOST_DEVICE -void memcpy_async(annotated_ptr<_Dst,_DstProperty> __dst, - annotated_ptr<_Src,_SrcProperty> __src, - _Shape __shape, _Sync & __sync){ +template +_CCCL_HOST_DEVICE void memcpy_async( + annotated_ptr<_Dst, _DstProperty> __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync) +{ memcpy_async(&(*__dst), &(*__src), __shape, __sync); } -template -_CCCL_HOST_DEVICE -void memcpy_async(const _Group & __group, - _Dst * __dst, - annotated_ptr<_Src,_SrcProperty> __src, - _Shape __shape, _Sync & __sync) { +template +_CCCL_HOST_DEVICE void +memcpy_async(const _Group& __group, _Dst* __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync) +{ memcpy_async(__group, __dst, &(*__src), __shape, __sync); } -template -_CCCL_HOST_DEVICE -void memcpy_async(const _Group & __group, - annotated_ptr<_Dst,_DstProperty> __dst, - annotated_ptr<_Src,_SrcProperty> __src, - _Shape __shape, _Sync & __sync) { +template +_CCCL_HOST_DEVICE void memcpy_async( + const _Group& __group, + annotated_ptr<_Dst, _DstProperty> __dst, + annotated_ptr<_Src, _SrcProperty> __src, + _Shape __shape, + _Sync& __sync) +{ memcpy_async(__group, &(*__dst), &(*__src), __shape, __sync); } diff --git a/libcudacxx/include/cuda/barrier b/libcudacxx/include/cuda/barrier index e19684cfece..99117dde90b 100644 --- a/libcudacxx/include/cuda/barrier +++ b/libcudacxx/include/cuda/barrier @@ -21,8 +21,8 @@ # pragma system_header #endif // no system header -#include #include +#include // Forward-declare CUtensorMap for use in cp_async_bulk_tensor_* PTX wrapping // functions. These functions take a pointer to CUtensorMap, so do not need to @@ -54,175 +54,185 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE_EXPERIMENTAL #ifdef __cccl_lib_experimental_ctk12_cp_async_exposure // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk -inline _CCCL_DEVICE -void cp_async_bulk_global_to_shared(void *__dest, const void *__src, _CUDA_VSTD::uint32_t __size, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_global_to_shared( + void* __dest, const void* __src, _CUDA_VSTD::uint32_t __size, ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - _LIBCUDACXX_DEBUG_ASSERT(__size % 16 == 0, "Size must be multiple of 16."); - _LIBCUDACXX_DEBUG_ASSERT(__isShared(__dest), "Destination must be shared memory address."); - _LIBCUDACXX_DEBUG_ASSERT(__isGlobal(__src), "Source must be global memory address."); - - _CUDA_VPTX::cp_async_bulk( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __src, __size, - ::cuda::device::barrier_native_handle(__bar)); + _LIBCUDACXX_DEBUG_ASSERT(__size % 16 == 0, "Size must be multiple of 16."); + _LIBCUDACXX_DEBUG_ASSERT(__isShared(__dest), "Destination must be shared memory address."); + _LIBCUDACXX_DEBUG_ASSERT(__isGlobal(__src), "Source must be global memory address."); + + _CUDA_VPTX::cp_async_bulk( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __src, + __size, + ::cuda::device::barrier_native_handle(__bar)); } - // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk -inline _CCCL_DEVICE -void cp_async_bulk_shared_to_global(void *__dest, const void * __src, _CUDA_VSTD::uint32_t __size) +inline _CCCL_DEVICE void cp_async_bulk_shared_to_global(void* __dest, const void* __src, _CUDA_VSTD::uint32_t __size) { - _LIBCUDACXX_DEBUG_ASSERT(__size % 16 == 0, "Size must be multiple of 16."); - _LIBCUDACXX_DEBUG_ASSERT(__isGlobal(__dest), "Destination must be global memory address."); - _LIBCUDACXX_DEBUG_ASSERT(__isShared(__src), "Source must be shared memory address."); + _LIBCUDACXX_DEBUG_ASSERT(__size % 16 == 0, "Size must be multiple of 16."); + _LIBCUDACXX_DEBUG_ASSERT(__isGlobal(__dest), "Destination must be global memory address."); + _LIBCUDACXX_DEBUG_ASSERT(__isShared(__src), "Source must be shared memory address."); - _CUDA_VPTX::cp_async_bulk( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __dest, __src, __size); + _CUDA_VPTX::cp_async_bulk(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __dest, __src, __size); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_1d_global_to_shared( - void *__dest, const CUtensorMap *__tensor_map , int __c0, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_tensor_1d_global_to_shared( + void* __dest, const CUtensorMap* __tensor_map, int __c0, ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - const _CUDA_VSTD::int32_t __coords[]{__c0}; - - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __tensor_map, __coords, - ::cuda::device::barrier_native_handle(__bar)); + const _CUDA_VSTD::int32_t __coords[]{__c0}; + + _CUDA_VPTX::cp_async_bulk_tensor( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __tensor_map, + __coords, + ::cuda::device::barrier_native_handle(__bar)); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_2d_global_to_shared( - void *__dest, const CUtensorMap *__tensor_map , int __c0, int __c1, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_tensor_2d_global_to_shared( + void* __dest, const CUtensorMap* __tensor_map, int __c0, int __c1, ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1}; - - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __tensor_map, __coords, - ::cuda::device::barrier_native_handle(__bar)); + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1}; + + _CUDA_VPTX::cp_async_bulk_tensor( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __tensor_map, + __coords, + ::cuda::device::barrier_native_handle(__bar)); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_3d_global_to_shared( - void *__dest, const CUtensorMap *__tensor_map, int __c0, int __c1, int __c2, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_tensor_3d_global_to_shared( + void* __dest, + const CUtensorMap* __tensor_map, + int __c0, + int __c1, + int __c2, + ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2}; - - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __tensor_map, __coords, - ::cuda::device::barrier_native_handle(__bar)); + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2}; + + _CUDA_VPTX::cp_async_bulk_tensor( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __tensor_map, + __coords, + ::cuda::device::barrier_native_handle(__bar)); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_4d_global_to_shared( - void *__dest, const CUtensorMap *__tensor_map , int __c0, int __c1, int __c2, int __c3, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_tensor_4d_global_to_shared( + void* __dest, + const CUtensorMap* __tensor_map, + int __c0, + int __c1, + int __c2, + int __c3, + ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3}; - - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __tensor_map, __coords, - ::cuda::device::barrier_native_handle(__bar)); + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3}; + + _CUDA_VPTX::cp_async_bulk_tensor( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __tensor_map, + __coords, + ::cuda::device::barrier_native_handle(__bar)); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_5d_global_to_shared( - void *__dest, const CUtensorMap *__tensor_map , int __c0, int __c1, int __c2, int __c3, int __c4, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_tensor_5d_global_to_shared( + void* __dest, + const CUtensorMap* __tensor_map, + int __c0, + int __c1, + int __c2, + int __c3, + int __c4, + ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3, __c4}; - - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __tensor_map, __coords, - ::cuda::device::barrier_native_handle(__bar)); + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3, __c4}; + + _CUDA_VPTX::cp_async_bulk_tensor( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __tensor_map, + __coords, + ::cuda::device::barrier_native_handle(__bar)); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_1d_shared_to_global( - const CUtensorMap *__tensor_map, int __c0, const void *__src) +inline _CCCL_DEVICE void +cp_async_bulk_tensor_1d_shared_to_global(const CUtensorMap* __tensor_map, int __c0, const void* __src) { - const _CUDA_VSTD::int32_t __coords[]{__c0}; + const _CUDA_VSTD::int32_t __coords[]{__c0}; - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __tensor_map, __coords, __src); + _CUDA_VPTX::cp_async_bulk_tensor(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __tensor_map, __coords, __src); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_2d_shared_to_global( - const CUtensorMap *__tensor_map, int __c0, int __c1, const void *__src) +inline _CCCL_DEVICE void +cp_async_bulk_tensor_2d_shared_to_global(const CUtensorMap* __tensor_map, int __c0, int __c1, const void* __src) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1}; + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1}; - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __tensor_map, __coords, __src); + _CUDA_VPTX::cp_async_bulk_tensor(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __tensor_map, __coords, __src); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_3d_shared_to_global( - const CUtensorMap *__tensor_map, int __c0, int __c1, int __c2, const void *__src) +inline _CCCL_DEVICE void cp_async_bulk_tensor_3d_shared_to_global( + const CUtensorMap* __tensor_map, int __c0, int __c1, int __c2, const void* __src) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2}; + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2}; - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __tensor_map, __coords, __src); + _CUDA_VPTX::cp_async_bulk_tensor(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __tensor_map, __coords, __src); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_4d_shared_to_global( - const CUtensorMap *__tensor_map, int __c0, int __c1, int __c2, int __c3, const void *__src) +inline _CCCL_DEVICE void cp_async_bulk_tensor_4d_shared_to_global( + const CUtensorMap* __tensor_map, int __c0, int __c1, int __c2, int __c3, const void* __src) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3}; + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3}; - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __tensor_map, __coords, __src); + _CUDA_VPTX::cp_async_bulk_tensor(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __tensor_map, __coords, __src); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_5d_shared_to_global( - const CUtensorMap *__tensor_map, int __c0, int __c1, int __c2, int __c3, int __c4, const void *__src) +inline _CCCL_DEVICE void cp_async_bulk_tensor_5d_shared_to_global( + const CUtensorMap* __tensor_map, int __c0, int __c1, int __c2, int __c3, int __c4, const void* __src) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3, __c4}; + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3, __c4}; - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __tensor_map, __coords, __src); + _CUDA_VPTX::cp_async_bulk_tensor(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __tensor_map, __coords, __src); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar -inline _CCCL_DEVICE -void fence_proxy_async_shared_cta() { - _CUDA_VPTX::fence_proxy_async(_CUDA_VPTX::space_shared); +inline _CCCL_DEVICE void fence_proxy_async_shared_cta() +{ + _CUDA_VPTX::fence_proxy_async(_CUDA_VPTX::space_shared); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-commit-group -inline _CCCL_DEVICE -void cp_async_bulk_commit_group() +inline _CCCL_DEVICE void cp_async_bulk_commit_group() { - _CUDA_VPTX::cp_async_bulk_commit_group(); + _CUDA_VPTX::cp_async_bulk_commit_group(); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-wait-group template -inline _CCCL_DEVICE -void cp_async_bulk_wait_group_read() +inline _CCCL_DEVICE void cp_async_bulk_wait_group_read() { static_assert(__n_prior <= 63, "cp_async_bulk_wait_group_read: waiting for more than 63 groups is not supported."); _CUDA_VPTX::cp_async_bulk_wait_group_read(_CUDA_VPTX::n32_t<__n_prior>{}); diff --git a/libcudacxx/include/cuda/discard_memory b/libcudacxx/include/cuda/discard_memory index 5893bf6108e..d6c772d57a2 100644 --- a/libcudacxx/include/cuda/discard_memory +++ b/libcudacxx/include/cuda/discard_memory @@ -21,8 +21,8 @@ # pragma system_header #endif // no system header -#include #include +#include _LIBCUDACXX_BEGIN_NAMESPACE_CUDA @@ -37,14 +37,14 @@ inline _CCCL_HOST_DEVICE void discard_memory(volatile void* __ptr, size_t __nbyt NV_PROVIDES_SM_80, (if (!__isGlobal((void*) __ptr)) return; - char* __p = reinterpret_cast(const_cast(__ptr)); - char* const __end_p = __p + __nbytes; + char* __p = reinterpret_cast(const_cast(__ptr)); + char* const __end_p = __p + __nbytes; static constexpr size_t _LINE_SIZE = 128; // Trim the first block and last block if they're not 128 bytes aligned - size_t __misalignment = reinterpret_cast(__p) % _LINE_SIZE; - char* __start_aligned = __misalignment == 0 ? __p : __p + (_LINE_SIZE - __misalignment); - char* const __end_aligned = __end_p - (reinterpret_cast(__end_p) % _LINE_SIZE); + size_t __misalignment = reinterpret_cast(__p) % _LINE_SIZE; + char* __start_aligned = __misalignment == 0 ? __p : __p + (_LINE_SIZE - __misalignment); + char* const __end_aligned = __end_p - (reinterpret_cast(__end_p) % _LINE_SIZE); while (__start_aligned < __end_aligned) { asm volatile("discard.global.L2 [%0], 128;" ::"l"(__start_aligned) :); diff --git a/libcudacxx/include/cuda/functional b/libcudacxx/include/cuda/functional index 955631e23a5..f8aaef4f0a9 100644 --- a/libcudacxx/include/cuda/functional +++ b/libcudacxx/include/cuda/functional @@ -4,50 +4,128 @@ * * NVIDIA SOFTWARE LICENSE * - * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). + * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the + * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). * - * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. + * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. + * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By + * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of + * this license, and you take legal and financial responsibility for the actions of your permitted users. * - * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions. + * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, + * regulation or generally accepted practices or guidelines in the relevant jurisdictions. * - * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license. + * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install + * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this + * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under + * this license. * * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: - * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights. - * b. You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. + * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, + * including (without limitation) terms relating to the license grant and license restrictions and protection of + * NVIDIA’s intellectual property rights. b. You agree to notify NVIDIA in writing of any known or suspected + * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms + * of your agreements with respect to distributed SOFTWARE. * * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows: * a. The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs. - * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. - * c. You may not modify or create derivative works of any portion of the SOFTWARE. - * d. You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE. - * e. You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. - * f. Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. - * g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. - * - * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. - * - * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE. - * - * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict. - * - * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice. - * - * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. - * - * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. - * - * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you. - * - * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. - * - * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. - * - * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE. - * - * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. - * - * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party. + * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from + * any portion of the SOFTWARE or copies of the SOFTWARE. c. You may not modify or create derivative works of any + * portion of the SOFTWARE. d. You may not bypass, disable, or circumvent any technical measure, encryption, + * security, digital rights management or authentication mechanism in the SOFTWARE. e. You may not use the SOFTWARE + * in any manner that would cause it to become subject to an open source software license. As examples, licenses that + * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in + * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f. + * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or + * application where the use or failure of the system or application can reasonably be expected to threaten or result in + * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life + * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these + * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or + * damages arising from such uses. g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, + * and their respective employees, contractors, agents, officers and directors, from and against any and all claims, + * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited + * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use + * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. + * + * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may + * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, + * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use + * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in + * production or business-critical systems. + * + * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and + * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United + * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time + * without notice, but is not obligated to support or update the SOFTWARE. + * + * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal + * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is + * a conflict between the terms in this license and the license terms associated with a component, the license terms + * associated with the components control only to the extent necessary to resolve the conflict. + * + * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, + * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you + * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable + * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute + * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA + * will use Feedback at its choice. + * + * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT + * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT + * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR + * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. + * + * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE + * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, + * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH + * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON + * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION + * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE + * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING + * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE + * OR EXTEND THIS LIMIT. + * + * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail + * to comply with any term and condition of this license or if you commence or participate in any legal proceeding + * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if + * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of + * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of + * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this + * license are not affected by the termination of this license. All provisions of this license will survive termination, + * except for the license granted to you. + * + * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State + * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware + * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the + * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English + * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction + * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be + * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. + * + * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or + * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be + * void and of no effect. + * + * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, + * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States + * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s + * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws + * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not + * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from + * receiving the SOFTWARE. + * + * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting + * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. + * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the + * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the + * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is + * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. + * + * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the + * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to + * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of + * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. + * This license may only be modified in a writing signed by an authorized representative of each party. * * (v. August 20, 2021) */ @@ -65,8 +143,8 @@ # pragma system_header #endif // no system header -#include #include +#include #include _LIBCUDACXX_BEGIN_NAMESPACE_CUDA @@ -74,90 +152,72 @@ namespace __detail { template -class __return_type_wrapper { - private: +class __return_type_wrapper +{ +private: _DecayFn __fn_; - public: +public: __return_type_wrapper() = delete; template , _DecayFn>::value>> - _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 - explicit __return_type_wrapper(_Fn &&__fn) noexcept - : __fn_(_CUDA_VSTD::forward<_Fn>(__fn)) {} + _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 explicit __return_type_wrapper(_Fn&& __fn) noexcept + : __fn_(_CUDA_VSTD::forward<_Fn>(__fn)) + {} template - _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 - _Ret operator()(_As&&... __as) & noexcept { + _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Ret operator()(_As&&... __as) & noexcept + { #if !defined(__NVCC__) || defined(__CUDA_ARCH__) - static_assert( - _CUDA_VSTD::is_same< - _Ret, - typename _CUDA_VSTD::__invoke_of<_DecayFn&, _As...>::type - >::value, - "Return type shall match the proclaimed one exactly"); + static_assert(_CUDA_VSTD::is_same<_Ret, typename _CUDA_VSTD::__invoke_of<_DecayFn&, _As...>::type>::value, + "Return type shall match the proclaimed one exactly"); #endif return _CUDA_VSTD::__invoke(__fn_, _CUDA_VSTD::forward<_As>(__as)...); } template - _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 - _Ret operator()(_As&&... __as) && noexcept { + _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Ret operator()(_As&&... __as) && noexcept + { #if !defined(__NVCC__) || defined(__CUDA_ARCH__) - static_assert( - _CUDA_VSTD::is_same< - _Ret, - typename _CUDA_VSTD::__invoke_of<_DecayFn, _As...>::type - >::value, - "Return type shall match the proclaimed one exactly"); + static_assert(_CUDA_VSTD::is_same<_Ret, typename _CUDA_VSTD::__invoke_of<_DecayFn, _As...>::type>::value, + "Return type shall match the proclaimed one exactly"); #endif - return _CUDA_VSTD::__invoke(_CUDA_VSTD::move(__fn_), - _CUDA_VSTD::forward<_As>(__as)...); + return _CUDA_VSTD::__invoke(_CUDA_VSTD::move(__fn_), _CUDA_VSTD::forward<_As>(__as)...); } template - _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 - _Ret operator()(_As&&... __as) const& noexcept { + _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Ret operator()(_As&&... __as) const& noexcept + { #if !defined(__NVCC__) || defined(__CUDA_ARCH__) - static_assert( - _CUDA_VSTD::is_same< - _Ret, - typename _CUDA_VSTD::__invoke_of::type - >::value, - "Return type shall match the proclaimed one exactly"); + static_assert(_CUDA_VSTD::is_same<_Ret, typename _CUDA_VSTD::__invoke_of::type>::value, + "Return type shall match the proclaimed one exactly"); #endif return _CUDA_VSTD::__invoke(__fn_, _CUDA_VSTD::forward<_As>(__as)...); } template - _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 - _Ret operator()(_As&&... __as) const&& noexcept { + _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Ret operator()(_As&&... __as) const&& noexcept + { #if !defined(__NVCC__) || defined(__CUDA_ARCH__) - static_assert( - _CUDA_VSTD::is_same< - _Ret, - typename _CUDA_VSTD::__invoke_of::type - >::value, - "Return type shall match the proclaimed one exactly"); + static_assert(_CUDA_VSTD::is_same<_Ret, typename _CUDA_VSTD::__invoke_of::type>::value, + "Return type shall match the proclaimed one exactly"); #endif - return _CUDA_VSTD::__invoke(_CUDA_VSTD::move(__fn_), - _CUDA_VSTD::forward<_As>(__as)...); + return _CUDA_VSTD::__invoke(_CUDA_VSTD::move(__fn_), _CUDA_VSTD::forward<_As>(__as)...); } }; -} // __detail +} // namespace __detail template -inline _LIBCUDACXX_INLINE_VISIBILITY -__detail::__return_type_wrapper<_Ret, _CUDA_VSTD::__decay_t<_Fn>> -proclaim_return_type(_Fn&& __fn) noexcept { - return __detail::__return_type_wrapper<_Ret, _CUDA_VSTD::__decay_t<_Fn>>( - _CUDA_VSTD::forward<_Fn>(__fn)); +inline _LIBCUDACXX_INLINE_VISIBILITY __detail::__return_type_wrapper<_Ret, _CUDA_VSTD::__decay_t<_Fn>> +proclaim_return_type(_Fn&& __fn) noexcept +{ + return __detail::__return_type_wrapper<_Ret, _CUDA_VSTD::__decay_t<_Fn>>(_CUDA_VSTD::forward<_Fn>(__fn)); } _LIBCUDACXX_END_NAMESPACE_CUDA diff --git a/libcudacxx/include/cuda/memory_resource b/libcudacxx/include/cuda/memory_resource index a138995aa5f..894fd9eb2dd 100644 --- a/libcudacxx/include/cuda/memory_resource +++ b/libcudacxx/include/cuda/memory_resource @@ -80,17 +80,17 @@ class resource_ref { */ // clang-format on -# include // cuda_runtime_api needs to come first +#include // cuda_runtime_api needs to come first -# include "__cccl_config" +#include "__cccl_config" -# if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) -# pragma GCC system_header -# elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) -# pragma clang system_header -# elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) -# pragma system_header -# endif // no system header +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header #include #include diff --git a/libcudacxx/include/cuda/pipeline b/libcudacxx/include/cuda/pipeline index 509dfd65cbe..583a6fb6c72 100644 --- a/libcudacxx/include/cuda/pipeline +++ b/libcudacxx/include/cuda/pipeline @@ -3,50 +3,128 @@ * * NVIDIA SOFTWARE LICENSE * - * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). + * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the + * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). * - * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. + * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. + * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By + * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of + * this license, and you take legal and financial responsibility for the actions of your permitted users. * - * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions. + * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, + * regulation or generally accepted practices or guidelines in the relevant jurisdictions. * - * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license. + * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install + * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this + * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under + * this license. * * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: - * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights. - * b. You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. + * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, + * including (without limitation) terms relating to the license grant and license restrictions and protection of + * NVIDIA’s intellectual property rights. b. You agree to notify NVIDIA in writing of any known or suspected + * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms + * of your agreements with respect to distributed SOFTWARE. * * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows: * a. The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs. - * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. - * c. You may not modify or create derivative works of any portion of the SOFTWARE. - * d. You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE. - * e. You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. - * f. Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. - * g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. + * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from + * any portion of the SOFTWARE or copies of the SOFTWARE. c. You may not modify or create derivative works of any + * portion of the SOFTWARE. d. You may not bypass, disable, or circumvent any technical measure, encryption, + * security, digital rights management or authentication mechanism in the SOFTWARE. e. You may not use the SOFTWARE + * in any manner that would cause it to become subject to an open source software license. As examples, licenses that + * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in + * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f. + * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or + * application where the use or failure of the system or application can reasonably be expected to threaten or result in + * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life + * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these + * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or + * damages arising from such uses. g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, + * and their respective employees, contractors, agents, officers and directors, from and against any and all claims, + * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited + * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use + * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. * - * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. + * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may + * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, + * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use + * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in + * production or business-critical systems. * - * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE. + * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and + * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United + * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time + * without notice, but is not obligated to support or update the SOFTWARE. * - * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict. + * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal + * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is + * a conflict between the terms in this license and the license terms associated with a component, the license terms + * associated with the components control only to the extent necessary to resolve the conflict. * - * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice. + * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, + * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you + * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable + * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute + * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA + * will use Feedback at its choice. * - * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. + * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT + * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT + * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR + * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. * - * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. + * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE + * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, + * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH + * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON + * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION + * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE + * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING + * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE + * OR EXTEND THIS LIMIT. * - * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you. + * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail + * to comply with any term and condition of this license or if you commence or participate in any legal proceeding + * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if + * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of + * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of + * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this + * license are not affected by the termination of this license. All provisions of this license will survive termination, + * except for the license granted to you. * - * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. + * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State + * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware + * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the + * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English + * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction + * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be + * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. * - * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. + * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or + * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be + * void and of no effect. * - * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE. + * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, + * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States + * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s + * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws + * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not + * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from + * receiving the SOFTWARE. * - * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. + * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting + * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. + * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the + * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the + * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is + * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. * - * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party. + * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the + * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to + * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of + * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. + * This license may only be modified in a writing signed by an authorized representative of each party. * * (v. August 20, 2021) */ @@ -63,532 +141,563 @@ # pragma system_header #endif // no system header -#include #include +#include #include _LIBCUDACXX_BEGIN_NAMESPACE_CUDA - // Forward declaration in barrier of pipeline - enum class pipeline_role { - producer, - consumer - }; - - template - struct __pipeline_stage { - barrier<_Scope> __produced; - barrier<_Scope> __consumed; - }; - - template - class pipeline_shared_state { - public: - pipeline_shared_state() = default; - pipeline_shared_state(const pipeline_shared_state &) = delete; - pipeline_shared_state(pipeline_shared_state &&) = delete; - pipeline_shared_state & operator=(pipeline_shared_state &&) = delete; - pipeline_shared_state & operator=(const pipeline_shared_state &) = delete; - - private: - __pipeline_stage<_Scope> __stages[_Stages_count]; - atomic __refcount; - - template - friend class pipeline; - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state, size_t __producer_count); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state, pipeline_role __role); - }; - - struct __pipeline_asm_helper { - _CCCL_DEVICE - static inline uint32_t __lane_id() - { - NV_IF_ELSE_TARGET( - NV_IS_DEVICE, - ( - uint32_t __lane_id; - asm volatile ("mov.u32 %0, %%laneid;" : "=r"(__lane_id)); - return __lane_id; - ), - ( - return 0; - ) - ) - } - }; - - template - class pipeline { - public: - pipeline(pipeline &&) = default; - pipeline(const pipeline &) = delete; - pipeline & operator=(pipeline &&) = delete; - pipeline & operator=(const pipeline &) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY - ~pipeline() - { - if (__active) { - (void)quit(); - } - } - - _LIBCUDACXX_INLINE_VISIBILITY - bool quit() - { - bool __elected; - uint32_t __sub_count; -NV_IF_TARGET(NV_IS_DEVICE, - const uint32_t __match_mask = __match_any_sync(__activemask(), reinterpret_cast(__shared_state_get_refcount())); - const uint32_t __elected_id = __ffs(__match_mask) - 1; - __elected = (__pipeline_asm_helper::__lane_id() == __elected_id); - __sub_count = __popc(__match_mask); -, - __elected = true; - __sub_count = 1; -) - bool __released = false; - if (__elected) { - const uint32_t __old = __shared_state_get_refcount()->fetch_sub(__sub_count); - const bool __last = (__old == __sub_count); - if (__last) { - for (uint8_t __stage = 0; __stage < __stages_count; ++__stage) { - __shared_state_get_stage(__stage)->__produced.~barrier(); - __shared_state_get_stage(__stage)->__consumed.~barrier(); - } - __released = true; - } - } - __active = false; - return __released; - } - - _LIBCUDACXX_INLINE_VISIBILITY - void producer_acquire() - { - barrier<_Scope> & __stage_barrier = __shared_state_get_stage(__head)->__consumed; - __stage_barrier.wait_parity(__consumed_phase_parity); - } - - _LIBCUDACXX_INLINE_VISIBILITY - void producer_commit() - { - barrier<_Scope> & __stage_barrier = __shared_state_get_stage(__head)->__produced; - (void)__memcpy_completion_impl::__defer(__completion_mechanism::__async_group, __single_thread_group{}, 0, __stage_barrier); - (void)__stage_barrier.arrive(); - if (++__head == __stages_count) { - __head = 0; - __consumed_phase_parity = !__consumed_phase_parity; - } - } - - _LIBCUDACXX_INLINE_VISIBILITY - void consumer_wait() - { - barrier<_Scope> & __stage_barrier = __shared_state_get_stage(__tail)->__produced; - __stage_barrier.wait_parity(__produced_phase_parity); - } - - _LIBCUDACXX_INLINE_VISIBILITY - void consumer_release() - { - (void)__shared_state_get_stage(__tail)->__consumed.arrive(); - if (++__tail == __stages_count) { - __tail = 0; - __produced_phase_parity = !__produced_phase_parity; - } - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - bool consumer_wait_for(const _CUDA_VSTD::chrono::duration<_Rep, _Period> & __duration) - { - barrier<_Scope> & __stage_barrier = __shared_state_get_stage(__tail)->__produced; - return _CUDA_VSTD::__libcpp_thread_poll_with_backoff( - _CUDA_VSTD::__barrier_poll_tester_parity>( - &__stage_barrier, - __produced_phase_parity), - _CUDA_VSTD::chrono::duration_cast<_CUDA_VSTD::chrono::nanoseconds>(__duration) - ); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - bool consumer_wait_until(const _CUDA_VSTD::chrono::time_point<_Clock, _Duration> & __time_point) - { - return consumer_wait_for(__time_point - _Clock::now()); - } - - private: - uint8_t __head : 8; - uint8_t __tail : 8; - const uint8_t __stages_count : 8; - bool __consumed_phase_parity : 1; - bool __produced_phase_parity : 1; - bool __active : 1; - // TODO: Remove partitioned on next ABI break - const bool __partitioned : 1; - char * const __shared_state; - - - _LIBCUDACXX_INLINE_VISIBILITY - pipeline(char * __shared_state, uint8_t __stages_count, bool __partitioned) - : __head(0) - , __tail(0) - , __stages_count(__stages_count) - , __consumed_phase_parity(true) - , __produced_phase_parity(false) - , __active(true) - , __partitioned(__partitioned) - , __shared_state(__shared_state) - {} - - _LIBCUDACXX_INLINE_VISIBILITY - __pipeline_stage<_Scope> * __shared_state_get_stage(uint8_t __stage) - { - ptrdiff_t __stage_offset = __stage * sizeof(__pipeline_stage<_Scope>); - return reinterpret_cast<__pipeline_stage<_Scope>*>(__shared_state + __stage_offset); - } - - _LIBCUDACXX_INLINE_VISIBILITY - atomic * __shared_state_get_refcount() +// Forward declaration in barrier of pipeline +enum class pipeline_role +{ + producer, + consumer +}; + +template +struct __pipeline_stage +{ + barrier<_Scope> __produced; + barrier<_Scope> __consumed; +}; + +template +class pipeline_shared_state +{ +public: + pipeline_shared_state() = default; + pipeline_shared_state(const pipeline_shared_state&) = delete; + pipeline_shared_state(pipeline_shared_state&&) = delete; + pipeline_shared_state& operator=(pipeline_shared_state&&) = delete; + pipeline_shared_state& operator=(const pipeline_shared_state&) = delete; + +private: + __pipeline_stage<_Scope> __stages[_Stages_count]; + atomic __refcount; + + template + friend class pipeline; + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, + pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state, + size_t __producer_count); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, + pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state, + pipeline_role __role); +}; + +struct __pipeline_asm_helper +{ + _CCCL_DEVICE static inline uint32_t __lane_id() + { + NV_IF_ELSE_TARGET( + NV_IS_DEVICE, + (uint32_t __lane_id; asm volatile("mov.u32 %0, %%laneid;" + : "=r"(__lane_id)); + return __lane_id;), + (return 0;)) + } +}; + +template +class pipeline +{ +public: + pipeline(pipeline&&) = default; + pipeline(const pipeline&) = delete; + pipeline& operator=(pipeline&&) = delete; + pipeline& operator=(const pipeline&) = delete; + + _LIBCUDACXX_INLINE_VISIBILITY ~pipeline() + { + if (__active) + { + (void) quit(); + } + } + + _LIBCUDACXX_INLINE_VISIBILITY bool quit() + { + bool __elected; + uint32_t __sub_count; + NV_IF_TARGET( + NV_IS_DEVICE, + const uint32_t __match_mask = + __match_any_sync(__activemask(), reinterpret_cast(__shared_state_get_refcount())); + const uint32_t __elected_id = __ffs(__match_mask) - 1; + __elected = (__pipeline_asm_helper::__lane_id() == __elected_id); + __sub_count = __popc(__match_mask); + , __elected = true; + __sub_count = 1;) + bool __released = false; + if (__elected) + { + const uint32_t __old = __shared_state_get_refcount()->fetch_sub(__sub_count); + const bool __last = (__old == __sub_count); + if (__last) + { + for (uint8_t __stage = 0; __stage < __stages_count; ++__stage) { - ptrdiff_t __refcount_offset = __stages_count * sizeof(__pipeline_stage<_Scope>); - return reinterpret_cast*>(__shared_state + __refcount_offset); + __shared_state_get_stage(__stage)->__produced.~barrier(); + __shared_state_get_stage(__stage)->__consumed.~barrier(); } - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state, size_t __producer_count); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state, pipeline_role __role); - }; - - template - _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Scope, _Stages_count> * __shared_state) + __released = true; + } + } + __active = false; + return __released; + } + + _LIBCUDACXX_INLINE_VISIBILITY void producer_acquire() + { + barrier<_Scope>& __stage_barrier = __shared_state_get_stage(__head)->__consumed; + __stage_barrier.wait_parity(__consumed_phase_parity); + } + + _LIBCUDACXX_INLINE_VISIBILITY void producer_commit() + { + barrier<_Scope>& __stage_barrier = __shared_state_get_stage(__head)->__produced; + (void) __memcpy_completion_impl::__defer( + __completion_mechanism::__async_group, __single_thread_group{}, 0, __stage_barrier); + (void) __stage_barrier.arrive(); + if (++__head == __stages_count) { - const uint32_t __group_size = static_cast(__group.size()); - const uint32_t __thread_rank = static_cast(__group.thread_rank()); - - if (__thread_rank == 0) { - for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) { - init(&__shared_state->__stages[__stage].__consumed, __group_size); - init(&__shared_state->__stages[__stage].__produced, __group_size); - } - __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); - } - __group.sync(); - - return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, false); + __head = 0; + __consumed_phase_parity = !__consumed_phase_parity; } - - template - _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Scope, _Stages_count> * __shared_state, size_t __producer_count) + } + + _LIBCUDACXX_INLINE_VISIBILITY void consumer_wait() + { + barrier<_Scope>& __stage_barrier = __shared_state_get_stage(__tail)->__produced; + __stage_barrier.wait_parity(__produced_phase_parity); + } + + _LIBCUDACXX_INLINE_VISIBILITY void consumer_release() + { + (void) __shared_state_get_stage(__tail)->__consumed.arrive(); + if (++__tail == __stages_count) { - const uint32_t __group_size = static_cast(__group.size()); - const uint32_t __thread_rank = static_cast(__group.thread_rank()); - - if (__thread_rank == 0) { - const size_t __consumer_count = __group_size - __producer_count; - for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) { - init(&__shared_state->__stages[__stage].__consumed, __consumer_count); - init(&__shared_state->__stages[__stage].__produced, __producer_count); - } - __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); - } - __group.sync(); - - return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, true); + __tail = 0; + __produced_phase_parity = !__produced_phase_parity; } - - template - _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Scope, _Stages_count> * __shared_state, pipeline_role __role) + } + + template + _LIBCUDACXX_INLINE_VISIBILITY bool consumer_wait_for(const _CUDA_VSTD::chrono::duration<_Rep, _Period>& __duration) + { + barrier<_Scope>& __stage_barrier = __shared_state_get_stage(__tail)->__produced; + return _CUDA_VSTD::__libcpp_thread_poll_with_backoff( + _CUDA_VSTD::__barrier_poll_tester_parity>(&__stage_barrier, __produced_phase_parity), + _CUDA_VSTD::chrono::duration_cast<_CUDA_VSTD::chrono::nanoseconds>(__duration)); + } + + template + _LIBCUDACXX_INLINE_VISIBILITY bool + consumer_wait_until(const _CUDA_VSTD::chrono::time_point<_Clock, _Duration>& __time_point) + { + return consumer_wait_for(__time_point - _Clock::now()); + } + +private: + uint8_t __head : 8; + uint8_t __tail : 8; + const uint8_t __stages_count : 8; + bool __consumed_phase_parity : 1; + bool __produced_phase_parity : 1; + bool __active : 1; + // TODO: Remove partitioned on next ABI break + const bool __partitioned : 1; + char* const __shared_state; + + _LIBCUDACXX_INLINE_VISIBILITY pipeline(char* __shared_state, uint8_t __stages_count, bool __partitioned) + : __head(0) + , __tail(0) + , __stages_count(__stages_count) + , __consumed_phase_parity(true) + , __produced_phase_parity(false) + , __active(true) + , __partitioned(__partitioned) + , __shared_state(__shared_state) + {} + + _LIBCUDACXX_INLINE_VISIBILITY __pipeline_stage<_Scope>* __shared_state_get_stage(uint8_t __stage) + { + ptrdiff_t __stage_offset = __stage * sizeof(__pipeline_stage<_Scope>); + return reinterpret_cast<__pipeline_stage<_Scope>*>(__shared_state + __stage_offset); + } + + _LIBCUDACXX_INLINE_VISIBILITY atomic* __shared_state_get_refcount() + { + ptrdiff_t __refcount_offset = __stages_count * sizeof(__pipeline_stage<_Scope>); + return reinterpret_cast*>(__shared_state + __refcount_offset); + } + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, + pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state, + size_t __producer_count); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, + pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state, + pipeline_role __role); +}; + +template +_LIBCUDACXX_INLINE_VISIBILITY pipeline<_Scope> +make_pipeline(const _Group& __group, pipeline_shared_state<_Scope, _Stages_count>* __shared_state) +{ + const uint32_t __group_size = static_cast(__group.size()); + const uint32_t __thread_rank = static_cast(__group.thread_rank()); + + if (__thread_rank == 0) + { + for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) { - const uint32_t __group_size = static_cast(__group.size()); - const uint32_t __thread_rank = static_cast(__group.thread_rank()); - - if (__thread_rank == 0) { - __shared_state->__refcount.store(0, std::memory_order_relaxed); - } - __group.sync(); - - if (__role == pipeline_role::producer) { - bool __elected; - uint32_t __add_count; -NV_IF_TARGET(NV_IS_DEVICE, - const uint32_t __match_mask = __match_any_sync(__activemask(), reinterpret_cast(&__shared_state->__refcount)); - const uint32_t __elected_id = __ffs(__match_mask) - 1; - __elected = (__pipeline_asm_helper::__lane_id() == __elected_id); - __add_count = __popc(__match_mask); -, - __elected = true; - __add_count = 1; -) - if (__elected) { - (void)__shared_state->__refcount.fetch_add(__add_count, std::memory_order_relaxed); - } - } - __group.sync(); - - if (__thread_rank == 0) { - const uint32_t __producer_count = __shared_state->__refcount.load(std::memory_order_relaxed); - const uint32_t __consumer_count = __group_size - __producer_count; - for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) { - init(&__shared_state->__stages[__stage].__consumed, __consumer_count); - init(&__shared_state->__stages[__stage].__produced, __producer_count); - } - __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); - } - __group.sync(); - - return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, true); + init(&__shared_state->__stages[__stage].__consumed, __group_size); + init(&__shared_state->__stages[__stage].__produced, __group_size); + } + __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); + } + __group.sync(); + + return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, false); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY pipeline<_Scope> make_pipeline( + const _Group& __group, pipeline_shared_state<_Scope, _Stages_count>* __shared_state, size_t __producer_count) +{ + const uint32_t __group_size = static_cast(__group.size()); + const uint32_t __thread_rank = static_cast(__group.thread_rank()); + + if (__thread_rank == 0) + { + const size_t __consumer_count = __group_size - __producer_count; + for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) + { + init(&__shared_state->__stages[__stage].__consumed, __consumer_count); + init(&__shared_state->__stages[__stage].__produced, __producer_count); + } + __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); + } + __group.sync(); + + return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, true); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY pipeline<_Scope> +make_pipeline(const _Group& __group, pipeline_shared_state<_Scope, _Stages_count>* __shared_state, pipeline_role __role) +{ + const uint32_t __group_size = static_cast(__group.size()); + const uint32_t __thread_rank = static_cast(__group.thread_rank()); + + if (__thread_rank == 0) + { + __shared_state->__refcount.store(0, std::memory_order_relaxed); + } + __group.sync(); + + if (__role == pipeline_role::producer) + { + bool __elected; + uint32_t __add_count; + NV_IF_TARGET( + NV_IS_DEVICE, + const uint32_t __match_mask = + __match_any_sync(__activemask(), reinterpret_cast(&__shared_state->__refcount)); + const uint32_t __elected_id = __ffs(__match_mask) - 1; + __elected = (__pipeline_asm_helper::__lane_id() == __elected_id); + __add_count = __popc(__match_mask); + , __elected = true; + __add_count = 1;) + if (__elected) + { + (void) __shared_state->__refcount.fetch_add(__add_count, std::memory_order_relaxed); } + } + __group.sync(); + + if (__thread_rank == 0) + { + const uint32_t __producer_count = __shared_state->__refcount.load(std::memory_order_relaxed); + const uint32_t __consumer_count = __group_size - __producer_count; + for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) + { + init(&__shared_state->__stages[__stage].__consumed, __consumer_count); + init(&__shared_state->__stages[__stage].__produced, __producer_count); + } + __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); + } + __group.sync(); + + return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, true); +} _LIBCUDACXX_END_NAMESPACE_CUDA _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE - template - _CCCL_DEVICE - void __pipeline_consumer_wait(pipeline & __pipeline); +template +_CCCL_DEVICE void __pipeline_consumer_wait(pipeline& __pipeline); - _CCCL_DEVICE - inline void __pipeline_consumer_wait(pipeline & __pipeline, uint8_t __prior); +_CCCL_DEVICE inline void __pipeline_consumer_wait(pipeline& __pipeline, uint8_t __prior); _LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE _LIBCUDACXX_BEGIN_NAMESPACE_CUDA - template<> - class pipeline { - public: - pipeline(pipeline &&) = default; - pipeline(const pipeline &) = delete; - pipeline & operator=(pipeline &&) = delete; - pipeline & operator=(const pipeline &) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY - ~pipeline() {} - - _LIBCUDACXX_INLINE_VISIBILITY - bool quit() - { - return true; - } - - _LIBCUDACXX_INLINE_VISIBILITY - void producer_acquire() {} - - _LIBCUDACXX_INLINE_VISIBILITY - void producer_commit() - { -NV_IF_TARGET(NV_PROVIDES_SM_80, - asm volatile ("cp.async.commit_group;"); - ++__head; -) - } - - _LIBCUDACXX_INLINE_VISIBILITY - void consumer_wait() - { -NV_IF_TARGET(NV_PROVIDES_SM_80, - if (__head == __tail) { - return; - } - - const uint8_t __prior = __head - __tail - 1; - device::__pipeline_consumer_wait(*this, __prior); - ++__tail; -) - } - - _LIBCUDACXX_INLINE_VISIBILITY - void consumer_release() {} - - template - _LIBCUDACXX_INLINE_VISIBILITY - bool consumer_wait_for(const _CUDA_VSTD::chrono::duration<_Rep, _Period> & __duration) - { - (void)__duration; - consumer_wait(); - return true; - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - bool consumer_wait_until(const _CUDA_VSTD::chrono::time_point<_Clock, _Duration> & __time_point) - { - (void)__time_point; - consumer_wait(); - return true; - } - - private: - uint8_t __head; - uint8_t __tail; - - _LIBCUDACXX_INLINE_VISIBILITY - pipeline() - : __head(0) - , __tail(0) - {} - - friend _LIBCUDACXX_INLINE_VISIBILITY inline pipeline make_pipeline(); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - void pipeline_consumer_wait_prior(pipeline & __pipeline); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> __make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state); - }; +template <> +class pipeline +{ +public: + pipeline(pipeline&&) = default; + pipeline(const pipeline&) = delete; + pipeline& operator=(pipeline&&) = delete; + pipeline& operator=(const pipeline&) = delete; + + _LIBCUDACXX_INLINE_VISIBILITY ~pipeline() {} + + _LIBCUDACXX_INLINE_VISIBILITY bool quit() + { + return true; + } + + _LIBCUDACXX_INLINE_VISIBILITY void producer_acquire() {} + + _LIBCUDACXX_INLINE_VISIBILITY void producer_commit() + { + NV_IF_TARGET(NV_PROVIDES_SM_80, asm volatile("cp.async.commit_group;"); ++__head;) + } + + _LIBCUDACXX_INLINE_VISIBILITY void consumer_wait() + { + NV_IF_TARGET( + NV_PROVIDES_SM_80, + if (__head == __tail) { return; } + + const uint8_t __prior = __head - __tail - 1; + device::__pipeline_consumer_wait(*this, __prior); + ++__tail;) + } + + _LIBCUDACXX_INLINE_VISIBILITY void consumer_release() {} + + template + _LIBCUDACXX_INLINE_VISIBILITY bool consumer_wait_for(const _CUDA_VSTD::chrono::duration<_Rep, _Period>& __duration) + { + (void) __duration; + consumer_wait(); + return true; + } + + template + _LIBCUDACXX_INLINE_VISIBILITY bool + consumer_wait_until(const _CUDA_VSTD::chrono::time_point<_Clock, _Duration>& __time_point) + { + (void) __time_point; + consumer_wait(); + return true; + } + +private: + uint8_t __head; + uint8_t __tail; + + _LIBCUDACXX_INLINE_VISIBILITY pipeline() + : __head(0) + , __tail(0) + {} + + friend _LIBCUDACXX_INLINE_VISIBILITY inline pipeline make_pipeline(); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY void pipeline_consumer_wait_prior(pipeline& __pipeline); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> __make_pipeline( + const _Group& __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state); +}; _LIBCUDACXX_END_NAMESPACE_CUDA _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE - template - _CCCL_DEVICE - void __pipeline_consumer_wait(pipeline & __pipeline) - { - (void)__pipeline; -NV_IF_TARGET(NV_PROVIDES_SM_80, - constexpr uint8_t __max_prior = 8; - - asm volatile ("cp.async.wait_group %0;" - : - : "n"(_Prior < __max_prior ? _Prior : __max_prior)); -) - } - - _CCCL_DEVICE - inline void __pipeline_consumer_wait(pipeline & __pipeline, uint8_t __prior) - { - switch (__prior) { - case 0: device::__pipeline_consumer_wait<0>(__pipeline); break; - case 1: device::__pipeline_consumer_wait<1>(__pipeline); break; - case 2: device::__pipeline_consumer_wait<2>(__pipeline); break; - case 3: device::__pipeline_consumer_wait<3>(__pipeline); break; - case 4: device::__pipeline_consumer_wait<4>(__pipeline); break; - case 5: device::__pipeline_consumer_wait<5>(__pipeline); break; - case 6: device::__pipeline_consumer_wait<6>(__pipeline); break; - case 7: device::__pipeline_consumer_wait<7>(__pipeline); break; - default: device::__pipeline_consumer_wait<8>(__pipeline); break; - } - } +template +_CCCL_DEVICE void __pipeline_consumer_wait(pipeline& __pipeline) +{ + (void) __pipeline; + NV_IF_TARGET(NV_PROVIDES_SM_80, constexpr uint8_t __max_prior = 8; + + asm volatile("cp.async.wait_group %0;" + : + : "n"(_Prior < __max_prior ? _Prior : __max_prior));) +} + +_CCCL_DEVICE inline void __pipeline_consumer_wait(pipeline& __pipeline, uint8_t __prior) +{ + switch (__prior) + { + case 0: + device::__pipeline_consumer_wait<0>(__pipeline); + break; + case 1: + device::__pipeline_consumer_wait<1>(__pipeline); + break; + case 2: + device::__pipeline_consumer_wait<2>(__pipeline); + break; + case 3: + device::__pipeline_consumer_wait<3>(__pipeline); + break; + case 4: + device::__pipeline_consumer_wait<4>(__pipeline); + break; + case 5: + device::__pipeline_consumer_wait<5>(__pipeline); + break; + case 6: + device::__pipeline_consumer_wait<6>(__pipeline); + break; + case 7: + device::__pipeline_consumer_wait<7>(__pipeline); + break; + default: + device::__pipeline_consumer_wait<8>(__pipeline); + break; + } +} _LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE _LIBCUDACXX_BEGIN_NAMESPACE_CUDA - _LIBCUDACXX_INLINE_VISIBILITY - inline pipeline make_pipeline() - { - return pipeline(); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - void pipeline_consumer_wait_prior(pipeline & __pipeline) - { - NV_IF_TARGET(NV_PROVIDES_SM_80, - device::__pipeline_consumer_wait<_Prior>(__pipeline); - __pipeline.__tail = __pipeline.__head - _Prior; - ) - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - void pipeline_producer_commit(pipeline & __pipeline, barrier<_Scope> & __barrier) - { - (void)__pipeline; - NV_IF_TARGET(NV_PROVIDES_SM_80,( - (void)__memcpy_completion_impl::__defer(__completion_mechanism::__async_group, __single_thread_group{}, 0, __barrier); - )); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment __memcpy_async_pipeline(_Group const & __group, _Tp * __destination, _Tp const * __source, _Size __size, pipeline<_Scope> & __pipeline) { - // 1. Set the completion mechanisms that can be used. - // - // Do not (yet) allow async_bulk_group completion. Do not allow - // mbarrier_complete_tx completion, even though it may be possible if - // the pipeline has stage barriers in shared memory. - _CUDA_VSTD::uint32_t __allowed_completions = _CUDA_VSTD::uint32_t(__completion_mechanism::__async_group); - - // Alignment: Use the maximum of the alignment of _Tp and that of a possible cuda::aligned_size_t. - constexpr _CUDA_VSTD::size_t __size_align = __get_size_align<_Size>::align; - constexpr _CUDA_VSTD::size_t __align = (alignof(_Tp) < __size_align) ? __size_align : alignof(_Tp); - // Cast to char pointers. We don't need the type for alignment anymore and - // erasing the types reduces the number of instantiations of down-stream - // functions. - char * __dest_char = reinterpret_cast(__destination); - char const * __src_char = reinterpret_cast(__source); - - // 2. Issue actual copy instructions. - auto __cm = __dispatch_memcpy_async<__align>(__group, __dest_char, __src_char, __size, __allowed_completions); - - // 3. No need to synchronize with copy instructions. - return __memcpy_completion_impl::__defer(__cm, __group, __size, __pipeline); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(_Group const & __group, _Type * __destination, _Type const * __source, std::size_t __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__group, __destination, __source, __size, __pipeline); - } - - template _Alignment) ? alignof(_Type) : _Alignment> - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(_Group const & __group, _Type * __destination, _Type const * __source, aligned_size_t<_Alignment> __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__group, __destination, __source, __size, __pipeline); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(_Type * __destination, _Type const * __source, _Size __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__single_thread_group{}, __destination, __source, __size, __pipeline); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(_Group const & __group, void * __destination, void const * __source, std::size_t __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__group, reinterpret_cast(__destination), reinterpret_cast(__source), __size, __pipeline); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(_Group const & __group, void * __destination, void const * __source, aligned_size_t<_Alignment> __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__group, reinterpret_cast(__destination), reinterpret_cast(__source), __size, __pipeline); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(void * __destination, void const * __source, _Size __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__single_thread_group{}, reinterpret_cast(__destination), reinterpret_cast(__source), __size, __pipeline); - } +_LIBCUDACXX_INLINE_VISIBILITY inline pipeline make_pipeline() +{ + return pipeline(); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void pipeline_consumer_wait_prior(pipeline& __pipeline) +{ + NV_IF_TARGET(NV_PROVIDES_SM_80, device::__pipeline_consumer_wait<_Prior>(__pipeline); + __pipeline.__tail = __pipeline.__head - _Prior;) +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void +pipeline_producer_commit(pipeline& __pipeline, barrier<_Scope>& __barrier) +{ + (void) __pipeline; + NV_IF_TARGET(NV_PROVIDES_SM_80, + ((void) __memcpy_completion_impl::__defer( + __completion_mechanism::__async_group, __single_thread_group{}, 0, __barrier);)); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment __memcpy_async_pipeline( + _Group const& __group, _Tp* __destination, _Tp const* __source, _Size __size, pipeline<_Scope>& __pipeline) +{ + // 1. Set the completion mechanisms that can be used. + // + // Do not (yet) allow async_bulk_group completion. Do not allow + // mbarrier_complete_tx completion, even though it may be possible if + // the pipeline has stage barriers in shared memory. + _CUDA_VSTD::uint32_t __allowed_completions = _CUDA_VSTD::uint32_t(__completion_mechanism::__async_group); + + // Alignment: Use the maximum of the alignment of _Tp and that of a possible cuda::aligned_size_t. + constexpr _CUDA_VSTD::size_t __size_align = __get_size_align<_Size>::align; + constexpr _CUDA_VSTD::size_t __align = (alignof(_Tp) < __size_align) ? __size_align : alignof(_Tp); + // Cast to char pointers. We don't need the type for alignment anymore and + // erasing the types reduces the number of instantiations of down-stream + // functions. + char* __dest_char = reinterpret_cast(__destination); + char const* __src_char = reinterpret_cast(__source); + + // 2. Issue actual copy instructions. + auto __cm = __dispatch_memcpy_async<__align>(__group, __dest_char, __src_char, __size, __allowed_completions); + + // 3. No need to synchronize with copy instructions. + return __memcpy_completion_impl::__defer(__cm, __group, __size, __pipeline); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment memcpy_async( + _Group const& __group, _Type* __destination, _Type const* __source, std::size_t __size, pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline(__group, __destination, __source, __size, __pipeline); +} + +template _Alignment) ? alignof(_Type) : _Alignment> +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment memcpy_async( + _Group const& __group, + _Type* __destination, + _Type const* __source, + aligned_size_t<_Alignment> __size, + pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline(__group, __destination, __source, __size, __pipeline); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment +memcpy_async(_Type* __destination, _Type const* __source, _Size __size, pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline(__single_thread_group{}, __destination, __source, __size, __pipeline); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment memcpy_async( + _Group const& __group, void* __destination, void const* __source, std::size_t __size, pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline( + __group, reinterpret_cast(__destination), reinterpret_cast(__source), __size, __pipeline); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment memcpy_async( + _Group const& __group, + void* __destination, + void const* __source, + aligned_size_t<_Alignment> __size, + pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline( + __group, reinterpret_cast(__destination), reinterpret_cast(__source), __size, __pipeline); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment +memcpy_async(void* __destination, void const* __source, _Size __size, pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline( + __single_thread_group{}, + reinterpret_cast(__destination), + reinterpret_cast(__source), + __size, + __pipeline); +} _LIBCUDACXX_END_NAMESPACE_CUDA diff --git a/libcudacxx/include/cuda/std/__algorithm_ b/libcudacxx/include/cuda/std/__algorithm_ index 91c4160a8b5..a1762b79ae5 100644 --- a/libcudacxx/include/cuda/std/__algorithm_ +++ b/libcudacxx/include/cuda/std/__algorithm_ @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_ALGORITHM diff --git a/libcudacxx/include/cuda/std/__exception_ b/libcudacxx/include/cuda/std/__exception_ index e5aedc1d49d..c9b2b855f5c 100644 --- a/libcudacxx/include/cuda/std/__exception_ +++ b/libcudacxx/include/cuda/std/__exception_ @@ -12,11 +12,8 @@ #define _CUDA_STD_NEW #include "detail/__config" - +#include "detail/__pragma_pop" #include "detail/__pragma_push" - #include "detail/libcxx/include/exception" -#include "detail/__pragma_pop" - #endif // _CUDA_STD_NEW diff --git a/libcudacxx/include/cuda/std/__memory_ b/libcudacxx/include/cuda/std/__memory_ index 1bff78d6773..077c795e1ed 100644 --- a/libcudacxx/include/cuda/std/__memory_ +++ b/libcudacxx/include/cuda/std/__memory_ @@ -12,11 +12,8 @@ #define _CUDA_STD_MEMORY #include "detail/__config" - +#include "detail/__pragma_pop" #include "detail/__pragma_push" - #include "detail/libcxx/include/memory" -#include "detail/__pragma_pop" - #endif // _CUDA_STD_MEMORY diff --git a/libcudacxx/include/cuda/std/__new_ b/libcudacxx/include/cuda/std/__new_ index 3e8aefcdb6f..daaf0f48084 100644 --- a/libcudacxx/include/cuda/std/__new_ +++ b/libcudacxx/include/cuda/std/__new_ @@ -12,11 +12,8 @@ #define _CUDA_STD_NEW #include "detail/__config" - +#include "detail/__pragma_pop" #include "detail/__pragma_push" - #include "detail/libcxx/include/new" -#include "detail/__pragma_pop" - #endif // _CUDA_STD_NEW diff --git a/libcudacxx/include/cuda/std/array b/libcudacxx/include/cuda/std/array index f0bd5785600..4dd41a43020 100644 --- a/libcudacxx/include/cuda/std/array +++ b/libcudacxx/include/cuda/std/array @@ -12,10 +12,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_ARRAY diff --git a/libcudacxx/include/cuda/std/atomic b/libcudacxx/include/cuda/std/atomic index 0daab5f2cb5..7908a2274ea 100644 --- a/libcudacxx/include/cuda/std/atomic +++ b/libcudacxx/include/cuda/std/atomic @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_ATOMIC diff --git a/libcudacxx/include/cuda/std/barrier b/libcudacxx/include/cuda/std/barrier index 415c3f80acf..94ab6e65df4 100644 --- a/libcudacxx/include/cuda/std/barrier +++ b/libcudacxx/include/cuda/std/barrier @@ -17,10 +17,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_BARRIER diff --git a/libcudacxx/include/cuda/std/bit b/libcudacxx/include/cuda/std/bit index 491b346c576..a80f1d5d1df 100644 --- a/libcudacxx/include/cuda/std/bit +++ b/libcudacxx/include/cuda/std/bit @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_BIT diff --git a/libcudacxx/include/cuda/std/cassert b/libcudacxx/include/cuda/std/cassert index af8af80e43d..b6400ae2694 100644 --- a/libcudacxx/include/cuda/std/cassert +++ b/libcudacxx/include/cuda/std/cassert @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_CASSERT diff --git a/libcudacxx/include/cuda/std/cfloat b/libcudacxx/include/cuda/std/cfloat index 31a9f8e4e61..13f64607bf3 100644 --- a/libcudacxx/include/cuda/std/cfloat +++ b/libcudacxx/include/cuda/std/cfloat @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_CFLOAT diff --git a/libcudacxx/include/cuda/std/chrono b/libcudacxx/include/cuda/std/chrono index f8d62efb4f6..38eff65fb16 100644 --- a/libcudacxx/include/cuda/std/chrono +++ b/libcudacxx/include/cuda/std/chrono @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_CHRONO diff --git a/libcudacxx/include/cuda/std/climits b/libcudacxx/include/cuda/std/climits index f7934b665a9..fa981537469 100644 --- a/libcudacxx/include/cuda/std/climits +++ b/libcudacxx/include/cuda/std/climits @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_CLIMITS diff --git a/libcudacxx/include/cuda/std/cmath b/libcudacxx/include/cuda/std/cmath index a6a05ef2430..68524be4bad 100644 --- a/libcudacxx/include/cuda/std/cmath +++ b/libcudacxx/include/cuda/std/cmath @@ -12,10 +12,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_CMATH diff --git a/libcudacxx/include/cuda/std/complex b/libcudacxx/include/cuda/std/complex index 7c8ea6b5b46..4940f7cb2bc 100644 --- a/libcudacxx/include/cuda/std/complex +++ b/libcudacxx/include/cuda/std/complex @@ -12,10 +12,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_COMPLEX diff --git a/libcudacxx/include/cuda/std/concepts b/libcudacxx/include/cuda/std/concepts index d3f9eb25dde..eee16d9b100 100644 --- a/libcudacxx/include/cuda/std/concepts +++ b/libcudacxx/include/cuda/std/concepts @@ -12,10 +12,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_CONCEPTS diff --git a/libcudacxx/include/cuda/std/cstddef b/libcudacxx/include/cuda/std/cstddef index 95aae77de22..5fe32da86d8 100644 --- a/libcudacxx/include/cuda/std/cstddef +++ b/libcudacxx/include/cuda/std/cstddef @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_CSTDDEF diff --git a/libcudacxx/include/cuda/std/cstdint b/libcudacxx/include/cuda/std/cstdint index 22c0754e481..f62a90d93ee 100644 --- a/libcudacxx/include/cuda/std/cstdint +++ b/libcudacxx/include/cuda/std/cstdint @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_CSTDINT diff --git a/libcudacxx/include/cuda/std/cstdlib b/libcudacxx/include/cuda/std/cstdlib index af85815be27..36c3d976657 100644 --- a/libcudacxx/include/cuda/std/cstdlib +++ b/libcudacxx/include/cuda/std/cstdlib @@ -12,11 +12,8 @@ #define _CUDA_STD_CSTDLIB #include "detail/__config" - +#include "detail/__pragma_pop" #include "detail/__pragma_push" - #include "detail/libcxx/include/cstdlib" -#include "detail/__pragma_pop" - #endif // _CUDA_STD_CSTDLIB diff --git a/libcudacxx/include/cuda/std/ctime b/libcudacxx/include/cuda/std/ctime index d610c831077..72275a6bdf3 100644 --- a/libcudacxx/include/cuda/std/ctime +++ b/libcudacxx/include/cuda/std/ctime @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_CTIME diff --git a/libcudacxx/include/cuda/std/detail/__access_property b/libcudacxx/include/cuda/std/detail/__access_property index 7d9718503e9..c63ec342df9 100644 --- a/libcudacxx/include/cuda/std/detail/__access_property +++ b/libcudacxx/include/cuda/std/detail/__access_property @@ -3,325 +3,445 @@ * * NVIDIA SOFTWARE LICENSE * - * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). + * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the + * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). * - * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. + * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. + * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By + * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of + * this license, and you take legal and financial responsibility for the actions of your permitted users. * - * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions. + * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, + * regulation or generally accepted practices or guidelines in the relevant jurisdictions. * - * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license. + * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install + * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this + * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under + * this license. * * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: - * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights. - * b. You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. + * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, + * including (without limitation) terms relating to the license grant and license restrictions and protection of + * NVIDIA’s intellectual property rights. b. You agree to notify NVIDIA in writing of any known or suspected + * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms + * of your agreements with respect to distributed SOFTWARE. * * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows: * a. The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs. - * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. - * c. You may not modify or create derivative works of any portion of the SOFTWARE. - * d. You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE. - * e. You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. - * f. Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. - * g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. + * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from + * any portion of the SOFTWARE or copies of the SOFTWARE. c. You may not modify or create derivative works of any + * portion of the SOFTWARE. d. You may not bypass, disable, or circumvent any technical measure, encryption, + * security, digital rights management or authentication mechanism in the SOFTWARE. e. You may not use the SOFTWARE + * in any manner that would cause it to become subject to an open source software license. As examples, licenses that + * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in + * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f. + * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or + * application where the use or failure of the system or application can reasonably be expected to threaten or result in + * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life + * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these + * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or + * damages arising from such uses. g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, + * and their respective employees, contractors, agents, officers and directors, from and against any and all claims, + * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited + * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use + * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. * - * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. + * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may + * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, + * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use + * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in + * production or business-critical systems. * - * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE. + * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and + * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United + * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time + * without notice, but is not obligated to support or update the SOFTWARE. * - * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict. + * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal + * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is + * a conflict between the terms in this license and the license terms associated with a component, the license terms + * associated with the components control only to the extent necessary to resolve the conflict. * - * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice. + * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, + * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you + * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable + * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute + * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA + * will use Feedback at its choice. * - * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. + * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT + * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT + * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR + * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. * - * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. + * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE + * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, + * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH + * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON + * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION + * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE + * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING + * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE + * OR EXTEND THIS LIMIT. * - * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you. + * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail + * to comply with any term and condition of this license or if you commence or participate in any legal proceeding + * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if + * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of + * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of + * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this + * license are not affected by the termination of this license. All provisions of this license will survive termination, + * except for the license granted to you. * - * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. + * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State + * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware + * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the + * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English + * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction + * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be + * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. * - * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. + * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or + * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be + * void and of no effect. * - * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE. + * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, + * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States + * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s + * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws + * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not + * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from + * receiving the SOFTWARE. * - * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. + * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting + * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. + * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the + * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the + * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is + * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. * - * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party. + * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the + * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to + * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of + * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. + * This license may only be modified in a writing signed by an authorized representative of each party. * * (v. August 20, 2021) */ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA -namespace __detail_ap { +namespace __detail_ap +{ - _CCCL_HOST_DEVICE - constexpr uint32_t __ap_floor_log2(uint32_t __x) { - return (__x == 1 | __x == 0) ? 0 : 1 + __ap_floor_log2(__x >> 1); - } +_CCCL_HOST_DEVICE constexpr uint32_t __ap_floor_log2(uint32_t __x) +{ + return (__x == 1 | __x == 0) ? 0 : 1 + __ap_floor_log2(__x >> 1); +} - _CCCL_HOST_DEVICE - constexpr uint32_t __ap_ceil_log2(uint32_t __x) { - return (__x == 1 | __x == 0) ? 0 : __ap_floor_log2(__x - 1) + 1; - } +_CCCL_HOST_DEVICE constexpr uint32_t __ap_ceil_log2(uint32_t __x) +{ + return (__x == 1 | __x == 0) ? 0 : __ap_floor_log2(__x - 1) + 1; +} - _CCCL_HOST_DEVICE - constexpr uint32_t __ap_min(uint32_t __a, uint32_t __b) noexcept { - return (__a < __b) ? __a : __b; - } +_CCCL_HOST_DEVICE constexpr uint32_t __ap_min(uint32_t __a, uint32_t __b) noexcept +{ + return (__a < __b) ? __a : __b; +} - _CCCL_HOST_DEVICE - constexpr uint32_t __ap_max(uint32_t __a, uint32_t __b) noexcept { - return (__a > __b) ? __a : __b; - } +_CCCL_HOST_DEVICE constexpr uint32_t __ap_max(uint32_t __a, uint32_t __b) noexcept +{ + return (__a > __b) ? __a : __b; +} // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414 // Specifically search for 8.4 and 9.3 and above to guarantee uint64_t enum. -#if defined(_CCCL_COMPILER_GCC) && ( \ - ((_GNUC_VER < 804)) || \ - ((_GNUC_VER < 903)) \ - ) -# define _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +#if defined(_CCCL_COMPILER_GCC) && (((_GNUC_VER < 804)) || ((_GNUC_VER < 903))) +# define _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION #else -# define _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION : uint64_t +# define _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION : uint64_t #endif - namespace __sm_80 { - namespace __off { - enum __l2_cop_off_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION { - _L2_EVICT_NORMAL = 0, - _L2_EVICT_FIRST = 1, - }; - } // namespace __off - - namespace __on { - enum __l2_cop_on_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION { - _L2_EVICT_NORMAL = 0, - _L2_EVICT_FIRST = 1, - _L2_EVICT_LAST = 2, - _L2_EVICT_NORMAL_DEMOTE = 3, - }; - } // namespace __on - - enum __l2_descriptor_mode_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION { - _DESC_IMPLICIT = 0, - _DESC_INTERLEAVED = 2, - _DESC_BLOCK_TYPE = 3, - }; - - enum __l2_eviction_max_way_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION { - _CUDA_AMPERE_MAX_L2_WAYS = std::uint32_t{16}, - }; - - enum __block_size_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION { - _BLOCKSIZE_4K = 0, - _BLOCKSIZE_8K = 1, - _BLOCKSIZE_16K = 2, - _BLOCKSIZE_32K = 3, - _BLOCKSIZE_64K = 4, - _BLOCKSIZE_128K = 5, - _BLOCKSIZE_256K = 6, - _BLOCKSIZE_512K = 7, - _BLOCKSIZE_1M = 8, - _BLOCKSIZE_2M = 9, - _BLOCKSIZE_4M = 10, - _BLOCKSIZE_8M = 11, - _BLOCKSIZE_16M = 12, - _BLOCKSIZE_32M = 13, - }; - - struct __block_desc_t { - uint64_t __ap_reserved : 37; - uint64_t __block_count: 7; - uint64_t __block_start: 7; - uint64_t __ap_reserved2 : 1; - __block_size_t __block_size : 4; - __off::__l2_cop_off_t __l2_cop_off : 1; - __on::__l2_cop_on_t __l2_cop_on : 2; - __l2_descriptor_mode_t __l2_descriptor_mode : 2; - uint64_t __l1_inv_dont_allocate : 1; - uint64_t __l2_sector_promote_256B : 1; - uint64_t __ap_reserved3 : 1; - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __get_descriptor_cexpr() const noexcept { - return - std::uint64_t(__ap_reserved) << 0 | - std::uint64_t(__block_count) << 37 | - std::uint64_t(__block_start) << 44 | - std::uint64_t(__ap_reserved2) << 51 | - std::uint64_t(__block_size) << 52 | - std::uint64_t(__l2_cop_off) << 56 | - std::uint64_t(__l2_cop_on) << 57 | - std::uint64_t(__l2_descriptor_mode) << 59 | - std::uint64_t(__l1_inv_dont_allocate) << 61 | - std::uint64_t(__l2_sector_promote_256B) << 62 | - std::uint64_t(__ap_reserved3) << 63; - } - - inline - _CCCL_HOST_DEVICE - std::uint64_t __get_descriptor_non_cexpr() const noexcept { return *reinterpret_cast(this); } - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __get_descriptor() const noexcept { +namespace __sm_80 +{ +namespace __off +{ +enum __l2_cop_off_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +{ + _L2_EVICT_NORMAL = 0, + _L2_EVICT_FIRST = 1, +}; +} // namespace __off + +namespace __on +{ +enum __l2_cop_on_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +{ + _L2_EVICT_NORMAL = 0, + _L2_EVICT_FIRST = 1, + _L2_EVICT_LAST = 2, + _L2_EVICT_NORMAL_DEMOTE = 3, +}; +} // namespace __on + +enum __l2_descriptor_mode_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +{ + _DESC_IMPLICIT = 0, + _DESC_INTERLEAVED = 2, + _DESC_BLOCK_TYPE = 3, +}; + +enum __l2_eviction_max_way_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +{ + _CUDA_AMPERE_MAX_L2_WAYS = std::uint32_t{16}, +}; + +enum __block_size_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +{ + _BLOCKSIZE_4K = 0, + _BLOCKSIZE_8K = 1, + _BLOCKSIZE_16K = 2, + _BLOCKSIZE_32K = 3, + _BLOCKSIZE_64K = 4, + _BLOCKSIZE_128K = 5, + _BLOCKSIZE_256K = 6, + _BLOCKSIZE_512K = 7, + _BLOCKSIZE_1M = 8, + _BLOCKSIZE_2M = 9, + _BLOCKSIZE_4M = 10, + _BLOCKSIZE_8M = 11, + _BLOCKSIZE_16M = 12, + _BLOCKSIZE_32M = 13, +}; + +struct __block_desc_t +{ + uint64_t __ap_reserved : 37; + uint64_t __block_count : 7; + uint64_t __block_start : 7; + uint64_t __ap_reserved2 : 1; + __block_size_t __block_size : 4; + __off::__l2_cop_off_t __l2_cop_off : 1; + __on::__l2_cop_on_t __l2_cop_on : 2; + __l2_descriptor_mode_t __l2_descriptor_mode : 2; + uint64_t __l1_inv_dont_allocate : 1; + uint64_t __l2_sector_promote_256B : 1; + uint64_t __ap_reserved3 : 1; + + _CCCL_HOST_DEVICE constexpr std::uint64_t __get_descriptor_cexpr() const noexcept + { + return std::uint64_t(__ap_reserved) << 0 | std::uint64_t(__block_count) << 37 | std::uint64_t(__block_start) << 44 + | std::uint64_t(__ap_reserved2) << 51 | std::uint64_t(__block_size) << 52 | std::uint64_t(__l2_cop_off) << 56 + | std::uint64_t(__l2_cop_on) << 57 | std::uint64_t(__l2_descriptor_mode) << 59 + | std::uint64_t(__l1_inv_dont_allocate) << 61 | std::uint64_t(__l2_sector_promote_256B) << 62 + | std::uint64_t(__ap_reserved3) << 63; + } + + inline _CCCL_HOST_DEVICE std::uint64_t __get_descriptor_non_cexpr() const noexcept + { + return *reinterpret_cast(this); + } + + _CCCL_HOST_DEVICE constexpr std::uint64_t __get_descriptor() const noexcept + { #if defined(_LIBCUDACXX_IS_CONSTANT_EVALUATED) - return cuda::std::is_constant_evaluated() ? - __get_descriptor_cexpr() : - __get_descriptor_non_cexpr(); + return cuda::std::is_constant_evaluated() ? __get_descriptor_cexpr() : __get_descriptor_non_cexpr(); #else - return __get_descriptor_cexpr(); + return __get_descriptor_cexpr(); #endif - } - }; - static_assert(sizeof(__block_desc_t) == 8, "__block_desc_t should be 8 bytes"); - static_assert(sizeof(__block_desc_t) == sizeof(std::uint64_t), ""); - static_assert( - __block_desc_t{(uint64_t)1, (uint64_t)1, (uint64_t)1, (uint64_t)1, __block_size_t::_BLOCKSIZE_8K, __off::_L2_EVICT_FIRST, __on::_L2_EVICT_FIRST, __l2_descriptor_mode_t::_DESC_INTERLEAVED, (uint64_t)1, (uint64_t)1, (uint64_t)1}.__get_descriptor() - == 0xF318102000000001, ""); - - /* Factory like struct to build a __block_desc_t due to constexpr C++11 - */ - struct __block_descriptor_builder { //variable declaration order matters == usage order - std::uint32_t __offset; - __block_size_t __block_size; - std::uint32_t __block_start, __end_hit; - std::uint32_t __block_count; - __off::__l2_cop_off_t __l2_cop_off; - __on::__l2_cop_on_t __l2_cop_on; - __l2_descriptor_mode_t __l2_descriptor_mode; - bool __l1_inv_dont_allocate, __l2_sector_promote_256B; - - _CCCL_HOST_DEVICE static constexpr std::uint32_t __calc_offset(std::size_t __total_bytes) { - return __ap_max(std::uint32_t{12}, static_cast(__ap_ceil_log2(static_cast(__total_bytes))) - std::uint32_t{7}); - } - - _CCCL_HOST_DEVICE static constexpr std::uint32_t __calc_block_start(std::uintptr_t __ptr, std::size_t __total_bytes) { - return static_cast(__ptr >> __calc_offset(static_cast(__total_bytes))); - } - - _CCCL_HOST_DEVICE static constexpr std::uint32_t __calc_end_hit(std::uintptr_t __ptr, std::size_t __hit_bytes, std::size_t __total_bytes) { - return static_cast((__ptr + __hit_bytes + (std::uintptr_t{1} << (__calc_offset(static_cast(__total_bytes)))) - 1) >> __calc_offset(static_cast(__total_bytes))); - } - - _CCCL_HOST_DEVICE constexpr __block_descriptor_builder(std::uintptr_t __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, __on::__l2_cop_on_t __hit_prop, __off::__l2_cop_off_t __miss_prop) - : __offset(__calc_offset(__total_bytes)) - , __block_size(static_cast<__block_size_t>(__calc_offset(__total_bytes) - std::uint32_t{12})) - , __block_start(__calc_block_start(__ptr, __total_bytes)) - , __end_hit(__calc_end_hit(__ptr, __hit_bytes, __total_bytes)) - , __block_count(__calc_end_hit(__ptr, __hit_bytes, __total_bytes) - __calc_block_start(__ptr, __total_bytes)) - , __l2_cop_off(__miss_prop) - , __l2_cop_on(__hit_prop) - , __l2_descriptor_mode(_DESC_BLOCK_TYPE) - , __l1_inv_dont_allocate(false) - , __l2_sector_promote_256B(false) - {} - - _CCCL_HOST_DEVICE - constexpr __block_desc_t __get_block() const noexcept { - return __block_desc_t { 0, __ap_min(std::uint32_t{0x7f}, __block_count), (__block_start & std::uint32_t{0x7f}), 0, __block_size, __l2_cop_off, __l2_cop_on, _DESC_BLOCK_TYPE, false, false, 0 }; - } - }; - static_assert(sizeof(std::uintptr_t) > 4, "std::uintptr_t needs at least 5 bytes for this code to work"); - - struct __interleave_descriptor_t { - uint64_t __ap_reserved : 52; - uint64_t __fraction : 4; - __off::__l2_cop_off_t __l2_cop_off : 1; - __on::__l2_cop_on_t __l2_cop_on : 2; - __l2_descriptor_mode_t __l2_descriptor_mode : 2; - uint64_t __l1_inv_dont_allocate : 1; - uint64_t __l2_sector_promote_256B : 1; - uint64_t __ap_reserved2 : 1; - - _CCCL_HOST_DEVICE - constexpr __interleave_descriptor_t( - __on::__l2_cop_on_t __hit_prop, - std::uint32_t __hit_ratio, - __off::__l2_cop_off_t __miss_prop) noexcept - : __ap_reserved(0x0), - __fraction(__hit_ratio), - __l2_cop_off(__miss_prop), - __l2_cop_on(__hit_prop), - __l2_descriptor_mode(_DESC_INTERLEAVED), - __l1_inv_dont_allocate(0x0), - __l2_sector_promote_256B(0x0), - __ap_reserved2(0x0) {} - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __get_descriptor_cexpr() const { - return - std::uint64_t(__ap_reserved) << 0 | - std::uint64_t(__fraction) << 52 | - std::uint64_t(__l2_cop_off) << 56 | - std::uint64_t(__l2_cop_on) << 57 | - std::uint64_t(__l2_descriptor_mode) << 59 | - std::uint64_t(__l1_inv_dont_allocate) << 61 | - std::uint64_t(__l2_sector_promote_256B) << 62 | - std::uint64_t(__ap_reserved2) << 63; - } - - inline - _CCCL_HOST_DEVICE - std::uint64_t __get_descriptor_non_cexpr() const noexcept { return *reinterpret_cast(this); } - - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __get_descriptor() const noexcept { + } +}; +static_assert(sizeof(__block_desc_t) == 8, "__block_desc_t should be 8 bytes"); +static_assert(sizeof(__block_desc_t) == sizeof(std::uint64_t), ""); +static_assert( + __block_desc_t{ + (uint64_t) 1, + (uint64_t) 1, + (uint64_t) 1, + (uint64_t) 1, + __block_size_t::_BLOCKSIZE_8K, + __off::_L2_EVICT_FIRST, + __on::_L2_EVICT_FIRST, + __l2_descriptor_mode_t::_DESC_INTERLEAVED, + (uint64_t) 1, + (uint64_t) 1, + (uint64_t) 1} + .__get_descriptor() + == 0xF318102000000001, + ""); + +/* Factory like struct to build a __block_desc_t due to constexpr C++11 + */ +struct __block_descriptor_builder +{ // variable declaration order matters == usage order + std::uint32_t __offset; + __block_size_t __block_size; + std::uint32_t __block_start, __end_hit; + std::uint32_t __block_count; + __off::__l2_cop_off_t __l2_cop_off; + __on::__l2_cop_on_t __l2_cop_on; + __l2_descriptor_mode_t __l2_descriptor_mode; + bool __l1_inv_dont_allocate, __l2_sector_promote_256B; + + _CCCL_HOST_DEVICE static constexpr std::uint32_t __calc_offset(std::size_t __total_bytes) + { + return __ap_max( + std::uint32_t{12}, + static_cast(__ap_ceil_log2(static_cast(__total_bytes))) - std::uint32_t{7}); + } + + _CCCL_HOST_DEVICE static constexpr std::uint32_t __calc_block_start(std::uintptr_t __ptr, std::size_t __total_bytes) + { + return static_cast(__ptr >> __calc_offset(static_cast(__total_bytes))); + } + + _CCCL_HOST_DEVICE static constexpr std::uint32_t + __calc_end_hit(std::uintptr_t __ptr, std::size_t __hit_bytes, std::size_t __total_bytes) + { + return static_cast( + (__ptr + __hit_bytes + (std::uintptr_t{1} << (__calc_offset(static_cast(__total_bytes)))) - 1) + >> __calc_offset(static_cast(__total_bytes))); + } + + _CCCL_HOST_DEVICE constexpr __block_descriptor_builder( + std::uintptr_t __ptr, + std::size_t __hit_bytes, + std::size_t __total_bytes, + __on::__l2_cop_on_t __hit_prop, + __off::__l2_cop_off_t __miss_prop) + : __offset(__calc_offset(__total_bytes)) + , __block_size(static_cast<__block_size_t>(__calc_offset(__total_bytes) - std::uint32_t{12})) + , __block_start(__calc_block_start(__ptr, __total_bytes)) + , __end_hit(__calc_end_hit(__ptr, __hit_bytes, __total_bytes)) + , __block_count(__calc_end_hit(__ptr, __hit_bytes, __total_bytes) - __calc_block_start(__ptr, __total_bytes)) + , __l2_cop_off(__miss_prop) + , __l2_cop_on(__hit_prop) + , __l2_descriptor_mode(_DESC_BLOCK_TYPE) + , __l1_inv_dont_allocate(false) + , __l2_sector_promote_256B(false) + {} + + _CCCL_HOST_DEVICE constexpr __block_desc_t __get_block() const noexcept + { + return __block_desc_t{ + 0, + __ap_min(std::uint32_t{0x7f}, __block_count), + (__block_start & std::uint32_t{0x7f}), + 0, + __block_size, + __l2_cop_off, + __l2_cop_on, + _DESC_BLOCK_TYPE, + false, + false, + 0}; + } +}; +static_assert(sizeof(std::uintptr_t) > 4, "std::uintptr_t needs at least 5 bytes for this code to work"); + +struct __interleave_descriptor_t +{ + uint64_t __ap_reserved : 52; + uint64_t __fraction : 4; + __off::__l2_cop_off_t __l2_cop_off : 1; + __on::__l2_cop_on_t __l2_cop_on : 2; + __l2_descriptor_mode_t __l2_descriptor_mode : 2; + uint64_t __l1_inv_dont_allocate : 1; + uint64_t __l2_sector_promote_256B : 1; + uint64_t __ap_reserved2 : 1; + + _CCCL_HOST_DEVICE constexpr __interleave_descriptor_t( + __on::__l2_cop_on_t __hit_prop, std::uint32_t __hit_ratio, __off::__l2_cop_off_t __miss_prop) noexcept + : __ap_reserved(0x0) + , __fraction(__hit_ratio) + , __l2_cop_off(__miss_prop) + , __l2_cop_on(__hit_prop) + , __l2_descriptor_mode(_DESC_INTERLEAVED) + , __l1_inv_dont_allocate(0x0) + , __l2_sector_promote_256B(0x0) + , __ap_reserved2(0x0) + {} + + _CCCL_HOST_DEVICE constexpr std::uint64_t __get_descriptor_cexpr() const + { + return std::uint64_t(__ap_reserved) << 0 | std::uint64_t(__fraction) << 52 | std::uint64_t(__l2_cop_off) << 56 + | std::uint64_t(__l2_cop_on) << 57 | std::uint64_t(__l2_descriptor_mode) << 59 + | std::uint64_t(__l1_inv_dont_allocate) << 61 | std::uint64_t(__l2_sector_promote_256B) << 62 + | std::uint64_t(__ap_reserved2) << 63; + } + + inline _CCCL_HOST_DEVICE std::uint64_t __get_descriptor_non_cexpr() const noexcept + { + return *reinterpret_cast(this); + } + + _CCCL_HOST_DEVICE constexpr std::uint64_t __get_descriptor() const noexcept + { #if defined(_LIBCUDACXX_IS_CONSTANT_EVALUATED) - return cuda::std::is_constant_evaluated() ? - __get_descriptor_cexpr() : - __get_descriptor_non_cexpr(); + return cuda::std::is_constant_evaluated() ? __get_descriptor_cexpr() : __get_descriptor_non_cexpr(); #else - return __get_descriptor_cexpr(); + return __get_descriptor_cexpr(); #endif - } - }; - static_assert(sizeof(__interleave_descriptor_t) == 8, "__interleave_descriptor_t should be 8 bytes"); - static_assert(sizeof(__interleave_descriptor_t) == sizeof(std::uint64_t), ""); - - _CCCL_HOST_DEVICE - static constexpr std::uint64_t __interleave_normal() noexcept { - return 0x10F0000000000000; - } - - _CCCL_HOST_DEVICE - static constexpr std::uint64_t __interleave_streaming() noexcept { - return 0x12F0000000000000; - } - - _CCCL_HOST_DEVICE - static constexpr std::uint64_t __interleave_persisting() noexcept { - return 0x14F0000000000000; - } - - _CCCL_HOST_DEVICE - static constexpr std::uint64_t __interleave_normal_demote() noexcept { - return 0x16F0000000000000; - } - - } // namespace __sm_80 - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __interleave(cudaAccessProperty __hit_prop, float __hit_ratio, cudaAccessProperty __miss_prop = cudaAccessPropertyNormal) { - return __sm_80::__interleave_descriptor_t( - ((__hit_prop == cudaAccessPropertyNormal) ? __sm_80::__on::__l2_cop_on_t::_L2_EVICT_NORMAL_DEMOTE : static_cast<__sm_80::__on::__l2_cop_on_t>(__hit_prop)), - __ap_min((static_cast(__hit_ratio) * __sm_80::__l2_eviction_max_way_t::_CUDA_AMPERE_MAX_L2_WAYS), static_cast(__sm_80::__l2_eviction_max_way_t::_CUDA_AMPERE_MAX_L2_WAYS - 1)), - static_cast<__sm_80::__off::__l2_cop_off_t>(__miss_prop) - ).__get_descriptor(); - } - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __block(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, cudaAccessProperty __hit_prop, cudaAccessProperty __miss_prop = cudaAccessPropertyNormal) { - return (__total_bytes <= (size_t{0xFFFFFFFF}) & __total_bytes != 0 & __hit_bytes <= __total_bytes) ? __sm_80::__block_descriptor_builder( - reinterpret_cast(__ptr), - __hit_bytes, - __total_bytes, - (__hit_prop == cudaAccessPropertyNormal) ? __sm_80::__on::_L2_EVICT_NORMAL_DEMOTE : static_cast<__sm_80::__on::__l2_cop_on_t>(__hit_prop), - static_cast<__sm_80::__off::__l2_cop_off_t>(__miss_prop) - ).__get_block().__get_descriptor() - : __sm_80::__interleave_normal(); } +}; +static_assert(sizeof(__interleave_descriptor_t) == 8, "__interleave_descriptor_t should be 8 bytes"); +static_assert(sizeof(__interleave_descriptor_t) == sizeof(std::uint64_t), ""); + +_CCCL_HOST_DEVICE static constexpr std::uint64_t __interleave_normal() noexcept +{ + return 0x10F0000000000000; +} + +_CCCL_HOST_DEVICE static constexpr std::uint64_t __interleave_streaming() noexcept +{ + return 0x12F0000000000000; +} + +_CCCL_HOST_DEVICE static constexpr std::uint64_t __interleave_persisting() noexcept +{ + return 0x14F0000000000000; +} + +_CCCL_HOST_DEVICE static constexpr std::uint64_t __interleave_normal_demote() noexcept +{ + return 0x16F0000000000000; +} + +} // namespace __sm_80 + +_CCCL_HOST_DEVICE constexpr std::uint64_t __interleave( + cudaAccessProperty __hit_prop, float __hit_ratio, cudaAccessProperty __miss_prop = cudaAccessPropertyNormal) +{ + return __sm_80::__interleave_descriptor_t( + ((__hit_prop == cudaAccessPropertyNormal) ? __sm_80::__on::__l2_cop_on_t::_L2_EVICT_NORMAL_DEMOTE + : static_cast<__sm_80::__on::__l2_cop_on_t>(__hit_prop)), + __ap_min( + (static_cast(__hit_ratio) * __sm_80::__l2_eviction_max_way_t::_CUDA_AMPERE_MAX_L2_WAYS), + static_cast(__sm_80::__l2_eviction_max_way_t::_CUDA_AMPERE_MAX_L2_WAYS - 1)), + static_cast<__sm_80::__off::__l2_cop_off_t>(__miss_prop)) + .__get_descriptor(); +} + +_CCCL_HOST_DEVICE constexpr std::uint64_t __block( + void* __ptr, + std::size_t __hit_bytes, + std::size_t __total_bytes, + cudaAccessProperty __hit_prop, + cudaAccessProperty __miss_prop = cudaAccessPropertyNormal) +{ + return (__total_bytes <= (size_t{0xFFFFFFFF}) & __total_bytes != 0 & __hit_bytes <= __total_bytes) + ? __sm_80::__block_descriptor_builder( + reinterpret_cast(__ptr), + __hit_bytes, + __total_bytes, + (__hit_prop == cudaAccessPropertyNormal) + ? __sm_80::__on::_L2_EVICT_NORMAL_DEMOTE + : static_cast<__sm_80::__on::__l2_cop_on_t>(__hit_prop), + static_cast<__sm_80::__off::__l2_cop_off_t>(__miss_prop)) + .__get_block() + .__get_descriptor() + : __sm_80::__interleave_normal(); +} } // namespace __detail_ap _LIBCUDACXX_END_NAMESPACE_CUDA diff --git a/libcudacxx/include/cuda/std/detail/__annotated_ptr b/libcudacxx/include/cuda/std/detail/__annotated_ptr index f1d4b166b6e..eb84a309f45 100644 --- a/libcudacxx/include/cuda/std/detail/__annotated_ptr +++ b/libcudacxx/include/cuda/std/detail/__annotated_ptr @@ -3,229 +3,327 @@ * * NVIDIA SOFTWARE LICENSE * - * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). + * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the + * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). * - * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. + * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. + * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By + * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of + * this license, and you take legal and financial responsibility for the actions of your permitted users. * - * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions. + * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, + * regulation or generally accepted practices or guidelines in the relevant jurisdictions. * - * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license. + * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install + * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this + * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under + * this license. * * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: - * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights. - * b. You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. + * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, + * including (without limitation) terms relating to the license grant and license restrictions and protection of + * NVIDIA’s intellectual property rights. b. You agree to notify NVIDIA in writing of any known or suspected + * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms + * of your agreements with respect to distributed SOFTWARE. * * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows: * a. The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs. - * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. - * c. You may not modify or create derivative works of any portion of the SOFTWARE. - * d. You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE. - * e. You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. - * f. Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. - * g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. + * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from + * any portion of the SOFTWARE or copies of the SOFTWARE. c. You may not modify or create derivative works of any + * portion of the SOFTWARE. d. You may not bypass, disable, or circumvent any technical measure, encryption, + * security, digital rights management or authentication mechanism in the SOFTWARE. e. You may not use the SOFTWARE + * in any manner that would cause it to become subject to an open source software license. As examples, licenses that + * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in + * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f. + * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or + * application where the use or failure of the system or application can reasonably be expected to threaten or result in + * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life + * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these + * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or + * damages arising from such uses. g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, + * and their respective employees, contractors, agents, officers and directors, from and against any and all claims, + * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited + * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use + * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. * - * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. + * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may + * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, + * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use + * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in + * production or business-critical systems. * - * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE. + * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and + * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United + * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time + * without notice, but is not obligated to support or update the SOFTWARE. * - * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict. + * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal + * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is + * a conflict between the terms in this license and the license terms associated with a component, the license terms + * associated with the components control only to the extent necessary to resolve the conflict. * - * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice. + * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, + * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you + * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable + * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute + * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA + * will use Feedback at its choice. * - * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. + * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT + * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT + * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR + * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. * - * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. + * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE + * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, + * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH + * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON + * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION + * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE + * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING + * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE + * OR EXTEND THIS LIMIT. * - * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you. + * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail + * to comply with any term and condition of this license or if you commence or participate in any legal proceeding + * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if + * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of + * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of + * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this + * license are not affected by the termination of this license. All provisions of this license will survive termination, + * except for the license granted to you. * - * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. + * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State + * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware + * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the + * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English + * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction + * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be + * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. * - * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. + * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or + * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be + * void and of no effect. * - * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE. + * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, + * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States + * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s + * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws + * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not + * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from + * receiving the SOFTWARE. * - * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. + * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting + * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. + * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the + * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the + * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is + * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. * - * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party. + * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the + * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to + * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of + * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. + * This license may only be modified in a writing signed by an authorized representative of each party. * * (v. August 20, 2021) */ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA -namespace __detail_ap { +namespace __detail_ap +{ - template - _CCCL_DEVICE - void* __associate_address_space(void* __ptr, _Property __prop) { - if (std::is_same<_Property, access_property::shared>::value == true) { - bool __b = __isShared(__ptr); - _LIBCUDACXX_ASSERT(__b, ""); +template +_CCCL_DEVICE void* __associate_address_space(void* __ptr, _Property __prop) +{ + if (std::is_same<_Property, access_property::shared>::value == true) + { + bool __b = __isShared(__ptr); + _LIBCUDACXX_ASSERT(__b, ""); #if !defined(_CCCL_CUDACC_BELOW_11_2) - __builtin_assume(__b); + __builtin_assume(__b); #else // ^^^ !_CCCL_CUDACC_BELOW_11_2 ^^^ / vvv _CCCL_CUDACC_BELOW_11_2 vvv - (void)__b; + (void) __b; #endif // _CCCL_CUDACC_BELOW_11_2 - } else if (std::is_same<_Property, access_property::global>::value == true || - std::is_same<_Property, access_property::normal>::value == true || - std::is_same<_Property, access_property::persisting>::value == true || - std::is_same<_Property, access_property::streaming>::value == true || - std::is_same<_Property, access_property>::value) { - bool __b = __isGlobal(__ptr); - _LIBCUDACXX_ASSERT(__b, ""); + } + else if (std::is_same<_Property, access_property::global>::value == true + || std::is_same<_Property, access_property::normal>::value == true + || std::is_same<_Property, access_property::persisting>::value == true + || std::is_same<_Property, access_property::streaming>::value == true + || std::is_same<_Property, access_property>::value) + { + bool __b = __isGlobal(__ptr); + _LIBCUDACXX_ASSERT(__b, ""); #if !defined(_CCCL_CUDACC_BELOW_11_2) - __builtin_assume(__b); + __builtin_assume(__b); #else // ^^^ !_CCCL_CUDACC_BELOW_11_2 ^^^ / vvv _CCCL_CUDACC_BELOW_11_2 vvv - (void)__b; + (void) __b; #endif // _CCCL_CUDACC_BELOW_11_2 - } + } + + return __ptr; +} + +template +_CCCL_DEVICE void* __associate_descriptor(void* __ptr, __Prop __prop) +{ + return __associate_descriptor(__ptr, static_cast(access_property(__prop))); +} + +template <> +inline _CCCL_DEVICE void* __associate_descriptor(void* __ptr, std::uint64_t __prop) +{ + NV_IF_ELSE_TARGET(NV_PROVIDES_SM_80, (return __nv_associate_access_property(__ptr, __prop);), (return __ptr;)) +} - return __ptr; +template <> +inline _CCCL_DEVICE void* __associate_descriptor(void* __ptr, access_property::shared) +{ + return __ptr; +} + +template +_CCCL_HOST_DEVICE _Type* __associate(_Type* __ptr, _Property __prop) +{ + NV_IF_ELSE_TARGET(NV_IS_DEVICE, + (return static_cast<_Type*>(__associate_descriptor( + __associate_address_space(const_cast(static_cast(__ptr)), __prop), __prop));), + (return __ptr;)) +} + +template +class __annotated_ptr_base +{ + using __error = typename _Property::__unknown_access_property_type; +}; + +template <> +class __annotated_ptr_base +{ +protected: + static constexpr std::uint64_t __prop = 0; + + constexpr __annotated_ptr_base() noexcept = default; + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::shared) noexcept {} + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, access_property::shared{}); + } + _CCCL_HOST_DEVICE constexpr access_property::shared __get_property() const noexcept + { + return access_property::shared{}; } +}; - template - _CCCL_DEVICE - void* __associate_descriptor(void* __ptr, __Prop __prop) { - return __associate_descriptor(__ptr, static_cast(access_property(__prop))); +template <> +class __annotated_ptr_base +{ +protected: + static constexpr std::uint64_t __prop = __sm_80::__interleave_normal(); + + constexpr __annotated_ptr_base() noexcept = default; + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::global) noexcept {} + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, access_property::global{}); + } + _CCCL_HOST_DEVICE constexpr access_property::global __get_property() const noexcept + { + return access_property::global{}; } +}; + +template <> +class __annotated_ptr_base +{ +protected: + static constexpr std::uint64_t __prop = __sm_80::__interleave_normal_demote(); - template <> - inline _CCCL_DEVICE - void* __associate_descriptor(void* __ptr, std::uint64_t __prop) { - NV_IF_ELSE_TARGET(NV_PROVIDES_SM_80,( - return __nv_associate_access_property(__ptr, __prop); - ),( - return __ptr; - )) + constexpr __annotated_ptr_base() noexcept = default; + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::normal) noexcept {} + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, access_property::normal{}); } + _CCCL_HOST_DEVICE constexpr access_property::normal __get_property() const noexcept + { + return access_property::normal{}; + } +}; + +template <> +class __annotated_ptr_base +{ +protected: + static constexpr std::uint64_t __prop = __sm_80::__interleave_persisting(); - template<> - inline _CCCL_DEVICE - void* __associate_descriptor(void* __ptr, access_property::shared) { - return __ptr; + constexpr __annotated_ptr_base() noexcept = default; + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::persisting) noexcept {} + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, access_property::persisting{}); } + _CCCL_HOST_DEVICE constexpr access_property::persisting __get_property() const noexcept + { + return access_property::persisting{}; + } +}; + +template <> +class __annotated_ptr_base +{ +protected: + static constexpr std::uint64_t __prop = __sm_80::__interleave_streaming(); - template - _CCCL_HOST_DEVICE - _Type* __associate(_Type* __ptr, _Property __prop) { - NV_IF_ELSE_TARGET(NV_IS_DEVICE,( - return static_cast<_Type*>(__associate_descriptor( - __associate_address_space(const_cast(static_cast(__ptr)), __prop), - __prop)); - ),( - return __ptr; - )) + constexpr __annotated_ptr_base() noexcept = default; + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::streaming) noexcept {} + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, access_property::streaming{}); + } + _CCCL_HOST_DEVICE constexpr access_property::streaming __get_property() const noexcept + { + return access_property::streaming{}; } +}; +template <> +class __annotated_ptr_base +{ +protected: + std::uint64_t __prop; - template - class __annotated_ptr_base { - using __error = typename _Property::__unknown_access_property_type; - }; - - template<> - class __annotated_ptr_base { - protected: - static constexpr std::uint64_t __prop = 0; - - constexpr __annotated_ptr_base() noexcept = default; - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::shared) noexcept {} - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, access_property::shared{}); - } - _CCCL_HOST_DEVICE constexpr access_property::shared __get_property() const noexcept { - return access_property::shared{}; - } - }; - - template<> - class __annotated_ptr_base { - protected: - static constexpr std::uint64_t __prop = __sm_80::__interleave_normal(); - - constexpr __annotated_ptr_base() noexcept = default; - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::global) noexcept {} - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, access_property::global{}); - } - _CCCL_HOST_DEVICE constexpr access_property::global __get_property() const noexcept { - return access_property::global{}; - } - }; - - template<> - class __annotated_ptr_base { - protected: - static constexpr std::uint64_t __prop = __sm_80::__interleave_normal_demote(); - - constexpr __annotated_ptr_base() noexcept = default; - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::normal) noexcept {} - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, access_property::normal{}); - } - _CCCL_HOST_DEVICE constexpr access_property::normal __get_property() const noexcept { - return access_property::normal{}; - } - }; - - template<> - class __annotated_ptr_base { - protected: - static constexpr std::uint64_t __prop = __sm_80::__interleave_persisting(); - - constexpr __annotated_ptr_base() noexcept = default; - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::persisting) noexcept {} - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, access_property::persisting{}); - } - _CCCL_HOST_DEVICE constexpr access_property::persisting __get_property() const noexcept { - return access_property::persisting{}; - } - }; - - template<> - class __annotated_ptr_base { - protected: - static constexpr std::uint64_t __prop = __sm_80::__interleave_streaming(); - - constexpr __annotated_ptr_base() noexcept = default; - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::streaming) noexcept {} - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, access_property::streaming{}); - } - _CCCL_HOST_DEVICE constexpr access_property::streaming __get_property() const noexcept { - return access_property::streaming{}; - } - }; - - template<> - class __annotated_ptr_base { - protected: - std::uint64_t __prop; - - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base() noexcept : __prop(access_property()) {} - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(std::uint64_t __property) noexcept : __prop(__property) {} - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property __property) noexcept - : __annotated_ptr_base(static_cast(__property)) {} - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, __prop); - } - _CCCL_HOST_DEVICE access_property __get_property() const noexcept { - return reinterpret_cast(const_cast(__prop)); - } - }; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base() noexcept + : __prop(access_property()) + {} + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(std::uint64_t __property) noexcept + : __prop(__property) + {} + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property __property) noexcept + : __annotated_ptr_base(static_cast(__property)) + {} + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, __prop); + } + _CCCL_HOST_DEVICE access_property __get_property() const noexcept + { + return reinterpret_cast(const_cast(__prop)); + } +}; } // namespace __detail_ap _LIBCUDACXX_END_NAMESPACE_CUDA diff --git a/libcudacxx/include/cuda/std/detail/__config b/libcudacxx/include/cuda/std/detail/__config index f4fba1f24d6..aaa22d7cf6d 100644 --- a/libcudacxx/include/cuda/std/detail/__config +++ b/libcudacxx/include/cuda/std/detail/__config @@ -13,7 +13,7 @@ #include -#define _LIBCUDACXX_CUDA_API_VERSION CCCL_VERSION +#define _LIBCUDACXX_CUDA_API_VERSION CCCL_VERSION #define _LIBCUDACXX_CUDA_API_VERSION_MAJOR CCCL_MAJOR_VERSION #define _LIBCUDACXX_CUDA_API_VERSION_MINOR CCCL_MINOR_VERSION #define _LIBCUDACXX_CUDA_API_VERSION_PATCH CCCL_PATCH_VERSION diff --git a/libcudacxx/include/cuda/std/detail/__pragma_push b/libcudacxx/include/cuda/std/detail/__pragma_push index 5042010790d..d7decfb316a 100644 --- a/libcudacxx/include/cuda/std/detail/__pragma_push +++ b/libcudacxx/include/cuda/std/detail/__pragma_push @@ -8,5 +8,6 @@ // //===----------------------------------------------------------------------===// -#include #include + +#include diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__assert b/libcudacxx/include/cuda/std/detail/libcxx/include/__assert index ad54f46dfd6..3568b3b746f 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__assert +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__assert @@ -27,28 +27,28 @@ // assertions through the Debug mode previously. // TODO: In LLVM 16, make it an error to define _LIBCUDACXX_DEBUG #if defined(_LIBCUDACXX_DEBUG) -# ifndef _LIBCUDACXX_ENABLE_ASSERTIONS -# define _LIBCUDACXX_ENABLE_ASSERTIONS 1 -# endif +# ifndef _LIBCUDACXX_ENABLE_ASSERTIONS +# define _LIBCUDACXX_ENABLE_ASSERTIONS 1 +# endif #endif // Automatically enable assertions when the debug mode is enabled. #if defined(_LIBCUDACXX_ENABLE_DEBUG_MODE) -# ifndef _LIBCUDACXX_ENABLE_ASSERTIONS -# define _LIBCUDACXX_ENABLE_ASSERTIONS 1 -# endif +# ifndef _LIBCUDACXX_ENABLE_ASSERTIONS +# define _LIBCUDACXX_ENABLE_ASSERTIONS 1 +# endif #endif #ifndef _LIBCUDACXX_ENABLE_ASSERTIONS -# define _LIBCUDACXX_ENABLE_ASSERTIONS _LIBCUDACXX_ENABLE_ASSERTIONS_DEFAULT +# define _LIBCUDACXX_ENABLE_ASSERTIONS _LIBCUDACXX_ENABLE_ASSERTIONS_DEFAULT #endif #if _LIBCUDACXX_ENABLE_ASSERTIONS != 0 && _LIBCUDACXX_ENABLE_ASSERTIONS != 1 -# error "_LIBCUDACXX_ENABLE_ASSERTIONS must be set to 0 or 1" +# error "_LIBCUDACXX_ENABLE_ASSERTIONS must be set to 0 or 1" #endif #if _LIBCUDACXX_ENABLE_ASSERTIONS -# define _LIBCUDACXX_ASSERT(expression, message) \ +# define _LIBCUDACXX_ASSERT(expression, message) \ (_CCCL_DIAG_PUSH \ _CCCL_DIAG_SUPPRESS_CLANG("-Wassume") \ __builtin_expect(static_cast(expression), 1) ? \ @@ -56,13 +56,11 @@ ::_CUDA_VSTD::__libcpp_verbose_abort("%s:%d: assertion %s failed: %s", __FILE__, __LINE__, #expression, message) _CCCL_DIAG_POP) #elif 0 // !defined(_LIBCUDACXX_ASSERTIONS_DISABLE_ASSUME) && __has_builtin(__builtin_assume) -# define _LIBCUDACXX_ASSERT(expression, message) \ - (_CCCL_DIAG_PUSH \ - _CCCL_DIAG_SUPPRESS_CLANG("-Wassume") \ - __builtin_assume(static_cast(expression)) \ - _CCCL_DIAG_POP) +# define _LIBCUDACXX_ASSERT(expression, message) \ + (_CCCL_DIAG_PUSH _CCCL_DIAG_SUPPRESS_CLANG("-Wassume") __builtin_assume(static_cast(expression)) \ + _CCCL_DIAG_POP) #else -# define _LIBCUDACXX_ASSERT(expression, message) ((void)0) +# define _LIBCUDACXX_ASSERT(expression, message) ((void) 0) #endif #endif // _LIBCUDACXX___ASSERT diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__availability b/libcudacxx/include/cuda/std/detail/libcxx/include/__availability index 37ac58934ea..f89d2abf1a0 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__availability +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__availability @@ -63,226 +63,230 @@ // // [1]: https://clang.llvm.org/docs/AttributeReference.html#availability - // For backwards compatibility, allow users to define _LIBCUDACXX_DISABLE_AVAILABILITY // for a while. #if defined(_LIBCUDACXX_DISABLE_AVAILABILITY) -# if !defined(_LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) -# define _LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS -# endif +# if !defined(_LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) +# define _LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS +# endif #endif // Availability markup is disabled when building the library, or when the compiler // doesn't support the proper attributes. -#if defined(_LIBCUDACXX_BUILDING_LIBRARY) || \ - defined(_LIBCXXABI_BUILDING_LIBRARY) || \ - !__has_feature(attribute_availability_with_strict) || \ - !__has_feature(attribute_availability_in_templates) || \ - !__has_extension(pragma_clang_attribute_external_declaration) -# if !defined(_LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) -# define _LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS -# endif +#if defined(_LIBCUDACXX_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) \ + || !__has_feature(attribute_availability_with_strict) || !__has_feature(attribute_availability_in_templates) \ + || !__has_extension(pragma_clang_attribute_external_declaration) +# if !defined(_LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) +# define _LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS +# endif #endif #if defined(_LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) - // This controls the availability of std::shared_mutex and std::shared_timed_mutex, - // which were added to the dylib later. -# define _LIBCUDACXX_AVAILABILITY_SHARED_MUTEX +// This controls the availability of std::shared_mutex and std::shared_timed_mutex, +// which were added to the dylib later. +# define _LIBCUDACXX_AVAILABILITY_SHARED_MUTEX // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex - // These macros control the availability of std::bad_optional_access and - // other exception types. These were put in the shared library to prevent - // code bloat from every user program defining the vtable for these exception - // types. - // - // Note that when exceptions are disabled, the methods that normally throw - // these exceptions can be used even on older deployment targets, but those - // methods will abort instead of throwing. -# define _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS -# define _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST - - // This controls the availability of std::uncaught_exceptions(). -# define _LIBCUDACXX_AVAILABILITY_UNCAUGHT_EXCEPTIONS - - // This controls the availability of the sized version of ::operator delete, - // ::operator delete[], and their align_val_t variants, which were all added - // in C++17, and hence not present in early dylibs. -# define _LIBCUDACXX_AVAILABILITY_SIZED_NEW_DELETE - - // This controls the availability of the std::future_error exception. - // - // Note that when exceptions are disabled, the methods that normally throw - // std::future_error can be used even on older deployment targets, but those - // methods will abort instead of throwing. -# define _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR - - // This controls the availability of std::type_info's vtable. - // I can't imagine how using std::type_info can work at all if - // this isn't supported. -# define _LIBCUDACXX_AVAILABILITY_TYPEINFO_VTABLE - - // This controls the availability of std::locale::category members - // (e.g. std::locale::collate), which are defined in the dylib. -# define _LIBCUDACXX_AVAILABILITY_LOCALE_CATEGORY - - // This controls the availability of atomic operations on std::shared_ptr - // (e.g. `std::atomic_store(std::shared_ptr)`), which require a shared - // lock table located in the dylib. -# define _LIBCUDACXX_AVAILABILITY_ATOMIC_SHARED_PTR - - // These macros control the availability of all parts of that - // depend on something in the dylib. -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_PUSH -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_POP +// These macros control the availability of std::bad_optional_access and +// other exception types. These were put in the shared library to prevent +// code bloat from every user program defining the vtable for these exception +// types. +// +// Note that when exceptions are disabled, the methods that normally throw +// these exceptions can be used even on older deployment targets, but those +// methods will abort instead of throwing. +# define _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS +# define _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS +# define _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST + +// This controls the availability of std::uncaught_exceptions(). +# define _LIBCUDACXX_AVAILABILITY_UNCAUGHT_EXCEPTIONS + +// This controls the availability of the sized version of ::operator delete, +// ::operator delete[], and their align_val_t variants, which were all added +// in C++17, and hence not present in early dylibs. +# define _LIBCUDACXX_AVAILABILITY_SIZED_NEW_DELETE + +// This controls the availability of the std::future_error exception. +// +// Note that when exceptions are disabled, the methods that normally throw +// std::future_error can be used even on older deployment targets, but those +// methods will abort instead of throwing. +# define _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR + +// This controls the availability of std::type_info's vtable. +// I can't imagine how using std::type_info can work at all if +// this isn't supported. +# define _LIBCUDACXX_AVAILABILITY_TYPEINFO_VTABLE + +// This controls the availability of std::locale::category members +// (e.g. std::locale::collate), which are defined in the dylib. +# define _LIBCUDACXX_AVAILABILITY_LOCALE_CATEGORY + +// This controls the availability of atomic operations on std::shared_ptr +// (e.g. `std::atomic_store(std::shared_ptr)`), which require a shared +// lock table located in the dylib. +# define _LIBCUDACXX_AVAILABILITY_ATOMIC_SHARED_PTR + +// These macros control the availability of all parts of that +// depend on something in the dylib. +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_PUSH +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_POP // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem - // This controls the availability of floating-point std::to_chars functions. - // These overloads were added later than the integer overloads. -# define _LIBCUDACXX_AVAILABILITY_TO_CHARS_FLOATING_POINT +// This controls the availability of floating-point std::to_chars functions. +// These overloads were added later than the integer overloads. +# define _LIBCUDACXX_AVAILABILITY_TO_CHARS_FLOATING_POINT - // This controls the availability of the C++20 synchronization library, - // which requires shared library support for various operations - // (see libcxx/src/atomic.cpp). This includes , , - // , and notification functions on std::atomic. -# define _LIBCUDACXX_AVAILABILITY_SYNC +// This controls the availability of the C++20 synchronization library, +// which requires shared library support for various operations +// (see libcxx/src/atomic.cpp). This includes , , +// , and notification functions on std::atomic. +# define _LIBCUDACXX_AVAILABILITY_SYNC // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_latch // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore - // This controls the availability of the C++20 format library. - // The library is in development and not ABI stable yet. P2216 is - // retroactively accepted in C++20. This paper contains ABI breaking - // changes. -# define _LIBCUDACXX_AVAILABILITY_FORMAT +// This controls the availability of the C++20 format library. +// The library is in development and not ABI stable yet. P2216 is +// retroactively accepted in C++20. This paper contains ABI breaking +// changes. +# define _LIBCUDACXX_AVAILABILITY_FORMAT // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_format - // This controls whether the default verbose termination function is - // provided by the library. - // - // Note that when users provide their own custom function, it doesn't - // matter whether the dylib provides a default function, and the - // availability markup can actually give a false positive diagnostic - // (it will think that no function is provided, when in reality the - // user has provided their own). - // - // Users can pass -D_LIBCUDACXX_AVAILABILITY_CUSTOM_VERBOSE_ABORT_PROVIDED - // to the compiler to tell the library not to define its own verbose abort. - // Note that defining this macro but failing to define a custom function - // will lead to a load-time error on back-deployment targets, so it should - // be avoided. +// This controls whether the default verbose termination function is +// provided by the library. +// +// Note that when users provide their own custom function, it doesn't +// matter whether the dylib provides a default function, and the +// availability markup can actually give a false positive diagnostic +// (it will think that no function is provided, when in reality the +// user has provided their own). +// +// Users can pass -D_LIBCUDACXX_AVAILABILITY_CUSTOM_VERBOSE_ABORT_PROVIDED +// to the compiler to tell the library not to define its own verbose abort. +// Note that defining this macro but failing to define a custom function +// will lead to a load-time error on back-deployment targets, so it should +// be avoided. // # define _LIBCUDACXX_HAS_NO_VERBOSE_ABORT_IN_LIBRARY #elif defined(__APPLE__) -# define _LIBCUDACXX_AVAILABILITY_SHARED_MUTEX \ - __attribute__((availability(macos,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 100000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 100000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 30000) -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex -# endif - - // Note: bad_optional_access & friends were not introduced in the matching - // macOS and iOS versions, so the version mismatch between macOS and others - // is intended. -# define _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS \ - __attribute__((availability(macos,strict,introduced=10.13))) \ - __attribute__((availability(ios,strict,introduced=12.0))) \ - __attribute__((availability(tvos,strict,introduced=12.0))) \ - __attribute__((availability(watchos,strict,introduced=5.0))) -# define _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS \ - _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST \ - _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS - -# define _LIBCUDACXX_AVAILABILITY_UNCAUGHT_EXCEPTIONS \ - __attribute__((availability(macos,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) - -# define _LIBCUDACXX_AVAILABILITY_SIZED_NEW_DELETE \ - __attribute__((availability(macos,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) - -# define _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR \ - __attribute__((availability(ios,strict,introduced=6.0))) - -# define _LIBCUDACXX_AVAILABILITY_TYPEINFO_VTABLE \ - __attribute__((availability(macos,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) - -# define _LIBCUDACXX_AVAILABILITY_LOCALE_CATEGORY \ - __attribute__((availability(macos,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) - -# define _LIBCUDACXX_AVAILABILITY_ATOMIC_SHARED_PTR \ - __attribute__((availability(macos,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) - -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM \ - __attribute__((availability(macos,strict,introduced=10.15))) \ - __attribute__((availability(ios,strict,introduced=13.0))) \ - __attribute__((availability(tvos,strict,introduced=13.0))) \ - __attribute__((availability(watchos,strict,introduced=6.0))) -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_PUSH \ - _Pragma("clang attribute push(__attribute__((availability(macos,strict,introduced=10.15))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))") -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_POP \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem -# endif - -# define _LIBCUDACXX_AVAILABILITY_TO_CHARS_FLOATING_POINT \ - __attribute__((unavailable)) - -# define _LIBCUDACXX_AVAILABILITY_SYNC \ - __attribute__((availability(macos,strict,introduced=11.0))) \ - __attribute__((availability(ios,strict,introduced=14.0))) \ - __attribute__((availability(tvos,strict,introduced=14.0))) \ - __attribute__((availability(watchos,strict,introduced=7.0))) -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 140000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 140000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 70000) -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_latch -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore -# endif - -# define _LIBCUDACXX_AVAILABILITY_FORMAT \ - __attribute__((unavailable)) -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_format - -# define _LIBCUDACXX_HAS_NO_VERBOSE_ABORT_IN_LIBRARY +# define _LIBCUDACXX_AVAILABILITY_SHARED_MUTEX \ + __attribute__((availability(macos, strict, introduced = 10.12))) \ + __attribute__((availability(ios, strict, introduced = 10.0))) \ + __attribute__((availability(tvos, strict, introduced = 10.0))) \ + __attribute__((availability(watchos, strict, introduced = 3.0))) +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200) \ + || (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 100000) \ + || (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 100000) \ + || (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 30000) +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex +# endif + +// Note: bad_optional_access & friends were not introduced in the matching +// macOS and iOS versions, so the version mismatch between macOS and others +// is intended. +# define _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS \ + __attribute__((availability(macos, strict, introduced = 10.13))) \ + __attribute__((availability(ios, strict, introduced = 12.0))) \ + __attribute__((availability(tvos, strict, introduced = 12.0))) \ + __attribute__((availability(watchos, strict, introduced = 5.0))) +# define _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS +# define _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS + +# define _LIBCUDACXX_AVAILABILITY_UNCAUGHT_EXCEPTIONS \ + __attribute__((availability(macos, strict, introduced = 10.12))) \ + __attribute__((availability(ios, strict, introduced = 10.0))) \ + __attribute__((availability(tvos, strict, introduced = 10.0))) \ + __attribute__((availability(watchos, strict, introduced = 3.0))) + +# define _LIBCUDACXX_AVAILABILITY_SIZED_NEW_DELETE \ + __attribute__((availability(macos, strict, introduced = 10.12))) \ + __attribute__((availability(ios, strict, introduced = 10.0))) \ + __attribute__((availability(tvos, strict, introduced = 10.0))) \ + __attribute__((availability(watchos, strict, introduced = 3.0))) + +# define _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR __attribute__((availability(ios, strict, introduced = 6.0))) + +# define _LIBCUDACXX_AVAILABILITY_TYPEINFO_VTABLE \ + __attribute__((availability(macos, strict, introduced = 10.9))) \ + __attribute__((availability(ios, strict, introduced = 7.0))) + +# define _LIBCUDACXX_AVAILABILITY_LOCALE_CATEGORY \ + __attribute__((availability(macos, strict, introduced = 10.9))) \ + __attribute__((availability(ios, strict, introduced = 7.0))) + +# define _LIBCUDACXX_AVAILABILITY_ATOMIC_SHARED_PTR \ + __attribute__((availability(macos, strict, introduced = 10.9))) \ + __attribute__((availability(ios, strict, introduced = 7.0))) + +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM \ + __attribute__((availability(macos, strict, introduced = 10.15))) \ + __attribute__((availability(ios, strict, introduced = 13.0))) \ + __attribute__((availability(tvos, strict, introduced = 13.0))) \ + __attribute__((availability(watchos, strict, introduced = 6.0))) +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_PUSH \ + _Pragma("clang attribute push(__attribute__((availability(macos,strict,introduced=10.15))), " \ + "apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), " \ + "apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), " \ + "apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), " \ + "apply_to=any(function,record))") +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_POP \ + _Pragma("clang attribute pop") _Pragma("clang attribute pop") _Pragma("clang attribute pop") \ + _Pragma("clang attribute pop") +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) \ + || (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) \ + || (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) \ + || (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem +# endif + +# define _LIBCUDACXX_AVAILABILITY_TO_CHARS_FLOATING_POINT __attribute__((unavailable)) + +# define _LIBCUDACXX_AVAILABILITY_SYNC \ + __attribute__((availability(macos, strict, introduced = 11.0))) \ + __attribute__((availability(ios, strict, introduced = 14.0))) \ + __attribute__((availability(tvos, strict, introduced = 14.0))) \ + __attribute__((availability(watchos, strict, introduced = 7.0))) +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) \ + || (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 140000) \ + || (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 140000) \ + || (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 70000) +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_latch +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore +# endif + +# define _LIBCUDACXX_AVAILABILITY_FORMAT __attribute__((unavailable)) +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_format + +# define _LIBCUDACXX_HAS_NO_VERBOSE_ABORT_IN_LIBRARY #else // ...New vendors can add availability markup here... -# error "It looks like you're trying to enable vendor availability markup, but you haven't defined the corresponding macros yet!" +# error \ + "It looks like you're trying to enable vendor availability markup, but you haven't defined the corresponding macros yet!" #endif @@ -290,15 +294,15 @@ // Those are defined in terms of the availability attributes above, and // should not be vendor-specific. #if defined(_LIBCUDACXX_NO_EXCEPTIONS) -# define _LIBCUDACXX_AVAILABILITY_FUTURE -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_ANY_CAST -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_VARIANT_ACCESS +# define _LIBCUDACXX_AVAILABILITY_FUTURE +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_ANY_CAST +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_VARIANT_ACCESS #else -# define _LIBCUDACXX_AVAILABILITY_FUTURE _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_ANY_CAST _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_VARIANT_ACCESS _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS +# define _LIBCUDACXX_AVAILABILITY_FUTURE _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_ANY_CAST _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_VARIANT_ACCESS _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS #endif #endif // _LIBCUDACXX___AVAILABILITY diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__bit_reference b/libcudacxx/include/cuda/std/detail/libcxx/include/__bit_reference index 4ce42eb4c6a..88325c3d5c9 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__bit_reference +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__bit_reference @@ -10,9 +10,9 @@ #ifndef _LIBCUDACXX___BIT_REFERENCE #define _LIBCUDACXX___BIT_REFERENCE -##include -#include +##include #include +#include #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) # pragma GCC system_header @@ -22,229 +22,259 @@ # pragma system_header #endif // no system header -_LIBCUDACXX_PUSH_MACROS + _LIBCUDACXX_PUSH_MACROS #include <__undef_macros> + _LIBCUDACXX_BEGIN_NAMESPACE_STD -_LIBCUDACXX_BEGIN_NAMESPACE_STD - -template class __bit_iterator; -template class __bit_const_reference; +template +class __bit_iterator; +template +class __bit_const_reference; template struct __has_storage_type { - static const bool value = false; + static const bool value = false; }; template ::value> class __bit_reference { - typedef typename _Cp::__storage_type __storage_type; - typedef typename _Cp::__storage_pointer __storage_pointer; - - __storage_pointer __seg_; - __storage_type __mask_; + typedef typename _Cp::__storage_type __storage_type; + typedef typename _Cp::__storage_pointer __storage_pointer; - friend typename _Cp::__self; + __storage_pointer __seg_; + __storage_type __mask_; - friend class __bit_const_reference<_Cp>; - friend class __bit_iterator<_Cp, false>; -public: - _LIBCUDACXX_INLINE_VISIBILITY - __bit_reference(const __bit_reference&) = default; + friend typename _Cp::__self; - _LIBCUDACXX_INLINE_VISIBILITY operator bool() const noexcept - {return static_cast(*__seg_ & __mask_);} - _LIBCUDACXX_INLINE_VISIBILITY bool operator ~() const noexcept - {return !static_cast(*this);} + friend class __bit_const_reference<_Cp>; + friend class __bit_iterator<_Cp, false>; - _LIBCUDACXX_INLINE_VISIBILITY - __bit_reference& operator=(bool __x) noexcept +public: + _LIBCUDACXX_INLINE_VISIBILITY __bit_reference(const __bit_reference&) = default; + + _LIBCUDACXX_INLINE_VISIBILITY operator bool() const noexcept + { + return static_cast(*__seg_ & __mask_); + } + _LIBCUDACXX_INLINE_VISIBILITY bool operator~() const noexcept + { + return !static_cast(*this); + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_reference& operator=(bool __x) noexcept + { + if (__x) { - if (__x) - *__seg_ |= __mask_; - else - *__seg_ &= ~__mask_; - return *this; + *__seg_ |= __mask_; } + else + { + *__seg_ &= ~__mask_; + } + return *this; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_reference& operator=(const __bit_reference& __x) noexcept + { + return operator=(static_cast(__x)); + } + + _LIBCUDACXX_INLINE_VISIBILITY void flip() noexcept + { + *__seg_ ^= __mask_; + } + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> operator&() const noexcept + { + return __bit_iterator<_Cp, false>(__seg_, static_cast(__libcpp_ctz(__mask_))); + } - _LIBCUDACXX_INLINE_VISIBILITY - __bit_reference& operator=(const __bit_reference& __x) noexcept - {return operator=(static_cast(__x));} - - _LIBCUDACXX_INLINE_VISIBILITY void flip() noexcept {*__seg_ ^= __mask_;} - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> operator&() const noexcept - {return __bit_iterator<_Cp, false>(__seg_, static_cast(__libcpp_ctz(__mask_)));} private: - _LIBCUDACXX_INLINE_VISIBILITY - __bit_reference(__storage_pointer __s, __storage_type __m) noexcept - : __seg_(__s), __mask_(__m) {} + _LIBCUDACXX_INLINE_VISIBILITY __bit_reference(__storage_pointer __s, __storage_type __m) noexcept + : __seg_(__s) + , __mask_(__m) + {} }; template class __bit_reference<_Cp, false> -{ -}; +{}; template -inline _LIBCUDACXX_INLINE_VISIBILITY -void -swap(__bit_reference<_Cp> __x, __bit_reference<_Cp> __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void swap(__bit_reference<_Cp> __x, __bit_reference<_Cp> __y) noexcept { - bool __t = __x; - __x = __y; - __y = __t; + bool __t = __x; + __x = __y; + __y = __t; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -void -swap(__bit_reference<_Cp> __x, __bit_reference<_Dp> __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void swap(__bit_reference<_Cp> __x, __bit_reference<_Dp> __y) noexcept { - bool __t = __x; - __x = __y; - __y = __t; + bool __t = __x; + __x = __y; + __y = __t; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -void -swap(__bit_reference<_Cp> __x, bool& __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void swap(__bit_reference<_Cp> __x, bool& __y) noexcept { - bool __t = __x; - __x = __y; - __y = __t; + bool __t = __x; + __x = __y; + __y = __t; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -void -swap(bool& __x, __bit_reference<_Cp> __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void swap(bool& __x, __bit_reference<_Cp> __y) noexcept { - bool __t = __x; - __x = __y; - __y = __t; + bool __t = __x; + __x = __y; + __y = __t; } template class __bit_const_reference { - typedef typename _Cp::__storage_type __storage_type; - typedef typename _Cp::__const_storage_pointer __storage_pointer; + typedef typename _Cp::__storage_type __storage_type; + typedef typename _Cp::__const_storage_pointer __storage_pointer; + + __storage_pointer __seg_; + __storage_type __mask_; - __storage_pointer __seg_; - __storage_type __mask_; + friend typename _Cp::__self; + friend class __bit_iterator<_Cp, true>; - friend typename _Cp::__self; - friend class __bit_iterator<_Cp, true>; public: - _LIBCUDACXX_INLINE_VISIBILITY - __bit_const_reference(const __bit_const_reference&) = default; + _LIBCUDACXX_INLINE_VISIBILITY __bit_const_reference(const __bit_const_reference&) = default; + + _LIBCUDACXX_INLINE_VISIBILITY __bit_const_reference(const __bit_reference<_Cp>& __x) noexcept + : __seg_(__x.__seg_) + , __mask_(__x.__mask_) + {} - _LIBCUDACXX_INLINE_VISIBILITY - __bit_const_reference(const __bit_reference<_Cp>& __x) noexcept - : __seg_(__x.__seg_), __mask_(__x.__mask_) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr operator bool() const noexcept + { + return static_cast(*__seg_ & __mask_); + } - _LIBCUDACXX_INLINE_VISIBILITY constexpr operator bool() const noexcept - {return static_cast(*__seg_ & __mask_);} + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, true> operator&() const noexcept + { + return __bit_iterator<_Cp, true>(__seg_, static_cast(__libcpp_ctz(__mask_))); + } - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, true> operator&() const noexcept - {return __bit_iterator<_Cp, true>(__seg_, static_cast(__libcpp_ctz(__mask_)));} private: - _LIBCUDACXX_INLINE_VISIBILITY - constexpr - __bit_const_reference(__storage_pointer __s, __storage_type __m) noexcept - : __seg_(__s), __mask_(__m) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __bit_const_reference(__storage_pointer __s, __storage_type __m) noexcept + : __seg_(__s) + , __mask_(__m) + {} - __bit_const_reference& operator=(const __bit_const_reference&) = delete; + __bit_const_reference& operator=(const __bit_const_reference&) = delete; }; // find template -__bit_iterator<_Cp, _IsConst> -__find_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) +__bit_iterator<_Cp, _IsConst> __find_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, _IsConst> _It; - typedef typename _It::__storage_type __storage_type; - static const int __bits_per_word = _It::__bits_per_word; - // do first partial word - if (__first.__ctz_ != 0) + typedef __bit_iterator<_Cp, _IsConst> _It; + typedef typename _It::__storage_type __storage_type; + static const int __bits_per_word = _It::__bits_per_word; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b = *__first.__seg_ & __m; + if (__b) { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = *__first.__seg_ & __m; - if (__b) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); - if (__n == __dn) - return __first + __n; - __n -= __dn; - ++__first.__seg_; + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); } - // do middle whole words - for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - if (*__first.__seg_) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(*__first.__seg_))); - // do last partial word - if (__n > 0) + if (__n == __dn) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - if (__b) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); + return __first + __n; } - return _It(__first.__seg_, static_cast(__n)); + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) + { + if (*__first.__seg_) + { + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(*__first.__seg_))); + } + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + if (__b) + { + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); + } + } + return _It(__first.__seg_, static_cast(__n)); } template -__bit_iterator<_Cp, _IsConst> -__find_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) +__bit_iterator<_Cp, _IsConst> __find_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, _IsConst> _It; - typedef typename _It::__storage_type __storage_type; - const int __bits_per_word = _It::__bits_per_word; - // do first partial word - if (__first.__ctz_ != 0) + typedef __bit_iterator<_Cp, _IsConst> _It; + typedef typename _It::__storage_type __storage_type; + const int __bits_per_word = _It::__bits_per_word; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b = ~*__first.__seg_ & __m; + if (__b) { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = ~*__first.__seg_ & __m; - if (__b) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); - if (__n == __dn) - return __first + __n; - __n -= __dn; - ++__first.__seg_; + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); } - // do middle whole words - for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) + if (__n == __dn) { - __storage_type __b = ~*__first.__seg_; - if (__b) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); + return __first + __n; } - // do last partial word - if (__n > 0) + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) + { + __storage_type __b = ~*__first.__seg_; + if (__b) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = ~*__first.__seg_ & __m; - if (__b) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); } - return _It(__first.__seg_, static_cast(__n)); + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = ~*__first.__seg_ & __m; + if (__b) + { + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); + } + } + return _It(__first.__seg_, static_cast(__n)); } template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<_Cp, _IsConst> +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, _IsConst> find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value_) { - if (static_cast(__value_)) - return __find_bool_true(__first, static_cast(__last - __first)); - return __find_bool_false(__first, static_cast(__last - __first)); + if (static_cast(__value_)) + { + return __find_bool_true(__first, static_cast(__last - __first)); + } + return __find_bool_false(__first, static_cast(__last - __first)); } // count @@ -253,627 +283,633 @@ template typename __bit_iterator<_Cp, _IsConst>::difference_type __count_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, _IsConst> _It; - typedef typename _It::__storage_type __storage_type; - typedef typename _It::difference_type difference_type; - const int __bits_per_word = _It::__bits_per_word; - difference_type __r = 0; - // do first partial word - if (__first.__ctz_ != 0) - { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __r = _CUDA_VSTD::__libcpp_popcount(*__first.__seg_ & __m); - __n -= __dn; - ++__first.__seg_; - } - // do middle whole words - for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - __r += _CUDA_VSTD::__libcpp_popcount(*__first.__seg_); - // do last partial word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __r += _CUDA_VSTD::__libcpp_popcount(*__first.__seg_ & __m); - } - return __r; + typedef __bit_iterator<_Cp, _IsConst> _It; + typedef typename _It::__storage_type __storage_type; + typedef typename _It::difference_type difference_type; + const int __bits_per_word = _It::__bits_per_word; + difference_type __r = 0; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __r = _CUDA_VSTD::__libcpp_popcount(*__first.__seg_ & __m); + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) + { + __r += _CUDA_VSTD::__libcpp_popcount(*__first.__seg_); + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __r += _CUDA_VSTD::__libcpp_popcount(*__first.__seg_ & __m); + } + return __r; } template typename __bit_iterator<_Cp, _IsConst>::difference_type __count_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, _IsConst> _It; - typedef typename _It::__storage_type __storage_type; - typedef typename _It::difference_type difference_type; - const int __bits_per_word = _It::__bits_per_word; - difference_type __r = 0; - // do first partial word - if (__first.__ctz_ != 0) - { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __r = _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_ & __m); - __n -= __dn; - ++__first.__seg_; - } - // do middle whole words - for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - __r += _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_); - // do last partial word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __r += _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_ & __m); - } - return __r; + typedef __bit_iterator<_Cp, _IsConst> _It; + typedef typename _It::__storage_type __storage_type; + typedef typename _It::difference_type difference_type; + const int __bits_per_word = _It::__bits_per_word; + difference_type __r = 0; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __r = _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_ & __m); + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) + { + __r += _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_); + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __r += _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_ & __m); + } + return __r; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -typename __bit_iterator<_Cp, _IsConst>::difference_type +inline _LIBCUDACXX_INLINE_VISIBILITY typename __bit_iterator<_Cp, _IsConst>::difference_type count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value_) { - if (static_cast(__value_)) - return __count_bool_true(__first, static_cast(__last - __first)); - return __count_bool_false(__first, static_cast(__last - __first)); + if (static_cast(__value_)) + { + return __count_bool_true(__first, static_cast(__last - __first)); + } + return __count_bool_false(__first, static_cast(__last - __first)); } // fill_n template -void -__fill_n_false(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) +void __fill_n_false(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, false> _It; - typedef typename _It::__storage_type __storage_type; - const int __bits_per_word = _It::__bits_per_word; - // do first partial word - if (__first.__ctz_ != 0) - { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - *__first.__seg_ &= ~__m; - __n -= __dn; - ++__first.__seg_; - } - // do middle whole words - __storage_type __nw = __n / __bits_per_word; - _CUDA_VSTD::memset(_CUDA_VSTD::__to_raw_pointer(__first.__seg_), 0, __nw * sizeof(__storage_type)); - __n -= __nw * __bits_per_word; - // do last partial word - if (__n > 0) - { - __first.__seg_ += __nw; - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - *__first.__seg_ &= ~__m; - } + typedef __bit_iterator<_Cp, false> _It; + typedef typename _It::__storage_type __storage_type; + const int __bits_per_word = _It::__bits_per_word; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + *__first.__seg_ &= ~__m; + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + __storage_type __nw = __n / __bits_per_word; + _CUDA_VSTD::memset(_CUDA_VSTD::__to_raw_pointer(__first.__seg_), 0, __nw * sizeof(__storage_type)); + __n -= __nw * __bits_per_word; + // do last partial word + if (__n > 0) + { + __first.__seg_ += __nw; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + *__first.__seg_ &= ~__m; + } } template -void -__fill_n_true(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) +void __fill_n_true(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, false> _It; - typedef typename _It::__storage_type __storage_type; - const int __bits_per_word = _It::__bits_per_word; - // do first partial word - if (__first.__ctz_ != 0) - { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - *__first.__seg_ |= __m; - __n -= __dn; - ++__first.__seg_; - } - // do middle whole words - __storage_type __nw = __n / __bits_per_word; - _CUDA_VSTD::memset(_CUDA_VSTD::__to_raw_pointer(__first.__seg_), -1, __nw * sizeof(__storage_type)); - __n -= __nw * __bits_per_word; - // do last partial word - if (__n > 0) - { - __first.__seg_ += __nw; - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - *__first.__seg_ |= __m; - } + typedef __bit_iterator<_Cp, false> _It; + typedef typename _It::__storage_type __storage_type; + const int __bits_per_word = _It::__bits_per_word; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + *__first.__seg_ |= __m; + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + __storage_type __nw = __n / __bits_per_word; + _CUDA_VSTD::memset(_CUDA_VSTD::__to_raw_pointer(__first.__seg_), -1, __nw * sizeof(__storage_type)); + __n -= __nw * __bits_per_word; + // do last partial word + if (__n > 0) + { + __first.__seg_ += __nw; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + *__first.__seg_ |= __m; + } } template -inline _LIBCUDACXX_INLINE_VISIBILITY -void +inline _LIBCUDACXX_INLINE_VISIBILITY void fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n, bool __value_) { - if (__n > 0) + if (__n > 0) + { + if (__value_) { - if (__value_) - __fill_n_true(__first, __n); - else - __fill_n_false(__first, __n); + __fill_n_true(__first, __n); } + else + { + __fill_n_false(__first, __n); + } + } } // fill template -inline _LIBCUDACXX_INLINE_VISIBILITY -void +inline _LIBCUDACXX_INLINE_VISIBILITY void fill(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __last, bool __value_) { - _CUDA_VSTD::fill_n(__first, static_cast(__last - __first), __value_); + _CUDA_VSTD::fill_n(__first, static_cast(__last - __first), __value_); } // copy template -__bit_iterator<_Cp, false> -__copy_aligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, - __bit_iterator<_Cp, false> __result) +__bit_iterator<_Cp, false> __copy_aligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - typedef __bit_iterator<_Cp, _IsConst> _In; - typedef typename _In::difference_type difference_type; - typedef typename _In::__storage_type __storage_type; - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; + typedef __bit_iterator<_Cp, _IsConst> _In; + typedef typename _In::difference_type difference_type; + typedef typename _In::__storage_type __storage_type; + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__first.__ctz_ != 0) + { + unsigned __clz = __bits_per_word - __first.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); + __storage_type __b = *__first.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + __storage_type __nw = __n / __bits_per_word; + _CUDA_VSTD::memmove(_CUDA_VSTD::__to_raw_pointer(__result.__seg_), + _CUDA_VSTD::__to_raw_pointer(__first.__seg_), + __nw * sizeof(__storage_type)); + __n -= __nw * __bits_per_word; + __result.__seg_ += __nw; + // do last word if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) - { - unsigned __clz = __bits_per_word - __first.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); - __storage_type __b = *__first.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - __storage_type __nw = __n / __bits_per_word; - _CUDA_VSTD::memmove(_CUDA_VSTD::__to_raw_pointer(__result.__seg_), - _CUDA_VSTD::__to_raw_pointer(__first.__seg_), - __nw * sizeof(__storage_type)); - __n -= __nw * __bits_per_word; - __result.__seg_ += __nw; - // do last word - if (__n > 0) - { - __first.__seg_ += __nw; - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__ctz_ = static_cast(__n); - } + __first.__seg_ += __nw; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__ctz_ = static_cast(__n); } - return __result; + } + return __result; } template -__bit_iterator<_Cp, false> -__copy_unaligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, - __bit_iterator<_Cp, false> __result) +__bit_iterator<_Cp, false> __copy_unaligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - typedef __bit_iterator<_Cp, _IsConst> _In; - typedef typename _In::difference_type difference_type; - typedef typename _In::__storage_type __storage_type; - static const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; + typedef __bit_iterator<_Cp, _IsConst> _In; + typedef typename _In::difference_type difference_type; + typedef typename _In::__storage_type __storage_type; + static const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__first.__ctz_ != 0) + { + unsigned __clz_f = __bits_per_word - __first.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b = *__first.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); + *__result.__seg_ &= ~__m; + if (__result.__ctz_ > __first.__ctz_) + { + *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_); + } + else + { + *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_); + } + __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __dn); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn); + __result.__ctz_ = static_cast(__dn); + } + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __m = ~__storage_type(0) << __result.__ctz_; + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) + { + __storage_type __b = *__first.__seg_; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << __result.__ctz_; + ++__result.__seg_; + *__result.__seg_ &= __m; + *__result.__seg_ |= __b >> __clz_r; + } + // do last word if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) - { - unsigned __clz_f = __bits_per_word - __first.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = *__first.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); - *__result.__seg_ &= ~__m; - if (__result.__ctz_ > __first.__ctz_) - *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_); - else - *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_); - __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __dn); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn); - __result.__ctz_ = static_cast(__dn); - } - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __m = ~__storage_type(0) << __result.__ctz_; - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) - { - __storage_type __b = *__first.__seg_; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << __result.__ctz_; - ++__result.__seg_; - *__result.__seg_ &= __m; - *__result.__seg_ |= __b >> __clz_r; - } - // do last word - if (__n > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__clz_r)); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << __result.__ctz_; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> __dn; - __result.__ctz_ = static_cast(__n); - } - } + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__clz_r)); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << __result.__ctz_; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> __dn; + __result.__ctz_ = static_cast(__n); + } } - return __result; + } + return __result; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<_Cp, false> +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> copy(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - if (__first.__ctz_ == __result.__ctz_) - return __copy_aligned(__first, __last, __result); - return __copy_unaligned(__first, __last, __result); + if (__first.__ctz_ == __result.__ctz_) + { + return __copy_aligned(__first, __last, __result); + } + return __copy_unaligned(__first, __last, __result); } // copy_backward template -__bit_iterator<_Cp, false> -__copy_backward_aligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, - __bit_iterator<_Cp, false> __result) +__bit_iterator<_Cp, false> __copy_backward_aligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - typedef __bit_iterator<_Cp, _IsConst> _In; - typedef typename _In::difference_type difference_type; - typedef typename _In::__storage_type __storage_type; - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; + typedef __bit_iterator<_Cp, _IsConst> _In; + typedef typename _In::difference_type difference_type; + typedef typename _In::__storage_type __storage_type; + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__last.__ctz_ != 0) + { + difference_type __dn = _CUDA_VSTD::min(static_cast(__last.__ctz_), __n); + __n -= __dn; + unsigned __clz = __bits_per_word - __last.__ctz_; + __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz); + __storage_type __b = *__last.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); + // __last.__ctz_ = 0 + } + // __last.__ctz_ == 0 || __n == 0 + // __result.__ctz_ == 0 || __n == 0 + // do middle words + __storage_type __nw = __n / __bits_per_word; + __result.__seg_ -= __nw; + __last.__seg_ -= __nw; + _CUDA_VSTD::memmove(_CUDA_VSTD::__to_raw_pointer(__result.__seg_), + _CUDA_VSTD::__to_raw_pointer(__last.__seg_), + __nw * sizeof(__storage_type)); + __n -= __nw * __bits_per_word; + // do last word if (__n > 0) { - // do first word - if (__last.__ctz_ != 0) - { - difference_type __dn = _CUDA_VSTD::min(static_cast(__last.__ctz_), __n); - __n -= __dn; - unsigned __clz = __bits_per_word - __last.__ctz_; - __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz); - __storage_type __b = *__last.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + - __result.__ctz_) % __bits_per_word); - // __last.__ctz_ = 0 - } - // __last.__ctz_ == 0 || __n == 0 - // __result.__ctz_ == 0 || __n == 0 - // do middle words - __storage_type __nw = __n / __bits_per_word; - __result.__seg_ -= __nw; - __last.__seg_ -= __nw; - _CUDA_VSTD::memmove(_CUDA_VSTD::__to_raw_pointer(__result.__seg_), - _CUDA_VSTD::__to_raw_pointer(__last.__seg_), - __nw * sizeof(__storage_type)); - __n -= __nw * __bits_per_word; - // do last word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) << (__bits_per_word - __n); - __storage_type __b = *--__last.__seg_ & __m; - *--__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); - } + __storage_type __m = ~__storage_type(0) << (__bits_per_word - __n); + __storage_type __b = *--__last.__seg_ & __m; + *--__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); } - return __result; + } + return __result; } template -__bit_iterator<_Cp, false> -__copy_backward_unaligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, - __bit_iterator<_Cp, false> __result) +__bit_iterator<_Cp, false> __copy_backward_unaligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - typedef __bit_iterator<_Cp, _IsConst> _In; - typedef typename _In::difference_type difference_type; - typedef typename _In::__storage_type __storage_type; - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) + typedef __bit_iterator<_Cp, _IsConst> _In; + typedef typename _In::difference_type difference_type; + typedef typename _In::__storage_type __storage_type; + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__last.__ctz_ != 0) { - // do first word - if (__last.__ctz_ != 0) - { - difference_type __dn = _CUDA_VSTD::min(static_cast(__last.__ctz_), __n); - __n -= __dn; - unsigned __clz_l = __bits_per_word - __last.__ctz_; - __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_l); - __storage_type __b = *__last.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __ddn = _CUDA_VSTD::min(__dn, static_cast(__result.__ctz_)); - if (__ddn > 0) - { - __m = (~__storage_type(0) << (__result.__ctz_ - __ddn)) & (~__storage_type(0) >> __clz_r); - *__result.__seg_ &= ~__m; - if (__result.__ctz_ > __last.__ctz_) - *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); - else - *__result.__seg_ |= __b >> (__last.__ctz_ - __result.__ctz_); - __result.__ctz_ = static_cast(((-__ddn & (__bits_per_word - 1)) + - __result.__ctz_) % __bits_per_word); - __dn -= __ddn; - } - if (__dn > 0) - { - // __result.__ctz_ == 0 - --__result.__seg_; - __result.__ctz_ = static_cast(-__dn & (__bits_per_word - 1)); - __m = ~__storage_type(0) << __result.__ctz_; - *__result.__seg_ &= ~__m; - __last.__ctz_ -= __dn + __ddn; - *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); - } - // __last.__ctz_ = 0 - } - // __last.__ctz_ == 0 || __n == 0 - // __result.__ctz_ != 0 || __n == 0 - // do middle words - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __m = ~__storage_type(0) >> __clz_r; - for (; __n >= __bits_per_word; __n -= __bits_per_word) + difference_type __dn = _CUDA_VSTD::min(static_cast(__last.__ctz_), __n); + __n -= __dn; + unsigned __clz_l = __bits_per_word - __last.__ctz_; + __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_l); + __storage_type __b = *__last.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __ddn = _CUDA_VSTD::min(__dn, static_cast(__result.__ctz_)); + if (__ddn > 0) + { + __m = (~__storage_type(0) << (__result.__ctz_ - __ddn)) & (~__storage_type(0) >> __clz_r); + *__result.__seg_ &= ~__m; + if (__result.__ctz_ > __last.__ctz_) { - __storage_type __b = *--__last.__seg_; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> __clz_r; - *--__result.__seg_ &= __m; - *__result.__seg_ |= __b << __result.__ctz_; + *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); } - // do last word - if (__n > 0) + else { - __m = ~__storage_type(0) << (__bits_per_word - __n); - __storage_type __b = *--__last.__seg_ & __m; - __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__result.__ctz_)); - __m = (~__storage_type(0) << (__result.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_r); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> (__bits_per_word - __result.__ctz_); - __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + - __result.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) - { - // __result.__ctz_ == 0 - --__result.__seg_; - __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); - __m = ~__storage_type(0) << __result.__ctz_; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << (__result.__ctz_ - (__bits_per_word - __n - __dn)); - } + *__result.__seg_ |= __b >> (__last.__ctz_ - __result.__ctz_); } + __result.__ctz_ = static_cast(((-__ddn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); + __dn -= __ddn; + } + if (__dn > 0) + { + // __result.__ctz_ == 0 + --__result.__seg_; + __result.__ctz_ = static_cast(-__dn & (__bits_per_word - 1)); + __m = ~__storage_type(0) << __result.__ctz_; + *__result.__seg_ &= ~__m; + __last.__ctz_ -= __dn + __ddn; + *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); + } + // __last.__ctz_ = 0 + } + // __last.__ctz_ == 0 || __n == 0 + // __result.__ctz_ != 0 || __n == 0 + // do middle words + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __m = ~__storage_type(0) >> __clz_r; + for (; __n >= __bits_per_word; __n -= __bits_per_word) + { + __storage_type __b = *--__last.__seg_; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> __clz_r; + *--__result.__seg_ &= __m; + *__result.__seg_ |= __b << __result.__ctz_; } - return __result; + // do last word + if (__n > 0) + { + __m = ~__storage_type(0) << (__bits_per_word - __n); + __storage_type __b = *--__last.__seg_ & __m; + __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__result.__ctz_)); + __m = (~__storage_type(0) << (__result.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_r); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> (__bits_per_word - __result.__ctz_); + __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) + { + // __result.__ctz_ == 0 + --__result.__seg_; + __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); + __m = ~__storage_type(0) << __result.__ctz_; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << (__result.__ctz_ - (__bits_per_word - __n - __dn)); + } + } + } + return __result; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<_Cp, false> -copy_backward(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> copy_backward( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - if (__last.__ctz_ == __result.__ctz_) - return __copy_backward_aligned(__first, __last, __result); - return __copy_backward_unaligned(__first, __last, __result); + if (__last.__ctz_ == __result.__ctz_) + { + return __copy_backward_aligned(__first, __last, __result); + } + return __copy_backward_unaligned(__first, __last, __result); } // move template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<_Cp, false> +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> move(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - return _CUDA_VSTD::copy(__first, __last, __result); + return _CUDA_VSTD::copy(__first, __last, __result); } // move_backward template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<_Cp, false> -move_backward(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> move_backward( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - return _CUDA_VSTD::copy_backward(__first, __last, __result); + return _CUDA_VSTD::copy_backward(__first, __last, __result); } // swap_ranges template -__bit_iterator<__C2, false> -__swap_ranges_aligned(__bit_iterator<__C1, false> __first, __bit_iterator<__C1, false> __last, - __bit_iterator<__C2, false> __result) +__bit_iterator<__C2, false> __swap_ranges_aligned( + __bit_iterator<__C1, false> __first, __bit_iterator<__C1, false> __last, __bit_iterator<__C2, false> __result) { - typedef __bit_iterator<__C1, false> _I1; - typedef typename _I1::difference_type difference_type; - typedef typename _I1::__storage_type __storage_type; - const int __bits_per_word = _I1::__bits_per_word; - difference_type __n = __last - __first; + typedef __bit_iterator<__C1, false> _I1; + typedef typename _I1::difference_type difference_type; + typedef typename _I1::__storage_type __storage_type; + const int __bits_per_word = _I1::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__first.__ctz_ != 0) + { + unsigned __clz = __bits_per_word - __first.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1; + *__first.__seg_ |= __b2; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_) + { + swap(*__first.__seg_, *__result.__seg_); + } + // do last word if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) - { - unsigned __clz = __bits_per_word - __first.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1; - *__first.__seg_ |= __b2; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_) - swap(*__first.__seg_, *__result.__seg_); - // do last word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1; - *__first.__seg_ |= __b2; - __result.__ctz_ = static_cast(__n); - } + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1; + *__first.__seg_ |= __b2; + __result.__ctz_ = static_cast(__n); } - return __result; + } + return __result; } template -__bit_iterator<__C2, false> -__swap_ranges_unaligned(__bit_iterator<__C1, false> __first, __bit_iterator<__C1, false> __last, - __bit_iterator<__C2, false> __result) +__bit_iterator<__C2, false> __swap_ranges_unaligned( + __bit_iterator<__C1, false> __first, __bit_iterator<__C1, false> __last, __bit_iterator<__C2, false> __result) { - typedef __bit_iterator<__C1, false> _I1; - typedef typename _I1::difference_type difference_type; - typedef typename _I1::__storage_type __storage_type; - const int __bits_per_word = _I1::__bits_per_word; - difference_type __n = __last - __first; + typedef __bit_iterator<__C1, false> _I1; + typedef typename _I1::difference_type difference_type; + typedef typename _I1::__storage_type __storage_type; + const int __bits_per_word = _I1::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__first.__ctz_ != 0) + { + unsigned __clz_f = __bits_per_word - __first.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + if (__result.__ctz_ > __first.__ctz_) + { + unsigned __s = __result.__ctz_ - __first.__ctz_; + *__result.__seg_ |= __b1 << __s; + *__first.__seg_ |= __b2 >> __s; + } + else + { + unsigned __s = __first.__ctz_ - __result.__ctz_; + *__result.__seg_ |= __b1 >> __s; + *__first.__seg_ |= __b2 << __s; + } + __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __dn); + __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + unsigned __s = __first.__ctz_ + __ddn; + *__result.__seg_ |= __b1 >> __s; + *__first.__seg_ |= __b2 << __s; + __result.__ctz_ = static_cast(__dn); + } + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + __storage_type __m = ~__storage_type(0) << __result.__ctz_; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) + { + __storage_type __b1 = *__first.__seg_; + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1 << __result.__ctz_; + *__first.__seg_ = __b2 >> __result.__ctz_; + ++__result.__seg_; + __b2 = *__result.__seg_ & ~__m; + *__result.__seg_ &= __m; + *__result.__seg_ |= __b1 >> __clz_r; + *__first.__seg_ |= __b2 << __clz_r; + } + // do last word if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) - { - unsigned __clz_f = __bits_per_word - __first.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - if (__result.__ctz_ > __first.__ctz_) - { - unsigned __s = __result.__ctz_ - __first.__ctz_; - *__result.__seg_ |= __b1 << __s; - *__first.__seg_ |= __b2 >> __s; - } - else - { - unsigned __s = __first.__ctz_ - __result.__ctz_; - *__result.__seg_ |= __b1 >> __s; - *__first.__seg_ |= __b2 << __s; - } - __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __dn); - __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - unsigned __s = __first.__ctz_ + __ddn; - *__result.__seg_ |= __b1 >> __s; - *__first.__seg_ |= __b2 << __s; - __result.__ctz_ = static_cast(__dn); - } - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - __storage_type __m = ~__storage_type(0) << __result.__ctz_; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) - { - __storage_type __b1 = *__first.__seg_; - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1 << __result.__ctz_; - *__first.__seg_ = __b2 >> __result.__ctz_; - ++__result.__seg_; - __b2 = *__result.__seg_ & ~__m; - *__result.__seg_ &= __m; - *__result.__seg_ |= __b1 >> __clz_r; - *__first.__seg_ |= __b2 << __clz_r; - } - // do last word - if (__n > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - __storage_type __dn = _CUDA_VSTD::min<__storage_type>(__n, __clz_r); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1 << __result.__ctz_; - *__first.__seg_ |= __b2 >> __result.__ctz_; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1 >> __dn; - *__first.__seg_ |= __b2 << __dn; - __result.__ctz_ = static_cast(__n); - } - } + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + __storage_type __dn = _CUDA_VSTD::min<__storage_type>(__n, __clz_r); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1 << __result.__ctz_; + *__first.__seg_ |= __b2 >> __result.__ctz_; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1 >> __dn; + *__first.__seg_ |= __b2 << __dn; + __result.__ctz_ = static_cast(__n); + } } - return __result; + } + return __result; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<__C2, false> -swap_ranges(__bit_iterator<__C1, false> __first1, __bit_iterator<__C1, false> __last1, - __bit_iterator<__C2, false> __first2) +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<__C2, false> swap_ranges( + __bit_iterator<__C1, false> __first1, __bit_iterator<__C1, false> __last1, __bit_iterator<__C2, false> __first2) { - if (__first1.__ctz_ == __first2.__ctz_) - return __swap_ranges_aligned(__first1, __last1, __first2); - return __swap_ranges_unaligned(__first1, __last1, __first2); + if (__first1.__ctz_ == __first2.__ctz_) + { + return __swap_ranges_aligned(__first1, __last1, __first2); + } + return __swap_ranges_unaligned(__first1, __last1, __first2); } // rotate @@ -881,413 +917,469 @@ swap_ranges(__bit_iterator<__C1, false> __first1, __bit_iterator<__C1, false> __ template struct __bit_array { - typedef typename _Cp::difference_type difference_type; - typedef typename _Cp::__storage_type __storage_type; - typedef typename _Cp::__storage_pointer __storage_pointer; - typedef typename _Cp::iterator iterator; - static const unsigned __bits_per_word = _Cp::__bits_per_word; - static const unsigned _Np = 4; - - difference_type __size_; - __storage_type __word_[_Np]; - - _LIBCUDACXX_INLINE_VISIBILITY static difference_type capacity() - {return static_cast(_Np * __bits_per_word);} - _LIBCUDACXX_INLINE_VISIBILITY explicit __bit_array(difference_type __s) : __size_(__s) {} - _LIBCUDACXX_INLINE_VISIBILITY iterator begin() - { - return iterator(pointer_traits<__storage_pointer>::pointer_to(__word_[0]), 0); - } - _LIBCUDACXX_INLINE_VISIBILITY iterator end() - { - return iterator(pointer_traits<__storage_pointer>::pointer_to(__word_[0]) + __size_ / __bits_per_word, - static_cast(__size_ % __bits_per_word)); - } + typedef typename _Cp::difference_type difference_type; + typedef typename _Cp::__storage_type __storage_type; + typedef typename _Cp::__storage_pointer __storage_pointer; + typedef typename _Cp::iterator iterator; + static const unsigned __bits_per_word = _Cp::__bits_per_word; + static const unsigned _Np = 4; + + difference_type __size_; + __storage_type __word_[_Np]; + + _LIBCUDACXX_INLINE_VISIBILITY static difference_type capacity() + { + return static_cast(_Np * __bits_per_word); + } + _LIBCUDACXX_INLINE_VISIBILITY explicit __bit_array(difference_type __s) + : __size_(__s) + {} + _LIBCUDACXX_INLINE_VISIBILITY iterator begin() + { + return iterator(pointer_traits<__storage_pointer>::pointer_to(__word_[0]), 0); + } + _LIBCUDACXX_INLINE_VISIBILITY iterator end() + { + return iterator(pointer_traits<__storage_pointer>::pointer_to(__word_[0]) + __size_ / __bits_per_word, + static_cast(__size_ % __bits_per_word)); + } }; template __bit_iterator<_Cp, false> rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) { - typedef __bit_iterator<_Cp, false> _I1; - typedef typename _I1::difference_type difference_type; - difference_type __d1 = __middle - __first; - difference_type __d2 = __last - __middle; - _I1 __r = __first + __d2; - while (__d1 != 0 && __d2 != 0) + typedef __bit_iterator<_Cp, false> _I1; + typedef typename _I1::difference_type difference_type; + difference_type __d1 = __middle - __first; + difference_type __d2 = __last - __middle; + _I1 __r = __first + __d2; + while (__d1 != 0 && __d2 != 0) + { + if (__d1 <= __d2) { - if (__d1 <= __d2) - { - if (__d1 <= __bit_array<_Cp>::capacity()) - { - __bit_array<_Cp> __b(__d1); - _CUDA_VSTD::copy(__first, __middle, __b.begin()); - _CUDA_VSTD::copy(__b.begin(), __b.end(), _CUDA_VSTD::copy(__middle, __last, __first)); - break; - } - else - { - __bit_iterator<_Cp, false> __mp = _CUDA_VSTD::swap_ranges(__first, __middle, __middle); - __first = __middle; - __middle = __mp; - __d2 -= __d1; - } - } - else - { - if (__d2 <= __bit_array<_Cp>::capacity()) - { - __bit_array<_Cp> __b(__d2); - _CUDA_VSTD::copy(__middle, __last, __b.begin()); - _CUDA_VSTD::copy_backward(__b.begin(), __b.end(), _CUDA_VSTD::copy_backward(__first, __middle, __last)); - break; - } - else - { - __bit_iterator<_Cp, false> __mp = __first + __d2; - _CUDA_VSTD::swap_ranges(__first, __mp, __middle); - __first = __mp; - __d1 -= __d2; - } - } + if (__d1 <= __bit_array<_Cp>::capacity()) + { + __bit_array<_Cp> __b(__d1); + _CUDA_VSTD::copy(__first, __middle, __b.begin()); + _CUDA_VSTD::copy(__b.begin(), __b.end(), _CUDA_VSTD::copy(__middle, __last, __first)); + break; + } + else + { + __bit_iterator<_Cp, false> __mp = _CUDA_VSTD::swap_ranges(__first, __middle, __middle); + __first = __middle; + __middle = __mp; + __d2 -= __d1; + } + } + else + { + if (__d2 <= __bit_array<_Cp>::capacity()) + { + __bit_array<_Cp> __b(__d2); + _CUDA_VSTD::copy(__middle, __last, __b.begin()); + _CUDA_VSTD::copy_backward(__b.begin(), __b.end(), _CUDA_VSTD::copy_backward(__first, __middle, __last)); + break; + } + else + { + __bit_iterator<_Cp, false> __mp = __first + __d2; + _CUDA_VSTD::swap_ranges(__first, __mp, __middle); + __first = __mp; + __d1 -= __d2; + } } - return __r; + } + return __r; } // equal template -bool -__equal_unaligned(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, - __bit_iterator<_Cp, _IC2> __first2) +bool __equal_unaligned( + __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - typedef __bit_iterator<_Cp, _IC1> _It; - typedef typename _It::difference_type difference_type; - typedef typename _It::__storage_type __storage_type; - static const int __bits_per_word = _It::__bits_per_word; - difference_type __n = __last1 - __first1; - if (__n > 0) + typedef __bit_iterator<_Cp, _IC1> _It; + typedef typename _It::difference_type difference_type; + typedef typename _It::__storage_type __storage_type; + static const int __bits_per_word = _It::__bits_per_word; + difference_type __n = __last1 - __first1; + if (__n > 0) + { + // do first word + if (__first1.__ctz_ != 0) { - // do first word - if (__first1.__ctz_ != 0) + unsigned __clz_f = __bits_per_word - __first1.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b = *__first1.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __first2.__ctz_; + __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); + __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); + if (__first2.__ctz_ > __first1.__ctz_) + { + if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_))) { - unsigned __clz_f = __bits_per_word - __first1.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = *__first1.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __first2.__ctz_; - __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); - __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); - if (__first2.__ctz_ > __first1.__ctz_) - { - if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_))) - return false; - } - else - { - if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_))) - return false; - } - __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word; - __first2.__ctz_ = static_cast((__ddn + __first2.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __dn); - if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn))) - return false; - __first2.__ctz_ = static_cast(__dn); - } - ++__first1.__seg_; - // __first1.__ctz_ = 0; + return false; } - // __first1.__ctz_ == 0; - // do middle words - unsigned __clz_r = __bits_per_word - __first2.__ctz_; - __storage_type __m = ~__storage_type(0) << __first2.__ctz_; - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) + } + else + { + if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_))) { - __storage_type __b = *__first1.__seg_; - if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) - return false; - ++__first2.__seg_; - if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r)) - return false; + return false; } - // do last word - if (__n > 0) + } + __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word; + __first2.__ctz_ = static_cast((__ddn + __first2.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __dn); + if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn))) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first1.__seg_ & __m; - __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__clz_r)); - __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); - if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) - return false; - __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word; - __first2.__ctz_ = static_cast((__dn + __first2.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - if ((*__first2.__seg_ & __m) != (__b >> __dn)) - return false; - } + return false; } + __first2.__ctz_ = static_cast(__dn); + } + ++__first1.__seg_; + // __first1.__ctz_ = 0; } - return true; + // __first1.__ctz_ == 0; + // do middle words + unsigned __clz_r = __bits_per_word - __first2.__ctz_; + __storage_type __m = ~__storage_type(0) << __first2.__ctz_; + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) + { + __storage_type __b = *__first1.__seg_; + if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) + { + return false; + } + ++__first2.__seg_; + if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r)) + { + return false; + } + } + // do last word + if (__n > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__first1.__seg_ & __m; + __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__clz_r)); + __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); + if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) + { + return false; + } + __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word; + __first2.__ctz_ = static_cast((__dn + __first2.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + if ((*__first2.__seg_ & __m) != (__b >> __dn)) + { + return false; + } + } + } + } + return true; } template -bool -__equal_aligned(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, - __bit_iterator<_Cp, _IC2> __first2) +bool __equal_aligned( + __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - typedef __bit_iterator<_Cp, _IC1> _It; - typedef typename _It::difference_type difference_type; - typedef typename _It::__storage_type __storage_type; - static const int __bits_per_word = _It::__bits_per_word; - difference_type __n = __last1 - __first1; + typedef __bit_iterator<_Cp, _IC1> _It; + typedef typename _It::difference_type difference_type; + typedef typename _It::__storage_type __storage_type; + static const int __bits_per_word = _It::__bits_per_word; + difference_type __n = __last1 - __first1; + if (__n > 0) + { + // do first word + if (__first1.__ctz_ != 0) + { + unsigned __clz = __bits_per_word - __first1.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); + if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) + { + return false; + } + ++__first2.__seg_; + ++__first1.__seg_; + // __first1.__ctz_ = 0; + // __first2.__ctz_ = 0; + } + // __first1.__ctz_ == 0; + // __first2.__ctz_ == 0; + // do middle words + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_) + { + if (*__first2.__seg_ != *__first1.__seg_) + { + return false; + } + } + // do last word if (__n > 0) { - // do first word - if (__first1.__ctz_ != 0) - { - unsigned __clz = __bits_per_word - __first1.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); - if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) - return false; - ++__first2.__seg_; - ++__first1.__seg_; - // __first1.__ctz_ = 0; - // __first2.__ctz_ = 0; - } - // __first1.__ctz_ == 0; - // __first2.__ctz_ == 0; - // do middle words - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_) - if (*__first2.__seg_ != *__first1.__seg_) - return false; - // do last word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) - return false; - } + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) + { + return false; + } } - return true; + } + return true; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -bool +inline _LIBCUDACXX_INLINE_VISIBILITY bool equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - if (__first1.__ctz_ == __first2.__ctz_) - return __equal_aligned(__first1, __last1, __first2); - return __equal_unaligned(__first1, __last1, __first2); + if (__first1.__ctz_ == __first2.__ctz_) + { + return __equal_aligned(__first1, __last1, __first2); + } + return __equal_unaligned(__first1, __last1, __first2); } -template +template class __bit_iterator { public: - typedef typename _Cp::difference_type difference_type; - typedef bool value_type; - typedef __bit_iterator pointer; - typedef typename conditional<_IsConst, __bit_const_reference<_Cp>, __bit_reference<_Cp> >::type reference; - typedef random_access_iterator_tag iterator_category; + typedef typename _Cp::difference_type difference_type; + typedef bool value_type; + typedef __bit_iterator pointer; + typedef typename conditional<_IsConst, __bit_const_reference<_Cp>, __bit_reference<_Cp>>::type reference; + typedef random_access_iterator_tag iterator_category; private: - typedef typename _Cp::__storage_type __storage_type; - typedef typename conditional<_IsConst, typename _Cp::__const_storage_pointer, - typename _Cp::__storage_pointer>::type __storage_pointer; - static const unsigned __bits_per_word = _Cp::__bits_per_word; + typedef typename _Cp::__storage_type __storage_type; + typedef typename conditional<_IsConst, typename _Cp::__const_storage_pointer, typename _Cp::__storage_pointer>::type + __storage_pointer; + static const unsigned __bits_per_word = _Cp::__bits_per_word; - __storage_pointer __seg_; - unsigned __ctz_; + __storage_pointer __seg_; + unsigned __ctz_; public: - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator() noexcept + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator() noexcept #if _CCCL_STD_VER > 2011 - : __seg_(nullptr), __ctz_(0) + : __seg_(nullptr) + , __ctz_(0) #endif - {} - // avoid re-declaring a copy constructor for the non-const version. - using __type_for_copy_to_const = - _If<_IsConst, __bit_iterator<_Cp, false>, struct __private_nat>; - - _LIBCUDACXX_INLINE_VISIBILITY - __bit_iterator(const __type_for_copy_to_const& __it) noexcept - : __seg_(__it.__seg_), __ctz_(__it.__ctz_) {} - - _LIBCUDACXX_INLINE_VISIBILITY reference operator*() const noexcept - {return reference(__seg_, __storage_type(1) << __ctz_);} - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator++() - { - if (__ctz_ != __bits_per_word-1) - ++__ctz_; - else - { - __ctz_ = 0; - ++__seg_; - } - return *this; - } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator++(int) + {} + // avoid re-declaring a copy constructor for the non-const version. + using __type_for_copy_to_const = _If<_IsConst, __bit_iterator<_Cp, false>, struct __private_nat>; + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator(const __type_for_copy_to_const& __it) noexcept + : __seg_(__it.__seg_) + , __ctz_(__it.__ctz_) + {} + + _LIBCUDACXX_INLINE_VISIBILITY reference operator*() const noexcept + { + return reference(__seg_, __storage_type(1) << __ctz_); + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator++() + { + if (__ctz_ != __bits_per_word - 1) { - __bit_iterator __tmp = *this; - ++(*this); - return __tmp; + ++__ctz_; } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator--() + else { - if (__ctz_ != 0) - --__ctz_; - else - { - __ctz_ = __bits_per_word - 1; - --__seg_; - } - return *this; + __ctz_ = 0; + ++__seg_; } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator--(int) + return *this; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator++(int) + { + __bit_iterator __tmp = *this; + ++(*this); + return __tmp; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator--() + { + if (__ctz_ != 0) { - __bit_iterator __tmp = *this; - --(*this); - return __tmp; + --__ctz_; } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator+=(difference_type __n) - { - if (__n >= 0) - __seg_ += (__n + __ctz_) / __bits_per_word; - else - __seg_ += static_cast(__n - __bits_per_word + __ctz_ + 1) - / static_cast(__bits_per_word); - __n &= (__bits_per_word - 1); - __ctz_ = static_cast((__n + __ctz_) % __bits_per_word); - return *this; - } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator-=(difference_type __n) + else { - return *this += -__n; + __ctz_ = __bits_per_word - 1; + --__seg_; } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator+(difference_type __n) const + return *this; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator--(int) + { + __bit_iterator __tmp = *this; + --(*this); + return __tmp; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator+=(difference_type __n) + { + if (__n >= 0) { - __bit_iterator __t(*this); - __t += __n; - return __t; + __seg_ += (__n + __ctz_) / __bits_per_word; } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator-(difference_type __n) const + else { - __bit_iterator __t(*this); - __t -= __n; - return __t; + __seg_ += static_cast(__n - __bits_per_word + __ctz_ + 1) + / static_cast(__bits_per_word); } - - _LIBCUDACXX_INLINE_VISIBILITY - friend __bit_iterator operator+(difference_type __n, const __bit_iterator& __it) {return __it + __n;} - - _LIBCUDACXX_INLINE_VISIBILITY - friend difference_type operator-(const __bit_iterator& __x, const __bit_iterator& __y) - {return (__x.__seg_ - __y.__seg_) * __bits_per_word + __x.__ctz_ - __y.__ctz_;} - - _LIBCUDACXX_INLINE_VISIBILITY reference operator[](difference_type __n) const {return *(*this + __n);} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator==(const __bit_iterator& __x, const __bit_iterator& __y) - {return __x.__seg_ == __y.__seg_ && __x.__ctz_ == __y.__ctz_;} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator!=(const __bit_iterator& __x, const __bit_iterator& __y) - {return !(__x == __y);} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator<(const __bit_iterator& __x, const __bit_iterator& __y) - {return __x.__seg_ < __y.__seg_ || (__x.__seg_ == __y.__seg_ && __x.__ctz_ < __y.__ctz_);} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator>(const __bit_iterator& __x, const __bit_iterator& __y) - {return __y < __x;} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator<=(const __bit_iterator& __x, const __bit_iterator& __y) - {return !(__y < __x);} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator>=(const __bit_iterator& __x, const __bit_iterator& __y) - {return !(__x < __y);} + __n &= (__bits_per_word - 1); + __ctz_ = static_cast((__n + __ctz_) % __bits_per_word); + return *this; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator-=(difference_type __n) + { + return *this += -__n; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator+(difference_type __n) const + { + __bit_iterator __t(*this); + __t += __n; + return __t; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator-(difference_type __n) const + { + __bit_iterator __t(*this); + __t -= __n; + return __t; + } + + _LIBCUDACXX_INLINE_VISIBILITY friend __bit_iterator operator+(difference_type __n, const __bit_iterator& __it) + { + return __it + __n; + } + + _LIBCUDACXX_INLINE_VISIBILITY friend difference_type operator-(const __bit_iterator& __x, const __bit_iterator& __y) + { + return (__x.__seg_ - __y.__seg_) * __bits_per_word + __x.__ctz_ - __y.__ctz_; + } + + _LIBCUDACXX_INLINE_VISIBILITY reference operator[](difference_type __n) const + { + return *(*this + __n); + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator==(const __bit_iterator& __x, const __bit_iterator& __y) + { + return __x.__seg_ == __y.__seg_ && __x.__ctz_ == __y.__ctz_; + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator!=(const __bit_iterator& __x, const __bit_iterator& __y) + { + return !(__x == __y); + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator<(const __bit_iterator& __x, const __bit_iterator& __y) + { + return __x.__seg_ < __y.__seg_ || (__x.__seg_ == __y.__seg_ && __x.__ctz_ < __y.__ctz_); + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator>(const __bit_iterator& __x, const __bit_iterator& __y) + { + return __y < __x; + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator<=(const __bit_iterator& __x, const __bit_iterator& __y) + { + return !(__y < __x); + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator>=(const __bit_iterator& __x, const __bit_iterator& __y) + { + return !(__x < __y); + } private: - _LIBCUDACXX_INLINE_VISIBILITY - __bit_iterator(__storage_pointer __s, unsigned __ctz) noexcept - : __seg_(__s), __ctz_(__ctz) {} - - friend typename _Cp::__self; - - friend class __bit_reference<_Cp>; - friend class __bit_const_reference<_Cp>; - friend class __bit_iterator<_Cp, true>; - template friend struct __bit_array; - template friend void __fill_n_false(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); - template friend void __fill_n_true(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); - template friend __bit_iterator<_Dp, false> __copy_aligned(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<_Dp, false> __copy_unaligned(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<_Dp, false> copy(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<_Dp, false> __copy_backward_aligned(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<_Dp, false> __copy_backward_unaligned(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<_Dp, false> copy_backward(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<__C2, false> __swap_ranges_aligned(__bit_iterator<__C1, false>, - __bit_iterator<__C1, false>, - __bit_iterator<__C2, false>); - template friend __bit_iterator<__C2, false> __swap_ranges_unaligned(__bit_iterator<__C1, false>, - __bit_iterator<__C1, false>, - __bit_iterator<__C2, false>); - template friend __bit_iterator<__C2, false> swap_ranges(__bit_iterator<__C1, false>, - __bit_iterator<__C1, false>, - __bit_iterator<__C2, false>); - template friend __bit_iterator<_Dp, false> rotate(__bit_iterator<_Dp, false>, - __bit_iterator<_Dp, false>, - __bit_iterator<_Dp, false>); - template friend bool __equal_aligned(__bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC2>); - template friend bool __equal_unaligned(__bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC2>); - template friend bool equal(__bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC2>); - template friend __bit_iterator<_Dp, _IC> __find_bool_true(__bit_iterator<_Dp, _IC>, - typename _Dp::size_type); - template friend __bit_iterator<_Dp, _IC> __find_bool_false(__bit_iterator<_Dp, _IC>, - typename _Dp::size_type); - template friend typename __bit_iterator<_Dp, _IC>::difference_type - __count_bool_true(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); - template friend typename __bit_iterator<_Dp, _IC>::difference_type - __count_bool_false(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator(__storage_pointer __s, unsigned __ctz) noexcept + : __seg_(__s) + , __ctz_(__ctz) + {} + + friend typename _Cp::__self; + + friend class __bit_reference<_Cp>; + friend class __bit_const_reference<_Cp>; + friend class __bit_iterator<_Cp, true>; + template + friend struct __bit_array; + template + friend void __fill_n_false(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); + template + friend void __fill_n_true(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); + template + friend __bit_iterator<_Dp, false> __copy_aligned( + __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<_Dp, false> __copy_unaligned( + __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<_Dp, false> + copy(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<_Dp, false> __copy_backward_aligned( + __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<_Dp, false> __copy_backward_unaligned( + __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<_Dp, false> + copy_backward(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<__C2, false> + __swap_ranges_aligned(__bit_iterator<__C1, false>, __bit_iterator<__C1, false>, __bit_iterator<__C2, false>); + template + friend __bit_iterator<__C2, false> + __swap_ranges_unaligned(__bit_iterator<__C1, false>, __bit_iterator<__C1, false>, __bit_iterator<__C2, false>); + template + friend __bit_iterator<__C2, false> + swap_ranges(__bit_iterator<__C1, false>, __bit_iterator<__C1, false>, __bit_iterator<__C2, false>); + template + friend __bit_iterator<_Dp, false> + rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>); + template + friend bool __equal_aligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); + template + friend bool __equal_unaligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); + template + friend bool equal(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); + template + friend __bit_iterator<_Dp, _IC> __find_bool_true(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); + template + friend __bit_iterator<_Dp, _IC> __find_bool_false(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); + template + friend typename __bit_iterator<_Dp, _IC>::difference_type + __count_bool_true(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); + template + friend typename __bit_iterator<_Dp, _IC>::difference_type + __count_bool_false(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); }; _LIBCUDACXX_END_NAMESPACE_STD _LIBCUDACXX_POP_MACROS -#endif // _LIBCUDACXX___BIT_REFERENCE +#endif // _LIBCUDACXX___BIT_REFERENCE diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__config b/libcudacxx/include/cuda/std/detail/libcxx/include/__config index b7e8bcc3118..274d7e020b4 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__config +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__config @@ -43,13 +43,13 @@ #endif #if defined(_CCCL_COMPILER_MSVC) -#if _MSC_VER < 1917 -#define _LIBCUDACXX_COMPILER_MSVC_2017 -#elif _MSC_VER < 1930 -#define _LIBCUDACXX_COMPILER_MSVC_2019 -#else -#define _LIBCUDACXX_COMPILER_MSVC_2022 -#endif +# if _MSC_VER < 1917 +# define _LIBCUDACXX_COMPILER_MSVC_2017 +# elif _MSC_VER < 1930 +# define _LIBCUDACXX_COMPILER_MSVC_2019 +# else +# define _LIBCUDACXX_COMPILER_MSVC_2022 +# endif #endif // defined(_LIBCUDACXX_COMPILER_MSVC) #if defined(_CCCL_CUDA_COMPILER_NVCC) @@ -80,372 +80,368 @@ // __config may be included in `extern "C"` contexts, switch back to include extern "C++" { -#include +# include } -#ifdef __GNUC__ -# define _GNUC_VER (__GNUC__ * 100 + __GNUC_MINOR__) -#else -# define _GNUC_VER 0 -#endif +# ifdef __GNUC__ +# define _GNUC_VER (__GNUC__ * 100 + __GNUC_MINOR__) +# else +# define _GNUC_VER 0 +# endif -#define _LIBCUDACXX_VERSION 10000 +# define _LIBCUDACXX_VERSION 10000 -#ifndef _LIBCUDACXX_ABI_VERSION -# define _LIBCUDACXX_ABI_VERSION 1 -#endif +# ifndef _LIBCUDACXX_ABI_VERSION +# define _LIBCUDACXX_ABI_VERSION 1 +# endif -#define _LIBCUDACXX_STD_VER _CCCL_STD_VER +# define _LIBCUDACXX_STD_VER _CCCL_STD_VER -#if _CCCL_STD_VER < 2011 -# error libcu++ requires C++11 or later -#endif +# if _CCCL_STD_VER < 2011 +# error libcu++ requires C++11 or later +# endif -#if (defined(_CCCL_COMPILER_NVHPC) && defined(__linux__)) \ - || defined(_CCCL_COMPILER_NVRTC) - #define __ELF__ -#endif +# if (defined(_CCCL_COMPILER_NVHPC) && defined(__linux__)) || defined(_CCCL_COMPILER_NVRTC) +# define __ELF__ +# endif -#if defined(__ELF__) -# define _LIBCUDACXX_OBJECT_FORMAT_ELF 1 -#elif defined(__MACH__) -# define _LIBCUDACXX_OBJECT_FORMAT_MACHO 1 -#elif defined(_WIN32) -# define _LIBCUDACXX_OBJECT_FORMAT_COFF 1 -#elif defined(__wasm__) -# define _LIBCUDACXX_OBJECT_FORMAT_WASM 1 -#else -# error Unknown object file format -#endif +# if defined(__ELF__) +# define _LIBCUDACXX_OBJECT_FORMAT_ELF 1 +# elif defined(__MACH__) +# define _LIBCUDACXX_OBJECT_FORMAT_MACHO 1 +# elif defined(_WIN32) +# define _LIBCUDACXX_OBJECT_FORMAT_COFF 1 +# elif defined(__wasm__) +# define _LIBCUDACXX_OBJECT_FORMAT_WASM 1 +# else +# error Unknown object file format +# endif -#if defined(_LIBCUDACXX_ABI_UNSTABLE) || _LIBCUDACXX_ABI_VERSION >= 2 || defined(__cuda_std__) +# if defined(_LIBCUDACXX_ABI_UNSTABLE) || _LIBCUDACXX_ABI_VERSION >= 2 || defined(__cuda_std__) // Change short string representation so that string data starts at offset 0, // improving its alignment in some cases. -# define _LIBCUDACXX_ABI_ALTERNATE_STRING_LAYOUT +# define _LIBCUDACXX_ABI_ALTERNATE_STRING_LAYOUT // Fix deque iterator type in order to support incomplete types. -# define _LIBCUDACXX_ABI_INCOMPLETE_TYPES_IN_DEQUE +# define _LIBCUDACXX_ABI_INCOMPLETE_TYPES_IN_DEQUE // Fix undefined behavior in how std::list stores its linked nodes. -# define _LIBCUDACXX_ABI_LIST_REMOVE_NODE_POINTER_UB +# define _LIBCUDACXX_ABI_LIST_REMOVE_NODE_POINTER_UB // Fix undefined behavior in how __tree stores its end and parent nodes. -# define _LIBCUDACXX_ABI_TREE_REMOVE_NODE_POINTER_UB +# define _LIBCUDACXX_ABI_TREE_REMOVE_NODE_POINTER_UB // Fix undefined behavior in how __hash_table stores its pointer types. -# define _LIBCUDACXX_ABI_FIX_UNORDERED_NODE_POINTER_UB -# define _LIBCUDACXX_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB -# define _LIBCUDACXX_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE +# define _LIBCUDACXX_ABI_FIX_UNORDERED_NODE_POINTER_UB +# define _LIBCUDACXX_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB +# define _LIBCUDACXX_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE // Don't use a nullptr_t simulation type in C++03 instead using C++11 nullptr // provided under the alternate keyword __nullptr, which changes the mangling // of nullptr_t. This option is ABI incompatible with GCC in C++03 mode. -# define _LIBCUDACXX_ABI_ALWAYS_USE_CXX11_NULLPTR +# define _LIBCUDACXX_ABI_ALWAYS_USE_CXX11_NULLPTR // Define the `pointer_safety` enum as a C++11 strongly typed enumeration // instead of as a class simulating an enum. If this option is enabled // `pointer_safety` and `get_pointer_safety()` will no longer be available // in C++03. -# define _LIBCUDACXX_ABI_POINTER_SAFETY_ENUM_TYPE +# define _LIBCUDACXX_ABI_POINTER_SAFETY_ENUM_TYPE // Define a key function for `bad_function_call` in the library, to centralize // its vtable and typeinfo to libc++ rather than having all other libraries // using that class define their own copies. -# define _LIBCUDACXX_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION +# define _LIBCUDACXX_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION // Enable optimized version of __do_get_(un)signed which avoids redundant copies. -# define _LIBCUDACXX_ABI_OPTIMIZED_LOCALE_NUM_GET +# define _LIBCUDACXX_ABI_OPTIMIZED_LOCALE_NUM_GET // Use the smallest possible integer type to represent the index of the variant. // Previously libc++ used "unsigned int" exclusively. -# define _LIBCUDACXX_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION +# define _LIBCUDACXX_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION // Unstable attempt to provide a more optimized std::function -# define _LIBCUDACXX_ABI_OPTIMIZED_FUNCTION +# define _LIBCUDACXX_ABI_OPTIMIZED_FUNCTION // All the regex constants must be distinct and nonzero. -# define _LIBCUDACXX_ABI_REGEX_CONSTANTS_NONZERO -#elif _LIBCUDACXX_ABI_VERSION == 1 -# if !defined(_LIBCUDACXX_OBJECT_FORMAT_COFF) +# define _LIBCUDACXX_ABI_REGEX_CONSTANTS_NONZERO +# elif _LIBCUDACXX_ABI_VERSION == 1 +# if !defined(_LIBCUDACXX_OBJECT_FORMAT_COFF) // Enable compiling copies of now inline methods into the dylib to support // applications compiled against older libraries. This is unnecessary with // COFF dllexport semantics, since dllexport forces a non-inline definition // of inline functions to be emitted anyway. Our own non-inline copy would // conflict with the dllexport-emitted copy, so we disable it. -# define _LIBCUDACXX_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS +# define _LIBCUDACXX_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS +# endif # endif -#endif -#ifndef __has_attribute -#define __has_attribute(__x) 0 -#endif +# ifndef __has_attribute +# define __has_attribute(__x) 0 +# endif -#ifndef __has_builtin -#define __has_builtin(__x) 0 -#endif +# ifndef __has_builtin +# define __has_builtin(__x) 0 +# endif -#ifndef __has_extension -#define __has_extension(__x) 0 -#endif +# ifndef __has_extension +# define __has_extension(__x) 0 +# endif -#ifndef __has_feature -#define __has_feature(__x) 0 -#endif +# ifndef __has_feature +# define __has_feature(__x) 0 +# endif -#ifndef __has_cpp_attribute -#define __has_cpp_attribute(__x) 0 -#endif +# ifndef __has_cpp_attribute +# define __has_cpp_attribute(__x) 0 +# endif // '__is_identifier' returns '0' if '__x' is a reserved identifier provided by // the compiler and '1' otherwise. -#ifndef __is_identifier -#define __is_identifier(__x) 1 -#endif +# ifndef __is_identifier +# define __is_identifier(__x) 1 +# endif -#ifndef __has_declspec_attribute -#define __has_declspec_attribute(__x) 0 -#endif +# ifndef __has_declspec_attribute +# define __has_declspec_attribute(__x) 0 +# endif -#define __has_keyword(__x) !(__is_identifier(__x)) +# define __has_keyword(__x) !(__is_identifier(__x)) -#ifndef __has_include -#define __has_include(...) 0 -#endif +# ifndef __has_include +# define __has_include(...) 0 +# endif -#if !defined(_CCCL_CUDA_COMPILER_NVCC) && !defined(_CCCL_COMPILER_NVRTC) +# if !defined(_CCCL_CUDA_COMPILER_NVCC) && !defined(_CCCL_COMPILER_NVRTC) // If NVCC is not being used can safely use `long double` without warnings -# define _LIBCUDACXX_HAS_COMPLEX_LONG_DOUBLE +# define _LIBCUDACXX_HAS_COMPLEX_LONG_DOUBLE // NVCC does not have a way of silencing non '_' prefixed UDLs -# define _LIBCUDACXX_HAS_STL_LITERALS -#endif +# define _LIBCUDACXX_HAS_STL_LITERALS +# endif -#if defined(_CCCL_COMPILER_GCC) && __cplusplus < 201103L -#error "libc++ does not support using GCC with C++03. Please enable C++11" -#endif +# if defined(_CCCL_COMPILER_GCC) && __cplusplus < 201103L +# error "libc++ does not support using GCC with C++03. Please enable C++11" +# endif // FIXME: ABI detection should be done via compiler builtin macros. This // is just a placeholder until Clang implements such macros. For now assume // that Windows compilers pretending to be MSVC++ target the Microsoft ABI, // and allow the user to explicitly specify the ABI to handle cases where this // heuristic falls short. -#if defined(_LIBCUDACXX_ABI_FORCE_ITANIUM) && defined(_LIBCUDACXX_ABI_FORCE_MICROSOFT) -# error "Only one of _LIBCUDACXX_ABI_FORCE_ITANIUM and _LIBCUDACXX_ABI_FORCE_MICROSOFT can be defined" -#elif defined(_LIBCUDACXX_ABI_FORCE_ITANIUM) -# define _LIBCUDACXX_ABI_ITANIUM -#elif defined(_LIBCUDACXX_ABI_FORCE_MICROSOFT) -# define _LIBCUDACXX_ABI_MICROSOFT -#else -# if defined(_WIN32) && defined(_CCCL_COMPILER_MSVC) +# if defined(_LIBCUDACXX_ABI_FORCE_ITANIUM) && defined(_LIBCUDACXX_ABI_FORCE_MICROSOFT) +# error "Only one of _LIBCUDACXX_ABI_FORCE_ITANIUM and _LIBCUDACXX_ABI_FORCE_MICROSOFT can be defined" +# elif defined(_LIBCUDACXX_ABI_FORCE_ITANIUM) +# define _LIBCUDACXX_ABI_ITANIUM +# elif defined(_LIBCUDACXX_ABI_FORCE_MICROSOFT) # define _LIBCUDACXX_ABI_MICROSOFT # else -# define _LIBCUDACXX_ABI_ITANIUM +# if defined(_WIN32) && defined(_CCCL_COMPILER_MSVC) +# define _LIBCUDACXX_ABI_MICROSOFT +# else +# define _LIBCUDACXX_ABI_ITANIUM +# endif # endif -#endif -#if defined(_LIBCUDACXX_ABI_MICROSOFT) && !defined(_LIBCUDACXX_NO_VCRUNTIME) -# define _LIBCUDACXX_ABI_VCRUNTIME -#endif +# if defined(_LIBCUDACXX_ABI_MICROSOFT) && !defined(_LIBCUDACXX_NO_VCRUNTIME) +# define _LIBCUDACXX_ABI_VCRUNTIME +# endif // Need to detect which libc we're using if we're on Linux. -#if defined(__linux__) -# include -# if defined(__GLIBC_PREREQ) -# define _LIBCUDACXX_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b) -# else -# define _LIBCUDACXX_GLIBC_PREREQ(a, b) 0 -# endif // defined(__GLIBC_PREREQ) -#endif // defined(__linux__) - -#ifdef __LITTLE_ENDIAN__ -# if __LITTLE_ENDIAN__ -# define _LIBCUDACXX_LITTLE_ENDIAN -# endif // __LITTLE_ENDIAN__ -#endif // __LITTLE_ENDIAN__ - -#ifdef __BIG_ENDIAN__ -# if __BIG_ENDIAN__ -# define _LIBCUDACXX_BIG_ENDIAN -# endif // __BIG_ENDIAN__ -#endif // __BIG_ENDIAN__ +# if defined(__linux__) +# include +# if defined(__GLIBC_PREREQ) +# define _LIBCUDACXX_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b) +# else +# define _LIBCUDACXX_GLIBC_PREREQ(a, b) 0 +# endif // defined(__GLIBC_PREREQ) +# endif // defined(__linux__) + +# ifdef __LITTLE_ENDIAN__ +# if __LITTLE_ENDIAN__ +# define _LIBCUDACXX_LITTLE_ENDIAN +# endif // __LITTLE_ENDIAN__ +# endif // __LITTLE_ENDIAN__ + +# ifdef __BIG_ENDIAN__ +# if __BIG_ENDIAN__ +# define _LIBCUDACXX_BIG_ENDIAN +# endif // __BIG_ENDIAN__ +# endif // __BIG_ENDIAN__ + +# ifdef __BYTE_ORDER__ +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define _LIBCUDACXX_LITTLE_ENDIAN +# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define _LIBCUDACXX_BIG_ENDIAN +# endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# endif // __BYTE_ORDER__ + +# ifdef __FreeBSD__ +# include +# if _BYTE_ORDER == _LITTLE_ENDIAN +# define _LIBCUDACXX_LITTLE_ENDIAN +# else // _BYTE_ORDER == _LITTLE_ENDIAN +# define _LIBCUDACXX_BIG_ENDIAN +# endif // _BYTE_ORDER == _LITTLE_ENDIAN +# ifndef __LONG_LONG_SUPPORTED +# define _LIBCUDACXX_HAS_NO_LONG_LONG +# endif // __LONG_LONG_SUPPORTED +# endif // __FreeBSD__ + +# ifdef __NetBSD__ +# include +# if _BYTE_ORDER == _LITTLE_ENDIAN +# define _LIBCUDACXX_LITTLE_ENDIAN +# else // _BYTE_ORDER == _LITTLE_ENDIAN +# define _LIBCUDACXX_BIG_ENDIAN +# endif // _BYTE_ORDER == _LITTLE_ENDIAN +# define _LIBCUDACXX_HAS_QUICK_EXIT +# endif // __NetBSD__ -#ifdef __BYTE_ORDER__ -# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -# define _LIBCUDACXX_LITTLE_ENDIAN -# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -# define _LIBCUDACXX_BIG_ENDIAN -# endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#endif // __BYTE_ORDER__ - -#ifdef __FreeBSD__ -# include -# if _BYTE_ORDER == _LITTLE_ENDIAN +# if defined(_WIN32) +# define _LIBCUDACXX_WIN32API # define _LIBCUDACXX_LITTLE_ENDIAN -# else // _BYTE_ORDER == _LITTLE_ENDIAN -# define _LIBCUDACXX_BIG_ENDIAN -# endif // _BYTE_ORDER == _LITTLE_ENDIAN -# ifndef __LONG_LONG_SUPPORTED -# define _LIBCUDACXX_HAS_NO_LONG_LONG -# endif // __LONG_LONG_SUPPORTED -#endif // __FreeBSD__ - -#ifdef __NetBSD__ -# include -# if _BYTE_ORDER == _LITTLE_ENDIAN -# define _LIBCUDACXX_LITTLE_ENDIAN -# else // _BYTE_ORDER == _LITTLE_ENDIAN -# define _LIBCUDACXX_BIG_ENDIAN -# endif // _BYTE_ORDER == _LITTLE_ENDIAN -# define _LIBCUDACXX_HAS_QUICK_EXIT -#endif // __NetBSD__ - -#if defined(_WIN32) -# define _LIBCUDACXX_WIN32API -# define _LIBCUDACXX_LITTLE_ENDIAN -# define _LIBCUDACXX_SHORT_WCHAR 1 +# define _LIBCUDACXX_SHORT_WCHAR 1 // Both MinGW and native MSVC provide a "MSVC"-like environment -# define _LIBCUDACXX_MSVCRT_LIKE +# define _LIBCUDACXX_MSVCRT_LIKE // If mingw not explicitly detected, assume using MS C runtime only if // a MS compatibility version is specified. -# if defined(_CCCL_COMPILER_MSVC) && !defined(__MINGW32__) -# define _LIBCUDACXX_MSVCRT // Using Microsoft's C Runtime library -# endif -# if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_ARM) || defined(__arm__)) -# define _LIBCUDACXX_HAS_BITSCAN64 -# endif -# define _LIBCUDACXX_HAS_OPEN_WITH_WCHAR -# if defined(_LIBCUDACXX_MSVCRT) -# define _LIBCUDACXX_HAS_QUICK_EXIT -# endif +# if defined(_CCCL_COMPILER_MSVC) && !defined(__MINGW32__) +# define _LIBCUDACXX_MSVCRT // Using Microsoft's C Runtime library +# endif +# if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_ARM) || defined(__arm__)) +# define _LIBCUDACXX_HAS_BITSCAN64 +# endif +# define _LIBCUDACXX_HAS_OPEN_WITH_WCHAR +# if defined(_LIBCUDACXX_MSVCRT) +# define _LIBCUDACXX_HAS_QUICK_EXIT +# endif // Some CRT APIs are unavailable to store apps -# if defined(WINAPI_FAMILY) -# include -# if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && \ - (!defined(WINAPI_PARTITION_SYSTEM) || \ - !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_SYSTEM)) -# define _LIBCUDACXX_WINDOWS_STORE_APP +# if defined(WINAPI_FAMILY) +# include +# if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) \ + && (!defined(WINAPI_PARTITION_SYSTEM) || !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_SYSTEM)) +# define _LIBCUDACXX_WINDOWS_STORE_APP +# endif # endif -# endif -#endif // defined(_WIN32) +# endif // defined(_WIN32) -#ifdef __sun__ -# include -# ifdef _LITTLE_ENDIAN -# define _LIBCUDACXX_LITTLE_ENDIAN +# ifdef __sun__ +# include +# ifdef _LITTLE_ENDIAN +# define _LIBCUDACXX_LITTLE_ENDIAN +# else +# define _LIBCUDACXX_BIG_ENDIAN +# endif +# endif // __sun__ + +# if defined(__CloudABI__) +// Certain architectures provide arc4random(). Prefer using +// arc4random() over /dev/{u,}random to make it possible to obtain +// random data even when using sandboxing mechanisms such as chroots, +// Capsicum, etc. +# define _LIBCUDACXX_USING_ARC4_RANDOM +# elif defined(__Fuchsia__) || defined(__wasi__) +# define _LIBCUDACXX_USING_GETENTROPY +# elif defined(__native_client__) +// NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, +// including accesses to the special files under /dev. C++11's +// std::random_device is instead exposed through a NaCl syscall. +# define _LIBCUDACXX_USING_NACL_RANDOM +# elif defined(_LIBCUDACXX_WIN32API) +# define _LIBCUDACXX_USING_WIN32_RANDOM # else -# define _LIBCUDACXX_BIG_ENDIAN -# endif -#endif // __sun__ - -#if defined(__CloudABI__) - // Certain architectures provide arc4random(). Prefer using - // arc4random() over /dev/{u,}random to make it possible to obtain - // random data even when using sandboxing mechanisms such as chroots, - // Capsicum, etc. -# define _LIBCUDACXX_USING_ARC4_RANDOM -#elif defined(__Fuchsia__) || defined(__wasi__) -# define _LIBCUDACXX_USING_GETENTROPY -#elif defined(__native_client__) - // NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, - // including accesses to the special files under /dev. C++11's - // std::random_device is instead exposed through a NaCl syscall. -# define _LIBCUDACXX_USING_NACL_RANDOM -#elif defined(_LIBCUDACXX_WIN32API) -# define _LIBCUDACXX_USING_WIN32_RANDOM -#else -# define _LIBCUDACXX_USING_DEV_RANDOM -#endif +# define _LIBCUDACXX_USING_DEV_RANDOM +# endif -#ifndef _LIBCUDACXX_LITTLE_ENDIAN -#if defined(_CCCL_COMPILER_NVRTC) -# define _LIBCUDACXX_LITTLE_ENDIAN -#endif -#endif // _LIBCUDACXX_LITTLE_ENDIAN +# ifndef _LIBCUDACXX_LITTLE_ENDIAN +# if defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_LITTLE_ENDIAN +# endif +# endif // _LIBCUDACXX_LITTLE_ENDIAN + +# if !defined(_LIBCUDACXX_LITTLE_ENDIAN) && !defined(_LIBCUDACXX_BIG_ENDIAN) +# include +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define _LIBCUDACXX_LITTLE_ENDIAN +# elif __BYTE_ORDER == __BIG_ENDIAN +# define _LIBCUDACXX_BIG_ENDIAN +# else // __BYTE_ORDER == __BIG_ENDIAN +# error unable to determine endian +# endif +# endif // !defined(_LIBCUDACXX_LITTLE_ENDIAN) && !defined(_LIBCUDACXX_BIG_ENDIAN) -#if !defined(_LIBCUDACXX_LITTLE_ENDIAN) && !defined(_LIBCUDACXX_BIG_ENDIAN) -# include -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define _LIBCUDACXX_LITTLE_ENDIAN -# elif __BYTE_ORDER == __BIG_ENDIAN -# define _LIBCUDACXX_BIG_ENDIAN -# else // __BYTE_ORDER == __BIG_ENDIAN -# error unable to determine endian +# if __has_attribute(__no_sanitize__) && !defined(_CCCL_COMPILER_GCC) +# define _LIBCUDACXX_NO_CFI __attribute__((__no_sanitize__("cfi"))) +# else +# define _LIBCUDACXX_NO_CFI # endif -#endif // !defined(_LIBCUDACXX_LITTLE_ENDIAN) && !defined(_LIBCUDACXX_BIG_ENDIAN) - -#if __has_attribute(__no_sanitize__) && !defined(_CCCL_COMPILER_GCC) -# define _LIBCUDACXX_NO_CFI __attribute__((__no_sanitize__("cfi"))) -#else -# define _LIBCUDACXX_NO_CFI -#endif -#if (defined(__ISO_C_VISIBLE) && __ISO_C_VISIBLE >= 2011) || __cplusplus >= 201103L -# if defined(__FreeBSD__) -# define _LIBCUDACXX_HAS_QUICK_EXIT -# define _LIBCUDACXX_HAS_C11_FEATURES -# elif defined(__Fuchsia__) || defined(__wasi__) -# define _LIBCUDACXX_HAS_QUICK_EXIT -# define _LIBCUDACXX_HAS_TIMESPEC_GET -# define _LIBCUDACXX_HAS_C11_FEATURES -# elif defined(__linux__) -# if !defined(_LIBCUDACXX_HAS_MUSL_LIBC) -# if _LIBCUDACXX_GLIBC_PREREQ(2, 15) || defined(__BIONIC__) -# define _LIBCUDACXX_HAS_QUICK_EXIT -# endif -# if _LIBCUDACXX_GLIBC_PREREQ(2, 17) -# define _LIBCUDACXX_HAS_C11_FEATURES -# define _LIBCUDACXX_HAS_TIMESPEC_GET -# endif -# else // defined(_LIBCUDACXX_HAS_MUSL_LIBC) +# if (defined(__ISO_C_VISIBLE) && __ISO_C_VISIBLE >= 2011) || __cplusplus >= 201103L +# if defined(__FreeBSD__) +# define _LIBCUDACXX_HAS_QUICK_EXIT +# define _LIBCUDACXX_HAS_C11_FEATURES +# elif defined(__Fuchsia__) || defined(__wasi__) # define _LIBCUDACXX_HAS_QUICK_EXIT # define _LIBCUDACXX_HAS_TIMESPEC_GET # define _LIBCUDACXX_HAS_C11_FEATURES -# endif -# endif // __linux__ -#endif +# elif defined(__linux__) +# if !defined(_LIBCUDACXX_HAS_MUSL_LIBC) +# if _LIBCUDACXX_GLIBC_PREREQ(2, 15) || defined(__BIONIC__) +# define _LIBCUDACXX_HAS_QUICK_EXIT +# endif +# if _LIBCUDACXX_GLIBC_PREREQ(2, 17) +# define _LIBCUDACXX_HAS_C11_FEATURES +# define _LIBCUDACXX_HAS_TIMESPEC_GET +# endif +# else // defined(_LIBCUDACXX_HAS_MUSL_LIBC) +# define _LIBCUDACXX_HAS_QUICK_EXIT +# define _LIBCUDACXX_HAS_TIMESPEC_GET +# define _LIBCUDACXX_HAS_C11_FEATURES +# endif +# endif // __linux__ +# endif -#if defined(_CCCL_COMPILER_NVRTC) -# define __alignof(x) alignof(x) -#endif // _CCCL_COMPILER_NVRTC +# if defined(_CCCL_COMPILER_NVRTC) +# define __alignof(x) alignof(x) +# endif // _CCCL_COMPILER_NVRTC -#if defined(_CCCL_COMPILER_MSVC) -# define __alignof__ __alignof -#endif +# if defined(_CCCL_COMPILER_MSVC) +# define __alignof__ __alignof +# endif -#define _LIBCUDACXX_ALIGNOF(_Tp) alignof(_Tp) -#define _LIBCUDACXX_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp) +# define _LIBCUDACXX_ALIGNOF(_Tp) alignof(_Tp) +# define _LIBCUDACXX_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp) -#if defined(_CCCL_COMPILER_MSVC) -# define _CCCL_ALIGNAS_TYPE(x) alignas(x) -# define _CCCL_ALIGNAS(x) __declspec(align(x)) -#elif __has_feature(cxx_alignas) -# define _CCCL_ALIGNAS_TYPE(x) alignas(x) -# define _CCCL_ALIGNAS(x) alignas(x) -#else -# define _CCCL_ALIGNAS_TYPE(x) __attribute__((__aligned__(_LIBCUDACXX_ALIGNOF(x)))) -# define _CCCL_ALIGNAS(x) __attribute__((__aligned__(x))) -#endif // !_CCCL_COMPILER_MSVC && !__has_feature(cxx_alignas) - -#define _LIBCUDACXX_TOSTRING2(_STR) #_STR -#define _LIBCUDACXX_TOSTRING(_STR) _LIBCUDACXX_TOSTRING2(_STR) +# if defined(_CCCL_COMPILER_MSVC) +# define _CCCL_ALIGNAS_TYPE(x) alignas(x) +# define _CCCL_ALIGNAS(x) __declspec(align(x)) +# elif __has_feature(cxx_alignas) +# define _CCCL_ALIGNAS_TYPE(x) alignas(x) +# define _CCCL_ALIGNAS(x) alignas(x) +# else +# define _CCCL_ALIGNAS_TYPE(x) __attribute__((__aligned__(_LIBCUDACXX_ALIGNOF(x)))) +# define _CCCL_ALIGNAS(x) __attribute__((__aligned__(x))) +# endif // !_CCCL_COMPILER_MSVC && !__has_feature(cxx_alignas) + +# define _LIBCUDACXX_TOSTRING2(_STR) #_STR +# define _LIBCUDACXX_TOSTRING(_STR) _LIBCUDACXX_TOSTRING2(_STR) // This is wrapped in __CUDA_ARCH__ to prevent error: "ignoring '#pragma unroll' // [-Werror=unknown-pragmas]" -#if defined(__CUDA_ARCH__) -#if defined(_CCCL_COMPILER_MSVC) -# define _LIBCUDACXX_PRAGMA_UNROLL(_N) __pragma(_LIBCUDACXX_TOSTRING(unroll _N)) -#else // ^^^ _CCCL_COMPILER_MSVC ^^^ / vvv !_CCCL_COMPILER_MSVC vvv -# define _LIBCUDACXX_PRAGMA_UNROLL(_N) _Pragma(_LIBCUDACXX_TOSTRING(unroll _N)) -#endif // !_CCCL_COMPILER_MSVC -#else // ^^^ __CUDA_ARCH__ ^^^ / vvv !__CUDA_ARCH__ vvv -# define _LIBCUDACXX_PRAGMA_UNROLL(_N) -#endif // !__CUDA_ARCH__ +# if defined(__CUDA_ARCH__) +# if defined(_CCCL_COMPILER_MSVC) +# define _LIBCUDACXX_PRAGMA_UNROLL(_N) __pragma(_LIBCUDACXX_TOSTRING(unroll _N)) +# else // ^^^ _CCCL_COMPILER_MSVC ^^^ / vvv !_CCCL_COMPILER_MSVC vvv +# define _LIBCUDACXX_PRAGMA_UNROLL(_N) _Pragma(_LIBCUDACXX_TOSTRING(unroll _N)) +# endif // !_CCCL_COMPILER_MSVC +# else // ^^^ __CUDA_ARCH__ ^^^ / vvv !__CUDA_ARCH__ vvv +# define _LIBCUDACXX_PRAGMA_UNROLL(_N) +# endif // !__CUDA_ARCH__ -#if defined(_CCCL_COMPILER_MSVC) -#define _LIBCUDACXX_ALWAYS_INLINE __forceinline -#else -#define _LIBCUDACXX_ALWAYS_INLINE __attribute__ ((__always_inline__)) -#endif // !_CCCL_COMPILER_MSVC +# if defined(_CCCL_COMPILER_MSVC) +# define _LIBCUDACXX_ALWAYS_INLINE __forceinline +# else +# define _LIBCUDACXX_ALWAYS_INLINE __attribute__((__always_inline__)) +# endif // !_CCCL_COMPILER_MSVC -#if defined(__cuda_std__) -#define _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(size, ptr) (size <= 8) -#elif defined(_CCCL_COMPILER_CLANG) || defined(_CCCL_COMPILER_GCC) -#define _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(...) __atomic_always_lock_free(__VA_ARGS__) -#endif // __cuda_std__ +# if defined(__cuda_std__) +# define _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(size, ptr) (size <= 8) +# elif defined(_CCCL_COMPILER_CLANG) || defined(_CCCL_COMPILER_GCC) +# define _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(...) __atomic_always_lock_free(__VA_ARGS__) +# endif // __cuda_std__ // https://bugs.llvm.org/show_bug.cgi?id=44517 -#define __check_builtin(__x) (__has_builtin(__##__x) || \ - __has_keyword(__##__x) || \ - __has_feature(__x)) +# define __check_builtin(__x) (__has_builtin(__##__x) || __has_keyword(__##__x) || __has_feature(__x)) // We work around old clang versions (before clang-10) not supporting __has_builtin via __check_builtin // We work around old intel versions (before 2021.3) not supporting __has_builtin via __check_builtin @@ -453,486 +449,422 @@ extern "C++" { // MSVC needs manual handling, has no real way of checking builtins so all is manual // GCC needs manual handling, before gcc-10 as that finally supports __has_builtin -#if __check_builtin(array_rank) -#define _LIBCUDACXX_ARRAY_RANK(...) __array_rank(__VA_ARGS__) -#endif // __check_builtin(array_rank) +# if __check_builtin(array_rank) +# define _LIBCUDACXX_ARRAY_RANK(...) __array_rank(__VA_ARGS__) +# endif // __check_builtin(array_rank) // nvhpc has a bug where it supports __builtin_addressof but does not mark it via __check_builtin -#if __check_builtin(builtin_addressof) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 700) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVHPC) -#define _LIBCUDACXX_ADDRESSOF(...) __builtin_addressof(__VA_ARGS__) -#endif // __check_builtin(builtin_addressof) - -#if __check_builtin(builtin_bit_cast) \ - || (defined(_CCCL_COMPILER_MSVC) && _MSC_VER > 1925) -#define _LIBCUDACXX_BIT_CAST(...) __builtin_bit_cast(__VA_ARGS__) -#endif // __check_builtin(builtin_bit_cast) - -#if __check_builtin(builtin_is_constant_evaluated) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 900) \ - || (defined(_CCCL_COMPILER_MSVC) && _MSC_VER > 1924 && !defined(_CCCL_CUDACC_BELOW_11_3)) -#define _LIBCUDACXX_IS_CONSTANT_EVALUATED(...) __builtin_is_constant_evaluated(__VA_ARGS__) -#endif // __check_builtin(builtin_is_constant_evaluated) +# if __check_builtin(builtin_addressof) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 700) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVHPC) +# define _LIBCUDACXX_ADDRESSOF(...) __builtin_addressof(__VA_ARGS__) +# endif // __check_builtin(builtin_addressof) + +# if __check_builtin(builtin_bit_cast) || (defined(_CCCL_COMPILER_MSVC) && _MSC_VER > 1925) +# define _LIBCUDACXX_BIT_CAST(...) __builtin_bit_cast(__VA_ARGS__) +# endif // __check_builtin(builtin_bit_cast) + +# if __check_builtin(builtin_is_constant_evaluated) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 900) \ + || (defined(_CCCL_COMPILER_MSVC) && _MSC_VER > 1924 && !defined(_CCCL_CUDACC_BELOW_11_3)) +# define _LIBCUDACXX_IS_CONSTANT_EVALUATED(...) __builtin_is_constant_evaluated(__VA_ARGS__) +# endif // __check_builtin(builtin_is_constant_evaluated) // NVCC and NVRTC in C++11 mode freaks out about `__builtin_is_constant_evaluated`. -#if _CCCL_STD_VER < 2014 \ - && (defined(_CCCL_CUDA_COMPILER_NVCC) \ - || defined(_CCCL_COMPILER_NVRTC) \ - || defined(_CCCL_COMPILER_NVHPC)) -#undef _LIBCUDACXX_IS_CONSTANT_EVALUATED -#endif // _CCCL_STD_VER < 2014 && _CCCL_CUDA_COMPILER_NVCC - -#if __check_builtin(builtin_launder) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 700) -#define _LIBCUDACXX_LAUNDER(...) __builtin_launder(__VA_ARGS__) -#endif // __check_builtin(builtin_launder) +# if _CCCL_STD_VER < 2014 \ + && (defined(_CCCL_CUDA_COMPILER_NVCC) || defined(_CCCL_COMPILER_NVRTC) || defined(_CCCL_COMPILER_NVHPC)) +# undef _LIBCUDACXX_IS_CONSTANT_EVALUATED +# endif // _CCCL_STD_VER < 2014 && _CCCL_CUDA_COMPILER_NVCC + +# if __check_builtin(builtin_launder) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 700) +# define _LIBCUDACXX_LAUNDER(...) __builtin_launder(__VA_ARGS__) +# endif // __check_builtin(builtin_launder) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(decay) -#define _LIBCUDACXX_DECAY(...) __decay(__VA_ARGS__) -#endif // __check_builtin(decay) - -#if __check_builtin(has_nothrow_assign) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_HAS_NOTHROW_ASSIGN(...) __has_nothrow_assign(__VA_ARGS__) -#endif // __check_builtin(has_nothrow_assign) - -#if __check_builtin(has_nothrow_constructor) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_HAS_NOTHROW_CONSTRUCTOR(...) __has_nothrow_constructor(__VA_ARGS__) -#endif // __check_builtin(has_nothrow_constructor) - -#if __check_builtin(has_nothrow_copy) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_HAS_NOTHROW_COPY(...) __has_nothrow_copy(__VA_ARGS__) -#endif // __check_builtin(has_nothrow_copy) - -#if __check_builtin(has_trivial_constructor) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_HAS_TRIVIAL_CONSTRUCTOR(...) __has_trivial_constructor(__VA_ARGS__) -#endif // __check_builtin(has_trivial_constructor) - -#if __check_builtin(has_trivial_destructor) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_HAS_TRIVIAL_DESTRUCTOR(...) __has_trivial_destructor(__VA_ARGS__) -#endif // __check_builtin(has_trivial_destructor) - -#if __check_builtin(has_unique_object_representations) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 700) -#define _LIBCUDACXX_HAS_UNIQUE_OBJECT_REPRESENTATIONS(...) __has_unique_object_representations(__VA_ARGS__) -#endif // __check_builtin(has_unique_object_representations) - -#if __check_builtin(has_virtual_destructor) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_HAS_VIRTUAL_DESTRUCTOR(...) __has_virtual_destructor(__VA_ARGS__) -#endif // __check_builtin(has_virtual_destructor) - -#if __check_builtin(is_aggregate) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 700) \ - || (defined(_CCCL_COMPILER_MSVC) && _MSC_VER > 1914) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_AGGREGATE(...) __is_aggregate(__VA_ARGS__) -#endif // __check_builtin(is_aggregate) - -#if __check_builtin(is_array) -#define _LIBCUDACXX_IS_ARRAY(...) __is_array(__VA_ARGS__) -#endif // __check_builtin(is_array) +# if 0 // __check_builtin(decay) +# define _LIBCUDACXX_DECAY(...) __decay(__VA_ARGS__) +# endif // __check_builtin(decay) + +# if __check_builtin(has_nothrow_assign) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_HAS_NOTHROW_ASSIGN(...) __has_nothrow_assign(__VA_ARGS__) +# endif // __check_builtin(has_nothrow_assign) + +# if __check_builtin(has_nothrow_constructor) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_HAS_NOTHROW_CONSTRUCTOR(...) __has_nothrow_constructor(__VA_ARGS__) +# endif // __check_builtin(has_nothrow_constructor) + +# if __check_builtin(has_nothrow_copy) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_HAS_NOTHROW_COPY(...) __has_nothrow_copy(__VA_ARGS__) +# endif // __check_builtin(has_nothrow_copy) + +# if __check_builtin(has_trivial_constructor) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_HAS_TRIVIAL_CONSTRUCTOR(...) __has_trivial_constructor(__VA_ARGS__) +# endif // __check_builtin(has_trivial_constructor) + +# if __check_builtin(has_trivial_destructor) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_HAS_TRIVIAL_DESTRUCTOR(...) __has_trivial_destructor(__VA_ARGS__) +# endif // __check_builtin(has_trivial_destructor) + +# if __check_builtin(has_unique_object_representations) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 700) +# define _LIBCUDACXX_HAS_UNIQUE_OBJECT_REPRESENTATIONS(...) __has_unique_object_representations(__VA_ARGS__) +# endif // __check_builtin(has_unique_object_representations) + +# if __check_builtin(has_virtual_destructor) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_HAS_VIRTUAL_DESTRUCTOR(...) __has_virtual_destructor(__VA_ARGS__) +# endif // __check_builtin(has_virtual_destructor) + +# if __check_builtin(is_aggregate) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 700) \ + || (defined(_CCCL_COMPILER_MSVC) && _MSC_VER > 1914) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_AGGREGATE(...) __is_aggregate(__VA_ARGS__) +# endif // __check_builtin(is_aggregate) + +# if __check_builtin(is_array) +# define _LIBCUDACXX_IS_ARRAY(...) __is_array(__VA_ARGS__) +# endif // __check_builtin(is_array) // TODO: Clang incorrectly reports that __is_array is true for T[0]. // Re-enable the branch once https://llvm.org/PR54705 is fixed. -#ifndef _LIBCUDACXX_USE_IS_ARRAY_FALLBACK -#if defined(_CCCL_COMPILER_CLANG) -#define _LIBCUDACXX_USE_IS_ARRAY_FALLBACK -#endif // _CCCL_COMPILER_CLANG -#endif // !_LIBCUDACXX_USE_IS_ARRAY_FALLBACK - -#if __check_builtin(is_assignable) \ - || defined(_CCCL_COMPILER_MSVC) -#define _LIBCUDACXX_IS_ASSIGNABLE(...) __is_assignable(__VA_ARGS__) -#endif // __check_builtin(is_assignable) - -#if __check_builtin(is_base_of) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_BASE_OF(...) __is_base_of(__VA_ARGS__) -#endif // __check_builtin(is_base_of) - -#if __check_builtin(is_class) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_CLASS(...) __is_class(__VA_ARGS__) -#endif // __check_builtin(is_class) - -#if __check_builtin(is_constructible) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 800) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_CONSTRUCTIBLE(...) __is_constructible(__VA_ARGS__) -#endif // __check_builtin(is_constructible) - -#if __check_builtin(is_convertible_to) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_CONVERTIBLE_TO(...) __is_convertible_to(__VA_ARGS__) -#endif // __check_builtin(is_convertible_to) - -#if __check_builtin(is_destructible) \ - || defined(_CCCL_COMPILER_MSVC) -#define _LIBCUDACXX_IS_DESTRUCTIBLE(...) __is_destructible(__VA_ARGS__) -#endif // __check_builtin(is_destructible) - -#if __check_builtin(is_empty) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_EMPTY(...) __is_empty(__VA_ARGS__) -#endif // __check_builtin(is_empty) - -#if __check_builtin(is_enum) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_ENUM(...) __is_enum(__VA_ARGS__) -#endif // __check_builtin(is_enum) - -#if __check_builtin(is_final) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 407) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_FINAL(...) __is_final(__VA_ARGS__) -#endif // __check_builtin(is_final) - -#if __check_builtin(is_function) \ - && !defined(_CCCL_CUDA_COMPILER_NVCC) -#define _LIBCUDACXX_IS_FUNCTION(...) __is_function(__VA_ARGS__) -#endif // __check_builtin(is_function) - -#if __check_builtin(is_literal_type) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 406) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_LITERAL(...) __is_literal_type(__VA_ARGS__) -#endif // __check_builtin(is_literal_type) - -#if __check_builtin(is_lvalue_reference) -#define _LIBCUDACXX_IS_LVALUE_REFERENCE(...) __is_lvalue_reference(__VA_ARGS__) -#endif // __check_builtin(is_lvalue_reference) - -#ifndef _LIBCUDACXX_USE_IS_LVALUE_REFERENCE_FALLBACK -#if defined(_CCCL_CUDACC_BELOW_11_3) -#define _LIBCUDACXX_USE_IS_LVALUE_REFERENCE_FALLBACK -#endif // nvcc < 11.3 -#endif // !_LIBCUDACXX_USE_IS_LVALUE_REFERENCE_FALLBACK - -#if __check_builtin(is_nothrow_assignable) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_NOTHROW_ASSIGNABLE(...) __is_nothrow_assignable(__VA_ARGS__) -#endif // __check_builtin(is_nothrow_assignable) - -#if __check_builtin(is_nothrow_constructible) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_NOTHROW_CONSTRUCTIBLE(...) __is_nothrow_constructible(__VA_ARGS__) -#endif // __check_builtin(is_nothrow_constructible) - -#if __check_builtin(is_nothrow_destructible) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_NOTHROW_DESTRUCTIBLE(...) __is_nothrow_destructible(__VA_ARGS__) -#endif // __check_builtin(is_nothrow_destructible) - -#if __check_builtin(is_object) -#define _LIBCUDACXX_IS_OBJECT(...) __is_object(__VA_ARGS__) -#endif // __check_builtin(is_object) - -#ifndef _LIBCUDACXX_USE_IS_OBJECT_FALLBACK -#if defined(_CCCL_CUDACC_BELOW_11_3) -#define _LIBCUDACXX_USE_IS_OBJECT_FALLBACK -#endif // nvcc < 11.3 -#endif // !_LIBCUDACXX_USE_IS_OBJECT_FALLBACK - -#if __check_builtin(is_pod) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_POD(...) __is_pod(__VA_ARGS__) -#endif // __check_builtin(is_pod) +# ifndef _LIBCUDACXX_USE_IS_ARRAY_FALLBACK +# if defined(_CCCL_COMPILER_CLANG) +# define _LIBCUDACXX_USE_IS_ARRAY_FALLBACK +# endif // _CCCL_COMPILER_CLANG +# endif // !_LIBCUDACXX_USE_IS_ARRAY_FALLBACK + +# if __check_builtin(is_assignable) || defined(_CCCL_COMPILER_MSVC) +# define _LIBCUDACXX_IS_ASSIGNABLE(...) __is_assignable(__VA_ARGS__) +# endif // __check_builtin(is_assignable) + +# if __check_builtin(is_base_of) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) || defined(_CCCL_COMPILER_MSVC) \ + || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_BASE_OF(...) __is_base_of(__VA_ARGS__) +# endif // __check_builtin(is_base_of) + +# if __check_builtin(is_class) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) || defined(_CCCL_COMPILER_MSVC) \ + || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_CLASS(...) __is_class(__VA_ARGS__) +# endif // __check_builtin(is_class) + +# if __check_builtin(is_constructible) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 800) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_CONSTRUCTIBLE(...) __is_constructible(__VA_ARGS__) +# endif // __check_builtin(is_constructible) + +# if __check_builtin(is_convertible_to) || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_CONVERTIBLE_TO(...) __is_convertible_to(__VA_ARGS__) +# endif // __check_builtin(is_convertible_to) + +# if __check_builtin(is_destructible) || defined(_CCCL_COMPILER_MSVC) +# define _LIBCUDACXX_IS_DESTRUCTIBLE(...) __is_destructible(__VA_ARGS__) +# endif // __check_builtin(is_destructible) + +# if __check_builtin(is_empty) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) || defined(_CCCL_COMPILER_MSVC) \ + || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_EMPTY(...) __is_empty(__VA_ARGS__) +# endif // __check_builtin(is_empty) + +# if __check_builtin(is_enum) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) || defined(_CCCL_COMPILER_MSVC) \ + || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_ENUM(...) __is_enum(__VA_ARGS__) +# endif // __check_builtin(is_enum) + +# if __check_builtin(is_final) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 407) || defined(_CCCL_COMPILER_MSVC) \ + || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_FINAL(...) __is_final(__VA_ARGS__) +# endif // __check_builtin(is_final) + +# if __check_builtin(is_function) && !defined(_CCCL_CUDA_COMPILER_NVCC) +# define _LIBCUDACXX_IS_FUNCTION(...) __is_function(__VA_ARGS__) +# endif // __check_builtin(is_function) + +# if __check_builtin(is_literal_type) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 406) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_LITERAL(...) __is_literal_type(__VA_ARGS__) +# endif // __check_builtin(is_literal_type) + +# if __check_builtin(is_lvalue_reference) +# define _LIBCUDACXX_IS_LVALUE_REFERENCE(...) __is_lvalue_reference(__VA_ARGS__) +# endif // __check_builtin(is_lvalue_reference) + +# ifndef _LIBCUDACXX_USE_IS_LVALUE_REFERENCE_FALLBACK +# if defined(_CCCL_CUDACC_BELOW_11_3) +# define _LIBCUDACXX_USE_IS_LVALUE_REFERENCE_FALLBACK +# endif // nvcc < 11.3 +# endif // !_LIBCUDACXX_USE_IS_LVALUE_REFERENCE_FALLBACK + +# if __check_builtin(is_nothrow_assignable) || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_NOTHROW_ASSIGNABLE(...) __is_nothrow_assignable(__VA_ARGS__) +# endif // __check_builtin(is_nothrow_assignable) + +# if __check_builtin(is_nothrow_constructible) || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_NOTHROW_CONSTRUCTIBLE(...) __is_nothrow_constructible(__VA_ARGS__) +# endif // __check_builtin(is_nothrow_constructible) + +# if __check_builtin(is_nothrow_destructible) || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_NOTHROW_DESTRUCTIBLE(...) __is_nothrow_destructible(__VA_ARGS__) +# endif // __check_builtin(is_nothrow_destructible) + +# if __check_builtin(is_object) +# define _LIBCUDACXX_IS_OBJECT(...) __is_object(__VA_ARGS__) +# endif // __check_builtin(is_object) + +# ifndef _LIBCUDACXX_USE_IS_OBJECT_FALLBACK +# if defined(_CCCL_CUDACC_BELOW_11_3) +# define _LIBCUDACXX_USE_IS_OBJECT_FALLBACK +# endif // nvcc < 11.3 +# endif // !_LIBCUDACXX_USE_IS_OBJECT_FALLBACK + +# if __check_builtin(is_pod) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) || defined(_CCCL_COMPILER_MSVC) \ + || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_POD(...) __is_pod(__VA_ARGS__) +# endif // __check_builtin(is_pod) // libstdc++ defines this as a function, breaking functionality -#if 0 // __check_builtin(is_pointer) -#define _LIBCUDACXX_IS_POINTER(...) __is_pointer(__VA_ARGS__) -#endif // __check_builtin(is_pointer) +# if 0 // __check_builtin(is_pointer) +# define _LIBCUDACXX_IS_POINTER(...) __is_pointer(__VA_ARGS__) +# endif // __check_builtin(is_pointer) -#if __check_builtin(is_polymorphic) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_POLYMORPHIC(...) __is_polymorphic(__VA_ARGS__) -#endif // __check_builtin(is_polymorphic) +# if __check_builtin(is_polymorphic) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_POLYMORPHIC(...) __is_polymorphic(__VA_ARGS__) +# endif // __check_builtin(is_polymorphic) -#if __check_builtin(is_reference) -#define _LIBCUDACXX_IS_REFERENCE(...) __is_reference(__VA_ARGS__) -#endif // __check_builtin(is_reference) +# if __check_builtin(is_reference) +# define _LIBCUDACXX_IS_REFERENCE(...) __is_reference(__VA_ARGS__) +# endif // __check_builtin(is_reference) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(is_referenceable) -#define _LIBCUDACXX_IS_REFERENCEABLE(...) __is_referenceable(__VA_ARGS__) -#endif // __check_builtin(is_referenceable) +# if 0 // __check_builtin(is_referenceable) +# define _LIBCUDACXX_IS_REFERENCEABLE(...) __is_referenceable(__VA_ARGS__) +# endif // __check_builtin(is_referenceable) -#if __check_builtin(is_rvalue_reference) -#define _LIBCUDACXX_IS_RVALUE_REFERENCE(...) __is_rvalue_reference(__VA_ARGS__) -#endif // __check_builtin(is_rvalue_reference) +# if __check_builtin(is_rvalue_reference) +# define _LIBCUDACXX_IS_RVALUE_REFERENCE(...) __is_rvalue_reference(__VA_ARGS__) +# endif // __check_builtin(is_rvalue_reference) -#if __check_builtin(is_same) && !defined(_CCCL_CUDA_COMPILER_NVCC) -#define _LIBCUDACXX_IS_SAME(...) __is_same(__VA_ARGS__) -#endif // __check_builtin(is_same) +# if __check_builtin(is_same) && !defined(_CCCL_CUDA_COMPILER_NVCC) +# define _LIBCUDACXX_IS_SAME(...) __is_same(__VA_ARGS__) +# endif // __check_builtin(is_same) // libstdc++ defines this as a function, breaking functionality -#if 0 // __check_builtin(is_scalar) -#define _LIBCUDACXX_IS_SCALAR(...) __is_scalar(__VA_ARGS__) -#endif // __check_builtin(is_scalar) +# if 0 // __check_builtin(is_scalar) +# define _LIBCUDACXX_IS_SCALAR(...) __is_scalar(__VA_ARGS__) +# endif // __check_builtin(is_scalar) // libstdc++ defines this as a function, breaking functionality -#if 0 // __check_builtin(is_signed) -#define _LIBCUDACXX_IS_SIGNED(...) __is_signed(__VA_ARGS__) -#endif // __check_builtin(is_signed) - -#if __check_builtin(is_standard_layout) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 407) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_STANDARD_LAYOUT(...) __is_standard_layout(__VA_ARGS__) -#endif // __check_builtin(is_standard_layout) - -#if __check_builtin(is_trivial) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 405) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_TRIVIAL(...) __is_trivial(__VA_ARGS__) -#endif // __check_builtin(is_trivial) - -#if __check_builtin(is_trivially_assignable) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 501) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_TRIVIALLY_ASSIGNABLE(...) __is_trivially_assignable(__VA_ARGS__) -#endif // __check_builtin(is_trivially_assignable) - -#if __check_builtin(is_trivially_constructible) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 501) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_TRIVIALLY_CONSTRUCTIBLE(...) __is_trivially_constructible(__VA_ARGS__) -#endif // __check_builtin(is_trivially_constructible) - -#if __check_builtin(is_trivially_copyable) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 501) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_TRIVIALLY_COPYABLE(...) __is_trivially_copyable(__VA_ARGS__) -#endif // __check_builtin(is_trivially_copyable) - -#if __check_builtin(is_trivially_destructible) \ - || defined(_CCCL_COMPILER_MSVC) -#define _LIBCUDACXX_IS_TRIVIALLY_DESTRUCTIBLE(...) __is_trivially_destructible(__VA_ARGS__) -#endif // __check_builtin(is_trivially_destructible) - -#if __check_builtin(is_union) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_IS_UNION(...) __is_union(__VA_ARGS__) -#endif // __check_builtin(is_union) - -#if __check_builtin(is_unsigned) -#define _LIBCUDACXX_IS_UNSIGNED(...) __is_unsigned(__VA_ARGS__) -#endif // __check_builtin(is_unsigned) - -#ifndef _LIBCUDACXX_USE_IS_UNSIGNED_FALLBACK -#if defined(_CCCL_CUDACC_BELOW_11_3) -#define _LIBCUDACXX_USE_IS_UNSIGNED_FALLBACK -#endif // nvcc < 11.3 -#endif // !_LIBCUDACXX_USE_IS_UNSIGNED_FALLBACK +# if 0 // __check_builtin(is_signed) +# define _LIBCUDACXX_IS_SIGNED(...) __is_signed(__VA_ARGS__) +# endif // __check_builtin(is_signed) + +# if __check_builtin(is_standard_layout) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 407) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_STANDARD_LAYOUT(...) __is_standard_layout(__VA_ARGS__) +# endif // __check_builtin(is_standard_layout) + +# if __check_builtin(is_trivial) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 405) || defined(_CCCL_COMPILER_MSVC) \ + || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_TRIVIAL(...) __is_trivial(__VA_ARGS__) +# endif // __check_builtin(is_trivial) + +# if __check_builtin(is_trivially_assignable) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 501) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_TRIVIALLY_ASSIGNABLE(...) __is_trivially_assignable(__VA_ARGS__) +# endif // __check_builtin(is_trivially_assignable) + +# if __check_builtin(is_trivially_constructible) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 501) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_TRIVIALLY_CONSTRUCTIBLE(...) __is_trivially_constructible(__VA_ARGS__) +# endif // __check_builtin(is_trivially_constructible) + +# if __check_builtin(is_trivially_copyable) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 501) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_TRIVIALLY_COPYABLE(...) __is_trivially_copyable(__VA_ARGS__) +# endif // __check_builtin(is_trivially_copyable) + +# if __check_builtin(is_trivially_destructible) || defined(_CCCL_COMPILER_MSVC) +# define _LIBCUDACXX_IS_TRIVIALLY_DESTRUCTIBLE(...) __is_trivially_destructible(__VA_ARGS__) +# endif // __check_builtin(is_trivially_destructible) + +# if __check_builtin(is_union) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 403) || defined(_CCCL_COMPILER_MSVC) \ + || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_IS_UNION(...) __is_union(__VA_ARGS__) +# endif // __check_builtin(is_union) + +# if __check_builtin(is_unsigned) +# define _LIBCUDACXX_IS_UNSIGNED(...) __is_unsigned(__VA_ARGS__) +# endif // __check_builtin(is_unsigned) + +# ifndef _LIBCUDACXX_USE_IS_UNSIGNED_FALLBACK +# if defined(_CCCL_CUDACC_BELOW_11_3) +# define _LIBCUDACXX_USE_IS_UNSIGNED_FALLBACK +# endif // nvcc < 11.3 +# endif // !_LIBCUDACXX_USE_IS_UNSIGNED_FALLBACK // libstdc++ defines this as a function, breaking functionality -#if 0 // __check_builtin(is_void) -#define _LIBCUDACXX_IS_VOID(...) __is_void(__VA_ARGS__) -#endif // __check_builtin(is_void) +# if 0 // __check_builtin(is_void) +# define _LIBCUDACXX_IS_VOID(...) __is_void(__VA_ARGS__) +# endif // __check_builtin(is_void) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(make_signed) -#define _LIBCUDACXX_MAKE_SIGNED(...) __make_signed(__VA_ARGS__) -#endif // __check_builtin(make_signed) +# if 0 // __check_builtin(make_signed) +# define _LIBCUDACXX_MAKE_SIGNED(...) __make_signed(__VA_ARGS__) +# endif // __check_builtin(make_signed) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(make_unsigned) -#define _LIBCUDACXX_MAKE_UNSIGNED(...) __make_unsigned(__VA_ARGS__) -#endif // __check_builtin(make_unsigned) +# if 0 // __check_builtin(make_unsigned) +# define _LIBCUDACXX_MAKE_UNSIGNED(...) __make_unsigned(__VA_ARGS__) +# endif // __check_builtin(make_unsigned) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(remove_all_extents) -#define _LIBCUDACXX_REMOVE_ALL_EXTENTS(...) __remove_all_extents(__VA_ARGS__) -#endif // __check_builtin(remove_all_extents) +# if 0 // __check_builtin(remove_all_extents) +# define _LIBCUDACXX_REMOVE_ALL_EXTENTS(...) __remove_all_extents(__VA_ARGS__) +# endif // __check_builtin(remove_all_extents) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(remove_const) -#define _LIBCUDACXX_REMOVE_CONST(...) __remove_const(__VA_ARGS__) -#endif // __check_builtin(remove_const) +# if 0 // __check_builtin(remove_const) +# define _LIBCUDACXX_REMOVE_CONST(...) __remove_const(__VA_ARGS__) +# endif // __check_builtin(remove_const) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(remove_cv) -#define _LIBCUDACXX_REMOVE_CV(...) __remove_cv(__VA_ARGS__) -#endif // __check_builtin(remove_cv) +# if 0 // __check_builtin(remove_cv) +# define _LIBCUDACXX_REMOVE_CV(...) __remove_cv(__VA_ARGS__) +# endif // __check_builtin(remove_cv) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(remove_cvref) -#define _LIBCUDACXX_REMOVE_CVREF(...) __remove_cvref(__VA_ARGS__) -#endif // __check_builtin(remove_cvref) +# if 0 // __check_builtin(remove_cvref) +# define _LIBCUDACXX_REMOVE_CVREF(...) __remove_cvref(__VA_ARGS__) +# endif // __check_builtin(remove_cvref) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(remove_extent) -#define _LIBCUDACXX_REMOVE_EXTENT(...) __remove_extent(__VA_ARGS__) -#endif // __check_builtin(remove_extent) +# if 0 // __check_builtin(remove_extent) +# define _LIBCUDACXX_REMOVE_EXTENT(...) __remove_extent(__VA_ARGS__) +# endif // __check_builtin(remove_extent) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(remove_pointer) -#define _LIBCUDACXX_REMOVE_POINTER(...) __remove_pointer(__VA_ARGS__) -#endif // __check_builtin(remove_pointer) +# if 0 // __check_builtin(remove_pointer) +# define _LIBCUDACXX_REMOVE_POINTER(...) __remove_pointer(__VA_ARGS__) +# endif // __check_builtin(remove_pointer) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(remove_reference_t) -#define _LIBCUDACXX_REMOVE_REFERENCE_T(...) __remove_reference_t(__VA_ARGS__) -#endif // __check_builtin(remove_reference_t) +# if 0 // __check_builtin(remove_reference_t) +# define _LIBCUDACXX_REMOVE_REFERENCE_T(...) __remove_reference_t(__VA_ARGS__) +# endif // __check_builtin(remove_reference_t) // Disabled due to libstdc++ conflict -#if 0 // __check_builtin(remove_volatile) -#define _LIBCUDACXX_REMOVE_VOLATILE(...) __remove_volatile(__VA_ARGS__) -#endif // __check_builtin(remove_volatile) +# if 0 // __check_builtin(remove_volatile) +# define _LIBCUDACXX_REMOVE_VOLATILE(...) __remove_volatile(__VA_ARGS__) +# endif // __check_builtin(remove_volatile) -#if __check_builtin(underlying_type) \ - || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 407) \ - || defined(_CCCL_COMPILER_MSVC) \ - || defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_UNDERLYING_TYPE(...) __underlying_type(__VA_ARGS__) -#endif // __check_builtin(underlying_type) +# if __check_builtin(underlying_type) || (defined(_CCCL_COMPILER_GCC) && _GNUC_VER >= 407) \ + || defined(_CCCL_COMPILER_MSVC) || defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_UNDERLYING_TYPE(...) __underlying_type(__VA_ARGS__) +# endif // __check_builtin(underlying_type) -#if defined(_CCCL_COMPILER_CLANG) +# if defined(_CCCL_COMPILER_CLANG) // _LIBCUDACXX_ALTERNATE_STRING_LAYOUT is an old name for // _LIBCUDACXX_ABI_ALTERNATE_STRING_LAYOUT left here for backward compatibility. -#if defined(_LIBCUDACXX_ALTERNATE_STRING_LAYOUT) -#define _LIBCUDACXX_ABI_ALTERNATE_STRING_LAYOUT -#endif +# if defined(_LIBCUDACXX_ALTERNATE_STRING_LAYOUT) +# define _LIBCUDACXX_ABI_ALTERNATE_STRING_LAYOUT +# endif -#if __cplusplus < 201103L +# if __cplusplus < 201103L typedef __char16_t char16_t; typedef __char32_t char32_t; -#endif +# endif -#if !(__has_feature(cxx_strong_enums)) -#define _LIBCUDACXX_HAS_NO_STRONG_ENUMS -#endif +# if !(__has_feature(cxx_strong_enums)) +# define _LIBCUDACXX_HAS_NO_STRONG_ENUMS +# endif -#if !(__has_feature(cxx_lambdas)) -#define _LIBCUDACXX_HAS_NO_LAMBDAS -#endif +# if !(__has_feature(cxx_lambdas)) +# define _LIBCUDACXX_HAS_NO_LAMBDAS +# endif -#if !(__has_feature(cxx_nullptr)) -# if (__has_extension(cxx_nullptr) || __has_keyword(__nullptr)) && defined(_LIBCUDACXX_ABI_ALWAYS_USE_CXX11_NULLPTR) -# define nullptr __nullptr -# else -# define _LIBCUDACXX_HAS_NO_NULLPTR -# endif -#endif +# if !(__has_feature(cxx_nullptr)) +# if (__has_extension(cxx_nullptr) || __has_keyword(__nullptr)) \ + && defined(_LIBCUDACXX_ABI_ALWAYS_USE_CXX11_NULLPTR) +# define nullptr __nullptr +# else +# define _LIBCUDACXX_HAS_NO_NULLPTR +# endif +# endif -#if !(__has_feature(cxx_rvalue_references)) -#define _LIBCUDACXX_HAS_NO_RVALUE_REFERENCES -#endif +# if !(__has_feature(cxx_rvalue_references)) +# define _LIBCUDACXX_HAS_NO_RVALUE_REFERENCES +# endif -#if !(__has_feature(cxx_auto_type)) -#define _LIBCUDACXX_HAS_NO_AUTO_TYPE -#endif +# if !(__has_feature(cxx_auto_type)) +# define _LIBCUDACXX_HAS_NO_AUTO_TYPE +# endif -#if !(__has_feature(cxx_variadic_templates)) -#define _LIBCUDACXX_HAS_NO_VARIADICS -#endif +# if !(__has_feature(cxx_variadic_templates)) +# define _LIBCUDACXX_HAS_NO_VARIADICS +# endif -#if !(__has_feature(cxx_generalized_initializers)) -#define _LIBCUDACXX_HAS_NO_GENERALIZED_INITIALIZERS -#endif +# if !(__has_feature(cxx_generalized_initializers)) +# define _LIBCUDACXX_HAS_NO_GENERALIZED_INITIALIZERS +# endif // Objective-C++ features (opt-in) -#if __has_feature(objc_arc) -#define _LIBCUDACXX_HAS_OBJC_ARC -#endif +# if __has_feature(objc_arc) +# define _LIBCUDACXX_HAS_OBJC_ARC +# endif -#if __has_feature(objc_arc_weak) -#define _LIBCUDACXX_HAS_OBJC_ARC_WEAK -#endif +# if __has_feature(objc_arc_weak) +# define _LIBCUDACXX_HAS_OBJC_ARC_WEAK +# endif -#if !(__has_feature(cxx_variable_templates)) -#define _LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES -#endif +# if !(__has_feature(cxx_variable_templates)) +# define _LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES +# endif -#if !(__has_feature(cxx_noexcept)) -#define _LIBCUDACXX_HAS_NO_NOEXCEPT -#endif +# if !(__has_feature(cxx_noexcept)) +# define _LIBCUDACXX_HAS_NO_NOEXCEPT +# endif // Allow for build-time disabling of unsigned integer sanitization -#if !defined(_LIBCUDACXX_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK) && __has_attribute(no_sanitize) -#define _LIBCUDACXX_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __attribute__((__no_sanitize__("unsigned-integer-overflow"))) -#endif +# if !defined(_LIBCUDACXX_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK) && __has_attribute(no_sanitize) +# define _LIBCUDACXX_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK \ + __attribute__((__no_sanitize__("unsigned-integer-overflow"))) +# endif -#define _LIBCUDACXX_DISABLE_EXTENSION_WARNING __extension__ +# define _LIBCUDACXX_DISABLE_EXTENSION_WARNING __extension__ -#elif defined(_CCCL_COMPILER_GCC) +# elif defined(_CCCL_COMPILER_GCC) -#ifndef _LIBCUDACXX_USE_IS_ASSIGNABLE_FALLBACK +# ifndef _LIBCUDACXX_USE_IS_ASSIGNABLE_FALLBACK // FIXME: GCC 8.0 supports this trait, but it has a bug. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91592 // https://godbolt.org/z/IljfIw -#define _LIBCUDACXX_USE_IS_ASSIGNABLE_FALLBACK -#endif // _LIBCUDACXX_USE_IS_ASSIGNABLE_FALLBACK +# define _LIBCUDACXX_USE_IS_ASSIGNABLE_FALLBACK +# endif // _LIBCUDACXX_USE_IS_ASSIGNABLE_FALLBACK // GCC 5 supports variable templates -#if !defined(__cpp_variable_templates) || __cpp_variable_templates < 201304L -#define _LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES -#endif +# if !defined(__cpp_variable_templates) || __cpp_variable_templates < 201304L +# define _LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES +# endif -#if _GNUC_VER < 600 -#define _LIBCUDACXX_GCC_MATH_IN_STD -#endif +# if _GNUC_VER < 600 +# define _LIBCUDACXX_GCC_MATH_IN_STD +# endif // NVCC cannot properly handle some deductions occuring within NOEXCEPT // C++17 mode causes reference instatiation errors in tuple -#if (_GNUC_VER >= 702 && _GNUC_VER <= 805) -#if defined(_CCCL_CUDA_COMPILER_NVCC) && _CCCL_STD_VER == 2017 -#define _LIBCUDACXX_NO_TUPLE_NOEXCEPT -#endif -#endif +# if (_GNUC_VER >= 702 && _GNUC_VER <= 805) +# if defined(_CCCL_CUDA_COMPILER_NVCC) && _CCCL_STD_VER == 2017 +# define _LIBCUDACXX_NO_TUPLE_NOEXCEPT +# endif +# endif -#define _LIBCUDACXX_DISABLE_EXTENSION_WARNING __extension__ +# define _LIBCUDACXX_DISABLE_EXTENSION_WARNING __extension__ -#elif defined(_CCCL_COMPILER_MSVC) +# elif defined(_CCCL_COMPILER_MSVC) -#define _LIBCUDACXX_WARNING(x) __pragma(message(__FILE__ "(" _LIBCUDACXX_TOSTRING(__LINE__) ") : warning note: " x)) +# define _LIBCUDACXX_WARNING(x) __pragma(message(__FILE__ "(" _LIBCUDACXX_TOSTRING(__LINE__) ") : warning note: " x)) // https://github.com/microsoft/STL/blob/master/stl/inc/yvals_core.h#L353 // warning C4100: 'quack': unreferenced formal parameter @@ -946,143 +878,125 @@ typedef __char32_t char32_t; // warning C4668: 'meow' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif' // warning C4800: 'boo': forcing value to bool 'true' or 'false' (performance warning) // warning C4996: 'meow': was declared deprecated -#define _LIBCUDACXX_MSVC_DISABLED_WARNINGS \ - 4100 \ - 4127 \ - 4180 \ - 4197 \ - 4296 \ - 4324 \ - 4455 \ - 4503 \ - 4522 \ - 4668 \ - 4800 \ - 4996 \ - /**/ - -#if _MSC_VER < 1900 -#error "MSVC versions prior to Visual Studio 2015 are not supported" -#endif +# define _LIBCUDACXX_MSVC_DISABLED_WARNINGS 4100 4127 4180 4197 4296 4324 4455 4503 4522 4668 4800 4996 /**/ + +# if _MSC_VER < 1900 +# error "MSVC versions prior to Visual Studio 2015 are not supported" +# endif // MSVC implemented P0030R1 in 15.7, only available under C++17 -#if _MSC_VER < 1914 -#define _LIBCUDACXX_NO_HOST_CPP17_HYPOT -#endif +# if _MSC_VER < 1914 +# define _LIBCUDACXX_NO_HOST_CPP17_HYPOT +# endif -#if _MSC_VER < 1920 -#define _LIBCUDACXX_HAS_NO_NOEXCEPT_SFINAE -#define _LIBCUDACXX_HAS_NO_LOGICAL_METAFUNCTION_ALIASES -#endif +# if _MSC_VER < 1920 +# define _LIBCUDACXX_HAS_NO_NOEXCEPT_SFINAE +# define _LIBCUDACXX_HAS_NO_LOGICAL_METAFUNCTION_ALIASES +# endif // MSVC exposed __iso_volatile intrinsics beginning on 1924 for x86 -#if _MSC_VER < 1924 - #define _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN -#endif +# if _MSC_VER < 1924 +# define _LIBCUDACXX_MSVC_HAS_NO_ISO_INTRIN +# endif -#if _CCCL_STD_VER < 2014 -#define _LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES -#endif +# if _CCCL_STD_VER < 2014 +# define _LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES +# endif -#define _LIBCUDACXX_WEAK +# define _LIBCUDACXX_WEAK -#define _LIBCUDACXX_HAS_NO_VECTOR_EXTENSION +# define _LIBCUDACXX_HAS_NO_VECTOR_EXTENSION -#define _LIBCUDACXX_DISABLE_EXTENSION_WARNING +# define _LIBCUDACXX_DISABLE_EXTENSION_WARNING -#elif defined(_CCCL_COMPILER_IBM) +# elif defined(_CCCL_COMPILER_IBM) -#define _ATTRIBUTE(x) __attribute__((x)) +# define _ATTRIBUTE(x) __attribute__((x)) -#define _LIBCUDACXX_HAS_NO_UNICODE_CHARS -#define _LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES +# define _LIBCUDACXX_HAS_NO_UNICODE_CHARS +# define _LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES -#if defined(_AIX) -#define __MULTILOCALE_API -#endif +# if defined(_AIX) +# define __MULTILOCALE_API +# endif -#define _LIBCUDACXX_HAS_NO_VECTOR_EXTENSION +# define _LIBCUDACXX_HAS_NO_VECTOR_EXTENSION -#elif defined(_CCCL_COMPILER_NVRTC) || defined(_CCCL_COMPILER_NVHPC) +# elif defined(_CCCL_COMPILER_NVRTC) || defined(_CCCL_COMPILER_NVHPC) -#if !defined(__cpp_variable_templates) || __cpp_variable_templates < 201304L -#define _LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES -#endif +# if !defined(__cpp_variable_templates) || __cpp_variable_templates < 201304L +# define _LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES +# endif -#define _LIBCUDACXX_DISABLE_EXTENSION_WARNING +# define _LIBCUDACXX_DISABLE_EXTENSION_WARNING -#endif // _CCCL_COMPILER_[CLANG|GCC|MSVC|IBM|NVRTC] +# endif // _CCCL_COMPILER_[CLANG|GCC|MSVC|IBM|NVRTC] -#if defined(_CCCL_COMPILER_NVHPC) && !defined(__cuda_std__) +# if defined(_CCCL_COMPILER_NVHPC) && !defined(__cuda_std__) // Forcefully disable visibility controls when used as the standard library with NVC++. // TODO: reevaluate. -#define _LIBCUDACXX_HIDE_FROM_ABI -#ifndef _LIBCUDACXX_DISABLE_EXTERN_TEMPLATE -#define _LIBCUDACXX_DISABLE_EXTERN_TEMPLATE -#endif -#endif - -#ifndef _LIBCUDACXX_FREESTANDING -#if defined(__cuda_std__) \ - || !defined(__STDC_HOSTED__) -# define _LIBCUDACXX_FREESTANDING -#endif -#endif // !_LIBCUDACXX_FREESTANDING +# define _LIBCUDACXX_HIDE_FROM_ABI +# ifndef _LIBCUDACXX_DISABLE_EXTERN_TEMPLATE +# define _LIBCUDACXX_DISABLE_EXTERN_TEMPLATE +# endif +# endif -#ifndef _LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS -#if defined(_CCCL_COMPILER_NVRTC) \ - || (defined(_CCCL_COMPILER_NVHPC) && !defined(__cuda_std__)) -# define _LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS -#endif -#endif // _LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS +# ifndef _LIBCUDACXX_FREESTANDING +# if defined(__cuda_std__) || !defined(__STDC_HOSTED__) +# define _LIBCUDACXX_FREESTANDING +# endif +# endif // !_LIBCUDACXX_FREESTANDING -#ifndef _LIBCUDACXX_HAS_CUDA_ATOMIC_EXT -#if defined(__cuda_std__) -# define _LIBCUDACXX_HAS_CUDA_ATOMIC_EXT -#endif -#endif // _LIBCUDACXX_HAS_CUDA_ATOMIC_EXT +# ifndef _LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS +# if defined(_CCCL_COMPILER_NVRTC) || (defined(_CCCL_COMPILER_NVHPC) && !defined(__cuda_std__)) +# define _LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS +# endif +# endif // _LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS -#ifndef _LIBCUDACXX_HAS_EXTERNAL_ATOMIC_IMP -#if defined(__cuda_std__) -# define _LIBCUDACXX_HAS_EXTERNAL_ATOMIC_IMP -#endif -#endif // _LIBCUDACXX_HAS_EXTERNAL_ATOMIC_IMP +# ifndef _LIBCUDACXX_HAS_CUDA_ATOMIC_EXT +# if defined(__cuda_std__) +# define _LIBCUDACXX_HAS_CUDA_ATOMIC_EXT +# endif +# endif // _LIBCUDACXX_HAS_CUDA_ATOMIC_EXT -#ifndef _LIBCUDACXX_HAS_NO_ASAN -#if defined(_CCCL_COMPILER_GCC) -# if !defined(__SANITIZE_ADDRESS__) -# define _LIBCUDACXX_HAS_NO_ASAN -# endif // !__SANITIZE_ADDRESS__ -#elif defined(_CCCL_COMPILER_CLANG) -# if !__has_feature(address_sanitizer) -# define _LIBCUDACXX_HAS_NO_ASAN -# endif // !__has_feature(address_sanitizer) -#else -# define _LIBCUDACXX_HAS_NO_ASAN -#endif // _CCCL_COMPILER[MSVC|IBM|NVHPC|NVRTC] -#endif // _LIBCUDACXX_HAS_NO_ASAN - -#ifndef _LIBCUDACXX_HAS_NO_CXX20_CHRONO_LITERALS -#if defined(__cuda_std__) \ - || (defined(_CCCL_COMPILER_CLANG) && _LIBCUDACXX_CLANG_VER < 800) -# define _LIBCUDACXX_HAS_NO_CXX20_CHRONO_LITERALS -#endif // __cuda_std__ -#endif // _LIBCUDACXX_HAS_NO_CXX20_CHRONO_LITERALS - -#ifndef _LIBCUDACXX_HAS_NO_INT128 -#if defined(_CCCL_COMPILER_MSVC) \ - || (defined(_CCCL_COMPILER_NVRTC) && !defined(__CUDACC_RTC_INT128__)) \ - || (defined(_CCCL_CUDA_COMPILER_NVCC) && (_CCCL_CUDACC_VER < 1105000)) \ - || !defined(__SIZEOF_INT128__) -# define _LIBCUDACXX_HAS_NO_INT128 -#endif -#endif // !_LIBCUDACXX_HAS_NO_INT128 +# ifndef _LIBCUDACXX_HAS_EXTERNAL_ATOMIC_IMP +# if defined(__cuda_std__) +# define _LIBCUDACXX_HAS_EXTERNAL_ATOMIC_IMP +# endif +# endif // _LIBCUDACXX_HAS_EXTERNAL_ATOMIC_IMP + +# ifndef _LIBCUDACXX_HAS_NO_ASAN +# if defined(_CCCL_COMPILER_GCC) +# if !defined(__SANITIZE_ADDRESS__) +# define _LIBCUDACXX_HAS_NO_ASAN +# endif // !__SANITIZE_ADDRESS__ +# elif defined(_CCCL_COMPILER_CLANG) +# if !__has_feature(address_sanitizer) +# define _LIBCUDACXX_HAS_NO_ASAN +# endif // !__has_feature(address_sanitizer) +# else +# define _LIBCUDACXX_HAS_NO_ASAN +# endif // _CCCL_COMPILER[MSVC|IBM|NVHPC|NVRTC] +# endif // _LIBCUDACXX_HAS_NO_ASAN + +# ifndef _LIBCUDACXX_HAS_NO_CXX20_CHRONO_LITERALS +# if defined(__cuda_std__) || (defined(_CCCL_COMPILER_CLANG) && _LIBCUDACXX_CLANG_VER < 800) +# define _LIBCUDACXX_HAS_NO_CXX20_CHRONO_LITERALS +# endif // __cuda_std__ +# endif // _LIBCUDACXX_HAS_NO_CXX20_CHRONO_LITERALS + +# ifndef _LIBCUDACXX_HAS_NO_INT128 +# if defined(_CCCL_COMPILER_MSVC) || (defined(_CCCL_COMPILER_NVRTC) && !defined(__CUDACC_RTC_INT128__)) \ + || (defined(_CCCL_CUDA_COMPILER_NVCC) && (_CCCL_CUDACC_VER < 1105000)) || !defined(__SIZEOF_INT128__) +# define _LIBCUDACXX_HAS_NO_INT128 +# endif +# endif // !_LIBCUDACXX_HAS_NO_INT128 -#ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE -#if defined(_CCCL_CUDACC) -# define _LIBCUDACXX_HAS_NO_LONG_DOUBLE -#endif -#endif // _LIBCUDACXX_HAS_NO_LONG_DOUBLE +# ifndef _LIBCUDACXX_HAS_NO_LONG_DOUBLE +# if defined(_CCCL_CUDACC) +# define _LIBCUDACXX_HAS_NO_LONG_DOUBLE +# endif +# endif // _LIBCUDACXX_HAS_NO_LONG_DOUBLE # ifndef _LIBCUDACXX_HAS_NVFP16 # if __has_include() \ @@ -1103,43 +1017,42 @@ typedef __char32_t char32_t; # endif # endif // !_LIBCUDACXX_HAS_NVBF16 -#ifndef _LIBCUDACXX_HAS_NO_MONOTONIC_CLOCK -#if defined(__cuda_std__) -# define _LIBCUDACXX_HAS_NO_MONOTONIC_CLOCK -#endif -#endif // _LIBCUDACXX_HAS_NO_MONOTONIC_CLOCK +# ifndef _LIBCUDACXX_HAS_NO_MONOTONIC_CLOCK +# if defined(__cuda_std__) +# define _LIBCUDACXX_HAS_NO_MONOTONIC_CLOCK +# endif +# endif // _LIBCUDACXX_HAS_NO_MONOTONIC_CLOCK -#ifndef _LIBCUDACXX_HAS_NO_PLATFORM_WAIT -#if defined(__cuda_std__) -# define _LIBCUDACXX_HAS_NO_PLATFORM_WAIT -#endif -#endif // _LIBCUDACXX_HAS_NO_PLATFORM_WAIT +# ifndef _LIBCUDACXX_HAS_NO_PLATFORM_WAIT +# if defined(__cuda_std__) +# define _LIBCUDACXX_HAS_NO_PLATFORM_WAIT +# endif +# endif // _LIBCUDACXX_HAS_NO_PLATFORM_WAIT -#ifndef _LIBCUDACXX_HAS_NO_PRAGMA_PUSH_POP_MACRO -#if (defined(_CCCL_COMPILER_MSVC) && _MSC_VER < 1920) \ - || defined(_CCCL_COMPILER_NVRTC) \ - || defined(_CCCL_COMPILER_IBM) -#define _LIBCUDACXX_HAS_NO_PRAGMA_PUSH_POP_MACRO -#endif -#endif // _LIBCUDACXX_HAS_NO_PRAGMA_PUSH_POP_MACRO +# ifndef _LIBCUDACXX_HAS_NO_PRAGMA_PUSH_POP_MACRO +# if (defined(_CCCL_COMPILER_MSVC) && _MSC_VER < 1920) || defined(_CCCL_COMPILER_NVRTC) \ + || defined(_CCCL_COMPILER_IBM) +# define _LIBCUDACXX_HAS_NO_PRAGMA_PUSH_POP_MACRO +# endif +# endif // _LIBCUDACXX_HAS_NO_PRAGMA_PUSH_POP_MACRO -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE -#if defined(__cuda_std__) -# define _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE -#endif -#endif // _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE +# if defined(__cuda_std__) +# define _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE +# endif +# endif // _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE -#ifndef _LIBCUDACXX_HAS_NO_TREE_BARRIER -#if defined(__cuda_std__) -# define _LIBCUDACXX_HAS_NO_TREE_BARRIER -#endif -#endif // _LIBCUDACXX_HAS_NO_TREE_BARRIER +# ifndef _LIBCUDACXX_HAS_NO_TREE_BARRIER +# if defined(__cuda_std__) +# define _LIBCUDACXX_HAS_NO_TREE_BARRIER +# endif +# endif // _LIBCUDACXX_HAS_NO_TREE_BARRIER -#ifndef _LIBCUDACXX_HAS_NO_WCHAR_H -#if defined(__cuda_std__) -# define _LIBCUDACXX_HAS_NO_WCHAR_H -#endif -#endif // _LIBCUDACXX_HAS_NO_WCHAR_H +# ifndef _LIBCUDACXX_HAS_NO_WCHAR_H +# if defined(__cuda_std__) +# define _LIBCUDACXX_HAS_NO_WCHAR_H +# endif +# endif // _LIBCUDACXX_HAS_NO_WCHAR_H # ifndef _LIBCUDACXX_NO_EXCEPTIONS # if !defined(LIBCUDACXX_ENABLE_EXCEPTIONS) || (defined(_CCCL_COMPILER_MSVC) && _HAS_EXCEPTIONS == 0) \ @@ -1150,405 +1063,508 @@ typedef __char32_t char32_t; // Try to find out if RTTI is disabled. // g++ and cl.exe have RTTI on by default and define a macro when it is. -#ifndef _LIBCUDACXX_NO_RTTI -#if defined(__cuda_std__) \ - || (defined(_CCCL_COMPILER_CLANG) && !(__has_feature(cxx_rtti))) \ - || (defined(_CCCL_COMPILER_GCC) && !defined(__GXX_RTTI)) \ - || (defined(_CCCL_COMPILER_MSVC) && !defined(_CPPRTTI)) -# define _LIBCUDACXX_NO_RTTI -#endif -#endif // !_LIBCUDACXX_NO_RTTI - -#ifndef _LIBCUDACXX_NODEBUG_TYPE -#if defined(__cuda_std__) -# define _LIBCUDACXX_NODEBUG_TYPE -#elif __has_attribute(__nodebug__) \ - && (defined(_CCCL_COMPILER_CLANG) && _LIBCUDACXX_CLANG_VER >= 1210) -# define _LIBCUDACXX_NODEBUG_TYPE __attribute__((nodebug)) -#else -# define _LIBCUDACXX_NODEBUG_TYPE -#endif -#endif // !_LIBCUDACXX_NODEBUG_TYPE +# ifndef _LIBCUDACXX_NO_RTTI +# if defined(__cuda_std__) || (defined(_CCCL_COMPILER_CLANG) && !(__has_feature(cxx_rtti))) \ + || (defined(_CCCL_COMPILER_GCC) && !defined(__GXX_RTTI)) || (defined(_CCCL_COMPILER_MSVC) && !defined(_CPPRTTI)) +# define _LIBCUDACXX_NO_RTTI +# endif +# endif // !_LIBCUDACXX_NO_RTTI -#if defined(_LIBCUDACXX_OBJECT_FORMAT_COFF) +# ifndef _LIBCUDACXX_NODEBUG_TYPE +# if defined(__cuda_std__) +# define _LIBCUDACXX_NODEBUG_TYPE +# elif __has_attribute(__nodebug__) && (defined(_CCCL_COMPILER_CLANG) && _LIBCUDACXX_CLANG_VER >= 1210) +# define _LIBCUDACXX_NODEBUG_TYPE __attribute__((nodebug)) +# else +# define _LIBCUDACXX_NODEBUG_TYPE +# endif +# endif // !_LIBCUDACXX_NODEBUG_TYPE -#ifdef _DLL -# define _LIBCUDACXX_CRT_FUNC __declspec(dllimport) -#else -# define _LIBCUDACXX_CRT_FUNC -#endif +# if defined(_LIBCUDACXX_OBJECT_FORMAT_COFF) -#if defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) -# define _LIBCUDACXX_DLL_VIS -# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS -# define _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS -# define _LIBCUDACXX_OVERRIDABLE_FUNC_VIS -# define _LIBCUDACXX_EXPORTED_FROM_ABI -#elif defined(_LIBCUDACXX_BUILDING_LIBRARY) -# define _LIBCUDACXX_DLL_VIS __declspec(dllexport) -# if defined(__MINGW32__) -# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS _LIBCUDACXX_DLL_VIS -# define _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS -# else -# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS -# define _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS _LIBCUDACXX_DLL_VIS -# endif -# define _LIBCUDACXX_OVERRIDABLE_FUNC_VIS _LIBCUDACXX_DLL_VIS -# define _LIBCUDACXX_EXPORTED_FROM_ABI __declspec(dllexport) -#else -# define _LIBCUDACXX_DLL_VIS __declspec(dllimport) -# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS _LIBCUDACXX_DLL_VIS -# define _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS -# define _LIBCUDACXX_OVERRIDABLE_FUNC_VIS -# define _LIBCUDACXX_EXPORTED_FROM_ABI __declspec(dllimport) -#endif - -#define _LIBCUDACXX_TYPE_VIS _LIBCUDACXX_DLL_VIS -#define _LIBCUDACXX_FUNC_VIS _LIBCUDACXX_DLL_VIS -#define _LIBCUDACXX_EXCEPTION_ABI _LIBCUDACXX_DLL_VIS -#define _LIBCUDACXX_HIDDEN -#define _LIBCUDACXX_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS -#define _LIBCUDACXX_TEMPLATE_VIS -#define _LIBCUDACXX_ENUM_VIS +# ifdef _DLL +# define _LIBCUDACXX_CRT_FUNC __declspec(dllimport) +# else +# define _LIBCUDACXX_CRT_FUNC +# endif -#endif // defined(_LIBCUDACXX_OBJECT_FORMAT_COFF) +# if defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) +# define _LIBCUDACXX_DLL_VIS +# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS +# define _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS +# define _LIBCUDACXX_OVERRIDABLE_FUNC_VIS +# define _LIBCUDACXX_EXPORTED_FROM_ABI +# elif defined(_LIBCUDACXX_BUILDING_LIBRARY) +# define _LIBCUDACXX_DLL_VIS __declspec(dllexport) +# if defined(__MINGW32__) +# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS _LIBCUDACXX_DLL_VIS +# define _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS +# else +# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS +# define _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS _LIBCUDACXX_DLL_VIS +# endif +# define _LIBCUDACXX_OVERRIDABLE_FUNC_VIS _LIBCUDACXX_DLL_VIS +# define _LIBCUDACXX_EXPORTED_FROM_ABI __declspec(dllexport) +# else +# define _LIBCUDACXX_DLL_VIS __declspec(dllimport) +# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS _LIBCUDACXX_DLL_VIS +# define _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS +# define _LIBCUDACXX_OVERRIDABLE_FUNC_VIS +# define _LIBCUDACXX_EXPORTED_FROM_ABI __declspec(dllimport) +# endif -#ifndef _LIBCUDACXX_HIDDEN -# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) -# define _LIBCUDACXX_HIDDEN __attribute__ ((__visibility__("hidden"))) -# else +# define _LIBCUDACXX_TYPE_VIS _LIBCUDACXX_DLL_VIS +# define _LIBCUDACXX_FUNC_VIS _LIBCUDACXX_DLL_VIS +# define _LIBCUDACXX_EXCEPTION_ABI _LIBCUDACXX_DLL_VIS # define _LIBCUDACXX_HIDDEN +# define _LIBCUDACXX_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS +# define _LIBCUDACXX_TEMPLATE_VIS +# define _LIBCUDACXX_ENUM_VIS + +# endif // defined(_LIBCUDACXX_OBJECT_FORMAT_COFF) + +# ifndef _LIBCUDACXX_HIDDEN +# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) +# define _LIBCUDACXX_HIDDEN __attribute__((__visibility__("hidden"))) +# else +# define _LIBCUDACXX_HIDDEN +# endif # endif -#endif -#ifndef _LIBCUDACXX_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS -# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) +# ifndef _LIBCUDACXX_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS +# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) // The inline should be removed once PR32114 is resolved -# define _LIBCUDACXX_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS inline _LIBCUDACXX_HIDDEN -# else -# define _LIBCUDACXX_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS +# define _LIBCUDACXX_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS inline _LIBCUDACXX_HIDDEN +# else +# define _LIBCUDACXX_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS +# endif # endif -#endif -#ifndef _LIBCUDACXX_FUNC_VIS -# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) -# define _LIBCUDACXX_FUNC_VIS _CCCL_VISIBILITY_DEFAULT -# else -# define _LIBCUDACXX_FUNC_VIS +# ifndef _LIBCUDACXX_FUNC_VIS +# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) +# define _LIBCUDACXX_FUNC_VIS _CCCL_VISIBILITY_DEFAULT +# else +# define _LIBCUDACXX_FUNC_VIS +# endif # endif -#endif -#ifndef _LIBCUDACXX_TYPE_VIS -# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) -# define _LIBCUDACXX_TYPE_VIS _CCCL_VISIBILITY_DEFAULT -# else -# define _LIBCUDACXX_TYPE_VIS +# ifndef _LIBCUDACXX_TYPE_VIS +# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) +# define _LIBCUDACXX_TYPE_VIS _CCCL_VISIBILITY_DEFAULT +# else +# define _LIBCUDACXX_TYPE_VIS +# endif # endif -#endif -#ifndef _LIBCUDACXX_TEMPLATE_VIS -# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) -# if __has_attribute(__type_visibility__) -# define _LIBCUDACXX_TEMPLATE_VIS _CCCL_TYPE_VISIBILITY_DEFAULT +# ifndef _LIBCUDACXX_TEMPLATE_VIS +# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) +# if __has_attribute(__type_visibility__) +# define _LIBCUDACXX_TEMPLATE_VIS _CCCL_TYPE_VISIBILITY_DEFAULT +# else +# define _LIBCUDACXX_TEMPLATE_VIS _CCCL_VISIBILITY_DEFAULT +# endif # else -# define _LIBCUDACXX_TEMPLATE_VIS _CCCL_VISIBILITY_DEFAULT +# define _LIBCUDACXX_TEMPLATE_VIS # endif -# else -# define _LIBCUDACXX_TEMPLATE_VIS # endif -#endif -#ifndef _LIBCUDACXX_EXPORTED_FROM_ABI -# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) -# define _LIBCUDACXX_EXPORTED_FROM_ABI _CCCL_VISIBILITY_DEFAULT -# else -# define _LIBCUDACXX_EXPORTED_FROM_ABI +# ifndef _LIBCUDACXX_EXPORTED_FROM_ABI +# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) +# define _LIBCUDACXX_EXPORTED_FROM_ABI _CCCL_VISIBILITY_DEFAULT +# else +# define _LIBCUDACXX_EXPORTED_FROM_ABI +# endif # endif -#endif - -#ifndef _LIBCUDACXX_OVERRIDABLE_FUNC_VIS -#define _LIBCUDACXX_OVERRIDABLE_FUNC_VIS _LIBCUDACXX_FUNC_VIS -#endif -#ifndef _LIBCUDACXX_EXCEPTION_ABI -# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) -# define _LIBCUDACXX_EXCEPTION_ABI _CCCL_VISIBILITY_DEFAULT -# else -# define _LIBCUDACXX_EXCEPTION_ABI +# ifndef _LIBCUDACXX_OVERRIDABLE_FUNC_VIS +# define _LIBCUDACXX_OVERRIDABLE_FUNC_VIS _LIBCUDACXX_FUNC_VIS # endif -#endif -#ifndef _LIBCUDACXX_ENUM_VIS -# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) -# define _LIBCUDACXX_ENUM_VIS _CCCL_TYPE_VISIBILITY_DEFAULT -# else -# define _LIBCUDACXX_ENUM_VIS +# ifndef _LIBCUDACXX_EXCEPTION_ABI +# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) +# define _LIBCUDACXX_EXCEPTION_ABI _CCCL_VISIBILITY_DEFAULT +# else +# define _LIBCUDACXX_EXCEPTION_ABI +# endif # endif -#endif -#ifndef _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS -# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) && __has_attribute(__type_visibility__) -# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS _CCCL_VISIBILITY_DEFAULT -# else -# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS +# ifndef _LIBCUDACXX_ENUM_VIS +# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) +# define _LIBCUDACXX_ENUM_VIS _CCCL_TYPE_VISIBILITY_DEFAULT +# else +# define _LIBCUDACXX_ENUM_VIS +# endif # endif -#endif -#ifndef _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS -#define _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS -#endif +# ifndef _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS +# if !defined(_LIBCUDACXX_DISABLE_VISIBILITY_ANNOTATIONS) && __has_attribute(__type_visibility__) +# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS _CCCL_VISIBILITY_DEFAULT +# else +# define _LIBCUDACXX_EXTERN_TEMPLATE_TYPE_VIS +# endif +# endif -#if __has_attribute(internal_linkage) -# define _LIBCUDACXX_INTERNAL_LINKAGE __attribute__ ((internal_linkage)) -#else -# define _LIBCUDACXX_INTERNAL_LINKAGE _LIBCUDACXX_ALWAYS_INLINE -#endif +# ifndef _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS +# define _LIBCUDACXX_CLASS_TEMPLATE_INSTANTIATION_VIS +# endif -#if __has_attribute(exclude_from_explicit_instantiation) -# define _LIBCUDACXX_EXCLUDE_FROM_EXPLICIT_INSTANTIATION __attribute__ ((__exclude_from_explicit_instantiation__)) -#else - // Try to approximate the effect of exclude_from_explicit_instantiation - // (which is that entities are not assumed to be provided by explicit - // template instantiations in the dylib) by always inlining those entities. -# define _LIBCUDACXX_EXCLUDE_FROM_EXPLICIT_INSTANTIATION _LIBCUDACXX_ALWAYS_INLINE -#endif +# if __has_attribute(internal_linkage) +# define _LIBCUDACXX_INTERNAL_LINKAGE __attribute__((internal_linkage)) +# else +# define _LIBCUDACXX_INTERNAL_LINKAGE _LIBCUDACXX_ALWAYS_INLINE +# endif -#ifndef _LIBCUDACXX_HIDE_FROM_ABI_PER_TU -# ifndef _LIBCUDACXX_HIDE_FROM_ABI_PER_TU_BY_DEFAULT -# define _LIBCUDACXX_HIDE_FROM_ABI_PER_TU 0 +# if __has_attribute(exclude_from_explicit_instantiation) +# define _LIBCUDACXX_EXCLUDE_FROM_EXPLICIT_INSTANTIATION __attribute__((__exclude_from_explicit_instantiation__)) # else -# define _LIBCUDACXX_HIDE_FROM_ABI_PER_TU 1 +// Try to approximate the effect of exclude_from_explicit_instantiation +// (which is that entities are not assumed to be provided by explicit +// template instantiations in the dylib) by always inlining those entities. +# define _LIBCUDACXX_EXCLUDE_FROM_EXPLICIT_INSTANTIATION _LIBCUDACXX_ALWAYS_INLINE # endif -#endif -#ifndef _LIBCUDACXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT -# ifdef _LIBCUDACXX_OBJECT_FORMAT_COFF // Windows binaries can't merge typeinfos. -# define _LIBCUDACXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 0 -#else +# ifndef _LIBCUDACXX_HIDE_FROM_ABI_PER_TU +# ifndef _LIBCUDACXX_HIDE_FROM_ABI_PER_TU_BY_DEFAULT +# define _LIBCUDACXX_HIDE_FROM_ABI_PER_TU 0 +# else +# define _LIBCUDACXX_HIDE_FROM_ABI_PER_TU 1 +# endif +# endif + +# ifndef _LIBCUDACXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT +# ifdef _LIBCUDACXX_OBJECT_FORMAT_COFF // Windows binaries can't merge typeinfos. +# define _LIBCUDACXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 0 +# else // TODO: This isn't strictly correct on ELF platforms due to llvm.org/PR37398 // And we should consider defaulting to OFF. -# define _LIBCUDACXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 1 -#endif -#endif +# define _LIBCUDACXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 1 +# endif +# endif -#ifndef _LIBCUDACXX_HIDE_FROM_ABI -# if _LIBCUDACXX_HIDE_FROM_ABI_PER_TU -# define _LIBCUDACXX_HIDE_FROM_ABI _LIBCUDACXX_HIDDEN _LIBCUDACXX_INTERNAL_LINKAGE -# else -# define _LIBCUDACXX_HIDE_FROM_ABI _LIBCUDACXX_HIDDEN _LIBCUDACXX_EXCLUDE_FROM_EXPLICIT_INSTANTIATION +# ifndef _LIBCUDACXX_HIDE_FROM_ABI +# if _LIBCUDACXX_HIDE_FROM_ABI_PER_TU +# define _LIBCUDACXX_HIDE_FROM_ABI _LIBCUDACXX_HIDDEN _LIBCUDACXX_INTERNAL_LINKAGE +# else +# define _LIBCUDACXX_HIDE_FROM_ABI _LIBCUDACXX_HIDDEN _LIBCUDACXX_EXCLUDE_FROM_EXPLICIT_INSTANTIATION +# endif # endif -#endif -#ifdef _LIBCUDACXX_BUILDING_LIBRARY -# if _LIBCUDACXX_ABI_VERSION > 1 -# define _LIBCUDACXX_HIDE_FROM_ABI_AFTER_V1 _LIBCUDACXX_HIDE_FROM_ABI +# ifdef _LIBCUDACXX_BUILDING_LIBRARY +# if _LIBCUDACXX_ABI_VERSION > 1 +# define _LIBCUDACXX_HIDE_FROM_ABI_AFTER_V1 _LIBCUDACXX_HIDE_FROM_ABI +# else +# define _LIBCUDACXX_HIDE_FROM_ABI_AFTER_V1 +# endif # else -# define _LIBCUDACXX_HIDE_FROM_ABI_AFTER_V1 +# define _LIBCUDACXX_HIDE_FROM_ABI_AFTER_V1 _LIBCUDACXX_HIDE_FROM_ABI # endif -#else -# define _LIBCUDACXX_HIDE_FROM_ABI_AFTER_V1 _LIBCUDACXX_HIDE_FROM_ABI -#endif // Just so we can migrate to the new macros gradually. -#ifdef __cuda_std__ -# define _LIBCUDACXX_INLINE_VISIBILITY _CCCL_HOST_DEVICE -#else -# define _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_HIDE_FROM_ABI -#endif // __cuda_std__ - -#define _LIBCUDACXX_CONCAT1(_LIBCUDACXX_X,_LIBCUDACXX_Y) _LIBCUDACXX_X##_LIBCUDACXX_Y -#define _LIBCUDACXX_CONCAT(_LIBCUDACXX_X,_LIBCUDACXX_Y) _LIBCUDACXX_CONCAT1(_LIBCUDACXX_X,_LIBCUDACXX_Y) - -#ifndef _LIBCUDACXX_ABI_NAMESPACE -#ifdef __cuda_std__ -# define _LIBCUDACXX_ABI_NAMESPACE _LIBCUDACXX_CONCAT(__,_LIBCUDACXX_CUDA_ABI_VERSION) -#else -# define _LIBCUDACXX_ABI_NAMESPACE _LIBCUDACXX_CONCAT(__,_LIBCUDACXX_ABI_VERSION) -#endif // __cuda_std__ -#endif // _LIBCUDACXX_ABI_NAMESPACE - -#ifdef __cuda_std__ -# define _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION namespace cuda { namespace std { -# define _LIBCUDACXX_END_NAMESPACE_STD_NOVERSION } } -# define _CUDA_VSTD_NOVERSION ::cuda::std -# define _CUDA_VSTD ::cuda::std::_LIBCUDACXX_ABI_NAMESPACE -# define _CUDA_VRANGES ::cuda::std::ranges::_LIBCUDACXX_ABI_NAMESPACE -# define _CUDA_VIEWS ::cuda::std::ranges::views::_LIBCUDACXX_CUDA_ABI_NAMESPACE -# define _CUDA_VMR ::cuda::mr::_LIBCUDACXX_ABI_NAMESPACE -# define _CUDA_VPTX ::cuda::ptx::_LIBCUDACXX_ABI_NAMESPACE -#else -# define _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION namespace std { -# define _LIBCUDACXX_END_NAMESPACE_STD_NOVERSION } -# define _CUDA_VSTD_NOVERSION ::std -# define _CUDA_VSTD ::std::_LIBCUDACXX_ABI_NAMESPACE -# define _CUDA_VRANGES ::std::ranges::_LIBCUDACXX_ABI_NAMESPACE -# define _CUDA_VIEWS ::std::ranges::views::_LIBCUDACXX_CUDA_ABI_NAMESPACE -#endif - -#ifdef __cuda_std__ -#define _LIBCUDACXX_BEGIN_NAMESPACE_CUDA namespace cuda { inline namespace _LIBCUDACXX_ABI_NAMESPACE { -#define _LIBCUDACXX_END_NAMESPACE_CUDA } } -#define _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR namespace cuda { namespace mr { inline namespace _LIBCUDACXX_ABI_NAMESPACE { -#define _LIBCUDACXX_END_NAMESPACE_CUDA_MR } } } -#define _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE namespace cuda { namespace device { inline namespace _LIBCUDACXX_ABI_NAMESPACE { -#define _LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE } } } -#define _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX namespace cuda { namespace ptx { inline namespace _LIBCUDACXX_ABI_NAMESPACE { -#define _LIBCUDACXX_END_NAMESPACE_CUDA_PTX } } } -#define _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE_EXPERIMENTAL namespace cuda { namespace device { namespace experimental { inline namespace _LIBCUDACXX_ABI_NAMESPACE { -#define _LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE_EXPERIMENTAL } } } } -#endif +# ifdef __cuda_std__ +# define _LIBCUDACXX_INLINE_VISIBILITY _CCCL_HOST_DEVICE +# else +# define _LIBCUDACXX_INLINE_VISIBILITY _LIBCUDACXX_HIDE_FROM_ABI +# endif // __cuda_std__ -// Inline namespaces are available in Clang/GCC/MSVC regardless of C++ dialect. -#define _LIBCUDACXX_BEGIN_NAMESPACE_STD _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION inline namespace _LIBCUDACXX_ABI_NAMESPACE { -#define _LIBCUDACXX_END_NAMESPACE_STD } _LIBCUDACXX_END_NAMESPACE_STD_NOVERSION +# define _LIBCUDACXX_CONCAT1(_LIBCUDACXX_X, _LIBCUDACXX_Y) _LIBCUDACXX_X##_LIBCUDACXX_Y +# define _LIBCUDACXX_CONCAT(_LIBCUDACXX_X, _LIBCUDACXX_Y) _LIBCUDACXX_CONCAT1(_LIBCUDACXX_X, _LIBCUDACXX_Y) -#ifndef __cuda_std__ -_LIBCUDACXX_BEGIN_NAMESPACE_STD _LIBCUDACXX_END_NAMESPACE_STD -#endif +# ifndef _LIBCUDACXX_ABI_NAMESPACE +# ifdef __cuda_std__ +# define _LIBCUDACXX_ABI_NAMESPACE _LIBCUDACXX_CONCAT(__, _LIBCUDACXX_CUDA_ABI_VERSION) +# else +# define _LIBCUDACXX_ABI_NAMESPACE _LIBCUDACXX_CONCAT(__, _LIBCUDACXX_ABI_VERSION) +# endif // __cuda_std__ +# endif // _LIBCUDACXX_ABI_NAMESPACE -#define _LIBCUDACXX_BEGIN_NAMESPACE_RANGES _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION namespace ranges { inline namespace _LIBCUDACXX_ABI_NAMESPACE { -#define _LIBCUDACXX_END_NAMESPACE_RANGES } } _LIBCUDACXX_END_NAMESPACE_STD_NOVERSION +# ifdef __cuda_std__ +# define _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION \ + namespace cuda \ + { \ + namespace std \ + { +# define _LIBCUDACXX_END_NAMESPACE_STD_NOVERSION \ + } \ + } +# define _CUDA_VSTD_NOVERSION ::cuda::std +# define _CUDA_VSTD ::cuda::std::_LIBCUDACXX_ABI_NAMESPACE +# define _CUDA_VRANGES ::cuda::std::ranges::_LIBCUDACXX_ABI_NAMESPACE +# define _CUDA_VIEWS ::cuda::std::ranges::views::_LIBCUDACXX_CUDA_ABI_NAMESPACE +# define _CUDA_VMR ::cuda::mr::_LIBCUDACXX_ABI_NAMESPACE +# define _CUDA_VPTX ::cuda::ptx::_LIBCUDACXX_ABI_NAMESPACE +# else +# define _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION \ + namespace std \ + { +# define _LIBCUDACXX_END_NAMESPACE_STD_NOVERSION } +# define _CUDA_VSTD_NOVERSION ::std +# define _CUDA_VSTD ::std::_LIBCUDACXX_ABI_NAMESPACE +# define _CUDA_VRANGES ::std::ranges::_LIBCUDACXX_ABI_NAMESPACE +# define _CUDA_VIEWS ::std::ranges::views::_LIBCUDACXX_CUDA_ABI_NAMESPACE +# endif -#if !defined(__cuda_std__) -_LIBCUDACXX_BEGIN_NAMESPACE_RANGES _LIBCUDACXX_END_NAMESPACE_RANGES -#endif +# ifdef __cuda_std__ +# define _LIBCUDACXX_BEGIN_NAMESPACE_CUDA \ + namespace cuda \ + { \ + inline namespace _LIBCUDACXX_ABI_NAMESPACE \ + { +# define _LIBCUDACXX_END_NAMESPACE_CUDA \ + } \ + } +# define _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR \ + namespace cuda \ + { \ + namespace mr \ + { \ + inline namespace _LIBCUDACXX_ABI_NAMESPACE \ + { +# define _LIBCUDACXX_END_NAMESPACE_CUDA_MR \ + } \ + } \ + } +# define _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE \ + namespace cuda \ + { \ + namespace device \ + { \ + inline namespace _LIBCUDACXX_ABI_NAMESPACE \ + { +# define _LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE \ + } \ + } \ + } +# define _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX \ + namespace cuda \ + { \ + namespace ptx \ + { \ + inline namespace _LIBCUDACXX_ABI_NAMESPACE \ + { +# define _LIBCUDACXX_END_NAMESPACE_CUDA_PTX \ + } \ + } \ + } +# define _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE_EXPERIMENTAL \ + namespace cuda \ + { \ + namespace device \ + { \ + namespace experimental \ + { \ + inline namespace _LIBCUDACXX_ABI_NAMESPACE \ + { +# define _LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE_EXPERIMENTAL \ + } \ + } \ + } \ + } +# endif -#define _LIBCUDACXX_BEGIN_NAMESPACE_VIEWS _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION namespace ranges { namespace views { inline namespace _LIBCUDACXX_CUDA_ABI_NAMESPACE { -#define _LIBCUDACXX_END_NAMESPACE_VIEWS } } } _LIBCUDACXX_END_NAMESPACE_STD_NOVERSION -#if !defined(__cuda_std__) -_LIBCUDACXX_BEGIN_NAMESPACE_VIEWS _LIBCUDACXX_END_NAMESPACE_VIEWS -#endif +// Inline namespaces are available in Clang/GCC/MSVC regardless of C++ dialect. +# define _LIBCUDACXX_BEGIN_NAMESPACE_STD \ + _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION \ + inline namespace _LIBCUDACXX_ABI_NAMESPACE \ + { +# define _LIBCUDACXX_END_NAMESPACE_STD \ + } \ + _LIBCUDACXX_END_NAMESPACE_STD_NOVERSION + +# ifndef __cuda_std__ +_LIBCUDACXX_BEGIN_NAMESPACE_STD +_LIBCUDACXX_END_NAMESPACE_STD +# endif -#if _CCCL_STD_VER > 2017 -#define _LIBCUDACXX_BEGIN_NAMESPACE_RANGES_ABI inline namespace __cxx20 { -#else -#define _LIBCUDACXX_BEGIN_NAMESPACE_RANGES_ABI inline namespace __cxx17 { -#endif -#define _LIBCUDACXX_END_NAMESPACE_RANGES_ABI } +# define _LIBCUDACXX_BEGIN_NAMESPACE_RANGES \ + _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION \ + namespace ranges \ + { \ + inline namespace _LIBCUDACXX_ABI_NAMESPACE \ + { +# define _LIBCUDACXX_END_NAMESPACE_RANGES \ + } \ + } \ + _LIBCUDACXX_END_NAMESPACE_STD_NOVERSION + +# if !defined(__cuda_std__) +_LIBCUDACXX_BEGIN_NAMESPACE_RANGES +_LIBCUDACXX_END_NAMESPACE_RANGES +# endif -#define _LIBCUDACXX_BEGIN_NAMESPACE_CPO(_CPO) namespace _CPO { _LIBCUDACXX_BEGIN_NAMESPACE_RANGES_ABI -#define _LIBCUDACXX_END_NAMESPACE_CPO } } +# define _LIBCUDACXX_BEGIN_NAMESPACE_VIEWS \ + _LIBCUDACXX_BEGIN_NAMESPACE_STD_NOVERSION \ + namespace ranges \ + { \ + namespace views \ + { \ + inline namespace _LIBCUDACXX_CUDA_ABI_NAMESPACE \ + { +# define _LIBCUDACXX_END_NAMESPACE_VIEWS \ + } \ + } \ + } \ + _LIBCUDACXX_END_NAMESPACE_STD_NOVERSION +# if !defined(__cuda_std__) +_LIBCUDACXX_BEGIN_NAMESPACE_VIEWS +_LIBCUDACXX_END_NAMESPACE_VIEWS +# endif -#if _CCCL_STD_VER >= 2017 -#define _LIBCUDACXX_BEGIN_NAMESPACE_FILESYSTEM \ - _LIBCUDACXX_BEGIN_NAMESPACE_STD inline namespace __fs { namespace filesystem { -#else -#define _LIBCUDACXX_BEGIN_NAMESPACE_FILESYSTEM \ - _LIBCUDACXX_BEGIN_NAMESPACE_STD namespace __fs { namespace filesystem { -#endif +# if _CCCL_STD_VER > 2017 +# define _LIBCUDACXX_BEGIN_NAMESPACE_RANGES_ABI \ + inline namespace __cxx20 \ + { +# else +# define _LIBCUDACXX_BEGIN_NAMESPACE_RANGES_ABI \ + inline namespace __cxx17 \ + { +# endif +# define _LIBCUDACXX_END_NAMESPACE_RANGES_ABI } + +# define _LIBCUDACXX_BEGIN_NAMESPACE_CPO(_CPO) \ + namespace _CPO \ + { \ + _LIBCUDACXX_BEGIN_NAMESPACE_RANGES_ABI +# define _LIBCUDACXX_END_NAMESPACE_CPO \ + } \ + } + +# if _CCCL_STD_VER >= 2017 +# define _LIBCUDACXX_BEGIN_NAMESPACE_FILESYSTEM \ + _LIBCUDACXX_BEGIN_NAMESPACE_STD \ + inline namespace __fs \ + { \ + namespace filesystem \ + { +# else +# define _LIBCUDACXX_BEGIN_NAMESPACE_FILESYSTEM \ + _LIBCUDACXX_BEGIN_NAMESPACE_STD \ + namespace __fs \ + { \ + namespace filesystem \ + { +# endif -#define _LIBCUDACXX_END_NAMESPACE_FILESYSTEM \ - _LIBCUDACXX_END_NAMESPACE_STD } } +# define _LIBCUDACXX_END_NAMESPACE_FILESYSTEM \ + _LIBCUDACXX_END_NAMESPACE_STD \ + } \ + } -#define _CUDA_VSTD_FS _CUDA_VSTD::__fs::filesystem +# define _CUDA_VSTD_FS _CUDA_VSTD::__fs::filesystem -#ifndef _LIBCUDACXX_PREFERRED_OVERLOAD -# if __has_attribute(__enable_if__) -# define _LIBCUDACXX_PREFERRED_OVERLOAD __attribute__ ((__enable_if__(true, ""))) +# ifndef _LIBCUDACXX_PREFERRED_OVERLOAD +# if __has_attribute(__enable_if__) +# define _LIBCUDACXX_PREFERRED_OVERLOAD __attribute__((__enable_if__(true, ""))) +# endif # endif -#endif -#ifdef _LIBCUDACXX_HAS_NO_UNICODE_CHARS +# ifdef _LIBCUDACXX_HAS_NO_UNICODE_CHARS typedef unsigned short char16_t; -typedef unsigned int char32_t; -#endif // _LIBCUDACXX_HAS_NO_UNICODE_CHARS - -#if defined(_CCCL_COMPILER_GCC) \ - || defined(_CCCL_COMPILER_CLANG) -# define _LIBCUDACXX_NOALIAS __attribute__((__malloc__)) -#else -# define _LIBCUDACXX_NOALIAS -#endif +typedef unsigned int char32_t; +# endif // _LIBCUDACXX_HAS_NO_UNICODE_CHARS -#if __has_feature(cxx_explicit_conversions) \ - || defined(_CCCL_COMPILER_IBM) \ - || defined(_CCCL_COMPILER_GCC) \ - || defined(_CCCL_COMPILER_CLANG) -# define _LIBCUDACXX_EXPLICIT explicit -#else -# define _LIBCUDACXX_EXPLICIT -#endif - -#if !__has_builtin(__builtin_operator_new) || !__has_builtin(__builtin_operator_delete) -#define _LIBCUDACXX_HAS_NO_BUILTIN_OPERATOR_NEW_DELETE -#endif +# if defined(_CCCL_COMPILER_GCC) || defined(_CCCL_COMPILER_CLANG) +# define _LIBCUDACXX_NOALIAS __attribute__((__malloc__)) +# else +# define _LIBCUDACXX_NOALIAS +# endif -#ifdef _LIBCUDACXX_HAS_NO_STRONG_ENUMS -# define _LIBCUDACXX_DECLARE_STRONG_ENUM(x) struct _LIBCUDACXX_TYPE_VIS x { enum __lx -# define _LIBCUDACXX_DECLARE_STRONG_ENUM_EPILOG(x) \ - __lx __v_; \ - _LIBCUDACXX_INLINE_VISIBILITY x(__lx __v) : __v_(__v) {} \ - _LIBCUDACXX_INLINE_VISIBILITY explicit x(int __v) : __v_(static_cast<__lx>(__v)) {} \ - _LIBCUDACXX_INLINE_VISIBILITY operator int() const {return __v_;} \ - }; -#else // _LIBCUDACXX_HAS_NO_STRONG_ENUMS -# define _LIBCUDACXX_DECLARE_STRONG_ENUM(x) enum class _LIBCUDACXX_ENUM_VIS x -# define _LIBCUDACXX_DECLARE_STRONG_ENUM_EPILOG(x) -#endif // _LIBCUDACXX_HAS_NO_STRONG_ENUMS - -#ifdef _LIBCUDACXX_DEBUG -# if _LIBCUDACXX_DEBUG == 0 -# define _LIBCUDACXX_DEBUG_LEVEL 1 -# elif _LIBCUDACXX_DEBUG == 1 -# define _LIBCUDACXX_DEBUG_LEVEL 2 +# if __has_feature(cxx_explicit_conversions) || defined(_CCCL_COMPILER_IBM) || defined(_CCCL_COMPILER_GCC) \ + || defined(_CCCL_COMPILER_CLANG) +# define _LIBCUDACXX_EXPLICIT explicit # else -# error Supported values for _LIBCUDACXX_DEBUG are 0 and 1 +# define _LIBCUDACXX_EXPLICIT # endif -# if !defined(_LIBCUDACXX_BUILDING_LIBRARY) -# define _LIBCUDACXX_EXTERN_TEMPLATE(...) + +# if !__has_builtin(__builtin_operator_new) || !__has_builtin(__builtin_operator_delete) +# define _LIBCUDACXX_HAS_NO_BUILTIN_OPERATOR_NEW_DELETE # endif -#endif -#ifdef _LIBCUDACXX_DISABLE_EXTERN_TEMPLATE -#define _LIBCUDACXX_EXTERN_TEMPLATE(...) -#define _LIBCUDACXX_EXTERN_TEMPLATE2(...) -#endif +# ifdef _LIBCUDACXX_HAS_NO_STRONG_ENUMS +# define _LIBCUDACXX_DECLARE_STRONG_ENUM(x) \ + struct _LIBCUDACXX_TYPE_VIS x \ + { \ + enum __lx +# define _LIBCUDACXX_DECLARE_STRONG_ENUM_EPILOG(x) \ + __lx __v_; \ + _LIBCUDACXX_INLINE_VISIBILITY x(__lx __v) \ + : __v_(__v) \ + {} \ + _LIBCUDACXX_INLINE_VISIBILITY explicit x(int __v) \ + : __v_(static_cast<__lx>(__v)) \ + {} \ + _LIBCUDACXX_INLINE_VISIBILITY operator int() const \ + { \ + return __v_; \ + } \ + } \ + ; +# else // _LIBCUDACXX_HAS_NO_STRONG_ENUMS +# define _LIBCUDACXX_DECLARE_STRONG_ENUM(x) enum class _LIBCUDACXX_ENUM_VIS x +# define _LIBCUDACXX_DECLARE_STRONG_ENUM_EPILOG(x) +# endif // _LIBCUDACXX_HAS_NO_STRONG_ENUMS + +# ifdef _LIBCUDACXX_DEBUG +# if _LIBCUDACXX_DEBUG == 0 +# define _LIBCUDACXX_DEBUG_LEVEL 1 +# elif _LIBCUDACXX_DEBUG == 1 +# define _LIBCUDACXX_DEBUG_LEVEL 2 +# else +# error Supported values for _LIBCUDACXX_DEBUG are 0 and 1 +# endif +# if !defined(_LIBCUDACXX_BUILDING_LIBRARY) +# define _LIBCUDACXX_EXTERN_TEMPLATE(...) +# endif +# endif -#ifndef _LIBCUDACXX_EXTERN_TEMPLATE -#define _LIBCUDACXX_EXTERN_TEMPLATE(...) extern template __VA_ARGS__; -#endif +# ifdef _LIBCUDACXX_DISABLE_EXTERN_TEMPLATE +# define _LIBCUDACXX_EXTERN_TEMPLATE(...) +# define _LIBCUDACXX_EXTERN_TEMPLATE2(...) +# endif -#ifndef _LIBCUDACXX_EXTERN_TEMPLATE2 -#define _LIBCUDACXX_EXTERN_TEMPLATE2(...) extern template __VA_ARGS__; -#endif +# ifndef _LIBCUDACXX_EXTERN_TEMPLATE +# define _LIBCUDACXX_EXTERN_TEMPLATE(...) extern template __VA_ARGS__; +# endif -#if defined(__APPLE__) || defined(__FreeBSD__) || defined(_LIBCUDACXX_MSVCRT_LIKE) || \ - defined(__sun__) || defined(__NetBSD__) || defined(__CloudABI__) -#define _LIBCUDACXX_LOCALE__L_EXTENSIONS 1 -#endif +# ifndef _LIBCUDACXX_EXTERN_TEMPLATE2 +# define _LIBCUDACXX_EXTERN_TEMPLATE2(...) extern template __VA_ARGS__; +# endif + +# if defined(__APPLE__) || defined(__FreeBSD__) || defined(_LIBCUDACXX_MSVCRT_LIKE) || defined(__sun__) \ + || defined(__NetBSD__) || defined(__CloudABI__) +# define _LIBCUDACXX_LOCALE__L_EXTENSIONS 1 +# endif -#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) +# if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) // Most unix variants have catopen. These are the specific ones that don't. -# if !defined(__BIONIC__) && !defined(_NEWLIB_VERSION) -# define _LIBCUDACXX_HAS_CATOPEN 1 +# if !defined(__BIONIC__) && !defined(_NEWLIB_VERSION) +# define _LIBCUDACXX_HAS_CATOPEN 1 +# endif # endif -#endif -#ifdef __FreeBSD__ -#define _DECLARE_C99_LDBL_MATH 1 -#endif +# ifdef __FreeBSD__ +# define _DECLARE_C99_LDBL_MATH 1 +# endif -#if defined(_LIBCUDACXX_ABI_MICROSOFT) && !defined(_LIBCUDACXX_NO_VCRUNTIME) -# define _LIBCUDACXX_DEFER_NEW_TO_VCRUNTIME -#endif +# if defined(_LIBCUDACXX_ABI_MICROSOFT) && !defined(_LIBCUDACXX_NO_VCRUNTIME) +# define _LIBCUDACXX_DEFER_NEW_TO_VCRUNTIME +# endif // If we are getting operator new from the MSVC CRT, then allocation overloads // for align_val_t were added in 19.12, aka VS 2017 version 15.3. -#if defined(_LIBCUDACXX_MSVCRT) && defined(_CCCL_COMPILER_MSVC) && _MSC_VER < 1912 -# define _LIBCUDACXX_HAS_NO_LIBRARY_ALIGNED_ALLOCATION -#elif defined(_LIBCUDACXX_ABI_VCRUNTIME) && !defined(__cpp_aligned_new) - // We're deferring to Microsoft's STL to provide aligned new et al. We don't - // have it unless the language feature test macro is defined. -# define _LIBCUDACXX_HAS_NO_LIBRARY_ALIGNED_ALLOCATION -#endif - -#if defined(__APPLE__) -# if !defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && \ - defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) -# define __MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ +# if defined(_LIBCUDACXX_MSVCRT) && defined(_CCCL_COMPILER_MSVC) && _MSC_VER < 1912 +# define _LIBCUDACXX_HAS_NO_LIBRARY_ALIGNED_ALLOCATION +# elif defined(_LIBCUDACXX_ABI_VCRUNTIME) && !defined(__cpp_aligned_new) +// We're deferring to Microsoft's STL to provide aligned new et al. We don't +// have it unless the language feature test macro is defined. +# define _LIBCUDACXX_HAS_NO_LIBRARY_ALIGNED_ALLOCATION # endif -#endif // defined(__APPLE__) + +# if defined(__APPLE__) +# if !defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) +# define __MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ +# endif +# endif // defined(__APPLE__) # if !defined(_LIBCUDACXX_HAS_NO_ALIGNED_ALLOCATION) \ && (defined(_LIBCUDACXX_HAS_NO_LIBRARY_ALIGNED_ALLOCATION) \ @@ -1574,151 +1590,138 @@ typedef unsigned int char32_t; // Deprecations warnings are always enabled, except when users explicitly opt-out // by defining _LIBCUDACXX_DISABLE_DEPRECATION_WARNINGS. // NVCC 11.1 and 11.2 are broken with the deprecated attribute, so disable it -#if !defined(_LIBCUDACXX_DISABLE_DEPRECATION_WARNINGS) \ - && !defined(_CCCL_CUDACC_BELOW_11_3) -# if __has_attribute(deprecated) -# define _LIBCUDACXX_DEPRECATED __attribute__ ((deprecated)) -# elif _CCCL_STD_VER > 2011 -# define _LIBCUDACXX_DEPRECATED [[deprecated]] +# if !defined(_LIBCUDACXX_DISABLE_DEPRECATION_WARNINGS) && !defined(_CCCL_CUDACC_BELOW_11_3) +# if __has_attribute(deprecated) +# define _LIBCUDACXX_DEPRECATED __attribute__((deprecated)) +# elif _CCCL_STD_VER > 2011 +# define _LIBCUDACXX_DEPRECATED [[deprecated]] +# else +# define _LIBCUDACXX_DEPRECATED +# endif # else # define _LIBCUDACXX_DEPRECATED # endif -#else -# define _LIBCUDACXX_DEPRECATED -#endif -#define _LIBCUDACXX_DEPRECATED_IN_CXX11 _LIBCUDACXX_DEPRECATED +# define _LIBCUDACXX_DEPRECATED_IN_CXX11 _LIBCUDACXX_DEPRECATED -#if _CCCL_STD_VER >= 2014 -# define _LIBCUDACXX_DEPRECATED_IN_CXX14 _LIBCUDACXX_DEPRECATED -#else -# define _LIBCUDACXX_DEPRECATED_IN_CXX14 -#endif +# if _CCCL_STD_VER >= 2014 +# define _LIBCUDACXX_DEPRECATED_IN_CXX14 _LIBCUDACXX_DEPRECATED +# else +# define _LIBCUDACXX_DEPRECATED_IN_CXX14 +# endif -#if _CCCL_STD_VER >= 2017 -# define _LIBCUDACXX_DEPRECATED_IN_CXX17 _LIBCUDACXX_DEPRECATED -#else -# define _LIBCUDACXX_DEPRECATED_IN_CXX17 -#endif +# if _CCCL_STD_VER >= 2017 +# define _LIBCUDACXX_DEPRECATED_IN_CXX17 _LIBCUDACXX_DEPRECATED +# else +# define _LIBCUDACXX_DEPRECATED_IN_CXX17 +# endif -#if _CCCL_STD_VER >= 2020 -# define _LIBCUDACXX_DEPRECATED_IN_CXX20 _LIBCUDACXX_DEPRECATED -#else -# define _LIBCUDACXX_DEPRECATED_IN_CXX20 -#endif +# if _CCCL_STD_VER >= 2020 +# define _LIBCUDACXX_DEPRECATED_IN_CXX20 _LIBCUDACXX_DEPRECATED +# else +# define _LIBCUDACXX_DEPRECATED_IN_CXX20 +# endif -#if _CCCL_STD_VER <= 2011 -# define _LIBCUDACXX_EXPLICIT_AFTER_CXX11 -#else -# define _LIBCUDACXX_EXPLICIT_AFTER_CXX11 explicit -#endif +# if _CCCL_STD_VER <= 2011 +# define _LIBCUDACXX_EXPLICIT_AFTER_CXX11 +# else +# define _LIBCUDACXX_EXPLICIT_AFTER_CXX11 explicit +# endif -#if _CCCL_STD_VER > 2014 && defined(__cpp_inline_variables) && (__cpp_inline_variables >= 201606L) -# define _LIBCUDACXX_INLINE_VAR inline -#else -# define _LIBCUDACXX_INLINE_VAR -#endif +# if _CCCL_STD_VER > 2014 && defined(__cpp_inline_variables) && (__cpp_inline_variables >= 201606L) +# define _LIBCUDACXX_INLINE_VAR inline +# else +# define _LIBCUDACXX_INLINE_VAR +# endif -#ifdef _LIBCUDACXX_HAS_NO_RVALUE_REFERENCES -# define _LIBCUDACXX_EXPLICIT_MOVE(x) _CUDA_VSTD::move(x) -#else -# define _LIBCUDACXX_EXPLICIT_MOVE(x) (x) -#endif +# ifdef _LIBCUDACXX_HAS_NO_RVALUE_REFERENCES +# define _LIBCUDACXX_EXPLICIT_MOVE(x) _CUDA_VSTD::move(x) +# else +# define _LIBCUDACXX_EXPLICIT_MOVE(x) (x) +# endif -#if __has_attribute(no_destroy) -# define _LIBCUDACXX_NO_DESTROY __attribute__((__no_destroy__)) -#else -# define _LIBCUDACXX_NO_DESTROY -#endif +# if __has_attribute(no_destroy) +# define _LIBCUDACXX_NO_DESTROY __attribute__((__no_destroy__)) +# else +# define _LIBCUDACXX_NO_DESTROY +# endif -#ifndef _LIBCUDACXX_HAS_NO_ASAN -extern "C" _LIBCUDACXX_FUNC_VIS void __sanitizer_annotate_contiguous_container( - const void *, const void *, const void *, const void *); -#endif +# ifndef _LIBCUDACXX_HAS_NO_ASAN +extern "C" _LIBCUDACXX_FUNC_VIS void +__sanitizer_annotate_contiguous_container(const void*, const void*, const void*, const void*); +# endif -#ifndef _LIBCUDACXX_WEAK -#define _LIBCUDACXX_WEAK __attribute__((__weak__)) -#endif +# ifndef _LIBCUDACXX_WEAK +# define _LIBCUDACXX_WEAK __attribute__((__weak__)) +# endif // Redefine some macros for internal use -#if defined(__cuda_std__) -# undef _LIBCUDACXX_FUNC_VIS -# define _LIBCUDACXX_FUNC_VIS _LIBCUDACXX_INLINE_VISIBILITY -# undef _LIBCUDACXX_TYPE_VIS -# define _LIBCUDACXX_TYPE_VIS -#endif // __cuda_std__ +# if defined(__cuda_std__) +# undef _LIBCUDACXX_FUNC_VIS +# define _LIBCUDACXX_FUNC_VIS _LIBCUDACXX_INLINE_VISIBILITY +# undef _LIBCUDACXX_TYPE_VIS +# define _LIBCUDACXX_TYPE_VIS +# endif // __cuda_std__ // Thread API -#ifndef _LIBCUDACXX_HAS_THREAD_API_EXTERNAL -#if defined(_CCCL_COMPILER_NVRTC) \ - || defined(__EMSCRIPTEN__) -# define _LIBCUDACXX_HAS_THREAD_API_EXTERNAL -#endif -#endif // _LIBCUDACXX_HAS_THREAD_API_EXTERNAL - -#ifndef _LIBCUDACXX_HAS_THREAD_API_CUDA -#if defined(__cuda_std__) \ - && (defined(__CUDA_ARCH__) || defined(__EMSCRIPTEN__)) -# define _LIBCUDACXX_HAS_THREAD_API_CUDA -#endif // __cuda_std__ -#endif // _LIBCUDACXX_HAS_THREAD_API_CUDA - -#ifndef _LIBCUDACXX_HAS_THREAD_API_WIN32 -#if defined(_CCCL_COMPILER_MSVC) \ - && !defined(_LIBCUDACXX_HAS_THREAD_API_CUDA) -# define _LIBCUDACXX_HAS_THREAD_API_WIN32 -#endif -#endif // _LIBCUDACXX_HAS_THREAD_API_WIN32 - -#if !defined(_LIBCUDACXX_HAS_NO_THREADS) \ - && !defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) \ - && !defined(_LIBCUDACXX_HAS_THREAD_API_WIN32) \ - && !defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL) -# if defined(__FreeBSD__) || \ - defined(__Fuchsia__) || \ - defined(__wasi__) || \ - defined(__NetBSD__) || \ - defined(__linux__) || \ - defined(__GNU__) || \ - defined(__APPLE__) || \ - defined(__CloudABI__) || \ - defined(__sun__) || \ - (defined(__MINGW32__) && __has_include()) -# define _LIBCUDACXX_HAS_THREAD_API_PTHREAD -# elif defined(_LIBCUDACXX_WIN32API) -# define _LIBCUDACXX_HAS_THREAD_API_WIN32 -# else -# define _LIBCUDACXX_UNSUPPORTED_THREAD_API -# endif // _LIBCUDACXX_HAS_THREAD_API -#endif // _LIBCUDACXX_HAS_NO_THREADS - -#if defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) -#if defined(__ANDROID__) && __ANDROID_API__ >= 30 -#define _LIBCUDACXX_HAS_COND_CLOCKWAIT -#elif defined(_LIBCUDACXX_GLIBC_PREREQ) -#if _LIBCUDACXX_GLIBC_PREREQ(2, 30) -#define _LIBCUDACXX_HAS_COND_CLOCKWAIT -#endif -#endif -#endif +# ifndef _LIBCUDACXX_HAS_THREAD_API_EXTERNAL +# if defined(_CCCL_COMPILER_NVRTC) || defined(__EMSCRIPTEN__) +# define _LIBCUDACXX_HAS_THREAD_API_EXTERNAL +# endif +# endif // _LIBCUDACXX_HAS_THREAD_API_EXTERNAL + +# ifndef _LIBCUDACXX_HAS_THREAD_API_CUDA +# if defined(__cuda_std__) && (defined(__CUDA_ARCH__) || defined(__EMSCRIPTEN__)) +# define _LIBCUDACXX_HAS_THREAD_API_CUDA +# endif // __cuda_std__ +# endif // _LIBCUDACXX_HAS_THREAD_API_CUDA -#if defined(_LIBCUDACXX_HAS_NO_THREADS) && defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) -#error _LIBCUDACXX_HAS_THREAD_API_PTHREAD may only be defined when \ +# ifndef _LIBCUDACXX_HAS_THREAD_API_WIN32 +# if defined(_CCCL_COMPILER_MSVC) && !defined(_LIBCUDACXX_HAS_THREAD_API_CUDA) +# define _LIBCUDACXX_HAS_THREAD_API_WIN32 +# endif +# endif // _LIBCUDACXX_HAS_THREAD_API_WIN32 + +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) \ + && !defined(_LIBCUDACXX_HAS_THREAD_API_WIN32) && !defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL) +# if defined(__FreeBSD__) || defined(__Fuchsia__) || defined(__wasi__) || defined(__NetBSD__) || defined(__linux__) \ + || defined(__GNU__) || defined(__APPLE__) || defined(__CloudABI__) || defined(__sun__) \ + || (defined(__MINGW32__) && __has_include()) +# define _LIBCUDACXX_HAS_THREAD_API_PTHREAD +# elif defined(_LIBCUDACXX_WIN32API) +# define _LIBCUDACXX_HAS_THREAD_API_WIN32 +# else +# define _LIBCUDACXX_UNSUPPORTED_THREAD_API +# endif // _LIBCUDACXX_HAS_THREAD_API +# endif // _LIBCUDACXX_HAS_NO_THREADS + +# if defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) +# if defined(__ANDROID__) && __ANDROID_API__ >= 30 +# define _LIBCUDACXX_HAS_COND_CLOCKWAIT +# elif defined(_LIBCUDACXX_GLIBC_PREREQ) +# if _LIBCUDACXX_GLIBC_PREREQ(2, 30) +# define _LIBCUDACXX_HAS_COND_CLOCKWAIT +# endif +# endif +# endif + +# if defined(_LIBCUDACXX_HAS_NO_THREADS) && defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) +# error _LIBCUDACXX_HAS_THREAD_API_PTHREAD may only be defined when \ _LIBCUDACXX_HAS_NO_THREADS is not defined. -#endif +# endif -#if defined(_LIBCUDACXX_HAS_NO_THREADS) && defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL) -#error _LIBCUDACXX_HAS_THREAD_API_EXTERNAL may not be defined when \ +# if defined(_LIBCUDACXX_HAS_NO_THREADS) && defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL) +# error _LIBCUDACXX_HAS_THREAD_API_EXTERNAL may not be defined when \ _LIBCUDACXX_HAS_NO_THREADS is defined. -#endif +# endif -#if defined(__STDCPP_THREADS__) && defined(_LIBCUDACXX_HAS_NO_THREADS) -#error _LIBCUDACXX_HAS_NO_THREADS cannot be set when __STDCPP_THREADS__ is set. -#endif +# if defined(__STDCPP_THREADS__) && defined(_LIBCUDACXX_HAS_NO_THREADS) +# error _LIBCUDACXX_HAS_NO_THREADS cannot be set when __STDCPP_THREADS__ is set. +# endif -#if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(__STDCPP_THREADS__) -#define __STDCPP_THREADS__ 1 -#endif +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(__STDCPP_THREADS__) +# define __STDCPP_THREADS__ 1 +# endif // The glibc and Bionic implementation of pthreads implements // pthread_mutex_destroy as nop for regular mutexes. Additionally, Win32 @@ -1730,10 +1733,9 @@ extern "C" _LIBCUDACXX_FUNC_VIS void __sanitizer_annotate_contiguous_container( // // TODO(EricWF): Enable this optimization on Bionic after speaking to their // respective stakeholders. -#if (defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) && defined(__GLIBC__)) \ - || defined(_LIBCUDACXX_HAS_THREAD_API_WIN32) -# define _LIBCUDACXX_HAS_TRIVIAL_MUTEX_DESTRUCTION -#endif +# if (defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) && defined(__GLIBC__)) || defined(_LIBCUDACXX_HAS_THREAD_API_WIN32) +# define _LIBCUDACXX_HAS_TRIVIAL_MUTEX_DESTRUCTION +# endif // Destroying a condvar is a nop on Windows. // @@ -1743,123 +1745,121 @@ extern "C" _LIBCUDACXX_FUNC_VIS void __sanitizer_annotate_contiguous_container( // // TODO(EricWF): This is potentially true for some pthread implementations // as well. -#if defined(_LIBCUDACXX_HAS_THREAD_API_WIN32) -# define _LIBCUDACXX_HAS_TRIVIAL_CONDVAR_DESTRUCTION -#endif +# if defined(_LIBCUDACXX_HAS_THREAD_API_WIN32) +# define _LIBCUDACXX_HAS_TRIVIAL_CONDVAR_DESTRUCTION +# endif // Systems that use capability-based security (FreeBSD with Capsicum, // Nuxi CloudABI) may only provide local filesystem access (using *at()). // Functions like open(), rename(), unlink() and stat() should not be // used, as they attempt to access the global filesystem namespace. -#ifdef __CloudABI__ -#define _LIBCUDACXX_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE -#endif +# ifdef __CloudABI__ +# define _LIBCUDACXX_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE +# endif // CloudABI is intended for running networked services. Processes do not // have standard input and output channels. -#ifdef __CloudABI__ -#define _LIBCUDACXX_HAS_NO_STDIN -#define _LIBCUDACXX_HAS_NO_STDOUT -#endif +# ifdef __CloudABI__ +# define _LIBCUDACXX_HAS_NO_STDIN +# define _LIBCUDACXX_HAS_NO_STDOUT +# endif // Some systems do not provide gets() in their C library, for security reasons. -#ifndef _LIBCUDACXX_C_HAS_NO_GETS -# if defined(_LIBCUDACXX_MSVCRT) || (defined(__FreeBSD__) && __FreeBSD__ >= 13) -# define _LIBCUDACXX_C_HAS_NO_GETS +# ifndef _LIBCUDACXX_C_HAS_NO_GETS +# if defined(_LIBCUDACXX_MSVCRT) || (defined(__FreeBSD__) && __FreeBSD__ >= 13) +# define _LIBCUDACXX_C_HAS_NO_GETS +# endif # endif -#endif -#if defined(__BIONIC__) || defined(__CloudABI__) || \ - defined(__Fuchsia__) || defined(__wasi__) || defined(_LIBCUDACXX_HAS_MUSL_LIBC) -#define _LIBCUDACXX_PROVIDES_DEFAULT_RUNE_TABLE -#endif +# if defined(__BIONIC__) || defined(__CloudABI__) || defined(__Fuchsia__) || defined(__wasi__) \ + || defined(_LIBCUDACXX_HAS_MUSL_LIBC) +# define _LIBCUDACXX_PROVIDES_DEFAULT_RUNE_TABLE +# endif // Thread-unsafe functions such as strtok() and localtime() // are not available. -#ifdef __CloudABI__ -#define _LIBCUDACXX_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS -#endif +# ifdef __CloudABI__ +# define _LIBCUDACXX_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS +# endif // TODO: Support C11 Atomics? // #if __has_feature(cxx_atomic) || __has_extension(c_atomic) || __has_keyword(_Atomic) // # define _LIBCUDACXX_HAS_C_ATOMIC_IMP -#if defined(_CCCL_COMPILER_ICC) -# define _LIBCUDACXX_HAS_GCC_ATOMIC_IMP -#elif defined(_CCCL_COMPILER_CLANG) -# define _LIBCUDACXX_HAS_GCC_ATOMIC_IMP -#elif defined(_CCCL_COMPILER_GCC) -# define _LIBCUDACXX_HAS_GCC_ATOMIC_IMP -#elif defined(_CCCL_COMPILER_NVHPC) -# define _LIBCUDACXX_HAS_GCC_ATOMIC_IMP -#elif defined(_CCCL_COMPILER_MSVC) -# define _LIBCUDACXX_HAS_MSVC_ATOMIC_IMPL -#endif +# if defined(_CCCL_COMPILER_ICC) +# define _LIBCUDACXX_HAS_GCC_ATOMIC_IMP +# elif defined(_CCCL_COMPILER_CLANG) +# define _LIBCUDACXX_HAS_GCC_ATOMIC_IMP +# elif defined(_CCCL_COMPILER_GCC) +# define _LIBCUDACXX_HAS_GCC_ATOMIC_IMP +# elif defined(_CCCL_COMPILER_NVHPC) +# define _LIBCUDACXX_HAS_GCC_ATOMIC_IMP +# elif defined(_CCCL_COMPILER_MSVC) +# define _LIBCUDACXX_HAS_MSVC_ATOMIC_IMPL +# endif // CUDA Atomics supersede host atomics in order to insert the host/device dispatch layer -#if defined(_CCCL_CUDA_COMPILER_NVCC) || defined(_CCCL_COMPILER_NVRTC) || defined(_CCCL_COMPILER_NVHPC) || defined(_CCCL_CUDACC) -# define _LIBCUDACXX_HAS_CUDA_ATOMIC_IMPL -#endif - -#if (!defined(_LIBCUDACXX_HAS_C_ATOMIC_IMP) && \ - !defined(_LIBCUDACXX_HAS_GCC_ATOMIC_IMP) && \ - !defined(_LIBCUDACXX_HAS_EXTERNAL_ATOMIC_IMP)) \ - || defined(_LIBCUDACXX_HAS_NO_THREADS) -# define _LIBCUDACXX_HAS_NO_ATOMIC_HEADER -#else -# ifdef __cuda_std__ -# undef _LIBCUDACXX_ATOMIC_FLAG_TYPE -# define _LIBCUDACXX_ATOMIC_FLAG_TYPE int -# endif -# ifndef _LIBCUDACXX_ATOMIC_FLAG_TYPE -# define _LIBCUDACXX_ATOMIC_FLAG_TYPE bool +# if defined(_CCCL_CUDA_COMPILER_NVCC) || defined(_CCCL_COMPILER_NVRTC) || defined(_CCCL_COMPILER_NVHPC) \ + || defined(_CCCL_CUDACC) +# define _LIBCUDACXX_HAS_CUDA_ATOMIC_IMPL # endif -# ifdef _LIBCUDACXX_FREESTANDING -# define _LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS + +# if (!defined(_LIBCUDACXX_HAS_C_ATOMIC_IMP) && !defined(_LIBCUDACXX_HAS_GCC_ATOMIC_IMP) \ + && !defined(_LIBCUDACXX_HAS_EXTERNAL_ATOMIC_IMP)) \ + || defined(_LIBCUDACXX_HAS_NO_THREADS) +# define _LIBCUDACXX_HAS_NO_ATOMIC_HEADER +# else +# ifdef __cuda_std__ +# undef _LIBCUDACXX_ATOMIC_FLAG_TYPE +# define _LIBCUDACXX_ATOMIC_FLAG_TYPE int +# endif +# ifndef _LIBCUDACXX_ATOMIC_FLAG_TYPE +# define _LIBCUDACXX_ATOMIC_FLAG_TYPE bool +# endif +# ifdef _LIBCUDACXX_FREESTANDING +# define _LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS +# endif # endif -#endif -#ifndef _LIBCUDACXX_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK -#define _LIBCUDACXX_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK -#endif +# ifndef _LIBCUDACXX_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK +# define _LIBCUDACXX_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK +# endif -#if defined(_LIBCUDACXX_ENABLE_THREAD_SAFETY_ANNOTATIONS) -# if defined(_CCCL_COMPILER_CLANG) && __has_attribute(acquire_capability) +# if defined(_LIBCUDACXX_ENABLE_THREAD_SAFETY_ANNOTATIONS) +# if defined(_CCCL_COMPILER_CLANG) && __has_attribute(acquire_capability) // Work around the attribute handling in clang. When both __declspec and // __attribute__ are present, the processing goes awry preventing the definition // of the types. -# if !defined(_LIBCUDACXX_OBJECT_FORMAT_COFF) -# define _LIBCUDACXX_HAS_THREAD_SAFETY_ANNOTATIONS +# if !defined(_LIBCUDACXX_OBJECT_FORMAT_COFF) +# define _LIBCUDACXX_HAS_THREAD_SAFETY_ANNOTATIONS +# endif # endif # endif -#endif -#if __has_attribute(require_constant_initialization) -# define _LIBCUDACXX_SAFE_STATIC __attribute__((__require_constant_initialization__)) -#else -# define _LIBCUDACXX_SAFE_STATIC -#endif +# if __has_attribute(require_constant_initialization) +# define _LIBCUDACXX_SAFE_STATIC __attribute__((__require_constant_initialization__)) +# else +# define _LIBCUDACXX_SAFE_STATIC +# endif -#if !defined(_LIBCUDACXX_HAS_NO_OFF_T_FUNCTIONS) -# if defined(_LIBCUDACXX_MSVCRT) || defined(_NEWLIB_VERSION) -# define _LIBCUDACXX_HAS_NO_OFF_T_FUNCTIONS +# if !defined(_LIBCUDACXX_HAS_NO_OFF_T_FUNCTIONS) +# if defined(_LIBCUDACXX_MSVCRT) || defined(_NEWLIB_VERSION) +# define _LIBCUDACXX_HAS_NO_OFF_T_FUNCTIONS +# endif # endif -#endif -#if __has_attribute(diagnose_if) && !defined(_LIBCUDACXX_DISABLE_ADDITIONAL_DIAGNOSTICS) -# define _LIBCUDACXX_DIAGNOSE_WARNING(...) \ - __attribute__((diagnose_if(__VA_ARGS__, "warning"))) -# define _LIBCUDACXX_DIAGNOSE_ERROR(...) \ - __attribute__((diagnose_if(__VA_ARGS__, "error"))) -#else -# define _LIBCUDACXX_DIAGNOSE_WARNING(...) -# define _LIBCUDACXX_DIAGNOSE_ERROR(...) -#endif +# if __has_attribute(diagnose_if) && !defined(_LIBCUDACXX_DISABLE_ADDITIONAL_DIAGNOSTICS) +# define _LIBCUDACXX_DIAGNOSE_WARNING(...) __attribute__((diagnose_if(__VA_ARGS__, "warning"))) +# define _LIBCUDACXX_DIAGNOSE_ERROR(...) __attribute__((diagnose_if(__VA_ARGS__, "error"))) +# else +# define _LIBCUDACXX_DIAGNOSE_WARNING(...) +# define _LIBCUDACXX_DIAGNOSE_ERROR(...) +# endif -#if __has_attribute(__nodebug__) -#define _LIBCUDACXX_NODEBUG __attribute__((__nodebug__)) -#else -#define _LIBCUDACXX_NODEBUG -#endif +# if __has_attribute(__nodebug__) +# define _LIBCUDACXX_NODEBUG __attribute__((__nodebug__)) +# else +# define _LIBCUDACXX_NODEBUG +# endif # if __has_attribute(__preferred_name__) # define _LIBCUDACXX_PREFERRED_NAME(x) __attribute__((__preferred_name__(x))) @@ -1867,47 +1867,46 @@ extern "C" _LIBCUDACXX_FUNC_VIS void __sanitizer_annotate_contiguous_container( # define _LIBCUDACXX_PREFERRED_NAME(x) # endif -#if defined(_LIBCUDACXX_ABI_MICROSOFT) && \ - (defined(_CCCL_COMPILER_MSVC) || __has_declspec_attribute(empty_bases)) -# define _LIBCUDACXX_DECLSPEC_EMPTY_BASES __declspec(empty_bases) -#else -# define _LIBCUDACXX_DECLSPEC_EMPTY_BASES -#endif +# if defined(_LIBCUDACXX_ABI_MICROSOFT) && (defined(_CCCL_COMPILER_MSVC) || __has_declspec_attribute(empty_bases)) +# define _LIBCUDACXX_DECLSPEC_EMPTY_BASES __declspec(empty_bases) +# else +# define _LIBCUDACXX_DECLSPEC_EMPTY_BASES +# endif -#if defined(_LIBCUDACXX_ENABLE_CXX17_REMOVED_FEATURES) -#define _LIBCUDACXX_ENABLE_CXX17_REMOVED_AUTO_PTR -#define _LIBCUDACXX_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS -#define _LIBCUDACXX_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE -#define _LIBCUDACXX_ENABLE_CXX17_REMOVED_BINDERS -#endif // _LIBCUDACXX_ENABLE_CXX17_REMOVED_FEATURES +# if defined(_LIBCUDACXX_ENABLE_CXX17_REMOVED_FEATURES) +# define _LIBCUDACXX_ENABLE_CXX17_REMOVED_AUTO_PTR +# define _LIBCUDACXX_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS +# define _LIBCUDACXX_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE +# define _LIBCUDACXX_ENABLE_CXX17_REMOVED_BINDERS +# endif // _LIBCUDACXX_ENABLE_CXX17_REMOVED_FEATURES -#if !defined(__cpp_deduction_guides) || __cpp_deduction_guides < 201611 -#define _LIBCUDACXX_HAS_NO_DEDUCTION_GUIDES -#endif +# if !defined(__cpp_deduction_guides) || __cpp_deduction_guides < 201611 +# define _LIBCUDACXX_HAS_NO_DEDUCTION_GUIDES +# endif -#if !defined(__cpp_coroutines) || __cpp_coroutines < 201703L -#define _LIBCUDACXX_HAS_NO_COROUTINES -#endif +# if !defined(__cpp_coroutines) || __cpp_coroutines < 201703L +# define _LIBCUDACXX_HAS_NO_COROUTINES +# endif // We need `is_constant_evaluated` for clang and gcc. MSVC also needs extensive rework -#if !defined(_LIBCUDACXX_IS_CONSTANT_EVALUATED) -#define _LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS -#elif defined(_CCCL_COMPILER_NVRTC) -#define _LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS -#elif defined(_CCCL_COMPILER_MSVC) -#define _LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS -#elif defined(_CCCL_CUDACC_BELOW_11_8) -#define _LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS -#elif defined(_CCCL_CUDA_COMPILER_CLANG) -#define _LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS -#endif +# if !defined(_LIBCUDACXX_IS_CONSTANT_EVALUATED) +# define _LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS +# elif defined(_CCCL_COMPILER_NVRTC) +# define _LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS +# elif defined(_CCCL_COMPILER_MSVC) +# define _LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS +# elif defined(_CCCL_CUDACC_BELOW_11_8) +# define _LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS +# elif defined(_CCCL_CUDA_COMPILER_CLANG) +# define _LIBCUDACXX_HAS_NO_CONSTEXPR_COMPLEX_OPERATIONS +# endif // FIXME: Correct this macro when either (A) a feature test macro for the // spaceship operator is provided, or (B) a compiler provides a complete // implementation. -#define _LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR +# define _LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR -#define _LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS +# define _LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS // The stream API was dropped and re-added in the dylib shipped on macOS // and iOS. We can only assume the dylib to provide these definitions for @@ -1916,123 +1915,114 @@ extern "C" _LIBCUDACXX_FUNC_VIS void __sanitizer_annotate_contiguous_container( // declarations for streams exist conditionally to this; if we provide // an explicit instantiation declaration and we try to deploy to a dylib // that does not provide those symbols, we'll get a load-time error. -#if !defined(_LIBCUDACXX_BUILDING_LIBRARY) && \ - ((defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ - __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 1090) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \ - __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 70000)) -# define _LIBCUDACXX_DO_NOT_ASSUME_STREAMS_EXPLICIT_INSTANTIATION_IN_DYLIB -#endif - -#if defined(_LIBCUDACXX_HAS_NO_PRAGMA_PUSH_POP_MACRO) -# define _LIBCUDACXX_PUSH_MACROS -# define _LIBCUDACXX_POP_MACROS -#else - // Don't warn about macro conflicts when we can restore them at the - // end of the header. -# ifndef _LIBCUDACXX_DISABLE_MACRO_CONFLICT_WARNINGS -# define _LIBCUDACXX_DISABLE_MACRO_CONFLICT_WARNINGS +# if !defined(_LIBCUDACXX_BUILDING_LIBRARY) \ + && ((defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 1090) \ + || (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 70000)) +# define _LIBCUDACXX_DO_NOT_ASSUME_STREAMS_EXPLICIT_INSTANTIATION_IN_DYLIB # endif -# if defined(_CCCL_COMPILER_MSVC) -# define _LIBCUDACXX_PUSH_MACROS \ - __pragma(push_macro("min")) \ - __pragma(push_macro("max")) -# define _LIBCUDACXX_POP_MACROS \ - __pragma(pop_macro("min")) \ - __pragma(pop_macro("max")) + +# if defined(_LIBCUDACXX_HAS_NO_PRAGMA_PUSH_POP_MACRO) +# define _LIBCUDACXX_PUSH_MACROS +# define _LIBCUDACXX_POP_MACROS # else -# define _LIBCUDACXX_PUSH_MACROS \ - _Pragma("push_macro(\"min\")") \ - _Pragma("push_macro(\"max\")") -# define _LIBCUDACXX_POP_MACROS \ - _Pragma("pop_macro(\"min\")") \ - _Pragma("pop_macro(\"max\")") -# endif -#endif // defined(_LIBCUDACXX_HAS_NO_PRAGMA_PUSH_POP_MACRO) - -#if !defined(_LIBCUDACXX_NO_AUTO_LINK) && !defined(__cuda_std__) -# if defined(_LIBCUDACXX_ABI_MICROSOFT) && !defined(_LIBCUDACXX_BUILDING_LIBRARY) -# if defined(_DLL) -# pragma comment(lib, "c++.lib") +// Don't warn about macro conflicts when we can restore them at the +// end of the header. +# ifndef _LIBCUDACXX_DISABLE_MACRO_CONFLICT_WARNINGS +# define _LIBCUDACXX_DISABLE_MACRO_CONFLICT_WARNINGS +# endif +# if defined(_CCCL_COMPILER_MSVC) +# define _LIBCUDACXX_PUSH_MACROS __pragma(push_macro("min")) __pragma(push_macro("max")) +# define _LIBCUDACXX_POP_MACROS __pragma(pop_macro("min")) __pragma(pop_macro("max")) # else -# pragma comment(lib, "libc++.lib") +# define _LIBCUDACXX_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")") +# define _LIBCUDACXX_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")") # endif -# endif // defined(_LIBCUDACXX_ABI_MICROSOFT) && !defined(_LIBCUDACXX_BUILDING_LIBRARY) -#endif // !defined(_LIBCUDACXX_NO_AUTO_LINK) +# endif // defined(_LIBCUDACXX_HAS_NO_PRAGMA_PUSH_POP_MACRO) + +# if !defined(_LIBCUDACXX_NO_AUTO_LINK) && !defined(__cuda_std__) +# if defined(_LIBCUDACXX_ABI_MICROSOFT) && !defined(_LIBCUDACXX_BUILDING_LIBRARY) +# if defined(_DLL) +# pragma comment(lib, "c++.lib") +# else +# pragma comment(lib, "libc++.lib") +# endif +# endif // defined(_LIBCUDACXX_ABI_MICROSOFT) && !defined(_LIBCUDACXX_BUILDING_LIBRARY) +# endif // !defined(_LIBCUDACXX_NO_AUTO_LINK) -#define _LIBCUDACXX_UNUSED_VAR(x) ((void)(x)) +# define _LIBCUDACXX_UNUSED_VAR(x) ((void) (x)) // Configures the fopen close-on-exec mode character, if any. This string will // be appended to any mode string used by fstream for fopen/fdopen. // // Not all platforms support this, but it helps avoid fd-leaks on platforms that // do. -#if defined(__BIONIC__) -# define _LIBCUDACXX_FOPEN_CLOEXEC_MODE "e" -#else -# define _LIBCUDACXX_FOPEN_CLOEXEC_MODE -#endif +# if defined(__BIONIC__) +# define _LIBCUDACXX_FOPEN_CLOEXEC_MODE "e" +# else +# define _LIBCUDACXX_FOPEN_CLOEXEC_MODE +# endif # if __has_attribute(__format__) // The attribute uses 1-based indices for ordinary and static member functions. // The attribute uses 2-based indices for non-static member functions. -# define _LIBCUDACXX_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) \ +# define _LIBCUDACXX_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) \ __attribute__((__format__(archetype, format_string_index, first_format_arg_index))) # else # define _LIBCUDACXX_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) /* nothing */ # endif -#ifndef _LIBCUDACXX_SYS_CLOCK_DURATION -#if defined(__cuda_std__) -# define _LIBCUDACXX_SYS_CLOCK_DURATION nanoseconds -#else -# define _LIBCUDACXX_SYS_CLOCK_DURATION microseconds -#endif -#endif // _LIBCUDACXX_SYS_CLOCK_DURATION +# ifndef _LIBCUDACXX_SYS_CLOCK_DURATION +# if defined(__cuda_std__) +# define _LIBCUDACXX_SYS_CLOCK_DURATION nanoseconds +# else +# define _LIBCUDACXX_SYS_CLOCK_DURATION microseconds +# endif +# endif // _LIBCUDACXX_SYS_CLOCK_DURATION // There are a handful of public standard library types that are intended to // support CTAD but don't need any explicit deduction guides to do so. This // macro is used to mark them as such, which suppresses the // '-Wctad-maybe-unsupported' compiler warning when CTAD is used in user code // with these classes. -#if (!defined(_CCCL_COMPILER_GCC) || __GNUC__ > 6) \ - && _CCCL_STD_VER >= 2017 -# define _LIBCUDACXX_CTAD_SUPPORTED_FOR_TYPE(_ClassName) \ - template \ - _ClassName(typename _Tag::__allow_ctad...) -> _ClassName<_Tag...> -#else -# define _LIBCUDACXX_CTAD_SUPPORTED_FOR_TYPE(_ClassName) static_assert(true, "") -#endif +# if (!defined(_CCCL_COMPILER_GCC) || __GNUC__ > 6) && _CCCL_STD_VER >= 2017 +# define _LIBCUDACXX_CTAD_SUPPORTED_FOR_TYPE(_ClassName) \ + template \ + _ClassName(typename _Tag::__allow_ctad...)->_ClassName<_Tag...> +# else +# define _LIBCUDACXX_CTAD_SUPPORTED_FOR_TYPE(_ClassName) static_assert(true, "") +# endif -#if (defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ <= 11) \ - && (defined(__CUDACC_VER_MINOR__) && __CUDACC_VER_MINOR__ <= 2) -# define _LIBCUDACXX_CONSTEXPR_GLOBAL const -#else -# define _LIBCUDACXX_CONSTEXPR_GLOBAL constexpr -#endif +# if (defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ <= 11) \ + && (defined(__CUDACC_VER_MINOR__) && __CUDACC_VER_MINOR__ <= 2) +# define _LIBCUDACXX_CONSTEXPR_GLOBAL const +# else +# define _LIBCUDACXX_CONSTEXPR_GLOBAL constexpr +# endif -#if defined(__CUDA_ARCH__) -# define _LIBCUDACXX_CPO_ACCESSIBILITY _CCCL_DEVICE _LIBCUDACXX_CONSTEXPR_GLOBAL -#else -# define _LIBCUDACXX_CPO_ACCESSIBILITY _LIBCUDACXX_INLINE_VAR constexpr -#endif +# if defined(__CUDA_ARCH__) +# define _LIBCUDACXX_CPO_ACCESSIBILITY _CCCL_DEVICE _LIBCUDACXX_CONSTEXPR_GLOBAL +# else +# define _LIBCUDACXX_CPO_ACCESSIBILITY _LIBCUDACXX_INLINE_VAR constexpr +# endif -#if _CCCL_STD_VER > 2014 -# define _LIBCUDACXX_TRAIT(__TRAIT, ...) __TRAIT##_v<__VA_ARGS__> -#else -# define _LIBCUDACXX_TRAIT(__TRAIT, ...) __TRAIT<__VA_ARGS__>::value -#endif +# if _CCCL_STD_VER > 2014 +# define _LIBCUDACXX_TRAIT(__TRAIT, ...) __TRAIT##_v<__VA_ARGS__> +# else +# define _LIBCUDACXX_TRAIT(__TRAIT, ...) __TRAIT<__VA_ARGS__>::value +# endif // Older nvcc do not handle the constraint of `construct_at` in earlier std modes // So to preserve our performance optimization we default to the unconstrained // `__construct_at` and only in C++20 use `construct_at` -#if _CCCL_STD_VER > 2017 -# define _LIBCUDACXX_CONSTRUCT_AT(_LOCATION, ...) \ - _CUDA_VSTD::construct_at(_CUDA_VSTD::addressof(_LOCATION), __VA_ARGS__) -#else -# define _LIBCUDACXX_CONSTRUCT_AT(_LOCATION, ...) \ - _CUDA_VSTD::__construct_at(_CUDA_VSTD::addressof(_LOCATION), __VA_ARGS__) -#endif +# if _CCCL_STD_VER > 2017 +# define _LIBCUDACXX_CONSTRUCT_AT(_LOCATION, ...) \ + _CUDA_VSTD::construct_at(_CUDA_VSTD::addressof(_LOCATION), __VA_ARGS__) +# else +# define _LIBCUDACXX_CONSTRUCT_AT(_LOCATION, ...) \ + _CUDA_VSTD::__construct_at(_CUDA_VSTD::addressof(_LOCATION), __VA_ARGS__) +# endif // We can only expose constexpr allocations if the compiler supports it # if defined(__cpp_constexpr_dynamic_alloc) && defined(__cpp_lib_constexpr_dynamic_alloc) && _CCCL_STD_VER >= 2020 \ @@ -2061,7 +2051,7 @@ extern "C" _LIBCUDACXX_FUNC_VIS void __sanitizer_annotate_contiguous_container( constexpr __class() noexcept = default; # endif // !_CCCL_COMPILER_NVRTC || nvcc >= 11.3 -#define _LIBCUDACXX_HAS_NO_INCOMPLETE_RANGES +# define _LIBCUDACXX_HAS_NO_INCOMPLETE_RANGES #endif // __cplusplus diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__pragma_pop b/libcudacxx/include/cuda/std/detail/libcxx/include/__pragma_pop index 27a9a68b4e6..5bd85a09940 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__pragma_pop +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__pragma_pop @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #if defined(_LIBCUDACXX_USE_PRAGMA_MSVC_WARNING) - #pragma warning(pop) +# pragma warning(pop) #endif #if defined(_LIBCUDACXX_POP_MACROS) - _LIBCUDACXX_POP_MACROS +_LIBCUDACXX_POP_MACROS #endif diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/atomic b/libcudacxx/include/cuda/std/detail/libcxx/include/atomic index 298b69726f9..2d0a2e56af6 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/atomic +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/atomic @@ -556,9 +556,6 @@ void atomic_signal_fence(memory_order m) noexcept; # pragma system_header #endif // no system header -#include // all public C++ headers provide the assertion handler -#include -#include #include #include #include @@ -568,42 +565,42 @@ void atomic_signal_fence(memory_order m) noexcept; #include #include #include -#include #include #include +#include // all public C++ headers provide the assertion handler +#include +#include +#include #include #include #include #ifdef _LIBCUDACXX_HAS_NO_THREADS -# error is not supported on this single threaded system +# error is not supported on this single threaded system #endif #ifdef _LIBCUDACXX_HAS_NO_ATOMIC_HEADER -# error is not implemented +# error is not implemented #endif #ifdef _LIBCUDACXX_UNSUPPORTED_THREAD_API -# error " is not supported on this system" +# error " is not supported on this system" #endif #ifdef kill_dependency -# error C++ standard library is incompatible with +# error C++ standard library is incompatible with #endif -#define _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) \ - _LIBCUDACXX_DIAGNOSE_WARNING(__m == memory_order_consume || \ - __m == memory_order_acquire || \ - __m == memory_order_acq_rel, \ - "memory order argument to atomic operation is invalid") +#define _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) \ + _LIBCUDACXX_DIAGNOSE_WARNING( \ + __m == memory_order_consume || __m == memory_order_acquire || __m == memory_order_acq_rel, \ + "memory order argument to atomic operation is invalid") -#define _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) \ - _LIBCUDACXX_DIAGNOSE_WARNING(__m == memory_order_release || \ - __m == memory_order_acq_rel, \ - "memory order argument to atomic operation is invalid") +#define _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) \ + _LIBCUDACXX_DIAGNOSE_WARNING(__m == memory_order_release || __m == memory_order_acq_rel, \ + "memory order argument to atomic operation is invalid") -#define _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__m, __f) \ - _LIBCUDACXX_DIAGNOSE_WARNING(__f == memory_order_release || \ - __f == memory_order_acq_rel, \ - "memory order argument to atomic operation is invalid") +#define _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__m, __f) \ + _LIBCUDACXX_DIAGNOSE_WARNING(__f == memory_order_release || __f == memory_order_acq_rel, \ + "memory order argument to atomic operation is invalid") #if defined(_LIBCUDACXX_HAS_MSVC_ATOMIC_IMPL) # include @@ -614,25 +611,25 @@ void atomic_signal_fence(memory_order m) noexcept; #endif #if !defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE) -#define ATOMIC_BOOL_LOCK_FREE 2 -#define ATOMIC_CHAR_LOCK_FREE 2 -#define ATOMIC_CHAR16_T_LOCK_FREE 2 -#define ATOMIC_CHAR32_T_LOCK_FREE 2 -#define ATOMIC_WCHAR_T_LOCK_FREE 2 -#define ATOMIC_SHORT_LOCK_FREE 2 -#define ATOMIC_INT_LOCK_FREE 2 -#define ATOMIC_LONG_LOCK_FREE 2 -#define ATOMIC_LLONG_LOCK_FREE 2 -#define ATOMIC_POINTER_LOCK_FREE 2 -#endif //!defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE) +# define ATOMIC_BOOL_LOCK_FREE 2 +# define ATOMIC_CHAR_LOCK_FREE 2 +# define ATOMIC_CHAR16_T_LOCK_FREE 2 +# define ATOMIC_CHAR32_T_LOCK_FREE 2 +# define ATOMIC_WCHAR_T_LOCK_FREE 2 +# define ATOMIC_SHORT_LOCK_FREE 2 +# define ATOMIC_INT_LOCK_FREE 2 +# define ATOMIC_LONG_LOCK_FREE 2 +# define ATOMIC_LLONG_LOCK_FREE 2 +# define ATOMIC_POINTER_LOCK_FREE 2 +#endif //! defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE) #ifndef __ATOMIC_RELAXED -#define __ATOMIC_RELAXED 0 -#define __ATOMIC_CONSUME 1 -#define __ATOMIC_ACQUIRE 2 -#define __ATOMIC_RELEASE 3 -#define __ATOMIC_ACQ_REL 4 -#define __ATOMIC_SEQ_CST 5 +# define __ATOMIC_RELAXED 0 +# define __ATOMIC_CONSUME 1 +# define __ATOMIC_ACQUIRE 2 +# define __ATOMIC_RELEASE 3 +# define __ATOMIC_ACQ_REL 4 +# define __ATOMIC_SEQ_CST 5 #endif //__ATOMIC_RELAXED _LIBCUDACXX_BEGIN_NAMESPACE_STD @@ -640,20 +637,22 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD // Figure out what the underlying type for `memory_order` would be if it were // declared as an unscoped enum (accounting for -fshort-enums). Use this result // to pin the underlying type in C++20. -enum __legacy_memory_order { - __mo_relaxed, - __mo_consume, - __mo_acquire, - __mo_release, - __mo_acq_rel, - __mo_seq_cst +enum __legacy_memory_order +{ + __mo_relaxed, + __mo_consume, + __mo_acquire, + __mo_release, + __mo_acq_rel, + __mo_seq_cst }; typedef underlying_type<__legacy_memory_order>::type __memory_order_underlying_t; #if _CCCL_STD_VER > 2017 -enum class memory_order : __memory_order_underlying_t { +enum class memory_order : __memory_order_underlying_t +{ relaxed = __mo_relaxed, consume = __mo_consume, acquire = __mo_acquire, @@ -671,7 +670,8 @@ inline constexpr auto memory_order_seq_cst = memory_order::seq_cst; #else -typedef enum memory_order { +typedef enum memory_order +{ memory_order_relaxed = __mo_relaxed, memory_order_consume = __mo_consume, memory_order_acquire = __mo_acquire, @@ -682,43 +682,48 @@ typedef enum memory_order { #endif // _CCCL_STD_VER > 2017 -template _LIBCUDACXX_INLINE_VISIBILITY -bool __cxx_nonatomic_compare_equal(_Tp const& __lhs, _Tp const& __rhs) { +template +_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_nonatomic_compare_equal(_Tp const& __lhs, _Tp const& __rhs) +{ #if defined(_CCCL_CUDA_COMPILER) - return __lhs == __rhs; + return __lhs == __rhs; #else - return memcmp(&__lhs, &__rhs, sizeof(_Tp)) == 0; + return memcmp(&__lhs, &__rhs, sizeof(_Tp)) == 0; #endif } static_assert((is_same::type, __memory_order_underlying_t>::value), - "unexpected underlying type for std::memory_order"); + "unexpected underlying type for std::memory_order"); -#if defined(_LIBCUDACXX_HAS_GCC_ATOMIC_IMP) || \ - defined(_LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS) +#if defined(_LIBCUDACXX_HAS_GCC_ATOMIC_IMP) || defined(_LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS) // [atomics.types.generic]p1 guarantees _Tp is trivially copyable. Because // the default operator= in an object is not volatile, a byte-by-byte copy // is required. -template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t::value> -__cxx_atomic_assign_volatile(_Tp& __a_value, _Tv const& __val) { +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value> +__cxx_atomic_assign_volatile(_Tp& __a_value, _Tv const& __val) +{ __a_value = __val; } -template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t::value> -__cxx_atomic_assign_volatile(_Tp volatile& __a_value, _Tv volatile const& __val) { - volatile char* __to = reinterpret_cast(&__a_value); - volatile char* __end = __to + sizeof(_Tp); +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value> +__cxx_atomic_assign_volatile(_Tp volatile& __a_value, _Tv volatile const& __val) +{ + volatile char* __to = reinterpret_cast(&__a_value); + volatile char* __end = __to + sizeof(_Tp); volatile const char* __from = reinterpret_cast(&__val); while (__to != __end) + { *__to++ = *__from++; + } } #endif // Headers are wrapped like so: (cuda::std::|std::)detail -namespace __detail { +namespace __detail +{ #if defined(_LIBCUDACXX_HAS_CUDA_ATOMIC_EXT) # include #endif @@ -733,91 +738,98 @@ namespace __detail { // TODO: Maybe support C11 atomics? // #include #endif // _LIBCUDACXX_HAS_GCC_ATOMIC_IMP, _LIBCUDACXX_HAS_C_ATOMIC_IMP -} +} // namespace __detail using __detail::__cxx_atomic_base_impl; -using __detail::__cxx_atomic_ref_base_impl; -using __detail::__cxx_atomic_thread_fence; -using __detail::__cxx_atomic_signal_fence; -using __detail::__cxx_atomic_load; -using __detail::__cxx_atomic_store; -using __detail::__cxx_atomic_exchange; -using __detail::__cxx_atomic_compare_exchange_weak; using __detail::__cxx_atomic_compare_exchange_strong; +using __detail::__cxx_atomic_compare_exchange_weak; +using __detail::__cxx_atomic_exchange; using __detail::__cxx_atomic_fetch_add; -using __detail::__cxx_atomic_fetch_sub; -using __detail::__cxx_atomic_fetch_or; using __detail::__cxx_atomic_fetch_and; +using __detail::__cxx_atomic_fetch_or; +using __detail::__cxx_atomic_fetch_sub; using __detail::__cxx_atomic_fetch_xor; +using __detail::__cxx_atomic_load; +using __detail::__cxx_atomic_ref_base_impl; +using __detail::__cxx_atomic_signal_fence; +using __detail::__cxx_atomic_store; +using __detail::__cxx_atomic_thread_fence; template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp kill_dependency(_Tp __y) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp kill_dependency(_Tp __y) noexcept { - return __y; + return __y; } #if defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) -# define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE -# define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE -# define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE -# define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE -# define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE -# define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE -# define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE -# define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE -# define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE -# define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE +# define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE +# define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE +# define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE +# define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE +# define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE +# define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE +# define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE +# define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE +# define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE +# define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE #elif defined(__GCC_ATOMIC_BOOL_LOCK_FREE) -# define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE -# define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE -# define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE -# define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE -# define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE -# define ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE -# define ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE -# define ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE -# define ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE -# define ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE +# define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE +# define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE +# define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE +# define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE +# define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE +# define ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE +# define ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE +# define ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE +# define ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE +# define ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE #endif #ifdef _LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS -template -struct __cxx_atomic_lock_impl { - - _LIBCUDACXX_INLINE_VISIBILITY - __cxx_atomic_lock_impl() noexcept - : __a_value(), __a_lock(0) {} - _LIBCUDACXX_INLINE_VISIBILITY constexpr explicit - __cxx_atomic_lock_impl(_Tp value) noexcept - : __a_value(value), __a_lock(0) {} +template +struct __cxx_atomic_lock_impl +{ + _LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_lock_impl() noexcept + : __a_value() + , __a_lock(0) + {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr explicit __cxx_atomic_lock_impl(_Tp value) noexcept + : __a_value(value) + , __a_lock(0) + {} _Tp __a_value; mutable __cxx_atomic_base_impl<_LIBCUDACXX_ATOMIC_FLAG_TYPE, _Sco> __a_lock; - _LIBCUDACXX_INLINE_VISIBILITY void __lock() const volatile { - while(1 == __cxx_atomic_exchange(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire)) - /*spin*/; + _LIBCUDACXX_INLINE_VISIBILITY void __lock() const volatile + { + while (1 == __cxx_atomic_exchange(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire)) + /*spin*/; } - _LIBCUDACXX_INLINE_VISIBILITY void __lock() const { - while(1 == __cxx_atomic_exchange(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire)) - /*spin*/; + _LIBCUDACXX_INLINE_VISIBILITY void __lock() const + { + while (1 == __cxx_atomic_exchange(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire)) + /*spin*/; } - _LIBCUDACXX_INLINE_VISIBILITY void __unlock() const volatile { + _LIBCUDACXX_INLINE_VISIBILITY void __unlock() const volatile + { __cxx_atomic_store(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), memory_order_release); } - _LIBCUDACXX_INLINE_VISIBILITY void __unlock() const { + _LIBCUDACXX_INLINE_VISIBILITY void __unlock() const + { __cxx_atomic_store(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), memory_order_release); } - _LIBCUDACXX_INLINE_VISIBILITY _Tp __read() const volatile { + _LIBCUDACXX_INLINE_VISIBILITY _Tp __read() const volatile + { __lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a_value); __unlock(); return __old; } - _LIBCUDACXX_INLINE_VISIBILITY _Tp __read() const { + _LIBCUDACXX_INLINE_VISIBILITY _Tp __read() const + { __lock(); _Tp __old = __a_value; __unlock(); @@ -826,45 +838,47 @@ struct __cxx_atomic_lock_impl { }; template -_LIBCUDACXX_INLINE_VISIBILITY -void __cxx_atomic_init(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val) { +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_init(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val) +{ __cxx_atomic_assign_volatile(__a->__a_value, __val); } template -_LIBCUDACXX_INLINE_VISIBILITY -void __cxx_atomic_init(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val) { +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_init(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val) +{ __a->__a_value = __val; } template -_LIBCUDACXX_INLINE_VISIBILITY -void __cxx_atomic_store(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY void +__cxx_atomic_store(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val, memory_order) +{ __a->__lock(); __cxx_atomic_assign_volatile(__a->__a_value, __val); __a->__unlock(); } template -_LIBCUDACXX_INLINE_VISIBILITY -void __cxx_atomic_store(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_store(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val, memory_order) +{ __a->__lock(); __a->__a_value = __val; __a->__unlock(); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_load(const volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp __cxx_atomic_load(const volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, memory_order) +{ return __a->__read(); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_load(const __cxx_atomic_lock_impl<_Tp, _Sco>* __a, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp __cxx_atomic_load(const __cxx_atomic_lock_impl<_Tp, _Sco>* __a, memory_order) +{ return __a->__read(); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_exchange(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __value, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_exchange(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __value, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -873,77 +887,94 @@ _Tp __cxx_atomic_exchange(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp _ return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_exchange(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __value, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_exchange(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __value, memory_order) +{ __a->__lock(); - _Tp __old = __a->__a_value; + _Tp __old = __a->__a_value; __a->__a_value = __value; __a->__unlock(); return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -bool __cxx_atomic_compare_exchange_strong(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp* __expected, _Tp __value, memory_order, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_strong( + volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) +{ __a->__lock(); _Tp __temp; __cxx_atomic_assign_volatile(__temp, __a->__a_value); bool __ret = __temp == *__expected; - if(__ret) + if (__ret) + { __cxx_atomic_assign_volatile(__a->__a_value, __value); + } else + { __cxx_atomic_assign_volatile(*__expected, __a->__a_value); + } __a->__unlock(); return __ret; } template -_LIBCUDACXX_INLINE_VISIBILITY -bool __cxx_atomic_compare_exchange_strong(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp* __expected, _Tp __value, memory_order, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_strong( + __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) +{ __a->__lock(); bool __ret = __a->__a_value == *__expected; - if(__ret) + if (__ret) + { __a->__a_value = __value; + } else + { *__expected = __a->__a_value; + } __a->__unlock(); return __ret; } template -_LIBCUDACXX_INLINE_VISIBILITY -bool __cxx_atomic_compare_exchange_weak(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp* __expected, _Tp __value, memory_order, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_weak( + volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) +{ __a->__lock(); _Tp __temp; __cxx_atomic_assign_volatile(__temp, __a->__a_value); bool __ret = __temp == *__expected; - if(__ret) + if (__ret) + { __cxx_atomic_assign_volatile(__a->__a_value, __value); + } else + { __cxx_atomic_assign_volatile(*__expected, __a->__a_value); + } __a->__unlock(); return __ret; } template -_LIBCUDACXX_INLINE_VISIBILITY -bool __cxx_atomic_compare_exchange_weak(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp* __expected, _Tp __value, memory_order, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_weak( + __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) +{ __a->__lock(); bool __ret = __a->__a_value == *__expected; - if(__ret) + if (__ret) + { __a->__a_value = __value; + } else + { *__expected = __a->__a_value; + } __a->__unlock(); return __ret; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Td __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -952,9 +983,9 @@ _Tp __cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Td __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) +{ __a->__lock(); _Tp __old = __a->__a_value; __a->__a_value += __delta; @@ -963,9 +994,9 @@ _Tp __cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* __cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp*, _Sco>* __a, - ptrdiff_t __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +__cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp*, _Sco>* __a, ptrdiff_t __delta, memory_order) +{ __a->__lock(); _Tp* __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -974,9 +1005,9 @@ _Tp* __cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp*, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* __cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp*, _Sco>* __a, - ptrdiff_t __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +__cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp*, _Sco>* __a, ptrdiff_t __delta, memory_order) +{ __a->__lock(); _Tp* __old = __a->__a_value; __a->__a_value += __delta; @@ -985,9 +1016,9 @@ _Tp* __cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp*, _Sco>* __a, } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_sub(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Td __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_sub(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -996,9 +1027,9 @@ _Tp __cxx_atomic_fetch_sub(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_sub(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Td __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_sub(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) +{ __a->__lock(); _Tp __old = __a->__a_value; __a->__a_value -= __delta; @@ -1007,9 +1038,9 @@ _Tp __cxx_atomic_fetch_sub(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_and(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_and(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -1018,9 +1049,9 @@ _Tp __cxx_atomic_fetch_and(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_and(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_and(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old = __a->__a_value; __a->__a_value &= __pattern; @@ -1029,9 +1060,9 @@ _Tp __cxx_atomic_fetch_and(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_or(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -1040,9 +1071,9 @@ _Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_or(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_or(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old = __a->__a_value; __a->__a_value |= __pattern; @@ -1051,9 +1082,9 @@ _Tp __cxx_atomic_fetch_or(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_xor(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_xor(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -1062,9 +1093,9 @@ _Tp __cxx_atomic_fetch_xor(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_xor(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_xor(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old = __a->__a_value; __a->__a_value ^= __pattern; @@ -1072,44 +1103,56 @@ _Tp __cxx_atomic_fetch_xor(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } -#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) +# if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) -template struct __cxx_is_always_lock_free { - enum { __value = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0) }; }; +template +struct __cxx_is_always_lock_free +{ + enum + { + __value = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0) + }; +}; -#else +# else -template struct __cxx_is_always_lock_free { - enum { __value = sizeof(_Tp) <= 8 }; }; +template +struct __cxx_is_always_lock_free +{ + enum + { + __value = sizeof(_Tp) <= 8 + }; +}; -#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) +# endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) template -struct __cxx_atomic_impl_conditional { - using type = __conditional_t<__cxx_is_always_lock_free<_Tp>::__value, - __cxx_atomic_base_impl<_Tp, _Sco>, - __cxx_atomic_lock_impl<_Tp, _Sco> >; +struct __cxx_atomic_impl_conditional +{ + using type = __conditional_t<__cxx_is_always_lock_free<_Tp>::__value, + __cxx_atomic_base_impl<_Tp, _Sco>, + __cxx_atomic_lock_impl<_Tp, _Sco>>; }; -template ::type > +template ::type> #else -template > +template > #endif //_LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS -struct __cxx_atomic_impl : public _Base { +struct __cxx_atomic_impl : public _Base +{ __cxx_atomic_impl() noexcept = default; _LIBCUDACXX_INLINE_VISIBILITY constexpr explicit __cxx_atomic_impl(_Tp value) noexcept - : _Base(value) {} + : _Base(value) + {} }; - -template -_LIBCUDACXX_INLINE_VISIBILITY -__cxx_atomic_impl<_Tp, _Sco>* __cxx_atomic_rebind(_Tp* __inst) { - static_assert(sizeof(__cxx_atomic_impl<_Tp, _Sco>) == sizeof(_Tp),""); - static_assert(alignof(__cxx_atomic_impl<_Tp, _Sco>) == alignof(_Tp),""); - return (__cxx_atomic_impl<_Tp, _Sco>*)__inst; +template +_LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_impl<_Tp, _Sco>* __cxx_atomic_rebind(_Tp* __inst) +{ + static_assert(sizeof(__cxx_atomic_impl<_Tp, _Sco>) == sizeof(_Tp), ""); + static_assert(alignof(__cxx_atomic_impl<_Tp, _Sco>) == alignof(_Tp), ""); + return (__cxx_atomic_impl<_Tp, _Sco>*) __inst; } template @@ -1118,25 +1161,29 @@ using __cxx_atomic_ref_impl = __cxx_atomic_ref_base_impl<_Tp, _Sco>; #ifdef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE template , int _Sco = _Ty::__sco> -struct __cxx_atomic_poll_tester { - _Ty const volatile* __a; - _Tp __val; - memory_order __order; +struct __cxx_atomic_poll_tester +{ + _Ty const volatile* __a; + _Tp __val; + memory_order __order; - _LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_poll_tester(_Ty const volatile* __a_, _Tp __val_, memory_order __order_) + _LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_poll_tester(_Ty const volatile* __a_, _Tp __val_, memory_order __order_) : __a(__a_) , __val(__val_) , __order(__order_) - {} + {} - _LIBCUDACXX_INLINE_VISIBILITY bool operator()() const { - return !(__cxx_atomic_load(__a, __order) == __val); - } + _LIBCUDACXX_INLINE_VISIBILITY bool operator()() const + { + return !(__cxx_atomic_load(__a, __order) == __val); + } }; template , int _Sco = _Ty::__sco> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow_fallback(_Ty const volatile* __a, _Tp __val, memory_order __order) { - __libcpp_thread_poll_with_backoff(__cxx_atomic_poll_tester<_Ty>(__a, __val, __order)); +_LIBCUDACXX_INLINE_VISIBILITY void +__cxx_atomic_try_wait_slow_fallback(_Ty const volatile* __a, _Tp __val, memory_order __order) +{ + __libcpp_thread_poll_with_backoff(__cxx_atomic_poll_tester<_Ty>(__a, __val, __order)); } #endif @@ -1144,632 +1191,888 @@ _LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow_fallback(_Ty const #ifdef _LIBCUDACXX_HAS_PLATFORM_WAIT template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_fetch_add(__cxx_atomic_rebind<_Sco>(&__c->__version), (__libcpp_platform_wait_t)1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (0 != __cxx_atomic_exchange(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t)0, memory_order_relaxed)) - __libcpp_platform_wake(&__c->__version, true); -#endif +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_fetch_add(__cxx_atomic_rebind<_Sco>(&__c->__version), (__libcpp_platform_wait_t) 1, memory_order_relaxed); + __cxx_atomic_thread_fence(memory_order_seq_cst); + if (0 != __cxx_atomic_exchange(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 0, memory_order_relaxed)) + { + __libcpp_platform_wake(&__c->__version, true); + } +# endif } template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { - __cxx_atomic_notify_all(__a); -} -template , int _Sco = _Ty::__sco, __enable_if_t::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow(_Ty const volatile* __a, _Tp const __val, memory_order __order) { -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_store(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t)1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); - auto const __version = __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__version), memory_order_relaxed); - if (!__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - return; - if(sizeof(__libcpp_platform_wait_t) < 8) { - constexpr timespec __timeout = { 2, 0 }; // Hedge on rare 'int version' aliasing. - __libcpp_platform_wait(&__c->__version, __version, &__timeout); - } - else - __libcpp_platform_wait(&__c->__version, __version, nullptr); -#else - __cxx_atomic_try_wait_slow_fallback(__a, __val, __order); -#endif // _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ + __cxx_atomic_notify_all(__a); +} +template , + int _Sco = _Ty::__sco, + __enable_if_t::__value, int> = 1> +_LIBCUDACXX_INLINE_VISIBILITY void +__cxx_atomic_try_wait_slow(_Ty const volatile* __a, _Tp const __val, memory_order __order) +{ +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_store(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 1, memory_order_relaxed); + __cxx_atomic_thread_fence(memory_order_seq_cst); + auto const __version = __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__version), memory_order_relaxed); + if (!__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) + { + return; + } + if (sizeof(__libcpp_platform_wait_t) < 8) + { + constexpr timespec __timeout = {2, 0}; // Hedge on rare 'int version' aliasing. + __libcpp_platform_wait(&__c->__version, __version, &__timeout); + } + else + { + __libcpp_platform_wait(&__c->__version, __version, nullptr); + } +# else + __cxx_atomic_try_wait_slow_fallback(__a, __val, __order); +# endif // _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE } template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a, _Tp __val, memory_order) { -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_fetch_add(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t)1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); -#endif - __libcpp_platform_wait((_Tp*)__a, __val, nullptr); -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - __cxx_atomic_fetch_sub(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t)1, memory_order_relaxed); -#endif +_LIBCUDACXX_INLINE_VISIBILITY void +__cxx_atomic_try_wait_slow(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a, _Tp __val, memory_order) +{ +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_fetch_add(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 1, memory_order_relaxed); + __cxx_atomic_thread_fence(memory_order_seq_cst); +# endif + __libcpp_platform_wait((_Tp*) __a, __val, nullptr); +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + __cxx_atomic_fetch_sub(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 1, memory_order_relaxed); +# endif } template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (0 != __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__waiters), memory_order_relaxed)) -#endif - __libcpp_platform_wake((_Tp*)__a, true); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_thread_fence(memory_order_seq_cst); + if (0 != __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__waiters), memory_order_relaxed)) +# endif + __libcpp_platform_wake((_Tp*) __a, true); } template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (0 != __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__waiters), memory_order_relaxed)) -#endif - __libcpp_platform_wake((_Tp*)__a, false); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_thread_fence(memory_order_seq_cst); + if (0 != __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__waiters), memory_order_relaxed)) +# endif + __libcpp_platform_wake((_Tp*) __a, false); } #elif !defined(_LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE) template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if(0 == __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__credit), memory_order_relaxed)) - return; - if(0 != __cxx_atomic_exchange(__cxx_atomic_rebind<_Sco>(&__c->__credit), (ptrdiff_t)0, memory_order_relaxed)) { - __libcpp_mutex_lock(&__c->__mutex); - __libcpp_mutex_unlock(&__c->__mutex); - __libcpp_condvar_broadcast(&__c->__condvar); - } +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_thread_fence(memory_order_seq_cst); + if (0 == __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__credit), memory_order_relaxed)) + { + return; + } + if (0 != __cxx_atomic_exchange(__cxx_atomic_rebind<_Sco>(&__c->__credit), (ptrdiff_t) 0, memory_order_relaxed)) + { + __libcpp_mutex_lock(&__c->__mutex); + __libcpp_mutex_unlock(&__c->__mutex); + __libcpp_condvar_broadcast(&__c->__condvar); + } } template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { - __cxx_atomic_notify_all(__a); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ + __cxx_atomic_notify_all(__a); } template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a, _Tp const __val, memory_order __order) { - auto * const __c = __libcpp_contention_state(__a); - __libcpp_mutex_lock(&__c->__mutex); - __cxx_atomic_store(__cxx_atomic_rebind<_Sco>(&__c->__credit), (ptrdiff_t)1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - __libcpp_condvar_wait(&__c->__condvar, &__c->__mutex); - __libcpp_mutex_unlock(&__c->__mutex); +_LIBCUDACXX_INLINE_VISIBILITY void +__cxx_atomic_try_wait_slow(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a, _Tp const __val, memory_order __order) +{ + auto* const __c = __libcpp_contention_state(__a); + __libcpp_mutex_lock(&__c->__mutex); + __cxx_atomic_store(__cxx_atomic_rebind<_Sco>(&__c->__credit), (ptrdiff_t) 1, memory_order_relaxed); + __cxx_atomic_thread_fence(memory_order_seq_cst); + if (__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) + { + __libcpp_condvar_wait(&__c->__condvar, &__c->__mutex); + } + __libcpp_mutex_unlock(&__c->__mutex); } #else -template +template struct __atomic_wait_and_notify_supported -#if defined(__CUDA_MINIMUM_ARCH__) && __CUDA_MINIMUM_ARCH__ < 700 +# if defined(__CUDA_MINIMUM_ARCH__) && __CUDA_MINIMUM_ARCH__ < 700 : false_type -#else +# else : true_type -#endif +# endif {}; template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow(_Ty const volatile* __a, _Tp __val, memory_order __order) { - static_assert(__atomic_wait_and_notify_supported<_Tp>::value, "atomic wait operations are unsupported on Pascal"); - __cxx_atomic_try_wait_slow_fallback(__a, __val, __order); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow(_Ty const volatile* __a, _Tp __val, memory_order __order) +{ + static_assert(__atomic_wait_and_notify_supported<_Tp>::value, "atomic wait operations are unsupported on Pascal"); + __cxx_atomic_try_wait_slow_fallback(__a, __val, __order); } template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(_Ty const volatile*) { - static_assert(__atomic_wait_and_notify_supported<_Tp>::value, "atomic notify-one operations are unsupported on Pascal"); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(_Ty const volatile*) +{ + static_assert(__atomic_wait_and_notify_supported<_Tp>::value, + "atomic notify-one operations are unsupported on Pascal"); } template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(_Ty const volatile*) { - static_assert(__atomic_wait_and_notify_supported<_Tp>::value, "atomic notify-all operations are unsupported on Pascal"); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(_Ty const volatile*) +{ + static_assert(__atomic_wait_and_notify_supported<_Tp>::value, + "atomic notify-all operations are unsupported on Pascal"); } #endif // _LIBCUDACXX_HAS_PLATFORM_WAIT || !defined(_LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE) template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_wait(_Ty const volatile* __a, _Tp const __val, memory_order __order) { - for(int __i = 0; __i < _LIBCUDACXX_POLLING_COUNT; ++__i) { - if(!__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - return; - if(__i < 12) - __libcpp_thread_yield_processor(); - else - __libcpp_thread_yield(); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_wait(_Ty const volatile* __a, _Tp const __val, memory_order __order) +{ + for (int __i = 0; __i < _LIBCUDACXX_POLLING_COUNT; ++__i) + { + if (!__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) + { + return; + } + if (__i < 12) + { + __libcpp_thread_yield_processor(); } - while(__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - __cxx_atomic_try_wait_slow(__a, __val, __order); + else + { + __libcpp_thread_yield(); + } + } + while (__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) + { + __cxx_atomic_try_wait_slow(__a, __val, __order); + } } template -struct __atomic_base_storage { - mutable _Storage __a_; +struct __atomic_base_storage +{ + mutable _Storage __a_; - __atomic_base_storage() = default; - __atomic_base_storage(const __atomic_base_storage&) = default; - __atomic_base_storage(__atomic_base_storage&&) = default; + __atomic_base_storage() = default; + __atomic_base_storage(const __atomic_base_storage&) = default; + __atomic_base_storage(__atomic_base_storage&&) = default; - __atomic_base_storage& operator=(const __atomic_base_storage&) = default; - __atomic_base_storage& operator=(__atomic_base_storage&&) = default; + __atomic_base_storage& operator=(const __atomic_base_storage&) = default; + __atomic_base_storage& operator=(__atomic_base_storage&&) = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_storage(_Storage&& __a) noexcept : __a_(_CUDA_VSTD::forward<_Storage>(__a)) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_storage(_Storage&& __a) noexcept + : __a_(_CUDA_VSTD::forward<_Storage>(__a)) + {} }; template -struct __atomic_base_core : public __atomic_base_storage<_Tp, _Storage>{ - __atomic_base_core() = default; - __atomic_base_core(const __atomic_base_core&) = delete; - __atomic_base_core(__atomic_base_core&&) = delete; +struct __atomic_base_core : public __atomic_base_storage<_Tp, _Storage> +{ + __atomic_base_core() = default; + __atomic_base_core(const __atomic_base_core&) = delete; + __atomic_base_core(__atomic_base_core&&) = delete; - __atomic_base_core& operator=(const __atomic_base_core&) = delete; - __atomic_base_core& operator=(__atomic_base_core&&) = delete; + __atomic_base_core& operator=(const __atomic_base_core&) = delete; + __atomic_base_core& operator=(__atomic_base_core&&) = delete; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_core(_Storage&& __a) noexcept : __atomic_base_storage<_Tp, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_core(_Storage&& __a) noexcept + : __atomic_base_storage<_Tp, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} #if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); #endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - _LIBCUDACXX_INLINE_VISIBILITY - bool is_lock_free() const volatile noexcept - {return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp));} - _LIBCUDACXX_INLINE_VISIBILITY - bool is_lock_free() const noexcept - {return static_cast<__atomic_base_core const volatile*>(this)->is_lock_free();} - _LIBCUDACXX_INLINE_VISIBILITY - - void store(_Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - {__cxx_atomic_store(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void store(_Tp __d, memory_order __m = memory_order_seq_cst) noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - {__cxx_atomic_store(&this->__a_, __d, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp load(memory_order __m = memory_order_seq_cst) const volatile noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - {return __cxx_atomic_load(&this->__a_, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp load(memory_order __m = memory_order_seq_cst) const noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - {return __cxx_atomic_load(&this->__a_, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - operator _Tp() const volatile noexcept {return load();} - _LIBCUDACXX_INLINE_VISIBILITY - operator _Tp() const noexcept {return load();} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_exchange(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_exchange(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) volatile noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) volatile noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) volatile noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const volatile noexcept + { + return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp)); + } + _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const noexcept + { + return static_cast<__atomic_base_core const volatile*>(this)->is_lock_free(); + } + _LIBCUDACXX_INLINE_VISIBILITY + + void + store(_Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) + { + __cxx_atomic_store(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void store(_Tp __d, memory_order __m = memory_order_seq_cst) noexcept + _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) + { + __cxx_atomic_store(&this->__a_, __d, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const volatile noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) + { + return __cxx_atomic_load(&this->__a_, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) + { + return __cxx_atomic_load(&this->__a_, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const volatile noexcept + { + return load(); + } + _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const noexcept + { + return load(); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_exchange(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_exchange(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) volatile noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) volatile noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); + } + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); + } + else + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) noexcept { - if(memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if(memory_order_release == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) volatile noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + else + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); + } + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); + } + else + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); } + else + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + } + } - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile noexcept - {__cxx_atomic_wait(&this->__a_, __v, __m);} - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const noexcept - {__cxx_atomic_wait(&this->__a_, __v, __m);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() volatile noexcept - {__cxx_atomic_notify_one(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() noexcept - {__cxx_atomic_notify_one(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() volatile noexcept - {__cxx_atomic_notify_all(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() noexcept - {__cxx_atomic_notify_all(&this->__a_);} + _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + __cxx_atomic_wait(&this->__a_, __v, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const noexcept + { + __cxx_atomic_wait(&this->__a_, __v, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() volatile noexcept + { + __cxx_atomic_notify_one(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() noexcept + { + __cxx_atomic_notify_one(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() volatile noexcept + { + __cxx_atomic_notify_all(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() noexcept + { + __cxx_atomic_notify_all(&this->__a_); + } }; template -struct __atomic_base_core<_Tp, true, _Storage> : public __atomic_base_storage<_Tp, _Storage>{ - __atomic_base_core() = default; - __atomic_base_core(const __atomic_base_core&) = default; - __atomic_base_core(__atomic_base_core&&) = default; +struct __atomic_base_core<_Tp, true, _Storage> : public __atomic_base_storage<_Tp, _Storage> +{ + __atomic_base_core() = default; + __atomic_base_core(const __atomic_base_core&) = default; + __atomic_base_core(__atomic_base_core&&) = default; - __atomic_base_core& operator=(const __atomic_base_core&) = default; - __atomic_base_core& operator=(__atomic_base_core&&) = default; + __atomic_base_core& operator=(const __atomic_base_core&) = default; + __atomic_base_core& operator=(__atomic_base_core&&) = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_core(_Storage&& __a) noexcept : __atomic_base_storage<_Tp, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_core(_Storage&& __a) noexcept + : __atomic_base_storage<_Tp, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} #if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); #endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - _LIBCUDACXX_INLINE_VISIBILITY - bool is_lock_free() const volatile noexcept - {return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp));} - _LIBCUDACXX_INLINE_VISIBILITY - bool is_lock_free() const noexcept - {return static_cast<__atomic_base_core const volatile*>(this)->is_lock_free();} - _LIBCUDACXX_INLINE_VISIBILITY - - void store(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - {__cxx_atomic_store(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void store(_Tp __d, memory_order __m = memory_order_seq_cst) const noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - {__cxx_atomic_store(&this->__a_, __d, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp load(memory_order __m = memory_order_seq_cst) const volatile noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - {return __cxx_atomic_load(&this->__a_, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp load(memory_order __m = memory_order_seq_cst) const noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - {return __cxx_atomic_load(&this->__a_, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - operator _Tp() const volatile noexcept {return load();} - _LIBCUDACXX_INLINE_VISIBILITY - operator _Tp() const noexcept {return load();} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_exchange(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_exchange(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) const volatile noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) const noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) const volatile noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) const noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) const volatile noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const volatile noexcept + { + return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp)); + } + _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const noexcept + { + return static_cast<__atomic_base_core const volatile*>(this)->is_lock_free(); + } + _LIBCUDACXX_INLINE_VISIBILITY + + void + store(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept + _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) + { + __cxx_atomic_store(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void store(_Tp __d, memory_order __m = memory_order_seq_cst) const noexcept + _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) + { + __cxx_atomic_store(&this->__a_, __d, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const volatile noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) + { + return __cxx_atomic_load(&this->__a_, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) + { + return __cxx_atomic_load(&this->__a_, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const volatile noexcept + { + return load(); + } + _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const noexcept + { + return load(); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_exchange(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_exchange(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY bool compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const + volatile noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const + volatile noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); + } + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); + } + else + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); + } + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) const noexcept { - if(memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if(memory_order_release == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + else + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) const volatile noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) const noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + else + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); + } + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); + } + else + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + } + } - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile noexcept - {__cxx_atomic_wait(&this->__a_, __v, __m);} - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const noexcept - {__cxx_atomic_wait(&this->__a_, __v, __m);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const volatile noexcept - {__cxx_atomic_notify_one(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const noexcept - {__cxx_atomic_notify_one(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const volatile noexcept - {__cxx_atomic_notify_all(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const noexcept - {__cxx_atomic_notify_all(&this->__a_);} + _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + __cxx_atomic_wait(&this->__a_, __v, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const noexcept + { + __cxx_atomic_wait(&this->__a_, __v, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const volatile noexcept + { + __cxx_atomic_notify_one(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const noexcept + { + __cxx_atomic_notify_one(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const volatile noexcept + { + __cxx_atomic_notify_all(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const noexcept + { + __cxx_atomic_notify_all(&this->__a_); + } }; template -struct __atomic_base_arithmetic : public __atomic_base_core<_Tp, _Cq, _Storage> { - __atomic_base_arithmetic() = default; - __atomic_base_arithmetic(const __atomic_base_arithmetic&) = delete; - __atomic_base_arithmetic(__atomic_base_arithmetic&&) = delete; - - __atomic_base_arithmetic& operator=(const __atomic_base_arithmetic&) = delete; - __atomic_base_arithmetic& operator=(__atomic_base_arithmetic&&) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_arithmetic(_Storage&& __a) noexcept : __atomic_base_core<_Tp, _Cq, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++(int) volatile noexcept {return fetch_add(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++(int) noexcept {return fetch_add(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--(int) volatile noexcept {return fetch_sub(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--(int) noexcept {return fetch_sub(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++() volatile noexcept {return fetch_add(_Tp(1)) + _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++() noexcept {return fetch_add(_Tp(1)) + _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--() volatile noexcept {return fetch_sub(_Tp(1)) - _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--() noexcept {return fetch_sub(_Tp(1)) - _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator+=(_Tp __op) volatile noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator+=(_Tp __op) noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator-=(_Tp __op) volatile noexcept {return fetch_sub(__op) - __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator-=(_Tp __op) noexcept {return fetch_sub(__op) - __op;} +struct __atomic_base_arithmetic : public __atomic_base_core<_Tp, _Cq, _Storage> +{ + __atomic_base_arithmetic() = default; + __atomic_base_arithmetic(const __atomic_base_arithmetic&) = delete; + __atomic_base_arithmetic(__atomic_base_arithmetic&&) = delete; + + __atomic_base_arithmetic& operator=(const __atomic_base_arithmetic&) = delete; + __atomic_base_arithmetic& operator=(__atomic_base_arithmetic&&) = delete; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_arithmetic(_Storage&& __a) noexcept + : __atomic_base_core<_Tp, _Cq, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) volatile noexcept + { + return fetch_add(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) noexcept + { + return fetch_add(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) volatile noexcept + { + return fetch_sub(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) noexcept + { + return fetch_sub(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() volatile noexcept + { + return fetch_add(_Tp(1)) + _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() noexcept + { + return fetch_add(_Tp(1)) + _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() volatile noexcept + { + return fetch_sub(_Tp(1)) - _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() noexcept + { + return fetch_sub(_Tp(1)) - _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) volatile noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) volatile noexcept + { + return fetch_sub(__op) - __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) noexcept + { + return fetch_sub(__op) - __op; + } }; template -struct __atomic_base_arithmetic<_Tp, true, _Storage> : public __atomic_base_core<_Tp, true, _Storage> { - __atomic_base_arithmetic() = default; - __atomic_base_arithmetic(const __atomic_base_arithmetic&) = default; - __atomic_base_arithmetic(__atomic_base_arithmetic&&) = default; - - __atomic_base_arithmetic& operator=(const __atomic_base_arithmetic&) = default; - __atomic_base_arithmetic& operator=(__atomic_base_arithmetic&&) = default; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_arithmetic(_Storage&& __a) noexcept : __atomic_base_core<_Tp, true, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++(int) const volatile noexcept {return fetch_add(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++(int) const noexcept {return fetch_add(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--(int) const volatile noexcept {return fetch_sub(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--(int) const noexcept {return fetch_sub(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++() const volatile noexcept {return fetch_add(_Tp(1)) + _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++() const noexcept {return fetch_add(_Tp(1)) + _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--() const volatile noexcept {return fetch_sub(_Tp(1)) - _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--() const noexcept {return fetch_sub(_Tp(1)) - _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator+=(_Tp __op) const volatile noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator+=(_Tp __op) const noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator-=(_Tp __op) const volatile noexcept {return fetch_sub(__op) - __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator-=(_Tp __op) const noexcept {return fetch_sub(__op) - __op;} +struct __atomic_base_arithmetic<_Tp, true, _Storage> : public __atomic_base_core<_Tp, true, _Storage> +{ + __atomic_base_arithmetic() = default; + __atomic_base_arithmetic(const __atomic_base_arithmetic&) = default; + __atomic_base_arithmetic(__atomic_base_arithmetic&&) = default; + + __atomic_base_arithmetic& operator=(const __atomic_base_arithmetic&) = default; + __atomic_base_arithmetic& operator=(__atomic_base_arithmetic&&) = default; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_arithmetic(_Storage&& __a) noexcept + : __atomic_base_core<_Tp, true, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) const volatile noexcept + { + return fetch_add(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) const noexcept + { + return fetch_add(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) const volatile noexcept + { + return fetch_sub(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) const noexcept + { + return fetch_sub(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() const volatile noexcept + { + return fetch_add(_Tp(1)) + _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() const noexcept + { + return fetch_add(_Tp(1)) + _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() const volatile noexcept + { + return fetch_sub(_Tp(1)) - _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() const noexcept + { + return fetch_sub(_Tp(1)) - _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) const volatile noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) const noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) const volatile noexcept + { + return fetch_sub(__op) - __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) const noexcept + { + return fetch_sub(__op) - __op; + } }; template -struct __atomic_base_bitwise : public __atomic_base_arithmetic<_Tp, _Cq, _Storage> { - __atomic_base_bitwise() = default; - __atomic_base_bitwise(const __atomic_base_bitwise&) = delete; - __atomic_base_bitwise(__atomic_base_bitwise&&) = delete; - - __atomic_base_bitwise& operator=(const __atomic_base_bitwise&) = delete; - __atomic_base_bitwise& operator=(__atomic_base_bitwise&&) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_bitwise(_Storage&& __a) noexcept : __atomic_base_arithmetic<_Tp, _Cq, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator&=(_Tp __op) volatile noexcept {return fetch_and(__op) & __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator&=(_Tp __op) noexcept {return fetch_and(__op) & __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator|=(_Tp __op) volatile noexcept {return fetch_or(__op) | __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator|=(_Tp __op) noexcept {return fetch_or(__op) | __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator^=(_Tp __op) volatile noexcept {return fetch_xor(__op) ^ __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator^=(_Tp __op) noexcept {return fetch_xor(__op) ^ __op;} +struct __atomic_base_bitwise : public __atomic_base_arithmetic<_Tp, _Cq, _Storage> +{ + __atomic_base_bitwise() = default; + __atomic_base_bitwise(const __atomic_base_bitwise&) = delete; + __atomic_base_bitwise(__atomic_base_bitwise&&) = delete; + + __atomic_base_bitwise& operator=(const __atomic_base_bitwise&) = delete; + __atomic_base_bitwise& operator=(__atomic_base_bitwise&&) = delete; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_bitwise(_Storage&& __a) noexcept + : __atomic_base_arithmetic<_Tp, _Cq, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_and(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_and(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_or(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_or(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) volatile noexcept + { + return fetch_and(__op) & __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) noexcept + { + return fetch_and(__op) & __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) volatile noexcept + { + return fetch_or(__op) | __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) noexcept + { + return fetch_or(__op) | __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) volatile noexcept + { + return fetch_xor(__op) ^ __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) noexcept + { + return fetch_xor(__op) ^ __op; + } }; template -struct __atomic_base_bitwise<_Tp, true, _Storage> : public __atomic_base_arithmetic<_Tp, true, _Storage> { - __atomic_base_bitwise() = default; - __atomic_base_bitwise(const __atomic_base_bitwise&) = default; - __atomic_base_bitwise(__atomic_base_bitwise&&) = default; - - __atomic_base_bitwise& operator=(const __atomic_base_bitwise&) = default; - __atomic_base_bitwise& operator=(__atomic_base_bitwise&&) = default; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_bitwise(_Storage&& __a) noexcept : __atomic_base_arithmetic<_Tp, true, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator&=(_Tp __op) const volatile noexcept {return fetch_and(__op) & __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator&=(_Tp __op) const noexcept {return fetch_and(__op) & __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator|=(_Tp __op) const volatile noexcept {return fetch_or(__op) | __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator|=(_Tp __op) const noexcept {return fetch_or(__op) | __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator^=(_Tp __op) const volatile noexcept {return fetch_xor(__op) ^ __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator^=(_Tp __op) const noexcept {return fetch_xor(__op) ^ __op;} +struct __atomic_base_bitwise<_Tp, true, _Storage> : public __atomic_base_arithmetic<_Tp, true, _Storage> +{ + __atomic_base_bitwise() = default; + __atomic_base_bitwise(const __atomic_base_bitwise&) = default; + __atomic_base_bitwise(__atomic_base_bitwise&&) = default; + + __atomic_base_bitwise& operator=(const __atomic_base_bitwise&) = default; + __atomic_base_bitwise& operator=(__atomic_base_bitwise&&) = default; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_bitwise(_Storage&& __a) noexcept + : __atomic_base_arithmetic<_Tp, true, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_fetch_and(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_and(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_fetch_or(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_or(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) const volatile noexcept + { + return fetch_and(__op) & __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) const noexcept + { + return fetch_and(__op) & __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) const volatile noexcept + { + return fetch_or(__op) | __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) const noexcept + { + return fetch_or(__op) | __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) const volatile noexcept + { + return fetch_xor(__op) ^ __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) const noexcept + { + return fetch_xor(__op) ^ __op; + } }; template -using __atomic_select_base = __conditional_t::value, - __atomic_base_arithmetic<_Tp, _Cq, _Storage>, - __conditional_t::value, - __atomic_base_bitwise<_Tp, _Cq, _Storage>, - __atomic_base_core<_Tp, _Cq, _Storage> >>; +using __atomic_select_base = + __conditional_t::value, + __atomic_base_arithmetic<_Tp, _Cq, _Storage>, + __conditional_t::value, + __atomic_base_bitwise<_Tp, _Cq, _Storage>, + __atomic_base_core<_Tp, _Cq, _Storage>>>; template >> -struct __atomic_base : public _Base { - __atomic_base() = default; - __atomic_base(const __atomic_base&) = delete; - __atomic_base(__atomic_base&&) = delete; +struct __atomic_base : public _Base +{ + __atomic_base() = default; + __atomic_base(const __atomic_base&) = delete; + __atomic_base(__atomic_base&&) = delete; - __atomic_base& operator=(const __atomic_base&) = delete; - __atomic_base& operator=(__atomic_base&&) = delete; + __atomic_base& operator=(const __atomic_base&) = delete; + __atomic_base& operator=(__atomic_base&&) = delete; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base(const _Tp& __a) noexcept : - _Base(__cxx_atomic_impl<_Tp, _Sco>(__a)) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base(const _Tp& __a) noexcept + : _Base(__cxx_atomic_impl<_Tp, _Sco>(__a)) + {} }; template >> -struct __atomic_base_ref : public _Base { - __atomic_base_ref() = default; - __atomic_base_ref(const __atomic_base_ref&) = default; - __atomic_base_ref(__atomic_base_ref&&) = default; +struct __atomic_base_ref : public _Base +{ + __atomic_base_ref() = default; + __atomic_base_ref(const __atomic_base_ref&) = default; + __atomic_base_ref(__atomic_base_ref&&) = default; - __atomic_base_ref& operator=(const __atomic_base_ref&) = default; - __atomic_base_ref& operator=(__atomic_base_ref&&) = default; + __atomic_base_ref& operator=(const __atomic_base_ref&) = default; + __atomic_base_ref& operator=(__atomic_base_ref&&) = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_ref(_Tp& __a) noexcept : - _Base(__cxx_atomic_ref_impl<_Tp, _Sco>(__a)) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_ref(_Tp& __a) noexcept + : _Base(__cxx_atomic_ref_impl<_Tp, _Sco>(__a)) + {} }; #if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) @@ -1779,1059 +2082,918 @@ constexpr bool __atomic_base_core<_Tp, _Cq, _Storage>::is_always_lock_free; // atomic template -struct atomic - : public __atomic_base<_Tp> +struct atomic : public __atomic_base<_Tp> { - typedef __atomic_base<_Tp> __base; - using value_type = _Tp; + typedef __atomic_base<_Tp> __base; + using value_type = _Tp; - atomic() noexcept = default; - _LIBCUDACXX_INLINE_VISIBILITY - constexpr atomic(_Tp __d) noexcept : __base(__d) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator=(_Tp __d) volatile noexcept - {__base::store(__d); return __d;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator=(_Tp __d) noexcept - {__base::store(__d); return __d;} + atomic() noexcept = default; + _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic(_Tp __d) noexcept + : __base(__d) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __d) volatile noexcept + { + __base::store(__d); + return __d; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __d) noexcept + { + __base::store(__d); + return __d; + } }; // atomic template -struct atomic<_Tp*> - : public __atomic_base<_Tp*> +struct atomic<_Tp*> : public __atomic_base<_Tp*> { - typedef __atomic_base<_Tp*> __base; - using value_type = _Tp*; + typedef __atomic_base<_Tp*> __base; + using value_type = _Tp*; - atomic() noexcept = default; - _LIBCUDACXX_INLINE_VISIBILITY - constexpr atomic(_Tp* __d) noexcept : __base(__d) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator=(_Tp* __d) volatile noexcept - {__base::store(__d); return __d;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator=(_Tp* __d) noexcept - {__base::store(__d); return __d;} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - volatile noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - volatile noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++(int) volatile noexcept {return fetch_add(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++(int) noexcept {return fetch_add(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--(int) volatile noexcept {return fetch_sub(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--(int) noexcept {return fetch_sub(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++() volatile noexcept {return fetch_add(1) + 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++() noexcept {return fetch_add(1) + 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--() volatile noexcept {return fetch_sub(1) - 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--() noexcept {return fetch_sub(1) - 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator+=(ptrdiff_t __op) volatile noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator+=(ptrdiff_t __op) noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator-=(ptrdiff_t __op) volatile noexcept {return fetch_sub(__op) - __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator-=(ptrdiff_t __op) noexcept {return fetch_sub(__op) - __op;} + atomic() noexcept = default; + _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic(_Tp* __d) noexcept + : __base(__d) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator=(_Tp* __d) volatile noexcept + { + __base::store(__d); + return __d; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator=(_Tp* __d) noexcept + { + __base::store(__d); + return __d; + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++(int) volatile noexcept + { + return fetch_add(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++(int) noexcept + { + return fetch_add(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--(int) volatile noexcept + { + return fetch_sub(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--(int) noexcept + { + return fetch_sub(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++() volatile noexcept + { + return fetch_add(1) + 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++() noexcept + { + return fetch_add(1) + 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--() volatile noexcept + { + return fetch_sub(1) - 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--() noexcept + { + return fetch_sub(1) - 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator+=(ptrdiff_t __op) volatile noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator+=(ptrdiff_t __op) noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator-=(ptrdiff_t __op) volatile noexcept + { + return fetch_sub(__op) - __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator-=(ptrdiff_t __op) noexcept + { + return fetch_sub(__op) - __op; + } }; // atomic_ref template - struct atomic_ref - : public __atomic_base_ref<_Tp> +struct atomic_ref : public __atomic_base_ref<_Tp> { - typedef __atomic_base_ref<_Tp> __base; - using value_type = _Tp; + typedef __atomic_base_ref<_Tp> __base; + using value_type = _Tp; - static constexpr size_t required_alignment = sizeof(_Tp); + static constexpr size_t required_alignment = sizeof(_Tp); - static constexpr bool is_always_lock_free = sizeof(_Tp) <= 8; + static constexpr bool is_always_lock_free = sizeof(_Tp) <= 8; - _LIBCUDACXX_INLINE_VISIBILITY - explicit atomic_ref(_Tp& __ref) : __base(__ref) {} + _LIBCUDACXX_INLINE_VISIBILITY explicit atomic_ref(_Tp& __ref) + : __base(__ref) + {} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator=(_Tp __v) const volatile noexcept {__base::store(__v); return __v;} + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __v) const volatile noexcept + { + __base::store(__v); + return __v; + } }; // atomic_ref template - struct atomic_ref<_Tp*> - : public __atomic_base_ref<_Tp*> +struct atomic_ref<_Tp*> : public __atomic_base_ref<_Tp*> { - typedef __atomic_base_ref<_Tp*> __base; - using value_type = _Tp*; + typedef __atomic_base_ref<_Tp*> __base; + using value_type = _Tp*; - static constexpr size_t required_alignment = sizeof(_Tp*); + static constexpr size_t required_alignment = sizeof(_Tp*); - static constexpr bool is_always_lock_free = sizeof(_Tp*) <= 8; + static constexpr bool is_always_lock_free = sizeof(_Tp*) <= 8; - _LIBCUDACXX_INLINE_VISIBILITY - explicit atomic_ref(_Tp*& __ref) : __base(__ref) {} + _LIBCUDACXX_INLINE_VISIBILITY explicit atomic_ref(_Tp*& __ref) + : __base(__ref) + {} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator=(_Tp* __v) const noexcept {__base::store(__v); return __v;} + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator=(_Tp* __v) const noexcept + { + __base::store(__v); + return __v; + } - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - const noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - const noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++(int) const noexcept {return fetch_add(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--(int) const noexcept {return fetch_sub(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++() const noexcept {return fetch_add(1) + 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--() const noexcept {return fetch_sub(1) - 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator+=(ptrdiff_t __op) const noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator-=(ptrdiff_t __op) const noexcept {return fetch_sub(__op) - __op;} + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++(int) const noexcept + { + return fetch_add(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--(int) const noexcept + { + return fetch_sub(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++() const noexcept + { + return fetch_add(1) + 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--() const noexcept + { + return fetch_sub(1) - 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator+=(ptrdiff_t __op) const noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator-=(ptrdiff_t __op) const noexcept + { + return fetch_sub(__op) - __op; + } }; // atomic_is_lock_free template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_is_lock_free(const volatile atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_is_lock_free(const volatile atomic<_Tp>* __o) noexcept { - return __o->is_lock_free(); + return __o->is_lock_free(); } template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_is_lock_free(const atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_is_lock_free(const atomic<_Tp>* __o) noexcept { - return __o->is_lock_free(); + return __o->is_lock_free(); } // atomic_init template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_init(volatile atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_init(volatile atomic<_Tp>* __o, _Tp __d) noexcept { - __cxx_atomic_init(&__o->__a_, __d); + __cxx_atomic_init(&__o->__a_, __d); } template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_init(atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_init(atomic<_Tp>* __o, _Tp __d) noexcept { - __cxx_atomic_init(&__o->__a_, __d); + __cxx_atomic_init(&__o->__a_, __d); } // atomic_store template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_store(volatile atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store(volatile atomic<_Tp>* __o, _Tp __d) noexcept { - __o->store(__d); + __o->store(__d); } template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_store(atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store(atomic<_Tp>* __o, _Tp __d) noexcept { - __o->store(__d); + __o->store(__d); } // atomic_store_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_store_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) { - __o->store(__d, __m); + __o->store(__d, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_store_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) { - __o->store(__d, __m); + __o->store(__d, __m); } // atomic_load template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_load(const volatile atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load(const volatile atomic<_Tp>* __o) noexcept { - return __o->load(); + return __o->load(); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_load(const atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load(const atomic<_Tp>* __o) noexcept { - return __o->load(); + return __o->load(); } // atomic_load_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) noexcept _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) { - return __o->load(__m); + return __o->load(__m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) noexcept _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) { - return __o->load(__m); + return __o->load(__m); } // atomic_exchange template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_exchange(volatile atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange(volatile atomic<_Tp>* __o, _Tp __d) noexcept { - return __o->exchange(__d); + return __o->exchange(__d); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_exchange(atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange(atomic<_Tp>* __o, _Tp __d) noexcept { - return __o->exchange(__d); + return __o->exchange(__d); } // atomic_exchange_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_exchange_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept { - return __o->exchange(__d, __m); + return __o->exchange(__d, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_exchange_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept { - return __o->exchange(__d, __m); + return __o->exchange(__d, __m); } // atomic_compare_exchange_weak template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_weak(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept { - return __o->compare_exchange_weak(*__e, __d); + return __o->compare_exchange_weak(*__e, __d); } template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_weak(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept { - return __o->compare_exchange_weak(*__e, __d); + return __o->compare_exchange_weak(*__e, __d); } // atomic_compare_exchange_strong template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_strong(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept { - return __o->compare_exchange_strong(*__e, __d); + return __o->compare_exchange_strong(*__e, __d); } template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_strong(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept { - return __o->compare_exchange_strong(*__e, __d); + return __o->compare_exchange_strong(*__e, __d); } // atomic_compare_exchange_weak_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_weak_explicit(volatile atomic<_Tp>* __o, _Tp* __e, - _Tp __d, - memory_order __s, memory_order __f) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak_explicit( + volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return __o->compare_exchange_weak(*__e, __d, __s, __f); + return __o->compare_exchange_weak(*__e, __d, __s, __f); } template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_weak_explicit(atomic<_Tp>* __o, _Tp* __e, _Tp __d, - memory_order __s, memory_order __f) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool +atomic_compare_exchange_weak_explicit(atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return __o->compare_exchange_weak(*__e, __d, __s, __f); + return __o->compare_exchange_weak(*__e, __d, __s, __f); } // atomic_compare_exchange_strong_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_strong_explicit(volatile atomic<_Tp>* __o, - _Tp* __e, _Tp __d, - memory_order __s, memory_order __f) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong_explicit( + volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return __o->compare_exchange_strong(*__e, __d, __s, __f); + return __o->compare_exchange_strong(*__e, __d, __s, __f); } template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_strong_explicit(atomic<_Tp>* __o, _Tp* __e, - _Tp __d, - memory_order __s, memory_order __f) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong_explicit( + atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return __o->compare_exchange_strong(*__e, __d, __s, __f); + return __o->compare_exchange_strong(*__e, __d, __s, __f); } // atomic_wait template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_wait(const volatile atomic<_Tp>* __o, - typename atomic<_Tp>::value_type __v) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void +atomic_wait(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) noexcept { - return __o->wait(__v); + return __o->wait(__v); } template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_wait(const atomic<_Tp>* __o, - typename atomic<_Tp>::value_type __v) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_wait(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) noexcept { - return __o->wait(__v); + return __o->wait(__v); } // atomic_wait_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_wait_explicit(const volatile atomic<_Tp>* __o, - typename atomic<_Tp>::value_type __v, - memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void +atomic_wait_explicit(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) noexcept _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) { - return __o->wait(__v, __m); + return __o->wait(__v, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_wait_explicit(const atomic<_Tp>* __o, - typename atomic<_Tp>::value_type __v, - memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void +atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) noexcept _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) { - return __o->wait(__v, __m); + return __o->wait(__v, __m); } // atomic_notify_one template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_notify_one(volatile atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_one(volatile atomic<_Tp>* __o) noexcept { - __o->notify_one(); + __o->notify_one(); } template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_notify_one(atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_one(atomic<_Tp>* __o) noexcept { - __o->notify_one(); + __o->notify_one(); } // atomic_notify_one template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_notify_all(volatile atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_all(volatile atomic<_Tp>* __o) noexcept { - __o->notify_all(); + __o->notify_all(); } template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_notify_all(atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_all(atomic<_Tp>* __o) noexcept { - __o->notify_all(); + __o->notify_all(); } // atomic_fetch_add template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_add(volatile atomic<_Tp>* __o, _Tp __op) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add(volatile atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_add(__op); + return __o->fetch_add(__op); } template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_add(atomic<_Tp>* __o, _Tp __op) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add(atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_add(__op); + return __o->fetch_add(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_add(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_add(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept { - return __o->fetch_add(__op); + return __o->fetch_add(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_add(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_add(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept { - return __o->fetch_add(__op); + return __o->fetch_add(__op); } // atomic_fetch_add_explicit template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_add(__op, __m); + return __o->fetch_add(__op, __m); } template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_add_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_add(__op, __m); + return __o->fetch_add(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_add_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, - memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +atomic_fetch_add_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept { - return __o->fetch_add(__op, __m); + return __o->fetch_add(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_add_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept { - return __o->fetch_add(__op, __m); + return __o->fetch_add(__op, __m); } // atomic_fetch_sub template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_sub(volatile atomic<_Tp>* __o, _Tp __op) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub(volatile atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_sub(__op); + return __o->fetch_sub(__op); } template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_sub(atomic<_Tp>* __o, _Tp __op) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub(atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_sub(__op); + return __o->fetch_sub(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_sub(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_sub(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept { - return __o->fetch_sub(__op); + return __o->fetch_sub(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_sub(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_sub(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept { - return __o->fetch_sub(__op); + return __o->fetch_sub(__op); } // atomic_fetch_sub_explicit template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_sub(__op, __m); + return __o->fetch_sub(__op, __m); } template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_sub_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_sub(__op, __m); + return __o->fetch_sub(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_sub_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, - memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +atomic_fetch_sub_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept { - return __o->fetch_sub(__op, __m); + return __o->fetch_sub(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_sub_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept { - return __o->fetch_sub(__op, __m); + return __o->fetch_sub(__op, __m); } // atomic_fetch_and template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_and(volatile atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_and(__op); + return __o->fetch_and(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_and(atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_and(__op); + return __o->fetch_and(__op); } // atomic_fetch_and_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_and_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_and(__op, __m); + return __o->fetch_and(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_and_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_and(__op, __m); + return __o->fetch_and(__op, __m); } // atomic_fetch_or template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_or(volatile atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_or(__op); + return __o->fetch_or(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_or(atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_or(__op); + return __o->fetch_or(__op); } // atomic_fetch_or_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_or_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_or(__op, __m); + return __o->fetch_or(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_or_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_or(__op, __m); + return __o->fetch_or(__op, __m); } // atomic_fetch_xor template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_xor(volatile atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_xor(__op); + return __o->fetch_xor(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_xor(atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_xor(__op); + return __o->fetch_xor(__op); } // atomic_fetch_xor_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_xor_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_xor(__op, __m); + return __o->fetch_xor(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_xor_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_xor(__op, __m); + return __o->fetch_xor(__op, __m); } // flag type and operations typedef struct atomic_flag { - __cxx_atomic_impl<_LIBCUDACXX_ATOMIC_FLAG_TYPE, 0> __a_; - - _LIBCUDACXX_INLINE_VISIBILITY - bool test(memory_order __m = memory_order_seq_cst) const volatile noexcept - {return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true)==__cxx_atomic_load(&__a_, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - bool test(memory_order __m = memory_order_seq_cst) const noexcept - {return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true)==__cxx_atomic_load(&__a_, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - bool test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_exchange(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m);} - _LIBCUDACXX_INLINE_VISIBILITY - bool test_and_set(memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_exchange(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void clear(memory_order __m = memory_order_seq_cst) volatile noexcept - {__cxx_atomic_store(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void clear(memory_order __m = memory_order_seq_cst) noexcept - {__cxx_atomic_store(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m);} + __cxx_atomic_impl<_LIBCUDACXX_ATOMIC_FLAG_TYPE, 0> __a_; + + _LIBCUDACXX_INLINE_VISIBILITY bool test(memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true) == __cxx_atomic_load(&__a_, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY bool test(memory_order __m = memory_order_seq_cst) const noexcept + { + return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true) == __cxx_atomic_load(&__a_, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY bool test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_exchange(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m); + } + _LIBCUDACXX_INLINE_VISIBILITY bool test_and_set(memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_exchange(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void clear(memory_order __m = memory_order_seq_cst) volatile noexcept + { + __cxx_atomic_store(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void clear(memory_order __m = memory_order_seq_cst) noexcept + { + __cxx_atomic_store(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m); + } #if !defined(__CUDA_MINIMUM_ARCH__) || __CUDA_MINIMUM_ARCH__ >= 700 - _LIBCUDACXX_INLINE_VISIBILITY - void wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile noexcept - {__cxx_atomic_wait(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(__v), __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void wait(bool __v, memory_order __m = memory_order_seq_cst) const noexcept - {__cxx_atomic_wait(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(__v), __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void notify_one() volatile noexcept - {__cxx_atomic_notify_one(&__a_);} - _LIBCUDACXX_INLINE_VISIBILITY - void notify_one() noexcept - {__cxx_atomic_notify_one(&__a_);} - _LIBCUDACXX_INLINE_VISIBILITY - void notify_all() volatile noexcept - {__cxx_atomic_notify_all(&__a_);} - _LIBCUDACXX_INLINE_VISIBILITY - void notify_all() noexcept - {__cxx_atomic_notify_all(&__a_);} + _LIBCUDACXX_INLINE_VISIBILITY void wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + __cxx_atomic_wait(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(__v), __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void wait(bool __v, memory_order __m = memory_order_seq_cst) const noexcept + { + __cxx_atomic_wait(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(__v), __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() volatile noexcept + { + __cxx_atomic_notify_one(&__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() noexcept + { + __cxx_atomic_notify_one(&__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() volatile noexcept + { + __cxx_atomic_notify_all(&__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() noexcept + { + __cxx_atomic_notify_all(&__a_); + } #endif - atomic_flag() noexcept = default; + atomic_flag() noexcept = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - atomic_flag(bool __b) noexcept : __a_(__b) {} // EXTENSION + _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic_flag(bool __b) noexcept + : __a_(__b) + {} // EXTENSION - atomic_flag(const atomic_flag&) = delete; - atomic_flag& operator=(const atomic_flag&) = delete; - atomic_flag& operator=(const atomic_flag&) volatile = delete; + atomic_flag(const atomic_flag&) = delete; + atomic_flag& operator=(const atomic_flag&) = delete; + atomic_flag& operator=(const atomic_flag&) volatile = delete; } atomic_flag; - -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test(const volatile atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test(const volatile atomic_flag* __o) noexcept { - return __o->test(); + return __o->test(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test(const atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test(const atomic_flag* __o) noexcept { - return __o->test(); + return __o->test(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_explicit(const volatile atomic_flag* __o, memory_order __m) noexcept { - return __o->test(__m); + return __o->test(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test_explicit(const atomic_flag* __o, memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_explicit(const atomic_flag* __o, memory_order __m) noexcept { - return __o->test(__m); + return __o->test(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test_and_set(volatile atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set(volatile atomic_flag* __o) noexcept { - return __o->test_and_set(); + return __o->test_and_set(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test_and_set(atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set(atomic_flag* __o) noexcept { - return __o->test_and_set(); + return __o->test_and_set(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set_explicit(volatile atomic_flag* __o, memory_order __m) noexcept { - return __o->test_and_set(__m); + return __o->test_and_set(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test_and_set_explicit(atomic_flag* __o, memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set_explicit(atomic_flag* __o, memory_order __m) noexcept { - return __o->test_and_set(__m); + return __o->test_and_set(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_clear(volatile atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear(volatile atomic_flag* __o) noexcept { - __o->clear(); + __o->clear(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_clear(atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear(atomic_flag* __o) noexcept { - __o->clear(); + __o->clear(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear_explicit(volatile atomic_flag* __o, memory_order __m) noexcept { - __o->clear(__m); + __o->clear(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_clear_explicit(atomic_flag* __o, memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear_explicit(atomic_flag* __o, memory_order __m) noexcept { - __o->clear(__m); + __o->clear(__m); } #if !defined(__CUDA_MINIMUM_ARCH__) || __CUDA_MINIMUM_ARCH__ >= 700 -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_wait(const volatile atomic_flag* __o, bool __v) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_wait(const volatile atomic_flag* __o, bool __v) noexcept { - __o->wait(__v); + __o->wait(__v); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_wait(const atomic_flag* __o, bool __v) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_wait(const atomic_flag* __o, bool __v) noexcept { - __o->wait(__v); + __o->wait(__v); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_wait_explicit(const volatile atomic_flag* __o, - bool __v, memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void +atomic_flag_wait_explicit(const volatile atomic_flag* __o, bool __v, memory_order __m) noexcept { - __o->wait(__v, __m); + __o->wait(__v, __m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_wait_explicit(const atomic_flag* __o, - bool __v, memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void +atomic_flag_wait_explicit(const atomic_flag* __o, bool __v, memory_order __m) noexcept { - __o->wait(__v, __m); + __o->wait(__v, __m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_notify_one(volatile atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_one(volatile atomic_flag* __o) noexcept { - __o->notify_one(); + __o->notify_one(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_notify_one(atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_one(atomic_flag* __o) noexcept { - __o->notify_one(); + __o->notify_one(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_notify_all(volatile atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_all(volatile atomic_flag* __o) noexcept { - __o->notify_all(); + __o->notify_all(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_notify_all(atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_all(atomic_flag* __o) noexcept { - __o->notify_all(); + __o->notify_all(); } #endif // fences -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_thread_fence(memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_thread_fence(memory_order __m) noexcept { - __cxx_atomic_thread_fence(__m); + __cxx_atomic_thread_fence(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_signal_fence(memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_signal_fence(memory_order __m) noexcept { - __cxx_atomic_signal_fence(__m); + __cxx_atomic_signal_fence(__m); } // Atomics for standard typedef types -typedef atomic atomic_bool; -typedef atomic atomic_char; -typedef atomic atomic_schar; -typedef atomic atomic_uchar; -typedef atomic atomic_short; -typedef atomic atomic_ushort; -typedef atomic atomic_int; -typedef atomic atomic_uint; -typedef atomic atomic_long; -typedef atomic atomic_ulong; -typedef atomic atomic_llong; +typedef atomic atomic_bool; +typedef atomic atomic_char; +typedef atomic atomic_schar; +typedef atomic atomic_uchar; +typedef atomic atomic_short; +typedef atomic atomic_ushort; +typedef atomic atomic_int; +typedef atomic atomic_uint; +typedef atomic atomic_long; +typedef atomic atomic_ulong; +typedef atomic atomic_llong; typedef atomic atomic_ullong; -typedef atomic atomic_char16_t; -typedef atomic atomic_char32_t; -typedef atomic atomic_wchar_t; +typedef atomic atomic_char16_t; +typedef atomic atomic_char32_t; +typedef atomic atomic_wchar_t; -typedef atomic atomic_int_least8_t; -typedef atomic atomic_uint_least8_t; -typedef atomic atomic_int_least16_t; +typedef atomic atomic_int_least8_t; +typedef atomic atomic_uint_least8_t; +typedef atomic atomic_int_least16_t; typedef atomic atomic_uint_least16_t; -typedef atomic atomic_int_least32_t; +typedef atomic atomic_int_least32_t; typedef atomic atomic_uint_least32_t; -typedef atomic atomic_int_least64_t; +typedef atomic atomic_int_least64_t; typedef atomic atomic_uint_least64_t; -typedef atomic atomic_int_fast8_t; -typedef atomic atomic_uint_fast8_t; -typedef atomic atomic_int_fast16_t; +typedef atomic atomic_int_fast8_t; +typedef atomic atomic_uint_fast8_t; +typedef atomic atomic_int_fast16_t; typedef atomic atomic_uint_fast16_t; -typedef atomic atomic_int_fast32_t; +typedef atomic atomic_int_fast32_t; typedef atomic atomic_uint_fast32_t; -typedef atomic atomic_int_fast64_t; +typedef atomic atomic_int_fast64_t; typedef atomic atomic_uint_fast64_t; -typedef atomic< int8_t> atomic_int8_t; -typedef atomic atomic_uint8_t; -typedef atomic< int16_t> atomic_int16_t; +typedef atomic atomic_int8_t; +typedef atomic atomic_uint8_t; +typedef atomic atomic_int16_t; typedef atomic atomic_uint16_t; -typedef atomic< int32_t> atomic_int32_t; +typedef atomic atomic_int32_t; typedef atomic atomic_uint32_t; -typedef atomic< int64_t> atomic_int64_t; +typedef atomic atomic_int64_t; typedef atomic atomic_uint64_t; -typedef atomic atomic_intptr_t; +typedef atomic atomic_intptr_t; typedef atomic atomic_uintptr_t; -typedef atomic atomic_size_t; +typedef atomic atomic_size_t; typedef atomic atomic_ptrdiff_t; -typedef atomic atomic_intmax_t; +typedef atomic atomic_intmax_t; typedef atomic atomic_uintmax_t; static_assert(ATOMIC_INT_LOCK_FREE, "This library assumes atomic is lock-free."); -typedef atomic atomic_signed_lock_free; -typedef atomic atomic_unsigned_lock_free; +typedef atomic atomic_signed_lock_free; +typedef atomic atomic_unsigned_lock_free; -#define ATOMIC_FLAG_INIT {false} -#define ATOMIC_VAR_INIT(__v) {__v} +#define ATOMIC_FLAG_INIT \ + { \ + false \ + } +#define ATOMIC_VAR_INIT(__v) \ + { \ + __v \ + } _LIBCUDACXX_END_NAMESPACE_STD #include #include -#endif // _LIBCUDACXX_ATOMIC +#endif // _LIBCUDACXX_ATOMIC diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/bitset b/libcudacxx/include/cuda/std/detail/libcxx/include/bitset index c475bfb7d9f..ebf17ae02a2 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/bitset +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/bitset @@ -74,14 +74,10 @@ public: template basic_string > to_string(charT zero = charT('0'), charT one = charT('1')) const; template - basic_string, allocator > to_string(charT zero = charT('0'), charT one = charT('1')) const; - basic_string, allocator > to_string(char zero = '0', char one = '1') const; - size_t count() const noexcept; - constexpr size_t size() const noexcept; - bool operator==(const bitset& rhs) const noexcept; - bool operator!=(const bitset& rhs) const noexcept; - bool test(size_t pos) const; - bool all() const noexcept; + basic_string, allocator > to_string(charT zero = charT('0'), charT one = +charT('1')) const; basic_string, allocator > to_string(char zero = '0', char one = '1') +const; size_t count() const noexcept; constexpr size_t size() const noexcept; bool operator==(const bitset& rhs) const +noexcept; bool operator!=(const bitset& rhs) const noexcept; bool test(size_t pos) const; bool all() const noexcept; bool any() const noexcept; bool none() const noexcept; bitset operator<<(size_t pos) const noexcept; @@ -112,14 +108,14 @@ template struct hash>; */ -#include <__config> #include <__bit_reference> -#include +#include <__config> +#include <__functional_base> #include -#include -#include +#include #include -#include <__functional_base> +#include +#include #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) # pragma GCC system_header @@ -132,929 +128,901 @@ template struct hash>; _LIBCUDACXX_PUSH_MACROS #include <__undef_macros> - _LIBCUDACXX_BEGIN_NAMESPACE_STD template class __bitset; template -struct __has_storage_type<__bitset<_N_words, _Size> > +struct __has_storage_type<__bitset<_N_words, _Size>> { - static const bool value = true; + static const bool value = true; }; template class __bitset { public: - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef size_type __storage_type; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef size_type __storage_type; + protected: - typedef __bitset __self; - typedef __storage_type* __storage_pointer; - typedef const __storage_type* __const_storage_pointer; - static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); - - friend class __bit_reference<__bitset>; - friend class __bit_const_reference<__bitset>; - friend class __bit_iterator<__bitset, false>; - friend class __bit_iterator<__bitset, true>; - friend struct __bit_array<__bitset>; - - __storage_type __first_[_N_words]; - - typedef __bit_reference<__bitset> reference; - typedef __bit_const_reference<__bitset> const_reference; - typedef __bit_iterator<__bitset, false> iterator; - typedef __bit_iterator<__bitset, true> const_iterator; - - _LIBCUDACXX_INLINE_VISIBILITY - constexpr __bitset() noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - explicit constexpr __bitset(unsigned long long __v) noexcept; - - _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t __pos) noexcept - {return reference(__first_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word);} - _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t __pos) const noexcept - {return const_reference(__first_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word);} - _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t __pos) noexcept - {return iterator(__first_ + __pos / __bits_per_word, __pos % __bits_per_word);} - _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t __pos) const noexcept - {return const_iterator(__first_ + __pos / __bits_per_word, __pos % __bits_per_word);} - - _LIBCUDACXX_INLINE_VISIBILITY - void operator&=(const __bitset& __v) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - void operator|=(const __bitset& __v) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - void operator^=(const __bitset& __v) noexcept; - - void flip() noexcept; - _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const - {return to_ulong(integral_constant());} - _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const - {return to_ullong(integral_constant());} - - bool all() const noexcept; - bool any() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - size_t __hash_code() const noexcept; + typedef __bitset __self; + typedef __storage_type* __storage_pointer; + typedef const __storage_type* __const_storage_pointer; + static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); + + friend class __bit_reference<__bitset>; + friend class __bit_const_reference<__bitset>; + friend class __bit_iterator<__bitset, false>; + friend class __bit_iterator<__bitset, true>; + friend struct __bit_array<__bitset>; + + __storage_type __first_[_N_words]; + + typedef __bit_reference<__bitset> reference; + typedef __bit_const_reference<__bitset> const_reference; + typedef __bit_iterator<__bitset, false> iterator; + typedef __bit_iterator<__bitset, true> const_iterator; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __bitset() noexcept; + _LIBCUDACXX_INLINE_VISIBILITY explicit constexpr __bitset(unsigned long long __v) noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t __pos) noexcept + { + return reference(__first_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word); + } + _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t __pos) const noexcept + { + return const_reference(__first_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word); + } + _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t __pos) noexcept + { + return iterator(__first_ + __pos / __bits_per_word, __pos % __bits_per_word); + } + _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t __pos) const noexcept + { + return const_iterator(__first_ + __pos / __bits_per_word, __pos % __bits_per_word); + } + + _LIBCUDACXX_INLINE_VISIBILITY void operator&=(const __bitset& __v) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY void operator|=(const __bitset& __v) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY void operator^=(const __bitset& __v) noexcept; + + void flip() noexcept; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const + { + return to_ulong(integral_constant < bool, _Size()); + } + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const + { + return to_ullong(integral_constant < bool, _Size()); + } + + bool all() const noexcept; + bool any() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY size_t __hash_code() const noexcept; + private: - unsigned long to_ulong(false_type) const; - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long to_ulong(true_type) const; - unsigned long long to_ullong(false_type) const; - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long long to_ullong(true_type) const; - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long long to_ullong(true_type, false_type) const; - unsigned long long to_ullong(true_type, true_type) const; + unsigned long to_ulong(false_type) const; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong(true_type) const; + unsigned long long to_ullong(false_type) const; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong(true_type) const; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong(true_type, false_type) const; + unsigned long long to_ullong(true_type, true_type) const; }; template -inline constexpr -__bitset<_N_words, _Size>::__bitset() noexcept +inline constexpr __bitset<_N_words, _Size>::__bitset() noexcept : __first_{0} {} template -inline -constexpr -__bitset<_N_words, _Size>::__bitset(unsigned long long __v) noexcept +inline constexpr __bitset<_N_words, _Size>::__bitset(unsigned long long __v) noexcept #if __SIZEOF_SIZE_T__ == 8 : __first_{__v} #elif __SIZEOF_SIZE_T__ == 4 - : __first_{static_cast<__storage_type>(__v), - _Size >= 2 * __bits_per_word ? static_cast<__storage_type>(__v >> __bits_per_word) - : static_cast<__storage_type>((__v >> __bits_per_word) & (__storage_type(1) << (_Size - __bits_per_word)) - 1)} + : __first_{ + static_cast<__storage_type>(__v), + _Size >= 2 * __bits_per_word + ? static_cast<__storage_type>(__v >> __bits_per_word) + : static_cast<__storage_type>((__v >> __bits_per_word) & (__storage_type(1) << (_Size - __bits_per_word)) - 1)} #else -#error This constructor has not been ported to this platform +# error This constructor has not been ported to this platform #endif {} template -inline -void -__bitset<_N_words, _Size>::operator&=(const __bitset& __v) noexcept +inline void __bitset<_N_words, _Size>::operator&=(const __bitset& __v) noexcept { - for (size_type __i = 0; __i < _N_words; ++__i) - __first_[__i] &= __v.__first_[__i]; + for (size_type __i = 0; __i < _N_words; ++__i) + { + __first_[__i] &= __v.__first_[__i]; + } } template -inline -void -__bitset<_N_words, _Size>::operator|=(const __bitset& __v) noexcept +inline void __bitset<_N_words, _Size>::operator|=(const __bitset& __v) noexcept { - for (size_type __i = 0; __i < _N_words; ++__i) - __first_[__i] |= __v.__first_[__i]; + for (size_type __i = 0; __i < _N_words; ++__i) + { + __first_[__i] |= __v.__first_[__i]; + } } template -inline -void -__bitset<_N_words, _Size>::operator^=(const __bitset& __v) noexcept +inline void __bitset<_N_words, _Size>::operator^=(const __bitset& __v) noexcept { - for (size_type __i = 0; __i < _N_words; ++__i) - __first_[__i] ^= __v.__first_[__i]; + for (size_type __i = 0; __i < _N_words; ++__i) + { + __first_[__i] ^= __v.__first_[__i]; + } } template -void -__bitset<_N_words, _Size>::flip() noexcept -{ - // do middle whole words - size_type __n = _Size; - __storage_pointer __p = __first_; - for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) - *__p = ~*__p; - // do last partial word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__p & __m; - *__p &= ~__m; - *__p |= ~__b & __m; - } +void __bitset<_N_words, _Size>::flip() noexcept +{ + // do middle whole words + size_type __n = _Size; + __storage_pointer __p = __first_; + for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) + { + *__p = ~*__p; + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__p & __m; + *__p &= ~__m; + *__p |= ~__b & __m; + } } template -unsigned long -__bitset<_N_words, _Size>::to_ulong(false_type) const +unsigned long __bitset<_N_words, _Size>::to_ulong(false_type) const { - const_iterator __e = __make_iter(_Size); - const_iterator __i = _CUDA_VSTD::find(__make_iter(sizeof(unsigned long) * CHAR_BIT), __e, true); - if (__i != __e) - __throw_overflow_error("bitset to_ulong overflow error"); + const_iterator __e = __make_iter(_Size); + const_iterator __i = _CUDA_VSTD::find(__make_iter(sizeof(unsigned long) * CHAR_BIT), __e, true); + if (__i != __e) + { + __throw_overflow_error("bitset to_ulong overflow error"); + } - return __first_[0]; + return __first_[0]; } template -inline -unsigned long -__bitset<_N_words, _Size>::to_ulong(true_type) const +inline unsigned long __bitset<_N_words, _Size>::to_ulong(true_type) const { - return __first_[0]; + return __first_[0]; } template -unsigned long long -__bitset<_N_words, _Size>::to_ullong(false_type) const +unsigned long long __bitset<_N_words, _Size>::to_ullong(false_type) const { - const_iterator __e = __make_iter(_Size); - const_iterator __i = _CUDA_VSTD::find(__make_iter(sizeof(unsigned long long) * CHAR_BIT), __e, true); - if (__i != __e) - __throw_overflow_error("bitset to_ullong overflow error"); + const_iterator __e = __make_iter(_Size); + const_iterator __i = _CUDA_VSTD::find(__make_iter(sizeof(unsigned long long) * CHAR_BIT), __e, true); + if (__i != __e) + { + __throw_overflow_error("bitset to_ullong overflow error"); + } - return to_ullong(true_type()); + return to_ullong(true_type()); } template -inline -unsigned long long -__bitset<_N_words, _Size>::to_ullong(true_type) const +inline unsigned long long __bitset<_N_words, _Size>::to_ullong(true_type) const { - return to_ullong(true_type(), integral_constant()); + return to_ullong(true_type(), integral_constant()); } template -inline -unsigned long long -__bitset<_N_words, _Size>::to_ullong(true_type, false_type) const +inline unsigned long long __bitset<_N_words, _Size>::to_ullong(true_type, false_type) const { - return __first_[0]; + return __first_[0]; } template -unsigned long long -__bitset<_N_words, _Size>::to_ullong(true_type, true_type) const -{ - unsigned long long __r = __first_[0]; - for (std::size_t __i = 1; __i < sizeof(unsigned long long) / sizeof(__storage_type); ++__i) - __r |= static_cast(__first_[__i]) << (sizeof(__storage_type) * CHAR_BIT); - return __r; +unsigned long long __bitset<_N_words, _Size>::to_ullong(true_type, true_type) const +{ + unsigned long long __r = __first_[0]; + for (std::size_t __i = 1; __i < sizeof(unsigned long long) / sizeof(__storage_type); ++__i) + { + __r |= static_cast(__first_[__i]) << (sizeof(__storage_type) * CHAR_BIT); + } + return __r; } template -bool -__bitset<_N_words, _Size>::all() const noexcept -{ - // do middle whole words - size_type __n = _Size; - __const_storage_pointer __p = __first_; - for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) - if (~*__p) - return false; - // do last partial word - if (__n > 0) +bool __bitset<_N_words, _Size>::all() const noexcept +{ + // do middle whole words + size_type __n = _Size; + __const_storage_pointer __p = __first_; + for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) + { + if (~*__p) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - if (~*__p & __m) - return false; + return false; } - return true; + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + if (~*__p & __m) + { + return false; + } + } + return true; } template -bool -__bitset<_N_words, _Size>::any() const noexcept -{ - // do middle whole words - size_type __n = _Size; - __const_storage_pointer __p = __first_; - for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) - if (*__p) - return true; - // do last partial word - if (__n > 0) +bool __bitset<_N_words, _Size>::any() const noexcept +{ + // do middle whole words + size_type __n = _Size; + __const_storage_pointer __p = __first_; + for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) + { + if (*__p) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - if (*__p & __m) - return true; + return true; } - return false; + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + if (*__p & __m) + { + return true; + } + } + return false; } template -inline -size_t -__bitset<_N_words, _Size>::__hash_code() const noexcept -{ - size_t __h = 0; - for (size_type __i = 0; __i < _N_words; ++__i) - __h ^= __first_[__i]; - return __h; +inline size_t __bitset<_N_words, _Size>::__hash_code() const noexcept +{ + size_t __h = 0; + for (size_type __i = 0; __i < _N_words; ++__i) + { + __h ^= __first_[__i]; + } + return __h; } template class __bitset<1, _Size> { public: - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef size_type __storage_type; -protected: - typedef __bitset __self; - typedef __storage_type* __storage_pointer; - typedef const __storage_type* __const_storage_pointer; - static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); - - friend class __bit_reference<__bitset>; - friend class __bit_const_reference<__bitset>; - friend class __bit_iterator<__bitset, false>; - friend class __bit_iterator<__bitset, true>; - friend struct __bit_array<__bitset>; - - __storage_type __first_; - - typedef __bit_reference<__bitset> reference; - typedef __bit_const_reference<__bitset> const_reference; - typedef __bit_iterator<__bitset, false> iterator; - typedef __bit_iterator<__bitset, true> const_iterator; - - _LIBCUDACXX_INLINE_VISIBILITY - constexpr __bitset() noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - explicit constexpr __bitset(unsigned long long __v) noexcept; - - _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t __pos) noexcept - {return reference(&__first_, __storage_type(1) << __pos);} - _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t __pos) const noexcept - {return const_reference(&__first_, __storage_type(1) << __pos);} - _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t __pos) noexcept - {return iterator(&__first_ + __pos / __bits_per_word, __pos % __bits_per_word);} - _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t __pos) const noexcept - {return const_iterator(&__first_ + __pos / __bits_per_word, __pos % __bits_per_word);} - - _LIBCUDACXX_INLINE_VISIBILITY - void operator&=(const __bitset& __v) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - void operator|=(const __bitset& __v) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - void operator^=(const __bitset& __v) noexcept; - - _LIBCUDACXX_INLINE_VISIBILITY - void flip() noexcept; - - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long to_ulong() const; - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long long to_ullong() const; - - _LIBCUDACXX_INLINE_VISIBILITY - bool all() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bool any() const noexcept; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef size_type __storage_type; - _LIBCUDACXX_INLINE_VISIBILITY - size_t __hash_code() const noexcept; +protected: + typedef __bitset __self; + typedef __storage_type* __storage_pointer; + typedef const __storage_type* __const_storage_pointer; + static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); + + friend class __bit_reference<__bitset>; + friend class __bit_const_reference<__bitset>; + friend class __bit_iterator<__bitset, false>; + friend class __bit_iterator<__bitset, true>; + friend struct __bit_array<__bitset>; + + __storage_type __first_; + + typedef __bit_reference<__bitset> reference; + typedef __bit_const_reference<__bitset> const_reference; + typedef __bit_iterator<__bitset, false> iterator; + typedef __bit_iterator<__bitset, true> const_iterator; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __bitset() noexcept; + _LIBCUDACXX_INLINE_VISIBILITY explicit constexpr __bitset(unsigned long long __v) noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t __pos) noexcept + { + return reference(&__first_, __storage_type(1) << __pos); + } + _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t __pos) const noexcept + { + return const_reference(&__first_, __storage_type(1) << __pos); + } + _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t __pos) noexcept + { + return iterator(&__first_ + __pos / __bits_per_word, __pos % __bits_per_word); + } + _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t __pos) const noexcept + { + return const_iterator(&__first_ + __pos / __bits_per_word, __pos % __bits_per_word); + } + + _LIBCUDACXX_INLINE_VISIBILITY void operator&=(const __bitset& __v) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY void operator|=(const __bitset& __v) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY void operator^=(const __bitset& __v) noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY void flip() noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const; + + _LIBCUDACXX_INLINE_VISIBILITY bool all() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bool any() const noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY size_t __hash_code() const noexcept; }; template -inline constexpr -__bitset<1, _Size>::__bitset() noexcept +inline constexpr __bitset<1, _Size>::__bitset() noexcept : __first_(0) -{ -} +{} template -inline constexpr -__bitset<1, _Size>::__bitset(unsigned long long __v) noexcept - : __first_( - _Size == __bits_per_word ? static_cast<__storage_type>(__v) - : static_cast<__storage_type>(__v) & ((__storage_type(1) << _Size) - 1) - ) -{ -} +inline constexpr __bitset<1, _Size>::__bitset(unsigned long long __v) noexcept + : __first_(_Size == __bits_per_word ? static_cast<__storage_type>(__v) + : static_cast<__storage_type>(__v) & ((__storage_type(1) << _Size) - 1)) +{} template -inline -void -__bitset<1, _Size>::operator&=(const __bitset& __v) noexcept +inline void __bitset<1, _Size>::operator&=(const __bitset& __v) noexcept { - __first_ &= __v.__first_; + __first_ &= __v.__first_; } template -inline -void -__bitset<1, _Size>::operator|=(const __bitset& __v) noexcept +inline void __bitset<1, _Size>::operator|=(const __bitset& __v) noexcept { - __first_ |= __v.__first_; + __first_ |= __v.__first_; } template -inline -void -__bitset<1, _Size>::operator^=(const __bitset& __v) noexcept +inline void __bitset<1, _Size>::operator^=(const __bitset& __v) noexcept { - __first_ ^= __v.__first_; + __first_ ^= __v.__first_; } template -inline -void -__bitset<1, _Size>::flip() noexcept +inline void __bitset<1, _Size>::flip() noexcept { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); - __first_ = ~__first_; - __first_ &= __m; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); + __first_ = ~__first_; + __first_ &= __m; } template -inline -unsigned long -__bitset<1, _Size>::to_ulong() const +inline unsigned long __bitset<1, _Size>::to_ulong() const { - return __first_; + return __first_; } template -inline -unsigned long long -__bitset<1, _Size>::to_ullong() const +inline unsigned long long __bitset<1, _Size>::to_ullong() const { - return __first_; + return __first_; } template -inline -bool -__bitset<1, _Size>::all() const noexcept +inline bool __bitset<1, _Size>::all() const noexcept { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); - return !(~__first_ & __m); + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); + return !(~__first_ & __m); } template -inline -bool -__bitset<1, _Size>::any() const noexcept +inline bool __bitset<1, _Size>::any() const noexcept { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); - return __first_ & __m; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); + return __first_ & __m; } template -inline -size_t -__bitset<1, _Size>::__hash_code() const noexcept +inline size_t __bitset<1, _Size>::__hash_code() const noexcept { - return __first_; + return __first_; } template <> class __bitset<0, 0> { public: - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef size_type __storage_type; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef size_type __storage_type; + protected: - typedef __bitset __self; - typedef __storage_type* __storage_pointer; - typedef const __storage_type* __const_storage_pointer; - static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); - - friend class __bit_reference<__bitset>; - friend class __bit_const_reference<__bitset>; - friend class __bit_iterator<__bitset, false>; - friend class __bit_iterator<__bitset, true>; - friend struct __bit_array<__bitset>; - - typedef __bit_reference<__bitset> reference; - typedef __bit_const_reference<__bitset> const_reference; - typedef __bit_iterator<__bitset, false> iterator; - typedef __bit_iterator<__bitset, true> const_iterator; - - _LIBCUDACXX_INLINE_VISIBILITY - constexpr __bitset() noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - explicit constexpr __bitset(unsigned long long) noexcept; - - _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t) noexcept - {return reference(0, 1);} - _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t) const noexcept - {return const_reference(0, 1);} - _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t) noexcept - {return iterator(0, 0);} - _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t) const noexcept - {return const_iterator(0, 0);} - - _LIBCUDACXX_INLINE_VISIBILITY void operator&=(const __bitset&) noexcept {} - _LIBCUDACXX_INLINE_VISIBILITY void operator|=(const __bitset&) noexcept {} - _LIBCUDACXX_INLINE_VISIBILITY void operator^=(const __bitset&) noexcept {} - - _LIBCUDACXX_INLINE_VISIBILITY void flip() noexcept {} - - _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const {return 0;} - _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const {return 0;} - - _LIBCUDACXX_INLINE_VISIBILITY bool all() const noexcept {return true;} - _LIBCUDACXX_INLINE_VISIBILITY bool any() const noexcept {return false;} - - _LIBCUDACXX_INLINE_VISIBILITY size_t __hash_code() const noexcept {return 0;} + typedef __bitset __self; + typedef __storage_type* __storage_pointer; + typedef const __storage_type* __const_storage_pointer; + static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); + + friend class __bit_reference<__bitset>; + friend class __bit_const_reference<__bitset>; + friend class __bit_iterator<__bitset, false>; + friend class __bit_iterator<__bitset, true>; + friend struct __bit_array<__bitset>; + + typedef __bit_reference<__bitset> reference; + typedef __bit_const_reference<__bitset> const_reference; + typedef __bit_iterator<__bitset, false> iterator; + typedef __bit_iterator<__bitset, true> const_iterator; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __bitset() noexcept; + _LIBCUDACXX_INLINE_VISIBILITY explicit constexpr __bitset(unsigned long long) noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t) noexcept + { + return reference(0, 1); + } + _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t) const noexcept + { + return const_reference(0, 1); + } + _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t) noexcept + { + return iterator(0, 0); + } + _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t) const noexcept + { + return const_iterator(0, 0); + } + + _LIBCUDACXX_INLINE_VISIBILITY void operator&=(const __bitset&) noexcept {} + _LIBCUDACXX_INLINE_VISIBILITY void operator|=(const __bitset&) noexcept {} + _LIBCUDACXX_INLINE_VISIBILITY void operator^=(const __bitset&) noexcept {} + + _LIBCUDACXX_INLINE_VISIBILITY void flip() noexcept {} + + _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const + { + return 0; + } + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const + { + return 0; + } + + _LIBCUDACXX_INLINE_VISIBILITY bool all() const noexcept + { + return true; + } + _LIBCUDACXX_INLINE_VISIBILITY bool any() const noexcept + { + return false; + } + + _LIBCUDACXX_INLINE_VISIBILITY size_t __hash_code() const noexcept + { + return 0; + } }; -inline -constexpr -__bitset<0, 0>::__bitset() noexcept -{ -} +inline constexpr __bitset<0, 0>::__bitset() noexcept {} -inline -constexpr -__bitset<0, 0>::__bitset(unsigned long long) noexcept -{ -} +inline constexpr __bitset<0, 0>::__bitset(unsigned long long) noexcept {} -template class _LIBCUDACXX_TEMPLATE_VIS bitset; -template struct hash >; +template +class _LIBCUDACXX_TEMPLATE_VIS bitset; +template +struct hash>; template class _LIBCUDACXX_TEMPLATE_VIS bitset : private __bitset<_Size == 0 ? 0 : (_Size - 1) / (sizeof(size_t) * CHAR_BIT) + 1, _Size> { public: - static const unsigned __n_words = _Size == 0 ? 0 : (_Size - 1) / (sizeof(size_t) * CHAR_BIT) + 1; - typedef __bitset<__n_words, _Size> base; + static const unsigned __n_words = _Size == 0 ? 0 : (_Size - 1) / (sizeof(size_t) * CHAR_BIT) + 1; + typedef __bitset<__n_words, _Size> base; public: - typedef typename base::reference reference; - typedef typename base::const_reference const_reference; - - // 23.3.5.1 constructors: - _LIBCUDACXX_INLINE_VISIBILITY constexpr bitset() noexcept {} - _LIBCUDACXX_INLINE_VISIBILITY constexpr - bitset(unsigned long long __v) noexcept : base(__v) {} - template::value> > - explicit bitset(const _CharT* __str, - typename basic_string<_CharT>::size_type __n = basic_string<_CharT>::npos, - _CharT __zero = _CharT('0'), _CharT __one = _CharT('1')); - template - explicit bitset(const basic_string<_CharT,_Traits,_Allocator>& __str, - typename basic_string<_CharT,_Traits,_Allocator>::size_type __pos = 0, - typename basic_string<_CharT,_Traits,_Allocator>::size_type __n = - (basic_string<_CharT,_Traits,_Allocator>::npos), - _CharT __zero = _CharT('0'), _CharT __one = _CharT('1')); - - // 23.3.5.2 bitset operations: - _LIBCUDACXX_INLINE_VISIBILITY - bitset& operator&=(const bitset& __rhs) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bitset& operator|=(const bitset& __rhs) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bitset& operator^=(const bitset& __rhs) noexcept; - bitset& operator<<=(size_t __pos) noexcept; - bitset& operator>>=(size_t __pos) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bitset& set() noexcept; - bitset& set(size_t __pos, bool __val = true); - _LIBCUDACXX_INLINE_VISIBILITY - bitset& reset() noexcept; - bitset& reset(size_t __pos); - _LIBCUDACXX_INLINE_VISIBILITY - bitset operator~() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bitset& flip() noexcept; - bitset& flip(size_t __pos); - - // element access: - _LIBCUDACXX_INLINE_VISIBILITY constexpr - const_reference operator[](size_t __p) const {return base::__make_ref(__p);} - _LIBCUDACXX_INLINE_VISIBILITY reference operator[](size_t __p) {return base::__make_ref(__p);} - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long to_ulong() const; - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long long to_ullong() const; - template - basic_string<_CharT, _Traits, _Allocator> to_string(_CharT __zero = _CharT('0'), - _CharT __one = _CharT('1')) const; - template - _LIBCUDACXX_INLINE_VISIBILITY - basic_string<_CharT, _Traits, allocator<_CharT> > to_string(_CharT __zero = _CharT('0'), - _CharT __one = _CharT('1')) const; - template - _LIBCUDACXX_INLINE_VISIBILITY - basic_string<_CharT, char_traits<_CharT>, allocator<_CharT> > to_string(_CharT __zero = _CharT('0'), - _CharT __one = _CharT('1')) const; - _LIBCUDACXX_INLINE_VISIBILITY - basic_string, allocator > to_string(char __zero = '0', - char __one = '1') const; - _LIBCUDACXX_INLINE_VISIBILITY - size_t count() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY constexpr size_t size() const noexcept {return _Size;} - _LIBCUDACXX_INLINE_VISIBILITY - bool operator==(const bitset& __rhs) const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bool operator!=(const bitset& __rhs) const noexcept; - bool test(size_t __pos) const; - _LIBCUDACXX_INLINE_VISIBILITY - bool all() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bool any() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY bool none() const noexcept {return !any();} - _LIBCUDACXX_INLINE_VISIBILITY - bitset operator<<(size_t __pos) const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bitset operator>>(size_t __pos) const noexcept; + typedef typename base::reference reference; + typedef typename base::const_reference const_reference; + + // 23.3.5.1 constructors: + _LIBCUDACXX_INLINE_VISIBILITY constexpr bitset() noexcept {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr bitset(unsigned long long __v) noexcept + : base(__v) + {} + template ::value>> + explicit bitset(const _CharT* __str, + typename basic_string<_CharT>::size_type __n = basic_string<_CharT>::npos, + _CharT __zero = _CharT('0'), + _CharT __one = _CharT('1')); + template + explicit bitset(const basic_string<_CharT, _Traits, _Allocator>& __str, + typename basic_string<_CharT, _Traits, _Allocator>::size_type __pos = 0, + typename basic_string<_CharT, _Traits, _Allocator>::size_type __n = + (basic_string<_CharT, _Traits, _Allocator>::npos), + _CharT __zero = _CharT('0'), + _CharT __one = _CharT('1')); + + // 23.3.5.2 bitset operations: + _LIBCUDACXX_INLINE_VISIBILITY bitset& operator&=(const bitset& __rhs) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bitset& operator|=(const bitset& __rhs) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bitset& operator^=(const bitset& __rhs) noexcept; + bitset& operator<<=(size_t __pos) noexcept; + bitset& operator>>=(size_t __pos) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bitset& set() noexcept; + bitset& set(size_t __pos, bool __val = true); + _LIBCUDACXX_INLINE_VISIBILITY bitset& reset() noexcept; + bitset& reset(size_t __pos); + _LIBCUDACXX_INLINE_VISIBILITY bitset operator~() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bitset& flip() noexcept; + bitset& flip(size_t __pos); + + // element access: + _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference operator[](size_t __p) const + { + return base::__make_ref(__p); + } + _LIBCUDACXX_INLINE_VISIBILITY reference operator[](size_t __p) + { + return base::__make_ref(__p); + } + _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const; + template + basic_string<_CharT, _Traits, _Allocator> to_string(_CharT __zero = _CharT('0'), _CharT __one = _CharT('1')) const; + template + _LIBCUDACXX_INLINE_VISIBILITY basic_string<_CharT, _Traits, allocator<_CharT>> + to_string(_CharT __zero = _CharT('0'), _CharT __one = _CharT('1')) const; + template + _LIBCUDACXX_INLINE_VISIBILITY basic_string<_CharT, char_traits<_CharT>, allocator<_CharT>> + to_string(_CharT __zero = _CharT('0'), _CharT __one = _CharT('1')) const; + _LIBCUDACXX_INLINE_VISIBILITY basic_string, allocator> + to_string(char __zero = '0', char __one = '1') const; + _LIBCUDACXX_INLINE_VISIBILITY size_t count() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY constexpr size_t size() const noexcept + { + return _Size; + } + _LIBCUDACXX_INLINE_VISIBILITY bool operator==(const bitset& __rhs) const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bool operator!=(const bitset& __rhs) const noexcept; + bool test(size_t __pos) const; + _LIBCUDACXX_INLINE_VISIBILITY bool all() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bool any() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bool none() const noexcept + { + return !any(); + } + _LIBCUDACXX_INLINE_VISIBILITY bitset operator<<(size_t __pos) const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bitset operator>>(size_t __pos) const noexcept; private: + _LIBCUDACXX_INLINE_VISIBILITY size_t __hash_code() const noexcept + { + return base::__hash_code(); + } - _LIBCUDACXX_INLINE_VISIBILITY - size_t __hash_code() const noexcept {return base::__hash_code();} - - friend struct hash; + friend struct hash; }; template -template -bitset<_Size>::bitset(const _CharT* __str, - typename basic_string<_CharT>::size_type __n, - _CharT __zero, _CharT __one) +template +bitset<_Size>::bitset(const _CharT* __str, typename basic_string<_CharT>::size_type __n, _CharT __zero, _CharT __one) { - size_t __rlen = _CUDA_VSTD::min(__n, char_traits<_CharT>::length(__str)); - for (size_t __i = 0; __i < __rlen; ++__i) - if (__str[__i] != __zero && __str[__i] != __one) - __throw_invalid_argument("bitset string ctor has invalid argument"); - - size_t _Mp = _CUDA_VSTD::min(__rlen, _Size); - size_t __i = 0; - for (; __i < _Mp; ++__i) + size_t __rlen = _CUDA_VSTD::min(__n, char_traits<_CharT>::length(__str)); + for (size_t __i = 0; __i < __rlen; ++__i) + { + if (__str[__i] != __zero && __str[__i] != __one) { - _CharT __c = __str[_Mp - 1 - __i]; - if (__c == __zero) - (*this)[__i] = false; - else - (*this)[__i] = true; + __throw_invalid_argument("bitset string ctor has invalid argument"); } - _CUDA_VSTD::fill(base::__make_iter(__i), base::__make_iter(_Size), false); + } + + size_t _Mp = _CUDA_VSTD::min(__rlen, _Size); + size_t __i = 0; + for (; __i < _Mp; ++__i) + { + _CharT __c = __str[_Mp - 1 - __i]; + if (__c == __zero) + { + (*this)[__i] = false; + } + else + { + (*this)[__i] = true; + } + } + _CUDA_VSTD::fill(base::__make_iter(__i), base::__make_iter(_Size), false); } template -template -bitset<_Size>::bitset(const basic_string<_CharT,_Traits,_Allocator>& __str, - typename basic_string<_CharT,_Traits,_Allocator>::size_type __pos, - typename basic_string<_CharT,_Traits,_Allocator>::size_type __n, - _CharT __zero, _CharT __one) -{ - if (__pos > __str.size()) - __throw_out_of_range("bitset string pos out of range"); - - size_t __rlen = _CUDA_VSTD::min(__n, __str.size() - __pos); - for (size_t __i = __pos; __i < __pos + __rlen; ++__i) - if (!_Traits::eq(__str[__i], __zero) && !_Traits::eq(__str[__i], __one)) - __throw_invalid_argument("bitset string ctor has invalid argument"); - - size_t _Mp = _CUDA_VSTD::min(__rlen, _Size); - size_t __i = 0; - for (; __i < _Mp; ++__i) +template +bitset<_Size>::bitset( + const basic_string<_CharT, _Traits, _Allocator>& __str, + typename basic_string<_CharT, _Traits, _Allocator>::size_type __pos, + typename basic_string<_CharT, _Traits, _Allocator>::size_type __n, + _CharT __zero, + _CharT __one) +{ + if (__pos > __str.size()) + { + __throw_out_of_range("bitset string pos out of range"); + } + + size_t __rlen = _CUDA_VSTD::min(__n, __str.size() - __pos); + for (size_t __i = __pos; __i < __pos + __rlen; ++__i) + { + if (!_Traits::eq(__str[__i], __zero) && !_Traits::eq(__str[__i], __one)) + { + __throw_invalid_argument("bitset string ctor has invalid argument"); + } + } + + size_t _Mp = _CUDA_VSTD::min(__rlen, _Size); + size_t __i = 0; + for (; __i < _Mp; ++__i) + { + _CharT __c = __str[__pos + _Mp - 1 - __i]; + if (_Traits::eq(__c, __zero)) { - _CharT __c = __str[__pos + _Mp - 1 - __i]; - if (_Traits::eq(__c, __zero)) - (*this)[__i] = false; - else - (*this)[__i] = true; + (*this)[__i] = false; } - _CUDA_VSTD::fill(base::__make_iter(__i), base::__make_iter(_Size), false); + else + { + (*this)[__i] = true; + } + } + _CUDA_VSTD::fill(base::__make_iter(__i), base::__make_iter(_Size), false); } template -inline -bitset<_Size>& -bitset<_Size>::operator&=(const bitset& __rhs) noexcept +inline bitset<_Size>& bitset<_Size>::operator&=(const bitset& __rhs) noexcept { - base::operator&=(__rhs); - return *this; + base::operator&=(__rhs); + return *this; } template -inline -bitset<_Size>& -bitset<_Size>::operator|=(const bitset& __rhs) noexcept +inline bitset<_Size>& bitset<_Size>::operator|=(const bitset& __rhs) noexcept { - base::operator|=(__rhs); - return *this; + base::operator|=(__rhs); + return *this; } template -inline -bitset<_Size>& -bitset<_Size>::operator^=(const bitset& __rhs) noexcept +inline bitset<_Size>& bitset<_Size>::operator^=(const bitset& __rhs) noexcept { - base::operator^=(__rhs); - return *this; + base::operator^=(__rhs); + return *this; } template -bitset<_Size>& -bitset<_Size>::operator<<=(size_t __pos) noexcept +bitset<_Size>& bitset<_Size>::operator<<=(size_t __pos) noexcept { - __pos = _CUDA_VSTD::min(__pos, _Size); - _CUDA_VSTD::copy_backward(base::__make_iter(0), base::__make_iter(_Size - __pos), base::__make_iter(_Size)); - _CUDA_VSTD::fill_n(base::__make_iter(0), __pos, false); - return *this; + __pos = _CUDA_VSTD::min(__pos, _Size); + _CUDA_VSTD::copy_backward(base::__make_iter(0), base::__make_iter(_Size - __pos), base::__make_iter(_Size)); + _CUDA_VSTD::fill_n(base::__make_iter(0), __pos, false); + return *this; } template -bitset<_Size>& -bitset<_Size>::operator>>=(size_t __pos) noexcept +bitset<_Size>& bitset<_Size>::operator>>=(size_t __pos) noexcept { - __pos = _CUDA_VSTD::min(__pos, _Size); - _CUDA_VSTD::copy(base::__make_iter(__pos), base::__make_iter(_Size), base::__make_iter(0)); - _CUDA_VSTD::fill_n(base::__make_iter(_Size - __pos), __pos, false); - return *this; + __pos = _CUDA_VSTD::min(__pos, _Size); + _CUDA_VSTD::copy(base::__make_iter(__pos), base::__make_iter(_Size), base::__make_iter(0)); + _CUDA_VSTD::fill_n(base::__make_iter(_Size - __pos), __pos, false); + return *this; } template -inline -bitset<_Size>& -bitset<_Size>::set() noexcept +inline bitset<_Size>& bitset<_Size>::set() noexcept { - _CUDA_VSTD::fill_n(base::__make_iter(0), _Size, true); - return *this; + _CUDA_VSTD::fill_n(base::__make_iter(0), _Size, true); + return *this; } template -bitset<_Size>& -bitset<_Size>::set(size_t __pos, bool __val) +bitset<_Size>& bitset<_Size>::set(size_t __pos, bool __val) { - if (__pos >= _Size) - __throw_out_of_range("bitset set argument out of range"); + if (__pos >= _Size) + { + __throw_out_of_range("bitset set argument out of range"); + } - (*this)[__pos] = __val; - return *this; + (*this)[__pos] = __val; + return *this; } template -inline -bitset<_Size>& -bitset<_Size>::reset() noexcept +inline bitset<_Size>& bitset<_Size>::reset() noexcept { - _CUDA_VSTD::fill_n(base::__make_iter(0), _Size, false); - return *this; + _CUDA_VSTD::fill_n(base::__make_iter(0), _Size, false); + return *this; } template -bitset<_Size>& -bitset<_Size>::reset(size_t __pos) +bitset<_Size>& bitset<_Size>::reset(size_t __pos) { - if (__pos >= _Size) - __throw_out_of_range("bitset reset argument out of range"); + if (__pos >= _Size) + { + __throw_out_of_range("bitset reset argument out of range"); + } - (*this)[__pos] = false; - return *this; + (*this)[__pos] = false; + return *this; } template -inline -bitset<_Size> -bitset<_Size>::operator~() const noexcept +inline bitset<_Size> bitset<_Size>::operator~() const noexcept { - bitset __x(*this); - __x.flip(); - return __x; + bitset __x(*this); + __x.flip(); + return __x; } template -inline -bitset<_Size>& -bitset<_Size>::flip() noexcept +inline bitset<_Size>& bitset<_Size>::flip() noexcept { - base::flip(); - return *this; + base::flip(); + return *this; } template -bitset<_Size>& -bitset<_Size>::flip(size_t __pos) +bitset<_Size>& bitset<_Size>::flip(size_t __pos) { - if (__pos >= _Size) - __throw_out_of_range("bitset flip argument out of range"); + if (__pos >= _Size) + { + __throw_out_of_range("bitset flip argument out of range"); + } - reference r = base::__make_ref(__pos); - r = ~r; - return *this; + reference r = base::__make_ref(__pos); + r = ~r; + return *this; } template -inline -unsigned long -bitset<_Size>::to_ulong() const +inline unsigned long bitset<_Size>::to_ulong() const { - return base::to_ulong(); + return base::to_ulong(); } template -inline -unsigned long long -bitset<_Size>::to_ullong() const +inline unsigned long long bitset<_Size>::to_ullong() const { - return base::to_ullong(); + return base::to_ullong(); } template template -basic_string<_CharT, _Traits, _Allocator> -bitset<_Size>::to_string(_CharT __zero, _CharT __one) const +basic_string<_CharT, _Traits, _Allocator> bitset<_Size>::to_string(_CharT __zero, _CharT __one) const { - basic_string<_CharT, _Traits, _Allocator> __r(_Size, __zero); - for (size_t __i = 0; __i < _Size; ++__i) + basic_string<_CharT, _Traits, _Allocator> __r(_Size, __zero); + for (size_t __i = 0; __i < _Size; ++__i) + { + if ((*this)[__i]) { - if ((*this)[__i]) - __r[_Size - 1 - __i] = __one; + __r[_Size - 1 - __i] = __one; } - return __r; + } + return __r; } template template -inline -basic_string<_CharT, _Traits, allocator<_CharT> > -bitset<_Size>::to_string(_CharT __zero, _CharT __one) const +inline basic_string<_CharT, _Traits, allocator<_CharT>> bitset<_Size>::to_string(_CharT __zero, _CharT __one) const { - return to_string<_CharT, _Traits, allocator<_CharT> >(__zero, __one); + return to_string<_CharT, _Traits, allocator<_CharT>>(__zero, __one); } template template -inline -basic_string<_CharT, char_traits<_CharT>, allocator<_CharT> > +inline basic_string<_CharT, char_traits<_CharT>, allocator<_CharT>> bitset<_Size>::to_string(_CharT __zero, _CharT __one) const { - return to_string<_CharT, char_traits<_CharT>, allocator<_CharT> >(__zero, __one); + return to_string<_CharT, char_traits<_CharT>, allocator<_CharT>>(__zero, __one); } template -inline -basic_string, allocator > -bitset<_Size>::to_string(char __zero, char __one) const +inline basic_string, allocator> bitset<_Size>::to_string(char __zero, char __one) const { - return to_string, allocator >(__zero, __one); + return to_string, allocator>(__zero, __one); } template -inline -size_t -bitset<_Size>::count() const noexcept +inline size_t bitset<_Size>::count() const noexcept { - return static_cast(__count_bool_true(base::__make_iter(0), _Size)); + return static_cast(__count_bool_true(base::__make_iter(0), _Size)); } template -inline -bool -bitset<_Size>::operator==(const bitset& __rhs) const noexcept +inline bool bitset<_Size>::operator==(const bitset& __rhs) const noexcept { - return _CUDA_VSTD::equal(base::__make_iter(0), base::__make_iter(_Size), __rhs.__make_iter(0)); + return _CUDA_VSTD::equal(base::__make_iter(0), base::__make_iter(_Size), __rhs.__make_iter(0)); } template -inline -bool -bitset<_Size>::operator!=(const bitset& __rhs) const noexcept +inline bool bitset<_Size>::operator!=(const bitset& __rhs) const noexcept { - return !(*this == __rhs); + return !(*this == __rhs); } template -bool -bitset<_Size>::test(size_t __pos) const +bool bitset<_Size>::test(size_t __pos) const { - if (__pos >= _Size) - __throw_out_of_range("bitset test argument out of range"); + if (__pos >= _Size) + { + __throw_out_of_range("bitset test argument out of range"); + } - return (*this)[__pos]; + return (*this)[__pos]; } template -inline -bool -bitset<_Size>::all() const noexcept +inline bool bitset<_Size>::all() const noexcept { - return base::all(); + return base::all(); } template -inline -bool -bitset<_Size>::any() const noexcept +inline bool bitset<_Size>::any() const noexcept { - return base::any(); + return base::any(); } template -inline -bitset<_Size> -bitset<_Size>::operator<<(size_t __pos) const noexcept +inline bitset<_Size> bitset<_Size>::operator<<(size_t __pos) const noexcept { - bitset __r = *this; - __r <<= __pos; - return __r; + bitset __r = *this; + __r <<= __pos; + return __r; } template -inline -bitset<_Size> -bitset<_Size>::operator>>(size_t __pos) const noexcept +inline bitset<_Size> bitset<_Size>::operator>>(size_t __pos) const noexcept { - bitset __r = *this; - __r >>= __pos; - return __r; + bitset __r = *this; + __r >>= __pos; + return __r; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -bitset<_Size> -operator&(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bitset<_Size> operator&(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept { - bitset<_Size> __r = __x; - __r &= __y; - return __r; + bitset<_Size> __r = __x; + __r &= __y; + return __r; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -bitset<_Size> -operator|(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bitset<_Size> operator|(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept { - bitset<_Size> __r = __x; - __r |= __y; - return __r; + bitset<_Size> __r = __x; + __r |= __y; + return __r; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -bitset<_Size> -operator^(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bitset<_Size> operator^(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept { - bitset<_Size> __r = __x; - __r ^= __y; - return __r; + bitset<_Size> __r = __x; + __r ^= __y; + return __r; } template -struct _LIBCUDACXX_TEMPLATE_VIS hash > - : public __unary_function, size_t> +struct _LIBCUDACXX_TEMPLATE_VIS hash> : public __unary_function, size_t> { - _LIBCUDACXX_INLINE_VISIBILITY - size_t operator()(const bitset<_Size>& __bs) const noexcept - {return __bs.__hash_code();} + _LIBCUDACXX_INLINE_VISIBILITY size_t operator()(const bitset<_Size>& __bs) const noexcept + { + return __bs.__hash_code(); + } }; template -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, bitset<_Size>& __x); +basic_istream<_CharT, _Traits>& operator>>(basic_istream<_CharT, _Traits>& __is, bitset<_Size>& __x); template -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, const bitset<_Size>& __x); +basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const bitset<_Size>& __x); _LIBCUDACXX_END_NAMESPACE_STD _LIBCUDACXX_POP_MACROS -#endif // _LIBCUDACXX_BITSET +#endif // _LIBCUDACXX_BITSET diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/concepts b/libcudacxx/include/cuda/std/detail/libcxx/include/concepts index 15f041190c0..24995197262 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/concepts +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/concepts @@ -140,7 +140,6 @@ namespace std { # pragma system_header #endif // no system header -#include // all public C++ headers provide the assertion handler #include #include #include @@ -166,7 +165,7 @@ namespace std { #include #include #include - +#include // all public C++ headers provide the assertion handler #include #endif // _LIBCUDACXX_CONCEPTS diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/version b/libcudacxx/include/cuda/std/detail/libcxx/include/version index 9c81e18dcb2..08f33681920 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/version +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/version @@ -10,7 +10,6 @@ #ifndef _LIBCUDACXX_VERSIONH #define _LIBCUDACXX_VERSIONH - /* version synopsis @@ -212,131 +211,131 @@ __cpp_lib_void_t 201411L // We need to define our own macros to not conflict with the host stl. // At the same time we want bring in all feature test macros from host #if __has_include() // should be the smallest include possible -#include +# include #elif !defined(_CCCL_COMPILER_NVRTC) -#include // otherwise go for the smallest possible header +# include // otherwise go for the smallest possible header #endif #if _CCCL_STD_VER > 2011 -# define __cccl_lib_chrono_udls 201304L -# define __cccl_lib_complex_udls 201309L -#ifdef _LIBCUDACXX_IS_CONSTANT_EVALUATED -# define __cccl_lib_constexpr_complex 201711L -#endif -# define __cccl_lib_concepts 202002L -# define __cccl_lib_exchange_function 201304L -# define __cccl_lib_expected 202211L +# define __cccl_lib_chrono_udls 201304L +# define __cccl_lib_complex_udls 201309L +# ifdef _LIBCUDACXX_IS_CONSTANT_EVALUATED +# define __cccl_lib_constexpr_complex 201711L +# endif +# define __cccl_lib_concepts 202002L +# define __cccl_lib_exchange_function 201304L +# define __cccl_lib_expected 202211L // # define __cccl_lib_generic_associative_lookup 201304L -# define __cccl_lib_integer_sequence 201304L -# define __cccl_lib_integral_constant_callable 201304L -# define __cccl_lib_is_final 201402L -# define __cccl_lib_is_null_pointer 201309L -# define __cccl_lib_make_reverse_iterator 201402L +# define __cccl_lib_integer_sequence 201304L +# define __cccl_lib_integral_constant_callable 201304L +# define __cccl_lib_is_final 201402L +# define __cccl_lib_is_null_pointer 201309L +# define __cccl_lib_make_reverse_iterator 201402L // # define __cccl_lib_make_unique 201304L -# define __cccl_lib_null_iterators 201304L -# define __cccl_lib_optional 202110L +# define __cccl_lib_null_iterators 201304L +# define __cccl_lib_optional 202110L // # define __cccl_lib_quoted_string_io 201304L -# define __cccl_lib_result_of_sfinae 201210L -# define __cccl_lib_robust_nonmodifying_seq_ops 201304L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) +# define __cccl_lib_result_of_sfinae 201210L +# define __cccl_lib_robust_nonmodifying_seq_ops 201304L +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) // # define __cccl_lib_shared_timed_mutex 201402L -# endif -# define __cccl_lib_span 202002L +# endif +# define __cccl_lib_span 202002L // # define __cccl_lib_string_udls 201304L -# define __cccl_lib_transformation_trait_aliases 201304L -# define __cccl_lib_transparent_operators 201210L -# define __cccl_lib_tuple_element_t 201402L -# define __cccl_lib_tuples_by_type 201304L +# define __cccl_lib_transformation_trait_aliases 201304L +# define __cccl_lib_transparent_operators 201210L +# define __cccl_lib_tuple_element_t 201402L +# define __cccl_lib_tuples_by_type 201304L #endif // _CCCL_STD_VER > 2011 #if _CCCL_STD_VER > 2014 -# if defined(_LIBCUDACXX_ADDRESSOF) -# define __cccl_lib_addressof_constexpr 201603L -# endif +# if defined(_LIBCUDACXX_ADDRESSOF) +# define __cccl_lib_addressof_constexpr 201603L +# endif // # define __cccl_lib_allocator_traits_is_always_equal 201411L // # define __cccl_lib_any 201606L -# define __cccl_lib_apply 201603L -# define __cccl_lib_array_constexpr 201603L -# define __cccl_lib_as_const 201510L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) -# define __cccl_lib_atomic_is_always_lock_free 201603L -# endif -# define __cccl_lib_bind_front 201907L -# define __cccl_lib_bool_constant 201505L +# define __cccl_lib_apply 201603L +# define __cccl_lib_array_constexpr 201603L +# define __cccl_lib_as_const 201510L +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) +# define __cccl_lib_atomic_is_always_lock_free 201603L +# endif +# define __cccl_lib_bind_front 201907L +# define __cccl_lib_bool_constant 201505L // # define __cccl_lib_boyer_moore_searcher 201603L -# define __cccl_lib_byte 201603L -# define __cccl_lib_chrono 201611L +# define __cccl_lib_byte 201603L +# define __cccl_lib_chrono 201611L // # define __cccl_lib_clamp 201603L // # define __cccl_lib_enable_shared_from_this 201603L // # define __cccl_lib_execution 201603L // # define __cccl_lib_filesystem 201703L -# define __cccl_lib_gcd_lcm 201606L -# define __cccl_lib_hardware_interference_size 201703L -# if defined(_LIBCUDACXX_HAS_UNIQUE_OBJECT_REPRESENTATIONS) -# define __cccl_lib_has_unique_object_representations 201606L -# endif -# define __cccl_lib_hypot 201603L +# define __cccl_lib_gcd_lcm 201606L +# define __cccl_lib_hardware_interference_size 201703L +# if defined(_LIBCUDACXX_HAS_UNIQUE_OBJECT_REPRESENTATIONS) +# define __cccl_lib_has_unique_object_representations 201606L +# endif +# define __cccl_lib_hypot 201603L // # define __cccl_lib_incomplete_container_elements 201505L -# define __cccl_lib_invoke 201411L -# if !defined(_LIBCUDACXX_HAS_NO_IS_AGGREGATE) -# define __cccl_lib_is_aggregate 201703L -# endif -# define __cccl_lib_is_invocable 201703L -# define __cccl_lib_is_swappable 201603L -# define __cccl_lib_launder 201606L -# define __cccl_lib_logical_traits 201510L -# define __cccl_lib_make_from_tuple 201606L +# define __cccl_lib_invoke 201411L +# if !defined(_LIBCUDACXX_HAS_NO_IS_AGGREGATE) +# define __cccl_lib_is_aggregate 201703L +# endif +# define __cccl_lib_is_invocable 201703L +# define __cccl_lib_is_swappable 201603L +# define __cccl_lib_launder 201606L +# define __cccl_lib_logical_traits 201510L +# define __cccl_lib_make_from_tuple 201606L // # define __cccl_lib_map_try_emplace 201411L // # define __cccl_lib_math_special_functions 201603L // # define __cccl_lib_memory_resource 201603L // # define __cccl_lib_node_extract 201606L // # define __cccl_lib_nonmember_container_access 201411L -# define __cccl_lib_not_fn 201603L +# define __cccl_lib_not_fn 201603L // # define __cccl_lib_parallel_algorithm 201603L // # define __cccl_lib_raw_memory_algorithms 201606L // # define __cccl_lib_sample 201603L // # define __cccl_lib_scoped_lock 201703L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) // # define __cccl_lib_shared_mutex 201505L -# endif +# endif // # define __cccl_lib_shared_ptr_arrays 201611L // # define __cccl_lib_shared_ptr_weak_type 201606L // # define __cccl_lib_string_view 201606L // # define __cccl_lib_to_chars 201611L -# define __cccl_lib_type_trait_variable_templates 201510L -# define __cccl_lib_uncaught_exceptions 201411L -# define __cccl_lib_unordered_map_try_emplace 201411L -# define __cccl_lib_variant 201606L -# define __cccl_lib_void_t 201411L +# define __cccl_lib_type_trait_variable_templates 201510L +# define __cccl_lib_uncaught_exceptions 201411L +# define __cccl_lib_unordered_map_try_emplace 201411L +# define __cccl_lib_variant 201606L +# define __cccl_lib_void_t 201411L #endif // _CCCL_STD_VER > 2014 #if _CCCL_STD_VER > 2017 -# undef __cccl_lib_array_constexpr -# define __cccl_lib_array_constexpr 201811L +# undef __cccl_lib_array_constexpr +# define __cccl_lib_array_constexpr 201811L // # define __cccl_lib_assume_aligned 201811L -# define __cccl_lib_atomic_flag_test 201907L -# define __cccl_lib_atomic_float 201711L -# define __cccl_lib_atomic_lock_free_type_aliases 201907L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) -# define __cccl_lib_atomic_ref 201806L -#endif +# define __cccl_lib_atomic_flag_test 201907L +# define __cccl_lib_atomic_float 201711L +# define __cccl_lib_atomic_lock_free_type_aliases 201907L +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) +# define __cccl_lib_atomic_ref 201806L +# endif // # define __cccl_lib_atomic_shared_ptr 201711L -# define __cccl_lib_atomic_value_initialization 201911L -# if !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) -# define __cccl_lib_atomic_wait 201907L -# endif -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) -# define __cccl_lib_barrier 201907L -# endif -# define __cccl_lib_bit_cast 201806L -# define __cccl_lib_bitops 201907L -# define __cccl_lib_bounded_array_traits 201902L -# if !defined(_LIBCUDACXX_NO_HAS_CHAR8_T) -# define __cccl_lib_char8_t 201811L -# endif +# define __cccl_lib_atomic_value_initialization 201911L +# if !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) +# define __cccl_lib_atomic_wait 201907L +# endif +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) +# define __cccl_lib_barrier 201907L +# endif +# define __cccl_lib_bit_cast 201806L +# define __cccl_lib_bitops 201907L +# define __cccl_lib_bounded_array_traits 201902L +# if !defined(_LIBCUDACXX_NO_HAS_CHAR8_T) +# define __cccl_lib_char8_t 201811L +# endif // # define __cccl_lib_constexpr_algorithms 201806L // # define __cccl_lib_constexpr_dynamic_alloc 201907L -# define __cccl_lib_constexpr_functional 201907L +# define __cccl_lib_constexpr_functional 201907L // # define __cccl_lib_constexpr_iterator 201811L // # define __cccl_lib_constexpr_memory 201811L // # define __cccl_lib_constexpr_misc 201811L @@ -348,40 +347,41 @@ __cpp_lib_void_t 201411L // # define __cccl_lib_constexpr_utility 201811L // # define __cccl_lib_constexpr_vector 201907L // # define __cccl_lib_coroutine 201902L -# if defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L && defined(__cpp_lib_destroying_delete) -# define __cccl_lib_destroying_delete 201806L -# endif +# if defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L \ + && defined(__cpp_lib_destroying_delete) +# define __cccl_lib_destroying_delete 201806L +# endif // # define __cccl_lib_endian 201907L // # define __cccl_lib_erase_if 201811L // # undef __cccl_lib_execution // # define __cccl_lib_execution 201902L -# if !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_format) && !defined(_LIBCUDACXX_HAS_NO_INCOMPLETE_FORMAT) +# if !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_format) && !defined(_LIBCUDACXX_HAS_NO_INCOMPLETE_FORMAT) // # define __cccl_lib_format 202106L -# endif +# endif // # define __cccl_lib_generic_unordered_lookup 201811L // # define __cccl_lib_int_pow2 202002L // # define __cccl_lib_integer_comparison_functions 202002L // # define __cccl_lib_interpolate 201902L -# if defined(_LIBCUDACXX_IS_CONSTANT_EVALUATED) -# define __cccl_lib_is_constant_evaluated 201811L -# endif +# if defined(_LIBCUDACXX_IS_CONSTANT_EVALUATED) +# define __cccl_lib_is_constant_evaluated 201811L +# endif // # define __cccl_lib_is_layout_compatible 201907L -# define __cccl_lib_is_nothrow_convertible 201806L +# define __cccl_lib_is_nothrow_convertible 201806L // # define __cccl_lib_is_pointer_interconvertible 201907L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) // # define __cccl_lib_jthread 201911L -# endif -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) +# endif +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) // # define __cccl_lib_latch 201907L -# endif +# endif // # define __cccl_lib_list_remove_return_type 201806L // # define __cccl_lib_math_constants 201907L // # define __cccl_lib_polymorphic_allocator 201902L // # define __cccl_lib_ranges 201811L // # define __cccl_lib_remove_cvref 201711L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) // # define __cccl_lib_semaphore 201907L -# endif +# endif // # undef __cccl_lib_shared_ptr_arrays // # define __cccl_lib_shared_ptr_arrays 201707L // # define __cccl_lib_shift 201806L @@ -396,7 +396,7 @@ __cpp_lib_void_t 201411L // # define __cccl_lib_to_address 201711L // # define __cccl_lib_to_array 201907L // # define __cccl_lib_type_identity 201806L -# define __cccl_lib_unwrap_ref 201811L +# define __cccl_lib_unwrap_ref 201811L #endif // _CCCL_STD_VER > 2017 #if _CCCL_STD_VER > 2020 @@ -411,9 +411,9 @@ __cpp_lib_void_t 201411L // # undef __cccl_lib_constexpr_memory // # define __cccl_lib_constexpr_memory 202202L // # define __cccl_lib_constexpr_typeinfo 202106L -# define __cccl_lib_forward_like 202207L +# define __cccl_lib_forward_like 202207L // # define __cccl_lib_invoke_r 202106L -# define __cccl_lib_is_scoped_enum 202011L +# define __cccl_lib_is_scoped_enum 202011L // # define __cccl_lib_move_only_function 202110L // # define __cccl_lib_out_ptr 202106L // # define __cccl_lib_ranges_chunk 202202L @@ -430,8 +430,8 @@ __cpp_lib_void_t 201411L // # define __cccl_lib_stdatomic_h 202011L // # define __cccl_lib_string_contains 202011L // # define __cccl_lib_string_resize_and_overwrite 202110L -# define __cccl_lib_to_underlying 202102L -# define __cccl_lib_unreachable 202202L +# define __cccl_lib_to_underlying 202102L +# define __cccl_lib_unreachable 202202L #endif // _CCCL_STD_VER > 2020 diff --git a/libcudacxx/include/cuda/std/expected b/libcudacxx/include/cuda/std/expected index 9469e699f34..81879381ad4 100644 --- a/libcudacxx/include/cuda/std/expected +++ b/libcudacxx/include/cuda/std/expected @@ -12,10 +12,8 @@ #include +#include #include - #include -#include - #endif //_CUDA_STD_EXPECTED diff --git a/libcudacxx/include/cuda/std/functional b/libcudacxx/include/cuda/std/functional index 042e4b4b072..d9b2ccd2ff4 100644 --- a/libcudacxx/include/cuda/std/functional +++ b/libcudacxx/include/cuda/std/functional @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_FUNCTIONAL diff --git a/libcudacxx/include/cuda/std/initializer_list b/libcudacxx/include/cuda/std/initializer_list index 24296620e85..4224f9a7d5a 100644 --- a/libcudacxx/include/cuda/std/initializer_list +++ b/libcudacxx/include/cuda/std/initializer_list @@ -12,10 +12,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_INITIALIZER_LIST diff --git a/libcudacxx/include/cuda/std/iterator b/libcudacxx/include/cuda/std/iterator index 08568f18628..01ee8962240 100644 --- a/libcudacxx/include/cuda/std/iterator +++ b/libcudacxx/include/cuda/std/iterator @@ -12,10 +12,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_ITERATOR diff --git a/libcudacxx/include/cuda/std/latch b/libcudacxx/include/cuda/std/latch index fde9078ab4f..d3dfaf35f46 100644 --- a/libcudacxx/include/cuda/std/latch +++ b/libcudacxx/include/cuda/std/latch @@ -13,14 +13,12 @@ #endif #ifndef _CUDA_STD_LATCH -#define _CUDA_STD_LATCH +# define _CUDA_STD_LATCH -#include +# include -#include - -#include - -#include +# include +# include +# include #endif // _CUDA_STD_LATCH diff --git a/libcudacxx/include/cuda/std/limits b/libcudacxx/include/cuda/std/limits index 16531da3da0..c48b86a5e7d 100644 --- a/libcudacxx/include/cuda/std/limits +++ b/libcudacxx/include/cuda/std/limits @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_LIMITS diff --git a/libcudacxx/include/cuda/std/mdspan b/libcudacxx/include/cuda/std/mdspan index e9522897ca0..b5fa0ec9506 100644 --- a/libcudacxx/include/cuda/std/mdspan +++ b/libcudacxx/include/cuda/std/mdspan @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_MDSPAN diff --git a/libcudacxx/include/cuda/std/optional b/libcudacxx/include/cuda/std/optional index 5ecee7594fb..e89476c2737 100644 --- a/libcudacxx/include/cuda/std/optional +++ b/libcudacxx/include/cuda/std/optional @@ -12,10 +12,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_OPTIONAL diff --git a/libcudacxx/include/cuda/std/ranges b/libcudacxx/include/cuda/std/ranges index 56a06f65071..54672905285 100644 --- a/libcudacxx/include/cuda/std/ranges +++ b/libcudacxx/include/cuda/std/ranges @@ -12,10 +12,8 @@ #include +#include #include - #include -#include - #endif //_CUDA_RANGES diff --git a/libcudacxx/include/cuda/std/ratio b/libcudacxx/include/cuda/std/ratio index 97425f38d1e..8ebde7c6f51 100644 --- a/libcudacxx/include/cuda/std/ratio +++ b/libcudacxx/include/cuda/std/ratio @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_RATIO diff --git a/libcudacxx/include/cuda/std/semaphore b/libcudacxx/include/cuda/std/semaphore index 5b7efef48a3..645fce15fa0 100644 --- a/libcudacxx/include/cuda/std/semaphore +++ b/libcudacxx/include/cuda/std/semaphore @@ -13,14 +13,12 @@ #endif #ifndef _CUDA_STD_SEMAPHORE -#define _CUDA_STD_SEMAPHORE +# define _CUDA_STD_SEMAPHORE -#include +# include -#include - -#include - -#include +# include +# include +# include #endif // _CUDA_STD_SEMAPHORE diff --git a/libcudacxx/include/cuda/std/span b/libcudacxx/include/cuda/std/span index 0388da66871..e3592da7e16 100644 --- a/libcudacxx/include/cuda/std/span +++ b/libcudacxx/include/cuda/std/span @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_SPAN diff --git a/libcudacxx/include/cuda/std/tuple b/libcudacxx/include/cuda/std/tuple index ee870be346c..5954f9f1878 100644 --- a/libcudacxx/include/cuda/std/tuple +++ b/libcudacxx/include/cuda/std/tuple @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_TUPLE diff --git a/libcudacxx/include/cuda/std/type_traits b/libcudacxx/include/cuda/std/type_traits index 9eee9b7830a..32f2aa0037c 100644 --- a/libcudacxx/include/cuda/std/type_traits +++ b/libcudacxx/include/cuda/std/type_traits @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_TYPE_TRAITS diff --git a/libcudacxx/include/cuda/std/utility b/libcudacxx/include/cuda/std/utility index de2b78ca814..09291daf0a1 100644 --- a/libcudacxx/include/cuda/std/utility +++ b/libcudacxx/include/cuda/std/utility @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_UTILITY diff --git a/libcudacxx/include/cuda/std/variant b/libcudacxx/include/cuda/std/variant index 28d59fc012b..d1e4ca8e83d 100644 --- a/libcudacxx/include/cuda/std/variant +++ b/libcudacxx/include/cuda/std/variant @@ -12,10 +12,8 @@ #include +#include #include - #include -#include - #endif //_CUDA_STD_VARIANT diff --git a/libcudacxx/include/cuda/std/version b/libcudacxx/include/cuda/std/version index 2d0cbbe9aab..fddca30c2ce 100644 --- a/libcudacxx/include/cuda/std/version +++ b/libcudacxx/include/cuda/std/version @@ -13,10 +13,8 @@ #include +#include #include - #include -#include - #endif // _CUDA_STD_VERSION diff --git a/libcudacxx/include/cuda/stream_ref b/libcudacxx/include/cuda/stream_ref index 5c2ef3c3d8b..d36c2246550 100644 --- a/libcudacxx/include/cuda/stream_ref +++ b/libcudacxx/include/cuda/stream_ref @@ -38,9 +38,8 @@ private: } // cuda */ -#include // cuda_runtime_api needs to come first - #include +#include // cuda_runtime_api needs to come first #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) # pragma GCC system_header @@ -50,9 +49,9 @@ private: # pragma system_header #endif // no system header -#include #include #include +#include _LIBCUDACXX_BEGIN_NAMESPACE_CUDA @@ -127,7 +126,10 @@ public: } /// Returns the wrapped `cudaStream_t` handle. - _CCCL_NODISCARD constexpr value_type get() const noexcept { return __stream; } + _CCCL_NODISCARD constexpr value_type get() const noexcept + { + return __stream; + } /** * \brief Synchronizes the wrapped stream. @@ -150,7 +152,8 @@ public: _CCCL_NODISCARD bool ready() const { const auto __result = ::cudaStreamQuery(get()); - if (__result == ::cudaErrorNotReady) { + if (__result == ::cudaErrorNotReady) + { return false; } switch (__result) diff --git a/libcudacxx/include/nv/detail/__preprocessor b/libcudacxx/include/nv/detail/__preprocessor index af9382bd13a..b73579246c3 100644 --- a/libcudacxx/include/nv/detail/__preprocessor +++ b/libcudacxx/include/nv/detail/__preprocessor @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// #if defined(__GNUC__) -#pragma GCC system_header +# pragma GCC system_header #endif // For all compilers and dialects this header defines: @@ -24,95 +24,153 @@ #if defined(_NV_TARGET_CPP11) # define _NV_EVAL1(...) __VA_ARGS__ -# define _NV_EVAL(...) _NV_EVAL1(__VA_ARGS__) +# define _NV_EVAL(...) _NV_EVAL1(__VA_ARGS__) #else # define _NV_EVAL1(x) x -# define _NV_EVAL(x) _NV_EVAL1(x) +# define _NV_EVAL(x) _NV_EVAL1(x) #endif // C++11 -#define _NV_CONCAT_EVAL1(l, r) _NV_EVAL(l ## r) -#define _NV_CONCAT_EVAL(l, r) _NV_CONCAT_EVAL1(l, r) +#define _NV_CONCAT_EVAL1(l, r) _NV_EVAL(l##r) +#define _NV_CONCAT_EVAL(l, r) _NV_CONCAT_EVAL1(l, r) #define _NV_IF_0(t, f) f #define _NV_IF_1(t, f) t -#define _NV_IF_BIT(b) _NV_EVAL(_NV_IF_##b) -#define _NV_IF__EVAL(fn, t, f) _NV_EVAL(fn(t, f)) +#define _NV_IF_BIT(b) _NV_EVAL(_NV_IF_##b) +#define _NV_IF__EVAL(fn, t, f) _NV_EVAL(fn(t, f)) #define _NV_IF_EVAL(cond, t, f) _NV_IF__EVAL(_NV_IF_BIT(cond), t, f) #define _NV_IF1(cond, t, f) _NV_IF_EVAL(cond, t, f) -#define _NV_IF(cond, t, f) _NV_IF1(_NV_EVAL(cond), _NV_EVAL(t), _NV_EVAL(f)) +#define _NV_IF(cond, t, f) _NV_IF1(_NV_EVAL(cond), _NV_EVAL(t), _NV_EVAL(f)) #if defined(_NV_TARGET_CPP11) // The below mechanisms were derived from: https://gustedt.wordpress.com/2010/06/08/detect-empty-macro-arguments/ -#define _NV_ARG32(...) _NV_EVAL(_NV_ARG32_0(__VA_ARGS__)) -#define _NV_ARG32_0( \ - _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, \ - _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, ...) _31 - -#define _NV_HAS_COMMA(...) _NV_ARG32(__VA_ARGS__, \ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) - -#define _NV_TRIGGER_PARENTHESIS_(...) , - -#define _NV_ISEMPTY(...) \ - _NV_ISEMPTY0( \ - /* test if there is just one argument, eventually an empty \ - one */ \ - _NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__)), \ - /* test if _TRIGGER_PARENTHESIS_ together with the argument \ - adds a comma */ \ - _NV_EVAL(_NV_HAS_COMMA(_NV_TRIGGER_PARENTHESIS_ __VA_ARGS__)), \ - /* test if the argument together with a parenthesis \ - adds a comma */ \ - _NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__ (/*empty*/))), \ - /* test if placing it between _TRIGGER_PARENTHESIS_ and the \ - parenthesis adds a comma */ \ - _NV_EVAL(_NV_HAS_COMMA(_NV_TRIGGER_PARENTHESIS_ __VA_ARGS__ (/*empty*/))) \ - ) - -#define _NV_PASTE5(_0, _1, _2, _3, _4) _0 ## _1 ## _2 ## _3 ## _4 -#define _NV_ISEMPTY0(_0, _1, _2, _3) _NV_HAS_COMMA(_NV_PASTE5(_NV_IS_EMPTY_CASE_, _0, _1, _2, _3)) -#define _NV_IS_EMPTY_CASE_0001 , - - -#define _NV_REMOVE_PAREN(...) _NV_REMOVE_PAREN1(__VA_ARGS__) -#define _NV_REMOVE_PAREN1(...) _NV_STRIP_PAREN(_NV_IF(_NV_TEST_PAREN(__VA_ARGS__), (_NV_STRIP_PAREN(__VA_ARGS__)), (__VA_ARGS__))) - -#define _NV_STRIP_PAREN2(...) __VA_ARGS__ -#define _NV_STRIP_PAREN1(...) _NV_STRIP_PAREN2 __VA_ARGS__ -#define _NV_STRIP_PAREN(...) _NV_STRIP_PAREN1(__VA_ARGS__) - -#define _NV_TEST_PAREN(...) _NV_TEST_PAREN1(__VA_ARGS__) -#define _NV_TEST_PAREN1(...) _NV_TEST_PAREN2(_NV_TEST_PAREN_DUMMY __VA_ARGS__) -#define _NV_TEST_PAREN2(...) _NV_TEST_PAREN3(_NV_CONCAT_EVAL(_, __VA_ARGS__)) -#define _NV_TEST_PAREN3(...) _NV_EVAL(_NV_FIRST_ARG(__VA_ARGS__)) - -#define __NV_PAREN_YES 1 -#define __NV_PAREN_NO 0 - -#define _NV_TEST_PAREN_DUMMY(...) _NV_PAREN_YES -#define __NV_TEST_PAREN_DUMMY __NV_PAREN_NO, - -#define _NV_FIRST_ARG1(x, ...) x -#define _NV_FIRST_ARG(x, ...) _NV_FIRST_ARG1(x) - -#define _NV_REMOVE_FIRST_ARGS1(...) __VA_ARGS__ -#define _NV_REMOVE_FIRST_ARGS(x, ...) _NV_REMOVE_FIRST_ARGS1(__VA_ARGS__) - -#define _NV_NUM_ARGS(...) _NV_NUM_ARGS0(__VA_ARGS__) -#define _NV_NUM_ARGS0(...) _NV_EVAL(_NV_NUM_ARGS1(__VA_ARGS__)) -#define _NV_NUM_ARGS1(...) _NV_IF(_NV_ISEMPTY(__VA_ARGS__), 0, _NV_NUM_ARGS2(__VA_ARGS__)) -#define _NV_NUM_ARGS2(...) _NV_ARG32(__VA_ARGS__, \ - 31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16, \ - 15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) - -#define _NV_DISPATCH_N_IMPL1(name, ...) _NV_EVAL(name(__VA_ARGS__)) -#define _NV_DISPATCH_N_IMPL0(depth, name, ...) _NV_DISPATCH_N_IMPL1(_NV_CONCAT_EVAL(name, depth), __VA_ARGS__) -#define _NV_DISPATCH_N_IMPL(name, ...) _NV_DISPATCH_N_IMPL0(_NV_NUM_ARGS(__VA_ARGS__), name, __VA_ARGS__) -#define _NV_DISPATCH_N_ARY(name, ...) _NV_DISPATCH_N_IMPL(name, __VA_ARGS__) +# define _NV_ARG32(...) _NV_EVAL(_NV_ARG32_0(__VA_ARGS__)) +# define _NV_ARG32_0( \ + _0, \ + _1, \ + _2, \ + _3, \ + _4, \ + _5, \ + _6, \ + _7, \ + _8, \ + _9, \ + _10, \ + _11, \ + _12, \ + _13, \ + _14, \ + _15, \ + _16, \ + _17, \ + _18, \ + _19, \ + _20, \ + _21, \ + _22, \ + _23, \ + _24, \ + _25, \ + _26, \ + _27, \ + _28, \ + _29, \ + _30, \ + _31, \ + ...) \ + _31 + +# define _NV_HAS_COMMA(...) \ + _NV_ARG32(__VA_ARGS__, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) + +# define _NV_TRIGGER_PARENTHESIS_(...) , + +# define _NV_ISEMPTY(...) \ + _NV_ISEMPTY0(/* test if there is just one argument, eventually an empty \ + one */ \ + _NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__)), /* test if _TRIGGER_PARENTHESIS_ together with the argument \ + adds a comma */ \ + _NV_EVAL(_NV_HAS_COMMA(_NV_TRIGGER_PARENTHESIS_ __VA_ARGS__)), /* test if the argument together with \ + a parenthesis adds a comma */ \ + _NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__(/*empty*/))), /* test if placing it between _TRIGGER_PARENTHESIS_ \ + and the parenthesis adds a comma */ \ + _NV_EVAL(_NV_HAS_COMMA(_NV_TRIGGER_PARENTHESIS_ __VA_ARGS__(/*empty*/)))) + +# define _NV_PASTE5(_0, _1, _2, _3, _4) _0##_1##_2##_3##_4 +# define _NV_ISEMPTY0(_0, _1, _2, _3) _NV_HAS_COMMA(_NV_PASTE5(_NV_IS_EMPTY_CASE_, _0, _1, _2, _3)) +# define _NV_IS_EMPTY_CASE_0001 , + +# define _NV_REMOVE_PAREN(...) _NV_REMOVE_PAREN1(__VA_ARGS__) +# define _NV_REMOVE_PAREN1(...) \ + _NV_STRIP_PAREN(_NV_IF(_NV_TEST_PAREN(__VA_ARGS__), (_NV_STRIP_PAREN(__VA_ARGS__)), (__VA_ARGS__))) + +# define _NV_STRIP_PAREN2(...) __VA_ARGS__ +# define _NV_STRIP_PAREN1(...) _NV_STRIP_PAREN2 __VA_ARGS__ +# define _NV_STRIP_PAREN(...) _NV_STRIP_PAREN1(__VA_ARGS__) + +# define _NV_TEST_PAREN(...) _NV_TEST_PAREN1(__VA_ARGS__) +# define _NV_TEST_PAREN1(...) _NV_TEST_PAREN2(_NV_TEST_PAREN_DUMMY __VA_ARGS__) +# define _NV_TEST_PAREN2(...) _NV_TEST_PAREN3(_NV_CONCAT_EVAL(_, __VA_ARGS__)) +# define _NV_TEST_PAREN3(...) _NV_EVAL(_NV_FIRST_ARG(__VA_ARGS__)) + +# define __NV_PAREN_YES 1 +# define __NV_PAREN_NO 0 + +# define _NV_TEST_PAREN_DUMMY(...) _NV_PAREN_YES +# define __NV_TEST_PAREN_DUMMY __NV_PAREN_NO, + +# define _NV_FIRST_ARG1(x, ...) x +# define _NV_FIRST_ARG(x, ...) _NV_FIRST_ARG1(x) + +# define _NV_REMOVE_FIRST_ARGS1(...) __VA_ARGS__ +# define _NV_REMOVE_FIRST_ARGS(x, ...) _NV_REMOVE_FIRST_ARGS1(__VA_ARGS__) + +# define _NV_NUM_ARGS(...) _NV_NUM_ARGS0(__VA_ARGS__) +# define _NV_NUM_ARGS0(...) _NV_EVAL(_NV_NUM_ARGS1(__VA_ARGS__)) +# define _NV_NUM_ARGS1(...) _NV_IF(_NV_ISEMPTY(__VA_ARGS__), 0, _NV_NUM_ARGS2(__VA_ARGS__)) +# define _NV_NUM_ARGS2(...) \ + _NV_ARG32( \ + __VA_ARGS__, \ + 31, \ + 30, \ + 29, \ + 28, \ + 27, \ + 26, \ + 25, \ + 24, \ + 23, \ + 22, \ + 21, \ + 20, \ + 19, \ + 18, \ + 17, \ + 16, \ + 15, \ + 14, \ + 13, \ + 12, \ + 11, \ + 10, \ + 9, \ + 8, \ + 7, \ + 6, \ + 5, \ + 4, \ + 3, \ + 2, \ + 1, \ + 0) + +# define _NV_DISPATCH_N_IMPL1(name, ...) _NV_EVAL(name(__VA_ARGS__)) +# define _NV_DISPATCH_N_IMPL0(depth, name, ...) _NV_DISPATCH_N_IMPL1(_NV_CONCAT_EVAL(name, depth), __VA_ARGS__) +# define _NV_DISPATCH_N_IMPL(name, ...) _NV_DISPATCH_N_IMPL0(_NV_NUM_ARGS(__VA_ARGS__), name, __VA_ARGS__) +# define _NV_DISPATCH_N_ARY(name, ...) _NV_DISPATCH_N_IMPL(name, __VA_ARGS__) #endif // C++11 diff --git a/libcudacxx/include/nv/detail/__target_macros b/libcudacxx/include/nv/detail/__target_macros index 6d108021b41..59df8dfd188 100644 --- a/libcudacxx/include/nv/detail/__target_macros +++ b/libcudacxx/include/nv/detail/__target_macros @@ -14,42 +14,42 @@ #include #if defined(__GNUC__) -#pragma GCC system_header +# pragma GCC system_header #endif -# define _NV_TARGET_ARCH_TO_SELECTOR_350 nv::target::sm_35 -# define _NV_TARGET_ARCH_TO_SELECTOR_370 nv::target::sm_37 -# define _NV_TARGET_ARCH_TO_SELECTOR_500 nv::target::sm_50 -# define _NV_TARGET_ARCH_TO_SELECTOR_520 nv::target::sm_52 -# define _NV_TARGET_ARCH_TO_SELECTOR_530 nv::target::sm_53 -# define _NV_TARGET_ARCH_TO_SELECTOR_600 nv::target::sm_60 -# define _NV_TARGET_ARCH_TO_SELECTOR_610 nv::target::sm_61 -# define _NV_TARGET_ARCH_TO_SELECTOR_620 nv::target::sm_62 -# define _NV_TARGET_ARCH_TO_SELECTOR_700 nv::target::sm_70 -# define _NV_TARGET_ARCH_TO_SELECTOR_720 nv::target::sm_72 -# define _NV_TARGET_ARCH_TO_SELECTOR_750 nv::target::sm_75 -# define _NV_TARGET_ARCH_TO_SELECTOR_800 nv::target::sm_80 -# define _NV_TARGET_ARCH_TO_SELECTOR_860 nv::target::sm_86 -# define _NV_TARGET_ARCH_TO_SELECTOR_870 nv::target::sm_87 -# define _NV_TARGET_ARCH_TO_SELECTOR_890 nv::target::sm_89 -# define _NV_TARGET_ARCH_TO_SELECTOR_900 nv::target::sm_90 - -# define _NV_TARGET_ARCH_TO_SM_350 35 -# define _NV_TARGET_ARCH_TO_SM_370 37 -# define _NV_TARGET_ARCH_TO_SM_500 50 -# define _NV_TARGET_ARCH_TO_SM_520 52 -# define _NV_TARGET_ARCH_TO_SM_530 53 -# define _NV_TARGET_ARCH_TO_SM_600 60 -# define _NV_TARGET_ARCH_TO_SM_610 61 -# define _NV_TARGET_ARCH_TO_SM_620 62 -# define _NV_TARGET_ARCH_TO_SM_700 70 -# define _NV_TARGET_ARCH_TO_SM_720 72 -# define _NV_TARGET_ARCH_TO_SM_750 75 -# define _NV_TARGET_ARCH_TO_SM_800 80 -# define _NV_TARGET_ARCH_TO_SM_860 86 -# define _NV_TARGET_ARCH_TO_SM_870 87 -# define _NV_TARGET_ARCH_TO_SM_890 89 -# define _NV_TARGET_ARCH_TO_SM_900 90 +#define _NV_TARGET_ARCH_TO_SELECTOR_350 nv::target::sm_35 +#define _NV_TARGET_ARCH_TO_SELECTOR_370 nv::target::sm_37 +#define _NV_TARGET_ARCH_TO_SELECTOR_500 nv::target::sm_50 +#define _NV_TARGET_ARCH_TO_SELECTOR_520 nv::target::sm_52 +#define _NV_TARGET_ARCH_TO_SELECTOR_530 nv::target::sm_53 +#define _NV_TARGET_ARCH_TO_SELECTOR_600 nv::target::sm_60 +#define _NV_TARGET_ARCH_TO_SELECTOR_610 nv::target::sm_61 +#define _NV_TARGET_ARCH_TO_SELECTOR_620 nv::target::sm_62 +#define _NV_TARGET_ARCH_TO_SELECTOR_700 nv::target::sm_70 +#define _NV_TARGET_ARCH_TO_SELECTOR_720 nv::target::sm_72 +#define _NV_TARGET_ARCH_TO_SELECTOR_750 nv::target::sm_75 +#define _NV_TARGET_ARCH_TO_SELECTOR_800 nv::target::sm_80 +#define _NV_TARGET_ARCH_TO_SELECTOR_860 nv::target::sm_86 +#define _NV_TARGET_ARCH_TO_SELECTOR_870 nv::target::sm_87 +#define _NV_TARGET_ARCH_TO_SELECTOR_890 nv::target::sm_89 +#define _NV_TARGET_ARCH_TO_SELECTOR_900 nv::target::sm_90 + +#define _NV_TARGET_ARCH_TO_SM_350 35 +#define _NV_TARGET_ARCH_TO_SM_370 37 +#define _NV_TARGET_ARCH_TO_SM_500 50 +#define _NV_TARGET_ARCH_TO_SM_520 52 +#define _NV_TARGET_ARCH_TO_SM_530 53 +#define _NV_TARGET_ARCH_TO_SM_600 60 +#define _NV_TARGET_ARCH_TO_SM_610 61 +#define _NV_TARGET_ARCH_TO_SM_620 62 +#define _NV_TARGET_ARCH_TO_SM_700 70 +#define _NV_TARGET_ARCH_TO_SM_720 72 +#define _NV_TARGET_ARCH_TO_SM_750 75 +#define _NV_TARGET_ARCH_TO_SM_800 80 +#define _NV_TARGET_ARCH_TO_SM_860 86 +#define _NV_TARGET_ARCH_TO_SM_870 87 +#define _NV_TARGET_ARCH_TO_SM_890 89 +#define _NV_TARGET_ARCH_TO_SM_900 90 // Only enable when compiling for CUDA/stdpar #if defined(_NV_COMPILER_NVCXX) && defined(_NVHPC_CUDA) @@ -71,22 +71,22 @@ # define _NV_TARGET_VAL_SM_89 nv::target::sm_89 # define _NV_TARGET_VAL_SM_90 nv::target::sm_90 -# define _NV_TARGET___NV_IS_HOST nv::target::is_host +# define _NV_TARGET___NV_IS_HOST nv::target::is_host # define _NV_TARGET___NV_IS_DEVICE nv::target::is_device # define _NV_TARGET___NV_ANY_TARGET (nv::target::any_target) -# define _NV_TARGET___NV_NO_TARGET (nv::target::no_target) +# define _NV_TARGET___NV_NO_TARGET (nv::target::no_target) # if defined(NV_TARGET_SM_INTEGER_LIST) # define NV_TARGET_MINIMUM_SM_SELECTOR _NV_FIRST_ARG(NV_TARGET_SM_SELECTOR_LIST) -# define NV_TARGET_MINIMUM_SM_INTEGER _NV_FIRST_ARG(NV_TARGET_SM_INTEGER_LIST) -# define __CUDA_MINIMUM_ARCH__ _NV_CONCAT_EVAL(_NV_FIRST_ARG(NV_TARGET_SM_INTEGER_LIST), 0) +# define NV_TARGET_MINIMUM_SM_INTEGER _NV_FIRST_ARG(NV_TARGET_SM_INTEGER_LIST) +# define __CUDA_MINIMUM_ARCH__ _NV_CONCAT_EVAL(_NV_FIRST_ARG(NV_TARGET_SM_INTEGER_LIST), 0) # endif # define _NV_TARGET_PROVIDES(q) nv::target::provides(q) # define _NV_TARGET_IS_EXACTLY(q) nv::target::is_exactly(q) -#elif defined(_NV_COMPILER_NVCC) || defined (_NV_COMPILER_CLANG_CUDA) +#elif defined(_NV_COMPILER_NVCC) || defined(_NV_COMPILER_CLANG_CUDA) # define _NV_TARGET_VAL_SM_35 350 # define _NV_TARGET_VAL_SM_37 370 @@ -106,10 +106,10 @@ # define _NV_TARGET_VAL_SM_90 900 # if defined(__CUDA_ARCH__) -# define _NV_TARGET_VAL __CUDA_ARCH__ +# define _NV_TARGET_VAL __CUDA_ARCH__ # define NV_TARGET_MINIMUM_SM_SELECTOR _NV_CONCAT_EVAL(_NV_TARGET_ARCH_TO_SELECTOR_, __CUDA_ARCH__) -# define NV_TARGET_MINIMUM_SM_INTEGER _NV_CONCAT_EVAL(_NV_TARGET_ARCH_TO_SM_, __CUDA_ARCH__) -# define __CUDA_MINIMUM_ARCH__ __CUDA_ARCH__ +# define NV_TARGET_MINIMUM_SM_INTEGER _NV_CONCAT_EVAL(_NV_TARGET_ARCH_TO_SM_, __CUDA_ARCH__) +# define __CUDA_MINIMUM_ARCH__ __CUDA_ARCH__ # endif # if defined(__CUDA_ARCH__) @@ -197,22 +197,22 @@ #define _NV_TARGET___NV_IS_EXACTLY_SM_89 (_NV_TARGET_IS_EXACTLY(_NV_TARGET_VAL_SM_89)) #define _NV_TARGET___NV_IS_EXACTLY_SM_90 (_NV_TARGET_IS_EXACTLY(_NV_TARGET_VAL_SM_90)) -#define NV_PROVIDES_SM_35 __NV_PROVIDES_SM_35 -#define NV_PROVIDES_SM_37 __NV_PROVIDES_SM_37 -#define NV_PROVIDES_SM_50 __NV_PROVIDES_SM_50 -#define NV_PROVIDES_SM_52 __NV_PROVIDES_SM_52 -#define NV_PROVIDES_SM_53 __NV_PROVIDES_SM_53 -#define NV_PROVIDES_SM_60 __NV_PROVIDES_SM_60 -#define NV_PROVIDES_SM_61 __NV_PROVIDES_SM_61 -#define NV_PROVIDES_SM_62 __NV_PROVIDES_SM_62 -#define NV_PROVIDES_SM_70 __NV_PROVIDES_SM_70 -#define NV_PROVIDES_SM_72 __NV_PROVIDES_SM_72 -#define NV_PROVIDES_SM_75 __NV_PROVIDES_SM_75 -#define NV_PROVIDES_SM_80 __NV_PROVIDES_SM_80 -#define NV_PROVIDES_SM_86 __NV_PROVIDES_SM_86 -#define NV_PROVIDES_SM_87 __NV_PROVIDES_SM_87 -#define NV_PROVIDES_SM_89 __NV_PROVIDES_SM_89 -#define NV_PROVIDES_SM_90 __NV_PROVIDES_SM_90 +#define NV_PROVIDES_SM_35 __NV_PROVIDES_SM_35 +#define NV_PROVIDES_SM_37 __NV_PROVIDES_SM_37 +#define NV_PROVIDES_SM_50 __NV_PROVIDES_SM_50 +#define NV_PROVIDES_SM_52 __NV_PROVIDES_SM_52 +#define NV_PROVIDES_SM_53 __NV_PROVIDES_SM_53 +#define NV_PROVIDES_SM_60 __NV_PROVIDES_SM_60 +#define NV_PROVIDES_SM_61 __NV_PROVIDES_SM_61 +#define NV_PROVIDES_SM_62 __NV_PROVIDES_SM_62 +#define NV_PROVIDES_SM_70 __NV_PROVIDES_SM_70 +#define NV_PROVIDES_SM_72 __NV_PROVIDES_SM_72 +#define NV_PROVIDES_SM_75 __NV_PROVIDES_SM_75 +#define NV_PROVIDES_SM_80 __NV_PROVIDES_SM_80 +#define NV_PROVIDES_SM_86 __NV_PROVIDES_SM_86 +#define NV_PROVIDES_SM_87 __NV_PROVIDES_SM_87 +#define NV_PROVIDES_SM_89 __NV_PROVIDES_SM_89 +#define NV_PROVIDES_SM_90 __NV_PROVIDES_SM_90 #define NV_IS_EXACTLY_SM_35 __NV_IS_EXACTLY_SM_35 #define NV_IS_EXACTLY_SM_37 __NV_IS_EXACTLY_SM_37 @@ -235,11 +235,11 @@ // Will re-enable for nvcc below. #define NV_HAS_FEATURE_SM_90a NV_NO_TARGET -#define NV_IS_HOST __NV_IS_HOST -#define NV_IS_DEVICE __NV_IS_DEVICE +#define NV_IS_HOST __NV_IS_HOST +#define NV_IS_DEVICE __NV_IS_DEVICE -#define NV_ANY_TARGET __NV_ANY_TARGET -#define NV_NO_TARGET __NV_NO_TARGET +#define NV_ANY_TARGET __NV_ANY_TARGET +#define NV_NO_TARGET __NV_NO_TARGET // Platform invoke mechanisms #if defined(_NV_COMPILER_NVCXX) && defined(_NVHPC_CUDA) @@ -249,11 +249,9 @@ # define _NV_BLOCK_EXPAND(...) _NV_REMOVE_PAREN(__VA_ARGS__) # define _NV_TARGET_IF(cond, t, ...) \ - (if target _NV_ARCH_COND(cond) { \ - _NV_BLOCK_EXPAND(t) \ - } else { _NV_BLOCK_EXPAND(__VA_ARGS__) }) + (if target _NV_ARCH_COND(cond) { _NV_BLOCK_EXPAND(t) } else {_NV_BLOCK_EXPAND(__VA_ARGS__)}) -#elif defined(_NV_COMPILER_NVCC) || defined (_NV_COMPILER_CLANG_CUDA) +#elif defined(_NV_COMPILER_NVCC) || defined(_NV_COMPILER_CLANG_CUDA) # if (_NV_TARGET___NV_IS_EXACTLY_SM_35) # define _NV_TARGET_BOOL___NV_IS_EXACTLY_SM_35 1 @@ -353,7 +351,7 @@ // Re-enable sm_90a support in nvcc. # undef NV_HAS_FEATURE_SM_90a -# define NV_HAS_FEATURE_SM_90a __NV_HAS_FEATURE_SM_90a +# define NV_HAS_FEATURE_SM_90a __NV_HAS_FEATURE_SM_90a # if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900) && defined(__CUDA_ARCH_FEAT_SM90_ALL)) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_90a 1 # else @@ -369,7 +367,7 @@ # endif # define _NV_TARGET_BOOL___NV_ANY_TARGET 1 -# define _NV_TARGET_BOOL___NV_NO_TARGET 0 +# define _NV_TARGET_BOOL___NV_NO_TARGET 0 // NVCC Greater than stuff @@ -470,18 +468,24 @@ # endif # define _NV_ARCH_COND_CAT1(cond) _NV_TARGET_BOOL_##cond -# define _NV_ARCH_COND_CAT(cond) _NV_EVAL(_NV_ARCH_COND_CAT1(cond)) +# define _NV_ARCH_COND_CAT(cond) _NV_EVAL(_NV_ARCH_COND_CAT1(cond)) -# define _NV_TARGET_EMPTY_PARAM ; +# define _NV_TARGET_EMPTY_PARAM ; # if defined(_NV_TARGET_CPP11) -# define _NV_BLOCK_EXPAND(...) { _NV_REMOVE_PAREN(__VA_ARGS__) } -# define _NV_TARGET_IF(cond, t, ...) _NV_IF( _NV_ARCH_COND_CAT(cond), t, __VA_ARGS__) +# define _NV_BLOCK_EXPAND(...) \ + { \ + _NV_REMOVE_PAREN(__VA_ARGS__) \ + } +# define _NV_TARGET_IF(cond, t, ...) _NV_IF(_NV_ARCH_COND_CAT(cond), t, __VA_ARGS__) # else // = 201103L) || \ - (defined(_MSC_VER) && _MSVC_LANG >= 201103L)) +#if (!defined(__ibmxl__)) \ + && ((defined(__cplusplus) && __cplusplus >= 201103L) || (defined(_MSC_VER) && _MSVC_LANG >= 201103L)) # define _NV_TARGET_CPP11 #endif - // Hide `if target` support from NVRTC #if defined(_NV_TARGET_CPP11) && !defined(__CUDACC_RTC__) -#if defined(_NV_COMPILER_NVCXX) -# define _NV_BITSET_ATTRIBUTE [[nv::__target_bitset]] -#else -# define _NV_BITSET_ATTRIBUTE -#endif +# if defined(_NV_COMPILER_NVCXX) +# define _NV_BITSET_ATTRIBUTE [[nv::__target_bitset]] +# else +# define _NV_BITSET_ATTRIBUTE +# endif + +namespace nv +{ +namespace target +{ +namespace detail +{ + +typedef unsigned long long base_int_t; + +// No host specialization +constexpr base_int_t all_hosts = 1; + +// NVIDIA GPUs +constexpr base_int_t sm_35_bit = 1 << 1; +constexpr base_int_t sm_37_bit = 1 << 2; +constexpr base_int_t sm_50_bit = 1 << 3; +constexpr base_int_t sm_52_bit = 1 << 4; +constexpr base_int_t sm_53_bit = 1 << 5; +constexpr base_int_t sm_60_bit = 1 << 6; +constexpr base_int_t sm_61_bit = 1 << 7; +constexpr base_int_t sm_62_bit = 1 << 8; +constexpr base_int_t sm_70_bit = 1 << 9; +constexpr base_int_t sm_72_bit = 1 << 10; +constexpr base_int_t sm_75_bit = 1 << 11; +constexpr base_int_t sm_80_bit = 1 << 12; +constexpr base_int_t sm_86_bit = 1 << 13; +constexpr base_int_t sm_87_bit = 1 << 14; +constexpr base_int_t sm_89_bit = 1 << 15; +constexpr base_int_t sm_90_bit = 1 << 16; +constexpr base_int_t all_devices = + sm_35_bit | sm_37_bit | sm_50_bit | sm_52_bit | sm_53_bit | sm_60_bit | sm_61_bit | sm_62_bit | sm_70_bit | sm_72_bit + | sm_75_bit | sm_80_bit | sm_86_bit | sm_87_bit | sm_89_bit | sm_90_bit; + +// Store a set of targets as a set of bits +struct _NV_BITSET_ATTRIBUTE target_description +{ + base_int_t targets; + + constexpr target_description(base_int_t a) + : targets(a) + {} +}; + +// The type of the user-visible names of the NVIDIA GPU targets +enum class sm_selector : base_int_t +{ + sm_35 = 35, + sm_37 = 37, + sm_50 = 50, + sm_52 = 52, + sm_53 = 53, + sm_60 = 60, + sm_61 = 61, + sm_62 = 62, + sm_70 = 70, + sm_72 = 72, + sm_75 = 75, + sm_80 = 80, + sm_86 = 86, + sm_87 = 87, + sm_89 = 89, + sm_90 = 90, +}; + +constexpr base_int_t toint(sm_selector a) +{ + return static_cast(a); +} + +constexpr base_int_t bitexact(sm_selector a) +{ + return toint(a) == 35 ? sm_35_bit + : toint(a) == 37 ? sm_37_bit + : toint(a) == 50 ? sm_50_bit + : toint(a) == 52 ? sm_52_bit + : toint(a) == 53 ? sm_53_bit + : toint(a) == 60 ? sm_60_bit + : toint(a) == 61 ? sm_61_bit + : toint(a) == 62 ? sm_62_bit + : toint(a) == 70 ? sm_70_bit + : toint(a) == 72 ? sm_72_bit + : toint(a) == 75 ? sm_75_bit + : toint(a) == 80 ? sm_80_bit + : toint(a) == 86 ? sm_86_bit + : toint(a) == 87 ? sm_87_bit + : toint(a) == 89 ? sm_89_bit + : toint(a) == 90 ? sm_90_bit + : 0; +} + +constexpr base_int_t bitrounddown(sm_selector a) +{ + return toint(a) >= 90 ? sm_90_bit + : toint(a) >= 89 ? sm_89_bit + : toint(a) >= 87 ? sm_87_bit + : toint(a) >= 86 ? sm_86_bit + : toint(a) >= 80 ? sm_80_bit + : toint(a) >= 75 ? sm_75_bit + : toint(a) >= 72 ? sm_72_bit + : toint(a) >= 70 ? sm_70_bit + : toint(a) >= 62 ? sm_62_bit + : toint(a) >= 61 ? sm_61_bit + : toint(a) >= 60 ? sm_60_bit + : toint(a) >= 53 ? sm_53_bit + : toint(a) >= 52 ? sm_52_bit + : toint(a) >= 50 ? sm_50_bit + : toint(a) >= 37 ? sm_37_bit + : toint(a) >= 35 ? sm_35_bit + : 0; +} + +// Public API for NVIDIA GPUs + +constexpr target_description is_exactly(sm_selector a) +{ + return target_description(bitexact(a)); +} + +constexpr target_description provides(sm_selector a) +{ + return target_description(~(bitrounddown(a) - 1) & all_devices); +} + +// Boolean operations on target sets + +constexpr target_description operator&&(target_description a, target_description b) +{ + return target_description(a.targets & b.targets); +} + +constexpr target_description operator||(target_description a, target_description b) +{ + return target_description(a.targets | b.targets); +} -namespace nv { - namespace target { - namespace detail { - - typedef unsigned long long base_int_t; - - // No host specialization - constexpr base_int_t all_hosts = 1; - - // NVIDIA GPUs - constexpr base_int_t sm_35_bit = 1 << 1; - constexpr base_int_t sm_37_bit = 1 << 2; - constexpr base_int_t sm_50_bit = 1 << 3; - constexpr base_int_t sm_52_bit = 1 << 4; - constexpr base_int_t sm_53_bit = 1 << 5; - constexpr base_int_t sm_60_bit = 1 << 6; - constexpr base_int_t sm_61_bit = 1 << 7; - constexpr base_int_t sm_62_bit = 1 << 8; - constexpr base_int_t sm_70_bit = 1 << 9; - constexpr base_int_t sm_72_bit = 1 << 10; - constexpr base_int_t sm_75_bit = 1 << 11; - constexpr base_int_t sm_80_bit = 1 << 12; - constexpr base_int_t sm_86_bit = 1 << 13; - constexpr base_int_t sm_87_bit = 1 << 14; - constexpr base_int_t sm_89_bit = 1 << 15; - constexpr base_int_t sm_90_bit = 1 << 16; - constexpr base_int_t all_devices = - sm_35_bit | sm_37_bit | - sm_50_bit | sm_52_bit | sm_53_bit | - sm_60_bit | sm_61_bit | sm_62_bit | - sm_70_bit | sm_72_bit | sm_75_bit | - sm_80_bit | sm_86_bit | sm_87_bit | - sm_89_bit | sm_90_bit; - - // Store a set of targets as a set of bits - struct _NV_BITSET_ATTRIBUTE target_description { - base_int_t targets; - - constexpr target_description(base_int_t a) : targets(a) { } - }; - - // The type of the user-visible names of the NVIDIA GPU targets - enum class sm_selector : base_int_t { - sm_35 = 35, sm_37 = 37, - sm_50 = 50, sm_52 = 52, sm_53 = 53, - sm_60 = 60, sm_61 = 61, sm_62 = 62, - sm_70 = 70, sm_72 = 72, sm_75 = 75, - sm_80 = 80, sm_86 = 86, sm_87 = 87, - sm_89 = 89, sm_90 = 90, - }; - - constexpr base_int_t toint(sm_selector a) { - return static_cast(a); - } - - constexpr base_int_t bitexact(sm_selector a) { - return toint(a) == 35 ? sm_35_bit : - toint(a) == 37 ? sm_37_bit : - toint(a) == 50 ? sm_50_bit : - toint(a) == 52 ? sm_52_bit : - toint(a) == 53 ? sm_53_bit : - toint(a) == 60 ? sm_60_bit : - toint(a) == 61 ? sm_61_bit : - toint(a) == 62 ? sm_62_bit : - toint(a) == 70 ? sm_70_bit : - toint(a) == 72 ? sm_72_bit : - toint(a) == 75 ? sm_75_bit : - toint(a) == 80 ? sm_80_bit : - toint(a) == 86 ? sm_86_bit : - toint(a) == 87 ? sm_87_bit : - toint(a) == 89 ? sm_89_bit : - toint(a) == 90 ? sm_90_bit : 0; - } - - constexpr base_int_t bitrounddown(sm_selector a) { - return toint(a) >= 90 ? sm_90_bit : - toint(a) >= 89 ? sm_89_bit : - toint(a) >= 87 ? sm_87_bit : - toint(a) >= 86 ? sm_86_bit : - toint(a) >= 80 ? sm_80_bit : - toint(a) >= 75 ? sm_75_bit : - toint(a) >= 72 ? sm_72_bit : - toint(a) >= 70 ? sm_70_bit : - toint(a) >= 62 ? sm_62_bit : - toint(a) >= 61 ? sm_61_bit : - toint(a) >= 60 ? sm_60_bit : - toint(a) >= 53 ? sm_53_bit : - toint(a) >= 52 ? sm_52_bit : - toint(a) >= 50 ? sm_50_bit : - toint(a) >= 37 ? sm_37_bit : - toint(a) >= 35 ? sm_35_bit : 0; - } - - // Public API for NVIDIA GPUs - - constexpr target_description is_exactly(sm_selector a) { - return target_description(bitexact(a)); - } - - constexpr target_description provides(sm_selector a) { - return target_description(~(bitrounddown(a) - 1) & all_devices); - } - - // Boolean operations on target sets - - constexpr target_description operator&&(target_description a, - target_description b) { - return target_description(a.targets & b.targets); - } - - constexpr target_description operator||(target_description a, - target_description b) { - return target_description(a.targets | b.targets); - } - - constexpr target_description operator!(target_description a) { - return target_description(~a.targets & (all_devices | all_hosts)); - } - } - - using detail::target_description; - using detail::sm_selector; - - // The predicates for basic host/device selection - constexpr target_description is_host = - target_description(detail::all_hosts); - constexpr target_description is_device = - target_description(detail::all_devices); - constexpr target_description any_target = - target_description(detail::all_hosts | detail::all_devices); - constexpr target_description no_target = - target_description(0); - - // The public names for NVIDIA GPU architectures - constexpr sm_selector sm_35 = sm_selector::sm_35; - constexpr sm_selector sm_37 = sm_selector::sm_37; - constexpr sm_selector sm_50 = sm_selector::sm_50; - constexpr sm_selector sm_52 = sm_selector::sm_52; - constexpr sm_selector sm_53 = sm_selector::sm_53; - constexpr sm_selector sm_60 = sm_selector::sm_60; - constexpr sm_selector sm_61 = sm_selector::sm_61; - constexpr sm_selector sm_62 = sm_selector::sm_62; - constexpr sm_selector sm_70 = sm_selector::sm_70; - constexpr sm_selector sm_72 = sm_selector::sm_72; - constexpr sm_selector sm_75 = sm_selector::sm_75; - constexpr sm_selector sm_80 = sm_selector::sm_80; - constexpr sm_selector sm_86 = sm_selector::sm_86; - constexpr sm_selector sm_87 = sm_selector::sm_87; - constexpr sm_selector sm_89 = sm_selector::sm_89; - constexpr sm_selector sm_90 = sm_selector::sm_90; - - using detail::is_exactly; - using detail::provides; - } +constexpr target_description operator!(target_description a) +{ + return target_description(~a.targets & (all_devices | all_hosts)); } +} // namespace detail + +using detail::sm_selector; +using detail::target_description; + +// The predicates for basic host/device selection +constexpr target_description is_host = target_description(detail::all_hosts); +constexpr target_description is_device = target_description(detail::all_devices); +constexpr target_description any_target = target_description(detail::all_hosts | detail::all_devices); +constexpr target_description no_target = target_description(0); + +// The public names for NVIDIA GPU architectures +constexpr sm_selector sm_35 = sm_selector::sm_35; +constexpr sm_selector sm_37 = sm_selector::sm_37; +constexpr sm_selector sm_50 = sm_selector::sm_50; +constexpr sm_selector sm_52 = sm_selector::sm_52; +constexpr sm_selector sm_53 = sm_selector::sm_53; +constexpr sm_selector sm_60 = sm_selector::sm_60; +constexpr sm_selector sm_61 = sm_selector::sm_61; +constexpr sm_selector sm_62 = sm_selector::sm_62; +constexpr sm_selector sm_70 = sm_selector::sm_70; +constexpr sm_selector sm_72 = sm_selector::sm_72; +constexpr sm_selector sm_75 = sm_selector::sm_75; +constexpr sm_selector sm_80 = sm_selector::sm_80; +constexpr sm_selector sm_86 = sm_selector::sm_86; +constexpr sm_selector sm_87 = sm_selector::sm_87; +constexpr sm_selector sm_89 = sm_selector::sm_89; +constexpr sm_selector sm_90 = sm_selector::sm_90; + +using detail::is_exactly; +using detail::provides; +} // namespace target +} // namespace nv #endif // C++11 && !defined(__CUDACC_RTC__)