From 722c312d17cc4a684c29977bcd02dd3140cb77db Mon Sep 17 00:00:00 2001 From: ksenia zaitseva Date: Tue, 28 Dec 2021 20:19:36 +0300 Subject: [PATCH 1/7] [RNG] Add mcg59 --- .../rng/detail/curand/onemkl_rng_curand.hpp | 6 + .../rng/detail/mklcpu/onemkl_rng_mklcpu.hpp | 6 + .../rng/detail/mklgpu/onemkl_rng_mklgpu.hpp | 6 + include/oneapi/mkl/rng/detail/rng_loader.hpp | 6 + include/oneapi/mkl/rng/distributions.hpp | 3 +- include/oneapi/mkl/rng/engines.hpp | 71 +++ src/rng/backends/curand/CMakeLists.txt | 1 + src/rng/backends/curand/mcg59.cpp | 110 ++++ .../curand/mkl_rng_curand_wrappers.cpp | 3 +- src/rng/backends/mklcpu/CMakeLists.txt | 1 + src/rng/backends/mklcpu/mcg59.cpp | 578 +++++++++++++++++ .../backends/mklcpu/mkl_rng_cpu_wrappers.cpp | 3 +- src/rng/backends/mklgpu/CMakeLists.txt | 1 + src/rng/backends/mklgpu/mcg59.cpp | 586 ++++++++++++++++++ .../backends/mklgpu/mkl_rng_gpu_wrappers.cpp | 3 +- src/rng/function_table.hpp | 6 + src/rng/rng_loader.cpp | 10 + .../rng/include/engines_api_tests.hpp | 44 +- .../rng/include/rng_test_common.hpp | 21 +- .../rng/include/skip_ahead_test.hpp | 87 ++- .../rng/service/engines_api_test.cpp | 21 + tests/unit_tests/rng/service/skip_ahead.cpp | 20 + .../rng/statistics_check/bernoulli.cpp | 10 + .../rng/statistics_check/bernoulli_usm.cpp | 12 +- .../rng/statistics_check/gaussian.cpp | 20 + .../rng/statistics_check/gaussian_usm.cpp | 20 + .../rng/statistics_check/lognormal.cpp | 20 + .../rng/statistics_check/lognormal_usm.cpp | 20 + .../rng/statistics_check/poisson.cpp | 12 + .../rng/statistics_check/poisson_usm.cpp | 12 + .../rng/statistics_check/uniform.cpp | 25 + .../rng/statistics_check/uniform_usm.cpp | 25 + 32 files changed, 1751 insertions(+), 18 deletions(-) create mode 100755 src/rng/backends/curand/mcg59.cpp create mode 100755 src/rng/backends/mklcpu/mcg59.cpp create mode 100755 src/rng/backends/mklgpu/mcg59.cpp diff --git a/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp b/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp index e63c6ab56..fb6d76a3f 100644 --- a/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp +++ b/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp @@ -82,6 +82,12 @@ ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(cl::sycl::q ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a( cl::sycl::queue queue, std::initializer_list seed); +ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, + std::uint64_t seed); + +ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59( + cl::sycl::queue queue, std::initializer_list seed); + } // namespace curand } // namespace rng } // namespace mkl diff --git a/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp b/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp index 991e8af04..92a37e056 100644 --- a/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp +++ b/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp @@ -43,6 +43,12 @@ ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(cl::sycl::q ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a( cl::sycl::queue queue, std::initializer_list seed); +ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, + std::uint64_t seed); + +ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59( + cl::sycl::queue queue, std::initializer_list seed); + } // namespace mklcpu } // namespace rng } // namespace mkl diff --git a/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp b/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp index 64f49a706..ee3756c29 100644 --- a/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp +++ b/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp @@ -43,6 +43,12 @@ ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(cl::sycl::q ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a( cl::sycl::queue queue, std::initializer_list seed); +ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, + std::uint64_t seed); + +ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59( + cl::sycl::queue queue, std::initializer_list seed); + } // namespace mklgpu } // namespace rng } // namespace mkl diff --git a/include/oneapi/mkl/rng/detail/rng_loader.hpp b/include/oneapi/mkl/rng/detail/rng_loader.hpp index 2acfac5b5..2df2a12b0 100644 --- a/include/oneapi/mkl/rng/detail/rng_loader.hpp +++ b/include/oneapi/mkl/rng/detail/rng_loader.hpp @@ -45,6 +45,12 @@ ONEMKL_EXPORT engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, cl::sycl: ONEMKL_EXPORT engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, cl::sycl::queue queue, std::initializer_list seed); +ONEMKL_EXPORT engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::queue queue, + std::uint64_t seed); + +ONEMKL_EXPORT engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::queue queue, + std::initializer_list seed); + } // namespace detail } // namespace rng } // namespace mkl diff --git a/include/oneapi/mkl/rng/distributions.hpp b/include/oneapi/mkl/rng/distributions.hpp index 3dd4a6330..e8ec98485 100644 --- a/include/oneapi/mkl/rng/distributions.hpp +++ b/include/oneapi/mkl/rng/distributions.hpp @@ -358,7 +358,8 @@ class poisson { template class bits { public: - static_assert(std::is_same::value, "rng bits type is not supported"); + static_assert(std::is_same::value || std::is_same::value, + "rng bits type is not supported"); using result_type = UIntType; }; diff --git a/include/oneapi/mkl/rng/engines.hpp b/include/oneapi/mkl/rng/engines.hpp index 3deb45631..8f2d601df 100644 --- a/include/oneapi/mkl/rng/engines.hpp +++ b/include/oneapi/mkl/rng/engines.hpp @@ -210,6 +210,77 @@ class mrg32k3a { const std::vector& dependencies); }; +// Class oneapi::mkl::rng::mcg59 +// +// Represents Mcg59 counter-based pseudorandom number generator +// +// Supported parallelization methods: +// leapfrog +class mcg59 { +public: + static constexpr std::uint64_t default_seed = 0; + + mcg59(sycl::queue queue, std::uint64_t seed = default_seed) + : pimpl_(detail::create_mcg59(get_device_id(queue), queue, seed)) {} + +#ifdef ENABLE_MKLCPU_BACKEND + mcg59(backend_selector selector, std::uint64_t seed = default_seed) + : pimpl_(mklcpu::create_mcg59(selector.get_queue(), seed)) {} + +#endif + +#ifdef ENABLE_MKLGPU_BACKEND + mcg59(backend_selector selector, std::uint64_t seed = default_seed) + : pimpl_(mklgpu::create_mcg59(selector.get_queue(), seed)) {} + +#endif + +#ifdef ENABLE_CURAND_BACKEND + mcg59(backend_selector selector, std::uint64_t seed = default_seed) + : pimpl_(curand::create_mcg59(selector.get_queue(), seed)) {} +#endif + + mcg59(const mcg59& other) { + pimpl_.reset(other.pimpl_.get()->copy_state()); + } + + mcg59(mcg59&& other) { + pimpl_ = std::move(other.pimpl_); + } + + mcg59& operator=(const mcg59& other) { + if (this == &other) + return *this; + pimpl_.reset(other.pimpl_.get()->copy_state()); + return *this; + } + + mcg59& operator=(mcg59&& other) { + if (this == &other) + return *this; + pimpl_ = std::move(other.pimpl_); + return *this; + } + +private: + std::unique_ptr pimpl_; + + template + friend void skip_ahead(Engine& engine, std::uint64_t num_to_skip); + + template + friend void leapfrog(Engine& engine, std::uint64_t idx, std::uint64_t stride); + + template + friend void generate(const Distr& distr, Engine& engine, std::int64_t n, + sycl::buffer& r); + + template + friend sycl::event generate(const Distr& distr, Engine& engine, std::int64_t n, + typename Distr::result_type* r, + const std::vector& dependencies); +}; + // Default engine to be used for common cases using default_engine = philox4x32x10; diff --git a/src/rng/backends/curand/CMakeLists.txt b/src/rng/backends/curand/CMakeLists.txt index 41c2da185..866567fd2 100644 --- a/src/rng/backends/curand/CMakeLists.txt +++ b/src/rng/backends/curand/CMakeLists.txt @@ -62,6 +62,7 @@ find_package(cuRAND REQUIRED) set(SOURCES philox4x32x10.cpp mrg32k3a.cpp + mcg59.cpp $<$: mkl_rng_curand_wrappers.cpp>) add_library(${LIB_NAME}) diff --git a/src/rng/backends/curand/mcg59.cpp b/src/rng/backends/curand/mcg59.cpp new file mode 100755 index 000000000..f473d1744 --- /dev/null +++ b/src/rng/backends/curand/mcg59.cpp @@ -0,0 +1,110 @@ +/******************************************************************************* + * cuRAND back-end Copyright (c) 2021, The Regents of the University of + * California, through Lawrence Berkeley National Laboratory (subject to receipt + * of any required approvals from the U.S. Dept. of Energy). All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * (1) Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * (2) Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * (3) Neither the name of the University of California, Lawrence Berkeley + * National Laboratory, U.S. Dept. of Energy nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * You are under no obligation whatsoever to provide any bug fixes, patches, + * or upgrades to the features, functionality or performance of the source + * code ("Enhancements") to anyone; however, if you choose to make your + * Enhancements available either publicly, or directly to Lawrence Berkeley + * National Laboratory, without imposing a separate written license agreement + * for such Enhancements, then you hereby grant the following license: a + * non-exclusive, royalty-free perpetual license to install, use, modify, + * prepare derivative works, incorporate into other computer software, + * distribute, and sublicense such enhancements or derivative works thereof, + * in binary and source code form. + * + * If you have questions about your rights to use or distribute this software, + * please contact Berkeley Lab's Intellectual Property Office at + * IPO@lbl.gov. + * + * NOTICE. This Software was developed under funding from the U.S. Department + * of Energy and the U.S. Government consequently retains certain rights. As + * such, the U.S. Government has been granted for itself and others acting on + * its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the + * Software to reproduce, distribute copies to the public, prepare derivative + * works, and perform publicly and display publicly, and to permit others to do + * so. + ******************************************************************************/ + +#include +#include +#include + +#include "oneapi/mkl/rng/detail/engine_impl.hpp" +// #include "oneapi/mkl/rng/engines.hpp" +#include "curand_helper.hpp" +#include "oneapi/mkl/exceptions.hpp" +#include "oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp" + +namespace oneapi { +namespace mkl { +namespace rng { +namespace curand { +/* + * Note that cuRAND consists of two pieces: a host (CPU) API and a device (GPU) + * API. The host API acts like any standard library; the `curand.h' header is + * included and the functions can be called as usual. The generator is + * instantiated on the host and random numbers can be generated on either the + * host CPU or device. For device-side generation, calls to the library happen + * on the host, but the actual work of RNG is done on the device. In this case, + * the resulting random numbers are stored in global memory on the device. These + * random numbers can then be used in other kernels or be copied back to the + * host for further processing. For host-side generation, everything is done on + * the host, and the random numbers are stored in host memory. + * + * The second piece is the device header, `curand_kernel.h'. Using this file + * permits setting up random number generator states and generating sequences of + * random numbers. This allows random numbers to be generated and immediately + * consumed in other kernels without requiring the random numbers to be written + * to, and read from, global memory. + * + * Here we utilize the host API since this is most aligned with how oneMKL + * generates random numbers. + * + */ + +oneapi::mkl::rng::detail::engine_impl* create_mcg59(sycl::queue queue, std::uint64_t seed) { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return nullptr; +} + +oneapi::mkl::rng::detail::engine_impl* create_mcg59( + cl::sycl::queue queue, std::initializer_list seed) { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return nullptr; +} + +} // namespace curand +} // namespace rng +} // namespace mkl +} // namespace oneapi diff --git a/src/rng/backends/curand/mkl_rng_curand_wrappers.cpp b/src/rng/backends/curand/mkl_rng_curand_wrappers.cpp index 393433c81..2c9671e37 100644 --- a/src/rng/backends/curand/mkl_rng_curand_wrappers.cpp +++ b/src/rng/backends/curand/mkl_rng_curand_wrappers.cpp @@ -64,5 +64,6 @@ extern "C" ONEMKL_EXPORT rng_function_table_t mkl_rng_table = { WRAPPER_VERSION, oneapi::mkl::rng::curand::create_philox4x32x10, oneapi::mkl::rng::curand::create_philox4x32x10, oneapi::mkl::rng::curand::create_mrg32k3a, - oneapi::mkl::rng::curand::create_mrg32k3a + oneapi::mkl::rng::curand::create_mrg32k3a, oneapi::mkl::rng::curand::create_mcg59, + oneapi::mkl::rng::curand::create_mcg59 }; diff --git a/src/rng/backends/mklcpu/CMakeLists.txt b/src/rng/backends/mklcpu/CMakeLists.txt index 984eeabe3..6a54ad930 100644 --- a/src/rng/backends/mklcpu/CMakeLists.txt +++ b/src/rng/backends/mklcpu/CMakeLists.txt @@ -25,6 +25,7 @@ find_package(MKL REQUIRED) set(SOURCES cpu_common.hpp philox4x32x10.cpp mrg32k3a.cpp + mcg59.cpp $<$: mkl_rng_cpu_wrappers.cpp> ) diff --git a/src/rng/backends/mklcpu/mcg59.cpp b/src/rng/backends/mklcpu/mcg59.cpp new file mode 100755 index 000000000..2e1351adf --- /dev/null +++ b/src/rng/backends/mklcpu/mcg59.cpp @@ -0,0 +1,578 @@ +/******************************************************************************* +* Copyright 2021 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include + +#include "mkl_vsl.h" + +#include "oneapi/mkl/exceptions.hpp" +#include "oneapi/mkl/rng/detail/engine_impl.hpp" +#include "oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp" + +#include "cpu_common.hpp" + +namespace oneapi { +namespace mkl { +namespace rng { +namespace mklcpu { + +using namespace cl; + +class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { +public: + mcg59_impl(cl::sycl::queue queue, std::uint64_t seed) + : oneapi::mkl::rng::detail::engine_impl(queue) { + vslNewStream(&stream_, VSL_BRNG_MCG59, seed); + state_size_ = vslGetStreamSize(stream_); + } + + mcg59_impl(const mcg59_impl* other) : oneapi::mkl::rng::detail::engine_impl(*other) { + vslCopyStream(&stream_, other->stream_); + state_size_ = vslGetStreamSize(stream_); + } + + // Buffers APIs + + virtual void generate(const uniform& distr, std::int64_t n, + cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.a(), distr.b()); + }); + }); + } + + virtual void generate(const uniform& distr, std::int64_t n, + cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.a(), distr.b()); + }); + }); + } + + virtual void generate(const uniform& distr, + std::int64_t n, cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.a(), distr.b()); + }); + }); + } + + virtual void generate(const uniform& distr, std::int64_t n, + cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.a(), distr.b()); + }); + }); + } + + virtual void generate(const uniform& distr, std::int64_t n, + cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.a(), distr.b()); + }); + }); + } + + virtual void generate(const gaussian& distr, + std::int64_t n, cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.mean(), distr.stddev()); + }); + }); + } + + virtual void generate(const gaussian& distr, + std::int64_t n, cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.mean(), distr.stddev()); + }); + }); + } + + virtual void generate(const gaussian& distr, std::int64_t n, + cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.mean(), distr.stddev()); + }); + }); + } + + virtual void generate(const gaussian& distr, std::int64_t n, + cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.mean(), distr.stddev()); + }); + }); + } + + virtual void generate(const lognormal& distr, + std::int64_t n, cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.m(), distr.s(), distr.displ(), + distr.scale()); + }); + }); + } + + virtual void generate(const lognormal& distr, + std::int64_t n, cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.m(), distr.s(), distr.displ(), + distr.scale()); + }); + }); + } + + virtual void generate(const lognormal& distr, std::int64_t n, + cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.m(), distr.s(), distr.displ(), + distr.scale()); + }); + }); + } + + virtual void generate(const lognormal& distr, std::int64_t n, + cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.m(), distr.s(), distr.displ(), + distr.scale()); + }); + }); + } + + virtual void generate(const bernoulli& distr, + std::int64_t n, cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.p()); + }); + }); + } + + virtual void generate(const bernoulli& distr, + std::int64_t n, cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + std::uint32_t* r_ptr = acc_r.get_pointer(); + viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, + static_cast(acc_stream.get_pointer()), n, + reinterpret_cast(r_ptr), distr.p()); + }); + }); + } + + virtual void generate(const poisson& distr, + std::int64_t n, cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer(), distr.lambda()); + }); + }); + } + + virtual void generate(const poisson& distr, + std::int64_t n, cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + std::uint32_t* r_ptr = acc_r.get_pointer(); + viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, + static_cast(acc_stream.get_pointer()), n, + reinterpret_cast(r_ptr), distr.lambda()); + }); + }); + } + + virtual void generate(const bits& distr, std::int64_t n, + cl::sycl::buffer& r) override { + sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](sycl::handler& cgh) { + auto acc_stream = stream_buf.get_access(cgh); + auto acc_r = r.get_access(cgh); + host_task>(cgh, [=]() { + viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, + static_cast(acc_stream.get_pointer()), n, + acc_r.get_pointer()); + }); + }); + } + + // USM APIs + + virtual cl::sycl::event generate(const uniform& distr, + std::int64_t n, float* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); + }); + }); + } + + virtual cl::sycl::event generate(const uniform& distr, + std::int64_t n, double* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); + }); + }); + } + + virtual cl::sycl::event generate(const uniform& distr, + std::int64_t n, std::int32_t* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); + }); + }); + } + + virtual cl::sycl::event generate(const uniform& distr, + std::int64_t n, float* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, stream, n, r, distr.a(), + distr.b()); + }); + }); + } + + virtual cl::sycl::event generate(const uniform& distr, + std::int64_t n, double* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, stream, n, r, distr.a(), + distr.b()); + }); + }); + } + + virtual cl::sycl::event generate(const gaussian& distr, + std::int64_t n, float* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, n, r, distr.mean(), + distr.stddev()); + }); + }); + } + + virtual cl::sycl::event generate(const gaussian& distr, + std::int64_t n, double* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, n, r, distr.mean(), + distr.stddev()); + }); + }); + } + + virtual cl::sycl::event generate(const gaussian& distr, + std::int64_t n, float* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, n, r, distr.mean(), + distr.stddev()); + }); + }); + } + + virtual cl::sycl::event generate(const gaussian& distr, + std::int64_t n, double* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, n, r, distr.mean(), + distr.stddev()); + }); + }); + } + + virtual cl::sycl::event generate(const lognormal& distr, + std::int64_t n, float* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, stream, n, r, distr.m(), + distr.s(), distr.displ(), distr.scale()); + }); + }); + } + + virtual cl::sycl::event generate(const lognormal& distr, + std::int64_t n, double* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, stream, n, r, distr.m(), + distr.s(), distr.displ(), distr.scale()); + }); + }); + } + + virtual cl::sycl::event generate(const lognormal& distr, + std::int64_t n, float* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, stream, n, r, distr.m(), distr.s(), + distr.displ(), distr.scale()); + }); + }); + } + + virtual cl::sycl::event generate(const lognormal& distr, + std::int64_t n, double* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, stream, n, r, distr.m(), distr.s(), + distr.displ(), distr.scale()); + }); + }); + } + + virtual cl::sycl::event generate(const bernoulli& distr, + std::int64_t n, std::int32_t* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, n, r, distr.p()); + }); + }); + } + + virtual cl::sycl::event generate(const bernoulli& distr, + std::int64_t n, std::uint32_t* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, n, + reinterpret_cast(r), distr.p()); + }); + }); + } + + virtual cl::sycl::event generate( + const poisson& distr, std::int64_t n, + std::int32_t* r, const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, stream, n, r, distr.lambda()); + }); + }); + } + + virtual cl::sycl::event generate( + const poisson& distr, std::int64_t n, + std::uint32_t* r, const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>(cgh, [=]() { + viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, stream, n, + reinterpret_cast(r), distr.lambda()); + }); + }); + } + + virtual cl::sycl::event generate(const bits& distr, std::int64_t n, + std::uint32_t* r, + const std::vector& dependencies) override { + cl::sycl::event::wait_and_throw(dependencies); + return queue_.submit([&](sycl::handler& cgh) { + VSLStreamStatePtr stream = stream_; + host_task>( + cgh, [=]() { viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream, n, reinterpret_cast(r)); + }); + }); + } + + virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override { + return new mcg59_impl(this); + } + + virtual void skip_ahead(std::uint64_t num_to_skip) override { + vslSkipAheadStream(stream_, num_to_skip); + } + + virtual void skip_ahead(std::initializer_list num_to_skip) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 skipAheadEx"); + } + + virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override { + vslLeapfrogStream(stream_, idx, stride); + } + + virtual ~mcg59_impl() override { + vslDeleteStream(&stream_); + } + +private: + VSLStreamStatePtr stream_; + std::int32_t state_size_; +}; + +oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, std::uint64_t seed) { + return new mcg59_impl(queue, seed); +} + +oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, + std::initializer_list seed) { + throw oneapi::mkl::unimplemented("rng", "mcg59 skipAheadEx"); +} + +} // namespace mklcpu +} // namespace rng +} // namespace mkl +} // namespace oneapi diff --git a/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp b/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp index 840205db7..a9304a93d 100644 --- a/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp +++ b/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp @@ -25,5 +25,6 @@ extern "C" ONEMKL_EXPORT rng_function_table_t mkl_rng_table = { WRAPPER_VERSION, oneapi::mkl::rng::mklcpu::create_philox4x32x10, oneapi::mkl::rng::mklcpu::create_philox4x32x10, oneapi::mkl::rng::mklcpu::create_mrg32k3a, - oneapi::mkl::rng::mklcpu::create_mrg32k3a + oneapi::mkl::rng::mklcpu::create_mrg32k3a, oneapi::mkl::rng::mklcpu::create_mcg59, + oneapi::mkl::rng::mklcpu::create_mcg59 }; diff --git a/src/rng/backends/mklgpu/CMakeLists.txt b/src/rng/backends/mklgpu/CMakeLists.txt index 99a08302e..a3283cae2 100644 --- a/src/rng/backends/mklgpu/CMakeLists.txt +++ b/src/rng/backends/mklgpu/CMakeLists.txt @@ -26,6 +26,7 @@ add_library(${LIB_OBJ} OBJECT mkl_internal_rng_gpu.hpp philox4x32x10.cpp mrg32k3a.cpp + mcg59.cpp $<$: mkl_rng_gpu_wrappers.cpp> ) diff --git a/src/rng/backends/mklgpu/mcg59.cpp b/src/rng/backends/mklgpu/mcg59.cpp new file mode 100755 index 000000000..1f7c85b34 --- /dev/null +++ b/src/rng/backends/mklgpu/mcg59.cpp @@ -0,0 +1,586 @@ +/******************************************************************************* +* Copyright 2020-2021 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include + +#include "mkl_version.h" + +#include "oneapi/mkl/rng/detail/engine_impl.hpp" +#include "oneapi/mkl/rng/engines.hpp" +#include "oneapi/mkl/exceptions.hpp" +#include "oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp" + +#include "mkl_internal_rng_gpu.hpp" + +namespace oneapi { +namespace mkl { +namespace rng { +namespace mklgpu { + +#if !defined(_WIN64) +class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { +public: + mcg59_impl(cl::sycl::queue queue, std::uint64_t seed) + : oneapi::mkl::rng::detail::engine_impl(queue) { + engine_ = + oneapi::mkl::rng::detail::gpu::create_engine(queue, seed); + } + + mcg59_impl(const mcg59_impl* other) : oneapi::mkl::rng::detail::engine_impl(*other) { + sycl::queue queue(other->queue_); + engine_ = oneapi::mkl::rng::detail::gpu::create_engine( + queue, other->engine_); + } + + // Buffers API + + virtual void generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate(const oneapi::mkl::rng::uniform< + std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate(const oneapi::mkl::rng::gaussian< + float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate(const oneapi::mkl::rng::gaussian< + double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate( + const oneapi::mkl::rng::gaussian& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate( + const oneapi::mkl::rng::gaussian& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate(const oneapi::mkl::rng::lognormal< + float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate(const oneapi::mkl::rng::lognormal< + double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate( + const oneapi::mkl::rng::lognormal& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate( + const oneapi::mkl::rng::lognormal& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate(const bernoulli& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate(const bernoulli& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate(const poisson& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate(const poisson& distr, + std::int64_t n, cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + virtual void generate(const bits& distr, std::int64_t n, + cl::sycl::buffer& r) override { + oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); + } + + // USM APIs + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::uniform& + distr, + std::int64_t n, std::int32_t* r, + const std::vector& dependencies) override { + ; + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::gaussian& + distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::gaussian& + distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::gaussian& distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::gaussian& distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::lognormal& + distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::lognormal& + distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::lognormal& distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::lognormal& distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate(const bernoulli& distr, + std::int64_t n, std::int32_t* r, + const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate(const bernoulli& distr, + std::int64_t n, std::uint32_t* r, + const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const poisson& distr, std::int64_t n, + std::int32_t* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate( + const poisson& distr, std::int64_t n, + std::uint32_t* r, const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual cl::sycl::event generate(const bits& distr, std::int64_t n, + std::uint32_t* r, + const std::vector& dependencies) override { + return oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r, dependencies); + } + + virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override { + return new mcg59_impl(this); + } + + virtual void skip_ahead(std::uint64_t num_to_skip) override { + oneapi::mkl::rng::detail::gpu::skip_ahead(queue_, engine_, num_to_skip); + } + + virtual void skip_ahead(std::initializer_list num_to_skip) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 skipAheadEx"); + } + + virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override { + oneapi::mkl::rng::detail::gpu::leapfrog(queue_, engine_, idx, stride); + } + + virtual ~mcg59_impl() override { + oneapi::mkl::rng::detail::gpu::delete_engine(queue_, engine_); + } + +private: + oneapi::mkl::rng::detail::engine_base_impl* engine_; +}; +#else // GPU backend is not supported for Windows OS currently +class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { +public: + mcg59_impl(cl::sycl::queue queue, std::uint64_t seed) + : oneapi::mkl::rng::detail::engine_impl(queue) { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + mcg59_impl(cl::sycl::queue queue, std::initializer_list seed) + : oneapi::mkl::rng::detail::engine_impl(queue) { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + mcg59_impl(const mcg59_impl* other) : oneapi::mkl::rng::detail::engine_impl(*other) { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + // Buffers API + + virtual void generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate(const oneapi::mkl::rng::uniform< + std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate(const oneapi::mkl::rng::gaussian< + float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate(const oneapi::mkl::rng::gaussian< + double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate( + const oneapi::mkl::rng::gaussian& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate( + const oneapi::mkl::rng::gaussian& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate(const oneapi::mkl::rng::lognormal< + float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate(const oneapi::mkl::rng::lognormal< + double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate( + const oneapi::mkl::rng::lognormal& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate( + const oneapi::mkl::rng::lognormal& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate(const bernoulli& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate(const bernoulli& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate(const poisson& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate(const poisson& distr, + std::int64_t n, cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void generate(const bits& distr, std::int64_t n, + cl::sycl::buffer& r) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + // USM APIs + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::uniform& + distr, + std::int64_t n, std::int32_t* r, + const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::uniform& distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::gaussian& + distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::gaussian& + distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::gaussian& distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::gaussian& distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::lognormal& + distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::lognormal& + distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::lognormal& distr, + std::int64_t n, float* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const oneapi::mkl::rng::lognormal& distr, + std::int64_t n, double* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate(const bernoulli& distr, + std::int64_t n, std::int32_t* r, + const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate(const bernoulli& distr, + std::int64_t n, std::uint32_t* r, + const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const poisson& distr, std::int64_t n, + std::int32_t* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate( + const poisson& distr, std::int64_t n, + std::uint32_t* r, const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual cl::sycl::event generate(const bits& distr, std::int64_t n, + std::uint32_t* r, + const std::vector& dependencies) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return cl::sycl::event{}; + } + + virtual oneapi::mkl::rng::detail::engine_impl* copy_state() override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + return nullptr; + } + + virtual void skip_ahead(std::uint64_t num_to_skip) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void skip_ahead(std::initializer_list num_to_skip) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override { + throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); + } + + virtual ~mcg59_impl() override {} +}; +#endif + +oneapi::mkl::rng::detail::engine_impl* create_mcg59(sycl::queue queue, std::uint64_t seed) { + return new mcg59_impl(queue, seed); +} + +oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, + std::initializer_list seed) { + throw oneapi::mkl::unimplemented("rng", "mcg59 skipAheadEx"); +} + +} // namespace mklgpu +} // namespace rng +} // namespace mkl +} // namespace oneapi diff --git a/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp b/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp index 6754b2749..f6c37a43f 100644 --- a/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp +++ b/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp @@ -25,5 +25,6 @@ extern "C" ONEMKL_EXPORT rng_function_table_t mkl_rng_table = { WRAPPER_VERSION, oneapi::mkl::rng::mklgpu::create_philox4x32x10, oneapi::mkl::rng::mklgpu::create_philox4x32x10, oneapi::mkl::rng::mklgpu::create_mrg32k3a, - oneapi::mkl::rng::mklgpu::create_mrg32k3a + oneapi::mkl::rng::mklgpu::create_mrg32k3a, oneapi::mkl::rng::mklgpu::create_mcg59, + oneapi::mkl::rng::mklgpu::create_mcg59 }; diff --git a/src/rng/function_table.hpp b/src/rng/function_table.hpp index 408523e98..8cfedece0 100644 --- a/src/rng/function_table.hpp +++ b/src/rng/function_table.hpp @@ -37,6 +37,12 @@ typedef struct { std::uint32_t seed); oneapi::mkl::rng::detail::engine_impl* (*create_mrg32k3a_ex_sycl)( cl::sycl::queue queue, std::initializer_list seed); + + oneapi::mkl::rng::detail::engine_impl* (*create_mcg59_sycl)(cl::sycl::queue queue, + std::uint64_t seed); + oneapi::mkl::rng::detail::engine_impl* (*create_mcg59_ex_sycl)( + cl::sycl::queue queue, std::initializer_list seed); + } rng_function_table_t; #endif //_RNG_FUNCTION_TABLE_HPP_ diff --git a/src/rng/rng_loader.cpp b/src/rng/rng_loader.cpp index 130f9785e..6f5047661 100644 --- a/src/rng/rng_loader.cpp +++ b/src/rng/rng_loader.cpp @@ -49,6 +49,16 @@ engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, cl::sycl::queue queue, return function_tables[libkey].create_mrg32k3a_ex_sycl(queue, seed); } +engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::queue queue, + std::uint64_t seed) { + return function_tables[libkey].create_mcg59_sycl(queue, seed); +} + +engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::queue queue, + std::initializer_list seed) { + return function_tables[libkey].create_mcg59_ex_sycl(queue, seed); +} + } // namespace detail } // namespace rng } // namespace mkl diff --git a/tests/unit_tests/rng/include/engines_api_tests.hpp b/tests/unit_tests/rng/include/engines_api_tests.hpp index f36b3e266..7058dd800 100644 --- a/tests/unit_tests/rng/include/engines_api_tests.hpp +++ b/tests/unit_tests/rng/include/engines_api_tests.hpp @@ -32,6 +32,19 @@ template class engines_constructors_test { + + template + void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, cl::sycl::buffer& buf) + { + oneapi::mkl::rng::generate(distr, engine, n, buf); + } + + template <> + void generate(oneapi::mkl::rng::bits distr, oneapi::mkl::rng::mcg59 &engine, std::uint64_t n, cl::sycl::buffer& buf) + { + oneapi::mkl::rng::generate(distr, engine, n/2, buf); + } + public: template void operator()(Queue queue, Args... args) { @@ -55,10 +68,10 @@ class engines_constructors_test { cl::sycl::buffer r3_buffer(r3.data(), r3.size()); cl::sycl::buffer r4_buffer(r4.data(), r4.size()); - oneapi::mkl::rng::generate(distr, engine1, N_GEN, r1_buffer); - oneapi::mkl::rng::generate(distr, engine2, N_GEN, r2_buffer); - oneapi::mkl::rng::generate(distr, engine3, N_GEN, r3_buffer); - oneapi::mkl::rng::generate(distr, engine4, N_GEN, r4_buffer); + generate(distr, engine1, N_GEN, r1_buffer); + generate(distr, engine2, N_GEN, r2_buffer); + generate(distr, engine3, N_GEN, r3_buffer); + generate(distr, engine4, N_GEN, r4_buffer); } catch (const oneapi::mkl::unimplemented& e) { status = test_skipped; @@ -81,6 +94,19 @@ class engines_constructors_test { template class engines_copy_test { + + template + void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, cl::sycl::buffer& buf) + { + oneapi::mkl::rng::generate(distr, engine, n, buf); + } + + template <> + void generate(oneapi::mkl::rng::bits distr, oneapi::mkl::rng::mcg59 &engine, std::uint64_t n, cl::sycl::buffer& buf) + { + oneapi::mkl::rng::generate(distr, engine, n/2, buf); + } + public: template void operator()(Queue queue) { @@ -99,8 +125,8 @@ class engines_copy_test { cl::sycl::buffer r1_buffer(r1.data(), r1.size()); cl::sycl::buffer r2_buffer(r2.data(), r2.size()); - oneapi::mkl::rng::generate(distr, engine1, N_GEN, r1_buffer); - oneapi::mkl::rng::generate(distr, engine2, N_GEN, r2_buffer); + generate(distr, engine1, N_GEN, r1_buffer); + generate(distr, engine2, N_GEN, r2_buffer); } Engine engine3 = engine1; @@ -110,9 +136,9 @@ class engines_copy_test { cl::sycl::buffer r2_buffer(r2.data(), r2.size()); cl::sycl::buffer r3_buffer(r3.data(), r3.size()); - oneapi::mkl::rng::generate(distr, engine1, N_GEN, r1_buffer); - oneapi::mkl::rng::generate(distr, engine3, N_GEN, r2_buffer); - oneapi::mkl::rng::generate(distr, engine4, N_GEN, r3_buffer); + generate(distr, engine1, N_GEN, r1_buffer); + generate(distr, engine3, N_GEN, r2_buffer); + generate(distr, engine4, N_GEN, r3_buffer); } } catch (const oneapi::mkl::unimplemented& e) { diff --git a/tests/unit_tests/rng/include/rng_test_common.hpp b/tests/unit_tests/rng/include/rng_test_common.hpp index 54d3635fa..76443fdc3 100644 --- a/tests/unit_tests/rng/include/rng_test_common.hpp +++ b/tests/unit_tests/rng/include/rng_test_common.hpp @@ -29,7 +29,7 @@ #define N_GEN 1000 // Defines for skip_ahead and leapfrog tests -#define N_ENGINES 5 +#define N_ENGINES 6 #define N_PORTION 100 #define N_GEN_SERVICE (N_ENGINES * N_PORTION) @@ -73,6 +73,25 @@ static inline bool check_equal_vector(std::vector& r1, return good; } +template +static inline bool leapfrog_check(std::vector& r1, + std::vector& r2, int n_portion, int n_engines) { + bool good = true; + int j = 0; + for(int i = 0; i < n_engines; i++) { + for(int k = 0; k < n_portion/2; k++) { + for ( int p = 0; p < 2; p++) + { + if(!check_equal(r2[j++], r1[k * n_engines + i*2 + p])) { + good = false; + break; + } + } + } + } + return good; +} + template class rng_test { public: diff --git a/tests/unit_tests/rng/include/skip_ahead_test.hpp b/tests/unit_tests/rng/include/skip_ahead_test.hpp index 7efcf2408..dce4dd333 100644 --- a/tests/unit_tests/rng/include/skip_ahead_test.hpp +++ b/tests/unit_tests/rng/include/skip_ahead_test.hpp @@ -32,6 +32,30 @@ template class skip_ahead_test { + template + void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, cl::sycl::buffer& buf) + { + oneapi::mkl::rng::generate(distr, engine, n, buf); + } + + template <> + void generate(oneapi::mkl::rng::bits distr, oneapi::mkl::rng::mcg59 &engine, std::uint64_t n, cl::sycl::buffer& buf) + { + oneapi::mkl::rng::generate(distr, engine, n/2, buf); + } + + template + void skip_ahead(T& engine, std::uint64_t n) + { + oneapi::mkl::rng::skip_ahead(engine, n); + } + + template <> + void skip_ahead(oneapi::mkl::rng::mcg59 &engine, std::uint64_t n) + { + oneapi::mkl::rng::skip_ahead(engine, n/2); + } + public: template void operator()(Queue queue) { @@ -49,7 +73,7 @@ class skip_ahead_test { // Perform skip for (int i = 0; i < N_ENGINES; i++) { engines.push_back(new Engine(queue)); - oneapi::mkl::rng::skip_ahead(*(engines[i]), i * N_PORTION); + skip_ahead(*(engines[i]), i * N_PORTION); } cl::sycl::buffer r_buffer(r1.data(), r1.size()); @@ -59,9 +83,9 @@ class skip_ahead_test { cl::sycl::buffer(r2.data() + i * N_PORTION, N_PORTION)); } - oneapi::mkl::rng::generate(distr, engine, N_GEN_SERVICE, r_buffer); + generate(distr, engine, N_GEN_SERVICE, r_buffer); for (int i = 0; i < N_ENGINES; i++) { - oneapi::mkl::rng::generate(distr, *(engines[i]), N_PORTION, r_buffers[i]); + generate(distr, *(engines[i]), N_PORTION, r_buffers[i]); } // Clear memory @@ -133,4 +157,61 @@ class skip_ahead_ex_test { int status = test_passed; }; +template +class leapfrog_test { +public: + template + void operator()(Queue queue) { + // Prepare arrays for random numbers + std::vector r1(N_GEN); + std::vector r2(N_GEN); + + try { + // Initialize rng objects + Engine engine(queue); + std::vector engines; + + oneapi::mkl::rng::bits distr; + + // Perform skip + for (int i = 0; i < N_ENGINES; i++) { + engines.push_back(new Engine(queue)); + oneapi::mkl::rng::leapfrog(*(engines[i]), i, N_ENGINES / 2); + } + + cl::sycl::buffer r_buffer(r1.data(), r1.size()); + std::vector> r_buffers; + for (int i = 0; i < N_ENGINES; i++) { + r_buffers.push_back( + cl::sycl::buffer(r2.data() + i * N_PORTION, N_PORTION)); + } + + oneapi::mkl::rng::generate(distr, engine, N_GEN_SERVICE / 2, r_buffer); + for (int i = 0; i < N_ENGINES; i++) { + oneapi::mkl::rng::generate(distr, *(engines[i]), N_PORTION / 2, r_buffers[i]); + } + + // Clear memory + for (int i = 0; i < N_ENGINES; i++) { + delete engines[i]; + } + } + catch (const oneapi::mkl::unimplemented& e) { + status = test_skipped; + return; + } + catch (cl::sycl::exception const& e) { + std::cout << "SYCL exception during generation" << std::endl << e.what() << std::endl; + print_error_code(e); + status = test_failed; + return; + } + + // Validation + status = leapfrog_check(r1, r2, N_PORTION, N_ENGINES); + } + + int status = test_passed; +}; + #endif // _RNG_TEST_SKIP_AHEAD_TEST_HPP__ diff --git a/tests/unit_tests/rng/service/engines_api_test.cpp b/tests/unit_tests/rng/service/engines_api_test.cpp index e5b063ba3..20fa5ad7b 100644 --- a/tests/unit_tests/rng/service/engines_api_test.cpp +++ b/tests/unit_tests/rng/service/engines_api_test.cpp @@ -67,4 +67,25 @@ INSTANTIATE_TEST_SUITE_P(Mrg32k3aConstructorsTestsuite, Mrg32k3aConstructorsTest INSTANTIATE_TEST_SUITE_P(Mrg32k3aCopyTestsuite, Mrg32k3aCopyTests, ::testing::ValuesIn(devices), ::DeviceNamePrint()); +class Mcg59ConstructorsTests : public ::testing::TestWithParam {}; + +class Mcg59CopyTests : public ::testing::TestWithParam {}; + +TEST_P(Mcg59ConstructorsTests, BinaryPrecision) { + rng_test> test; + std::uint64_t seed = SEED; + EXPECT_TRUEORSKIP((test(GetParam(), seed))); +} + +TEST_P(Mcg59CopyTests, BinaryPrecision) { + rng_test> test; + EXPECT_TRUEORSKIP((test(GetParam()))); +} + +INSTANTIATE_TEST_SUITE_P(Mcg59ConstructorsTestsuite, Mcg59ConstructorsTests, + ::testing::ValuesIn(devices), ::DeviceNamePrint()); + +INSTANTIATE_TEST_SUITE_P(Mcg59CopyTestsuite, Mcg59CopyTests, + ::testing::ValuesIn(devices), ::DeviceNamePrint()); + } // anonymous namespace diff --git a/tests/unit_tests/rng/service/skip_ahead.cpp b/tests/unit_tests/rng/service/skip_ahead.cpp index 434b7f0c5..da8694e14 100644 --- a/tests/unit_tests/rng/service/skip_ahead.cpp +++ b/tests/unit_tests/rng/service/skip_ahead.cpp @@ -65,4 +65,24 @@ INSTANTIATE_TEST_SUITE_P(Mrg32k3aSkipAheadTestSuite, Mrg32k3aSkipAheadTests, INSTANTIATE_TEST_SUITE_P(Mrg32k3aSkipAheadExTestSuite, Mrg32k3aSkipAheadExTests, ::testing::ValuesIn(devices), ::DeviceNamePrint()); +class Mcg59SkipAheadTests : public ::testing::TestWithParam {}; + +class Mcg59LeapfrogTests : public ::testing::TestWithParam {}; + +TEST_P(Mcg59SkipAheadTests, BinaryPrecision) { + rng_test> test; + EXPECT_TRUEORSKIP((test(GetParam()))); +} + +TEST_P(Mcg59LeapfrogTests, BinaryPrecision) { + rng_test> test; + EXPECT_TRUEORSKIP((test(GetParam()))); +} + +INSTANTIATE_TEST_SUITE_P(Mcg59SkipAheadTestSuite, Mcg59SkipAheadTests, + ::testing::ValuesIn(devices), ::DeviceNamePrint()); + +INSTANTIATE_TEST_SUITE_P(Mcg59LeapfrogTestSuite, Mcg59LeapfrogTests, + ::testing::ValuesIn(devices), ::DeviceNamePrint()); + } // anonymous namespace diff --git a/tests/unit_tests/rng/statistics_check/bernoulli.cpp b/tests/unit_tests/rng/statistics_check/bernoulli.cpp index a9ec7a30b..5f09e01de 100755 --- a/tests/unit_tests/rng/statistics_check/bernoulli.cpp +++ b/tests/unit_tests/rng/statistics_check/bernoulli.cpp @@ -38,6 +38,11 @@ TEST_P(BernoulliIcdfTests, IntegerPrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, BERNOULLI_ARGS))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, BERNOULLI_ARGS))); } TEST_P(BernoulliIcdfTests, UnsignedIntegerPrecision) { @@ -51,6 +56,11 @@ TEST_P(BernoulliIcdfTests, UnsignedIntegerPrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, BERNOULLI_ARGS))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, BERNOULLI_ARGS))); } INSTANTIATE_TEST_SUITE_P(BernoulliIcdfTestSuite, BernoulliIcdfTests, ::testing::ValuesIn(devices), diff --git a/tests/unit_tests/rng/statistics_check/bernoulli_usm.cpp b/tests/unit_tests/rng/statistics_check/bernoulli_usm.cpp index 6fd9d3d3d..11928f718 100755 --- a/tests/unit_tests/rng/statistics_check/bernoulli_usm.cpp +++ b/tests/unit_tests/rng/statistics_check/bernoulli_usm.cpp @@ -38,6 +38,11 @@ TEST_P(BernoulliIcdfUsmTests, IntegerPrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, BERNOULLI_ARGS))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, BERNOULLI_ARGS))); } TEST_P(BernoulliIcdfUsmTests, UnsignedIntegerPrecision) { @@ -47,10 +52,15 @@ TEST_P(BernoulliIcdfUsmTests, UnsignedIntegerPrecision) { test1; EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, BERNOULLI_ARGS))); rng_test, + oneapi::mkl::rng::bernoulli, oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, BERNOULLI_ARGS))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, BERNOULLI_ARGS))); } INSTANTIATE_TEST_SUITE_P(BernoulliIcdfUsmTestSuite, BernoulliIcdfUsmTests, diff --git a/tests/unit_tests/rng/statistics_check/gaussian.cpp b/tests/unit_tests/rng/statistics_check/gaussian.cpp index d0ee3d2ef..afdbb3f4c 100644 --- a/tests/unit_tests/rng/statistics_check/gaussian.cpp +++ b/tests/unit_tests/rng/statistics_check/gaussian.cpp @@ -40,6 +40,11 @@ TEST_P(GaussianIcdfTest, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT))); + rng_test< + statistics_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT))); } TEST_P(GaussianIcdfTest, RealDoublePrecision) { @@ -53,6 +58,11 @@ TEST_P(GaussianIcdfTest, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE))); + rng_test< + statistics_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE))); } TEST_P(GaussianBoxmullerTest, RealSinglePrecision) { @@ -66,6 +76,11 @@ TEST_P(GaussianBoxmullerTest, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT))); } TEST_P(GaussianBoxmullerTest, RealDoublePrecision) { @@ -79,6 +94,11 @@ TEST_P(GaussianBoxmullerTest, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE))); } INSTANTIATE_TEST_SUITE_P(GaussianIcdfTestSuite, GaussianIcdfTest, ::testing::ValuesIn(devices), diff --git a/tests/unit_tests/rng/statistics_check/gaussian_usm.cpp b/tests/unit_tests/rng/statistics_check/gaussian_usm.cpp index e49b1e523..d5bcbc4e2 100644 --- a/tests/unit_tests/rng/statistics_check/gaussian_usm.cpp +++ b/tests/unit_tests/rng/statistics_check/gaussian_usm.cpp @@ -40,6 +40,11 @@ TEST_P(GaussianIcdfUsmTest, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT))); } TEST_P(GaussianIcdfUsmTest, RealDoublePrecision) { @@ -53,6 +58,11 @@ TEST_P(GaussianIcdfUsmTest, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE))); } TEST_P(GaussianBoxmullerUsmTest, RealSinglePrecision) { @@ -66,6 +76,11 @@ TEST_P(GaussianBoxmullerUsmTest, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT))); } TEST_P(GaussianBoxmullerUsmTest, RealDoublePrecision) { @@ -79,6 +94,11 @@ TEST_P(GaussianBoxmullerUsmTest, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE))); } INSTANTIATE_TEST_SUITE_P(GaussianIcdfUsmTestSuite, GaussianIcdfUsmTest, diff --git a/tests/unit_tests/rng/statistics_check/lognormal.cpp b/tests/unit_tests/rng/statistics_check/lognormal.cpp index b307431b4..c41ca52bb 100755 --- a/tests/unit_tests/rng/statistics_check/lognormal.cpp +++ b/tests/unit_tests/rng/statistics_check/lognormal.cpp @@ -40,6 +40,11 @@ TEST_P(LognormalIcdfTest, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT))); } TEST_P(LognormalIcdfTest, RealDoublePrecision) { @@ -53,6 +58,11 @@ TEST_P(LognormalIcdfTest, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE))); } TEST_P(LognormalBoxmullerTest, RealSinglePrecision) { @@ -66,6 +76,11 @@ TEST_P(LognormalBoxmullerTest, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT))); } TEST_P(LognormalBoxmullerTest, RealDoublePrecision) { @@ -79,6 +94,11 @@ TEST_P(LognormalBoxmullerTest, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE))); } INSTANTIATE_TEST_SUITE_P(LognormalIcdfTestSuite, LognormalIcdfTest, ::testing::ValuesIn(devices), diff --git a/tests/unit_tests/rng/statistics_check/lognormal_usm.cpp b/tests/unit_tests/rng/statistics_check/lognormal_usm.cpp index c1240c7a1..2b0242368 100755 --- a/tests/unit_tests/rng/statistics_check/lognormal_usm.cpp +++ b/tests/unit_tests/rng/statistics_check/lognormal_usm.cpp @@ -40,6 +40,11 @@ TEST_P(LognormalIcdfUsmTest, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT))); } TEST_P(LognormalIcdfUsmTest, RealDoublePrecision) { @@ -53,6 +58,11 @@ TEST_P(LognormalIcdfUsmTest, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE))); } TEST_P(LognormalBoxmullerUsmTest, RealSinglePrecision) { @@ -66,6 +76,11 @@ TEST_P(LognormalBoxmullerUsmTest, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, LOGNORMAL_ARGS_FLOAT))); } TEST_P(LognormalBoxmullerUsmTest, RealDoublePrecision) { @@ -79,6 +94,11 @@ TEST_P(LognormalBoxmullerUsmTest, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, LOGNORMAL_ARGS_DOUBLE))); } INSTANTIATE_TEST_SUITE_P(LognormalIcdfUsmTestSuite, LognormalIcdfUsmTest, diff --git a/tests/unit_tests/rng/statistics_check/poisson.cpp b/tests/unit_tests/rng/statistics_check/poisson.cpp index 02e7c1994..ec035db0a 100755 --- a/tests/unit_tests/rng/statistics_check/poisson.cpp +++ b/tests/unit_tests/rng/statistics_check/poisson.cpp @@ -40,6 +40,12 @@ TEST_P(PoissonIcdfTests, IntegerPrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, POISSON_ARGS))); + rng_test< + statistics_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, POISSON_ARGS))); } TEST_P(PoissonIcdfTests, UnsignedIntegerPrecision) { @@ -55,6 +61,12 @@ TEST_P(PoissonIcdfTests, UnsignedIntegerPrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, POISSON_ARGS))); + rng_test< + statistics_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, POISSON_ARGS))); } INSTANTIATE_TEST_SUITE_P(PoissonIcdfTestSuite, PoissonIcdfTests, ::testing::ValuesIn(devices), diff --git a/tests/unit_tests/rng/statistics_check/poisson_usm.cpp b/tests/unit_tests/rng/statistics_check/poisson_usm.cpp index 8ab720dfc..cf307a5aa 100755 --- a/tests/unit_tests/rng/statistics_check/poisson_usm.cpp +++ b/tests/unit_tests/rng/statistics_check/poisson_usm.cpp @@ -40,6 +40,12 @@ TEST_P(PoissonIcdfUsmTests, IntegerPrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, POISSON_ARGS))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, POISSON_ARGS))); } TEST_P(PoissonIcdfUsmTests, UnsignedIntegerPrecision) { @@ -55,6 +61,12 @@ TEST_P(PoissonIcdfUsmTests, UnsignedIntegerPrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, POISSON_ARGS))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, POISSON_ARGS))); } INSTANTIATE_TEST_SUITE_P(PoissonIcdfUsmTestSuite, PoissonIcdfUsmTests, ::testing::ValuesIn(devices), diff --git a/tests/unit_tests/rng/statistics_check/uniform.cpp b/tests/unit_tests/rng/statistics_check/uniform.cpp index 9ccbd7583..bd6c3ce6d 100644 --- a/tests/unit_tests/rng/statistics_check/uniform.cpp +++ b/tests/unit_tests/rng/statistics_check/uniform.cpp @@ -40,6 +40,11 @@ TEST_P(UniformStdTests, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT))); } TEST_P(UniformStdTests, RealDoublePrecision) { @@ -53,6 +58,11 @@ TEST_P(UniformStdTests, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE))); } TEST_P(UniformStdTests, IntegerPrecision) { @@ -66,6 +76,11 @@ TEST_P(UniformStdTests, IntegerPrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_INT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, UNIFORM_ARGS_INT))); } TEST_P(UniformAccurateTests, RealSinglePrecision) { @@ -79,6 +94,11 @@ TEST_P(UniformAccurateTests, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT))); } TEST_P(UniformAccurateTests, RealDoublePrecision) { @@ -92,6 +112,11 @@ TEST_P(UniformAccurateTests, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE))); } INSTANTIATE_TEST_SUITE_P(UniformStdTestSuite, UniformStdTests, ::testing::ValuesIn(devices), diff --git a/tests/unit_tests/rng/statistics_check/uniform_usm.cpp b/tests/unit_tests/rng/statistics_check/uniform_usm.cpp index f86890631..6ae4328a1 100644 --- a/tests/unit_tests/rng/statistics_check/uniform_usm.cpp +++ b/tests/unit_tests/rng/statistics_check/uniform_usm.cpp @@ -40,6 +40,11 @@ TEST_P(UniformStdUsmTests, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT))); } TEST_P(UniformStdUsmTests, RealDoublePrecision) { @@ -53,6 +58,11 @@ TEST_P(UniformStdUsmTests, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE))); } TEST_P(UniformStdUsmTests, IntegerPrecision) { @@ -66,6 +76,11 @@ TEST_P(UniformStdUsmTests, IntegerPrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_INT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, UNIFORM_ARGS_INT))); } TEST_P(UniformAccurateUsmTests, RealSinglePrecision) { @@ -79,6 +94,11 @@ TEST_P(UniformAccurateUsmTests, RealSinglePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, UNIFORM_ARGS_FLOAT))); } TEST_P(UniformAccurateUsmTests, RealDoublePrecision) { @@ -92,6 +112,11 @@ TEST_P(UniformAccurateUsmTests, RealDoublePrecision) { oneapi::mkl::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE))); + rng_test, + oneapi::mkl::rng::mcg59>> + test3; + EXPECT_TRUEORSKIP((test3(GetParam(), N_GEN, UNIFORM_ARGS_DOUBLE))); } INSTANTIATE_TEST_SUITE_P(UniformStdUsmTestSuite, UniformStdUsmTests, ::testing::ValuesIn(devices), From 7382d38de36722ab4bc2287e3c06baaf1c653ef5 Mon Sep 17 00:00:00 2001 From: ksenia zaitseva Date: Wed, 12 Jan 2022 15:05:34 +0300 Subject: [PATCH 2/7] [RNG] comment fix --- src/rng/backends/mklgpu/mcg59.cpp | 2 +- tests/unit_tests/rng/include/engines_api_tests.hpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rng/backends/mklgpu/mcg59.cpp b/src/rng/backends/mklgpu/mcg59.cpp index 1f7c85b34..69c0cf0a6 100755 --- a/src/rng/backends/mklgpu/mcg59.cpp +++ b/src/rng/backends/mklgpu/mcg59.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2021 Intel Corporation +* Copyright 2021 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests/unit_tests/rng/include/engines_api_tests.hpp b/tests/unit_tests/rng/include/engines_api_tests.hpp index 7058dd800..58e81365e 100644 --- a/tests/unit_tests/rng/include/engines_api_tests.hpp +++ b/tests/unit_tests/rng/include/engines_api_tests.hpp @@ -104,6 +104,8 @@ class engines_copy_test { template <> void generate(oneapi::mkl::rng::bits distr, oneapi::mkl::rng::mcg59 &engine, std::uint64_t n, cl::sycl::buffer& buf) { + // mcg59 generates numbers with type uint64_t, but retruns uint32_t + // so it necessary to use n/2 here to generate n/2 numbers with type uint64_t and retrun n numbers with type uint32_t oneapi::mkl::rng::generate(distr, engine, n/2, buf); } From 28f8bdeef47fd05e54cb96ed5a2a9a22a126ff1e Mon Sep 17 00:00:00 2001 From: ksenia zaitseva Date: Wed, 12 Jan 2022 20:09:17 +0300 Subject: [PATCH 3/7] [RNG] Clang-format --- .../rng/detail/curand/onemkl_rng_curand.hpp | 2 +- .../rng/detail/mklcpu/onemkl_rng_mklcpu.hpp | 2 +- .../rng/detail/mklgpu/onemkl_rng_mklgpu.hpp | 4 +-- include/oneapi/mkl/rng/detail/rng_loader.hpp | 4 +-- include/oneapi/mkl/rng/distributions.hpp | 5 ++-- src/rng/backends/curand/mcg59.cpp | 4 +-- .../curand/mkl_rng_curand_wrappers.cpp | 9 ++++-- src/rng/backends/mklcpu/mcg59.cpp | 9 +++--- .../backends/mklcpu/mkl_rng_cpu_wrappers.cpp | 9 ++++-- src/rng/backends/mklgpu/mcg59.cpp | 6 ++-- .../backends/mklgpu/mkl_rng_gpu_wrappers.cpp | 9 ++++-- src/rng/function_table.hpp | 4 +-- src/rng/rng_loader.cpp | 5 ++-- .../rng/include/engines_api_tests.hpp | 28 +++++++++---------- .../rng/include/rng_test_common.hpp | 13 ++++----- .../rng/include/skip_ahead_test.hpp | 21 +++++++------- .../rng/service/engines_api_test.cpp | 4 +-- tests/unit_tests/rng/service/skip_ahead.cpp | 8 +++--- 18 files changed, 77 insertions(+), 69 deletions(-) mode change 100755 => 100644 src/rng/backends/curand/mcg59.cpp mode change 100755 => 100644 src/rng/backends/mklcpu/mcg59.cpp mode change 100755 => 100644 src/rng/backends/mklgpu/mcg59.cpp diff --git a/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp b/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp index fb6d76a3f..e55ac506a 100644 --- a/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp +++ b/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp @@ -83,7 +83,7 @@ ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a( cl::sycl::queue queue, std::initializer_list seed); ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, - std::uint64_t seed); + std::uint64_t seed); ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59( cl::sycl::queue queue, std::initializer_list seed); diff --git a/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp b/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp index 92a37e056..4a06ac6d4 100644 --- a/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp +++ b/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp @@ -44,7 +44,7 @@ ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a( cl::sycl::queue queue, std::initializer_list seed); ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, - std::uint64_t seed); + std::uint64_t seed); ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59( cl::sycl::queue queue, std::initializer_list seed); diff --git a/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp b/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp index ee3756c29..c7213a046 100644 --- a/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp +++ b/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp @@ -44,11 +44,11 @@ ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a( cl::sycl::queue queue, std::initializer_list seed); ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, - std::uint64_t seed); + std::uint64_t seed); ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59( cl::sycl::queue queue, std::initializer_list seed); - + } // namespace mklgpu } // namespace rng } // namespace mkl diff --git a/include/oneapi/mkl/rng/detail/rng_loader.hpp b/include/oneapi/mkl/rng/detail/rng_loader.hpp index 2df2a12b0..a37897bad 100644 --- a/include/oneapi/mkl/rng/detail/rng_loader.hpp +++ b/include/oneapi/mkl/rng/detail/rng_loader.hpp @@ -46,10 +46,10 @@ ONEMKL_EXPORT engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, cl::sycl: std::initializer_list seed); ONEMKL_EXPORT engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::queue queue, - std::uint64_t seed); + std::uint64_t seed); ONEMKL_EXPORT engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::queue queue, - std::initializer_list seed); + std::initializer_list seed); } // namespace detail } // namespace rng diff --git a/include/oneapi/mkl/rng/distributions.hpp b/include/oneapi/mkl/rng/distributions.hpp index e8ec98485..bed5af418 100644 --- a/include/oneapi/mkl/rng/distributions.hpp +++ b/include/oneapi/mkl/rng/distributions.hpp @@ -358,8 +358,9 @@ class poisson { template class bits { public: - static_assert(std::is_same::value || std::is_same::value, - "rng bits type is not supported"); + static_assert(std::is_same::value || + std::is_same::value, + "rng bits type is not supported"); using result_type = UIntType; }; diff --git a/src/rng/backends/curand/mcg59.cpp b/src/rng/backends/curand/mcg59.cpp old mode 100755 new mode 100644 index f473d1744..84cd7af42 --- a/src/rng/backends/curand/mcg59.cpp +++ b/src/rng/backends/curand/mcg59.cpp @@ -98,8 +98,8 @@ oneapi::mkl::rng::detail::engine_impl* create_mcg59(sycl::queue queue, std::uint return nullptr; } -oneapi::mkl::rng::detail::engine_impl* create_mcg59( - cl::sycl::queue queue, std::initializer_list seed) { +oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, + std::initializer_list seed) { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); return nullptr; } diff --git a/src/rng/backends/curand/mkl_rng_curand_wrappers.cpp b/src/rng/backends/curand/mkl_rng_curand_wrappers.cpp index 2c9671e37..fedd42839 100644 --- a/src/rng/backends/curand/mkl_rng_curand_wrappers.cpp +++ b/src/rng/backends/curand/mkl_rng_curand_wrappers.cpp @@ -62,8 +62,11 @@ #define WRAPPER_VERSION 1 extern "C" ONEMKL_EXPORT rng_function_table_t mkl_rng_table = { - WRAPPER_VERSION, oneapi::mkl::rng::curand::create_philox4x32x10, - oneapi::mkl::rng::curand::create_philox4x32x10, oneapi::mkl::rng::curand::create_mrg32k3a, - oneapi::mkl::rng::curand::create_mrg32k3a, oneapi::mkl::rng::curand::create_mcg59, + WRAPPER_VERSION, + oneapi::mkl::rng::curand::create_philox4x32x10, + oneapi::mkl::rng::curand::create_philox4x32x10, + oneapi::mkl::rng::curand::create_mrg32k3a, + oneapi::mkl::rng::curand::create_mrg32k3a, + oneapi::mkl::rng::curand::create_mcg59, oneapi::mkl::rng::curand::create_mcg59 }; diff --git a/src/rng/backends/mklcpu/mcg59.cpp b/src/rng/backends/mklcpu/mcg59.cpp old mode 100755 new mode 100644 index 2e1351adf..f6e0147ee --- a/src/rng/backends/mklcpu/mcg59.cpp +++ b/src/rng/backends/mklcpu/mcg59.cpp @@ -532,9 +532,10 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { cl::sycl::event::wait_and_throw(dependencies); return queue_.submit([&](sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; - host_task>( - cgh, [=]() { viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream, n, reinterpret_cast(r)); - }); + host_task>(cgh, [=]() { + viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream, n, + reinterpret_cast(r)); + }); }); } @@ -568,7 +569,7 @@ oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, std:: } oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, - std::initializer_list seed) { + std::initializer_list seed) { throw oneapi::mkl::unimplemented("rng", "mcg59 skipAheadEx"); } diff --git a/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp b/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp index a9304a93d..2df6274c4 100644 --- a/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp +++ b/src/rng/backends/mklcpu/mkl_rng_cpu_wrappers.cpp @@ -23,8 +23,11 @@ #define WRAPPER_VERSION 1 extern "C" ONEMKL_EXPORT rng_function_table_t mkl_rng_table = { - WRAPPER_VERSION, oneapi::mkl::rng::mklcpu::create_philox4x32x10, - oneapi::mkl::rng::mklcpu::create_philox4x32x10, oneapi::mkl::rng::mklcpu::create_mrg32k3a, - oneapi::mkl::rng::mklcpu::create_mrg32k3a, oneapi::mkl::rng::mklcpu::create_mcg59, + WRAPPER_VERSION, + oneapi::mkl::rng::mklcpu::create_philox4x32x10, + oneapi::mkl::rng::mklcpu::create_philox4x32x10, + oneapi::mkl::rng::mklcpu::create_mrg32k3a, + oneapi::mkl::rng::mklcpu::create_mrg32k3a, + oneapi::mkl::rng::mklcpu::create_mcg59, oneapi::mkl::rng::mklcpu::create_mcg59 }; diff --git a/src/rng/backends/mklgpu/mcg59.cpp b/src/rng/backends/mklgpu/mcg59.cpp old mode 100755 new mode 100644 index 69c0cf0a6..5721e2218 --- a/src/rng/backends/mklgpu/mcg59.cpp +++ b/src/rng/backends/mklgpu/mcg59.cpp @@ -39,7 +39,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { public: mcg59_impl(cl::sycl::queue queue, std::uint64_t seed) : oneapi::mkl::rng::detail::engine_impl(queue) { - engine_ = + engine_ = oneapi::mkl::rng::detail::gpu::create_engine(queue, seed); } @@ -576,8 +576,8 @@ oneapi::mkl::rng::detail::engine_impl* create_mcg59(sycl::queue queue, std::uint } oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, - std::initializer_list seed) { - throw oneapi::mkl::unimplemented("rng", "mcg59 skipAheadEx"); + std::initializer_list seed) { + throw oneapi::mkl::unimplemented("rng", "mcg59 skipAheadEx"); } } // namespace mklgpu diff --git a/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp b/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp index f6c37a43f..a90b29737 100644 --- a/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp +++ b/src/rng/backends/mklgpu/mkl_rng_gpu_wrappers.cpp @@ -23,8 +23,11 @@ #define WRAPPER_VERSION 1 extern "C" ONEMKL_EXPORT rng_function_table_t mkl_rng_table = { - WRAPPER_VERSION, oneapi::mkl::rng::mklgpu::create_philox4x32x10, - oneapi::mkl::rng::mklgpu::create_philox4x32x10, oneapi::mkl::rng::mklgpu::create_mrg32k3a, - oneapi::mkl::rng::mklgpu::create_mrg32k3a, oneapi::mkl::rng::mklgpu::create_mcg59, + WRAPPER_VERSION, + oneapi::mkl::rng::mklgpu::create_philox4x32x10, + oneapi::mkl::rng::mklgpu::create_philox4x32x10, + oneapi::mkl::rng::mklgpu::create_mrg32k3a, + oneapi::mkl::rng::mklgpu::create_mrg32k3a, + oneapi::mkl::rng::mklgpu::create_mcg59, oneapi::mkl::rng::mklgpu::create_mcg59 }; diff --git a/src/rng/function_table.hpp b/src/rng/function_table.hpp index 8cfedece0..d2cc52a34 100644 --- a/src/rng/function_table.hpp +++ b/src/rng/function_table.hpp @@ -39,10 +39,10 @@ typedef struct { cl::sycl::queue queue, std::initializer_list seed); oneapi::mkl::rng::detail::engine_impl* (*create_mcg59_sycl)(cl::sycl::queue queue, - std::uint64_t seed); + std::uint64_t seed); oneapi::mkl::rng::detail::engine_impl* (*create_mcg59_ex_sycl)( cl::sycl::queue queue, std::initializer_list seed); - + } rng_function_table_t; #endif //_RNG_FUNCTION_TABLE_HPP_ diff --git a/src/rng/rng_loader.cpp b/src/rng/rng_loader.cpp index 6f5047661..675fc5e42 100644 --- a/src/rng/rng_loader.cpp +++ b/src/rng/rng_loader.cpp @@ -49,13 +49,12 @@ engine_impl* create_mrg32k3a(oneapi::mkl::device libkey, cl::sycl::queue queue, return function_tables[libkey].create_mrg32k3a_ex_sycl(queue, seed); } -engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::queue queue, - std::uint64_t seed) { +engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::queue queue, std::uint64_t seed) { return function_tables[libkey].create_mcg59_sycl(queue, seed); } engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::queue queue, - std::initializer_list seed) { + std::initializer_list seed) { return function_tables[libkey].create_mcg59_ex_sycl(queue, seed); } diff --git a/tests/unit_tests/rng/include/engines_api_tests.hpp b/tests/unit_tests/rng/include/engines_api_tests.hpp index 58e81365e..68b92fa8a 100644 --- a/tests/unit_tests/rng/include/engines_api_tests.hpp +++ b/tests/unit_tests/rng/include/engines_api_tests.hpp @@ -32,17 +32,17 @@ template class engines_constructors_test { - template - void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, cl::sycl::buffer& buf) - { + void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, + cl::sycl::buffer& buf) { oneapi::mkl::rng::generate(distr, engine, n, buf); } template <> - void generate(oneapi::mkl::rng::bits distr, oneapi::mkl::rng::mcg59 &engine, std::uint64_t n, cl::sycl::buffer& buf) - { - oneapi::mkl::rng::generate(distr, engine, n/2, buf); + void generate(oneapi::mkl::rng::bits distr, + oneapi::mkl::rng::mcg59& engine, std::uint64_t n, + cl::sycl::buffer& buf) { + oneapi::mkl::rng::generate(distr, engine, n / 2, buf); } public: @@ -94,19 +94,19 @@ class engines_constructors_test { template class engines_copy_test { - template - void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, cl::sycl::buffer& buf) - { + void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, + cl::sycl::buffer& buf) { oneapi::mkl::rng::generate(distr, engine, n, buf); } template <> - void generate(oneapi::mkl::rng::bits distr, oneapi::mkl::rng::mcg59 &engine, std::uint64_t n, cl::sycl::buffer& buf) - { - // mcg59 generates numbers with type uint64_t, but retruns uint32_t - // so it necessary to use n/2 here to generate n/2 numbers with type uint64_t and retrun n numbers with type uint32_t - oneapi::mkl::rng::generate(distr, engine, n/2, buf); + void generate(oneapi::mkl::rng::bits distr, + oneapi::mkl::rng::mcg59& engine, std::uint64_t n, + cl::sycl::buffer& buf) { + // mcg59 generates numbers with type uint64_t, but retruns uint32_t + // so it necessary to use n/2 here to generate n/2 numbers with type uint64_t and retrun n numbers with type uint32_t + oneapi::mkl::rng::generate(distr, engine, n / 2, buf); } public: diff --git a/tests/unit_tests/rng/include/rng_test_common.hpp b/tests/unit_tests/rng/include/rng_test_common.hpp index 76443fdc3..bbeaa1517 100644 --- a/tests/unit_tests/rng/include/rng_test_common.hpp +++ b/tests/unit_tests/rng/include/rng_test_common.hpp @@ -74,15 +74,14 @@ static inline bool check_equal_vector(std::vector& r1, } template -static inline bool leapfrog_check(std::vector& r1, - std::vector& r2, int n_portion, int n_engines) { +static inline bool leapfrog_check(std::vector& r1, std::vector& r2, + int n_portion, int n_engines) { bool good = true; int j = 0; - for(int i = 0; i < n_engines; i++) { - for(int k = 0; k < n_portion/2; k++) { - for ( int p = 0; p < 2; p++) - { - if(!check_equal(r2[j++], r1[k * n_engines + i*2 + p])) { + for (int i = 0; i < n_engines; i++) { + for (int k = 0; k < n_portion / 2; k++) { + for (int p = 0; p < 2; p++) { + if (!check_equal(r2[j++], r1[k * n_engines + i * 2 + p])) { good = false; break; } diff --git a/tests/unit_tests/rng/include/skip_ahead_test.hpp b/tests/unit_tests/rng/include/skip_ahead_test.hpp index dce4dd333..d449f381b 100644 --- a/tests/unit_tests/rng/include/skip_ahead_test.hpp +++ b/tests/unit_tests/rng/include/skip_ahead_test.hpp @@ -33,27 +33,26 @@ template class skip_ahead_test { template - void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, cl::sycl::buffer& buf) - { + void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, + cl::sycl::buffer& buf) { oneapi::mkl::rng::generate(distr, engine, n, buf); } template <> - void generate(oneapi::mkl::rng::bits distr, oneapi::mkl::rng::mcg59 &engine, std::uint64_t n, cl::sycl::buffer& buf) - { - oneapi::mkl::rng::generate(distr, engine, n/2, buf); + void generate(oneapi::mkl::rng::bits distr, + oneapi::mkl::rng::mcg59& engine, std::uint64_t n, + cl::sycl::buffer& buf) { + oneapi::mkl::rng::generate(distr, engine, n / 2, buf); } template - void skip_ahead(T& engine, std::uint64_t n) - { + void skip_ahead(T& engine, std::uint64_t n) { oneapi::mkl::rng::skip_ahead(engine, n); } - template <> - void skip_ahead(oneapi::mkl::rng::mcg59 &engine, std::uint64_t n) - { - oneapi::mkl::rng::skip_ahead(engine, n/2); + template <> + void skip_ahead(oneapi::mkl::rng::mcg59& engine, std::uint64_t n) { + oneapi::mkl::rng::skip_ahead(engine, n / 2); } public: diff --git a/tests/unit_tests/rng/service/engines_api_test.cpp b/tests/unit_tests/rng/service/engines_api_test.cpp index 20fa5ad7b..1e576b676 100644 --- a/tests/unit_tests/rng/service/engines_api_test.cpp +++ b/tests/unit_tests/rng/service/engines_api_test.cpp @@ -85,7 +85,7 @@ TEST_P(Mcg59CopyTests, BinaryPrecision) { INSTANTIATE_TEST_SUITE_P(Mcg59ConstructorsTestsuite, Mcg59ConstructorsTests, ::testing::ValuesIn(devices), ::DeviceNamePrint()); -INSTANTIATE_TEST_SUITE_P(Mcg59CopyTestsuite, Mcg59CopyTests, - ::testing::ValuesIn(devices), ::DeviceNamePrint()); +INSTANTIATE_TEST_SUITE_P(Mcg59CopyTestsuite, Mcg59CopyTests, ::testing::ValuesIn(devices), + ::DeviceNamePrint()); } // anonymous namespace diff --git a/tests/unit_tests/rng/service/skip_ahead.cpp b/tests/unit_tests/rng/service/skip_ahead.cpp index da8694e14..6fd170eab 100644 --- a/tests/unit_tests/rng/service/skip_ahead.cpp +++ b/tests/unit_tests/rng/service/skip_ahead.cpp @@ -79,10 +79,10 @@ TEST_P(Mcg59LeapfrogTests, BinaryPrecision) { EXPECT_TRUEORSKIP((test(GetParam()))); } -INSTANTIATE_TEST_SUITE_P(Mcg59SkipAheadTestSuite, Mcg59SkipAheadTests, - ::testing::ValuesIn(devices), ::DeviceNamePrint()); +INSTANTIATE_TEST_SUITE_P(Mcg59SkipAheadTestSuite, Mcg59SkipAheadTests, ::testing::ValuesIn(devices), + ::DeviceNamePrint()); -INSTANTIATE_TEST_SUITE_P(Mcg59LeapfrogTestSuite, Mcg59LeapfrogTests, - ::testing::ValuesIn(devices), ::DeviceNamePrint()); +INSTANTIATE_TEST_SUITE_P(Mcg59LeapfrogTestSuite, Mcg59LeapfrogTests, ::testing::ValuesIn(devices), + ::DeviceNamePrint()); } // anonymous namespace From a04b09dbcf79fc09155a2497d1338b20c70dcf87 Mon Sep 17 00:00:00 2001 From: ksenia zaitseva Date: Thu, 20 Jan 2022 22:38:28 +0300 Subject: [PATCH 4/7] [RNG] code style fix --- .../rng/detail/curand/onemkl_rng_curand.hpp | 10 +- include/oneapi/mkl/rng/detail/engine_impl.hpp | 46 ++-- .../rng/detail/mklcpu/onemkl_rng_mklcpu.hpp | 10 +- .../rng/detail/mklgpu/onemkl_rng_mklgpu.hpp | 10 +- include/oneapi/mkl/rng/detail/rng_loader.hpp | 10 +- include/oneapi/mkl/rng/distributions.hpp | 9 +- include/oneapi/mkl/rng/engines.hpp | 38 ++- include/oneapi/mkl/rng/functions.hpp | 13 +- include/oneapi/mkl/rng/predicates.hpp | 10 +- src/rng/backends/curand/curand_helper.hpp | 22 +- src/rng/backends/curand/mcg59.cpp | 14 +- src/rng/backends/curand/mrg32k3a.cpp | 106 ++++---- src/rng/backends/curand/philox4x32x10.cpp | 106 ++++---- src/rng/backends/mklcpu/cpu_common.hpp | 10 +- src/rng/backends/mklcpu/mcg59.cpp | 230 +++++++++--------- src/rng/backends/mklcpu/mrg32k3a.cpp | 228 +++++++++-------- src/rng/backends/mklcpu/philox4x32x10.cpp | 228 +++++++++-------- src/rng/backends/mklgpu/mcg59.cpp | 88 ++++--- .../backends/mklgpu/mkl_internal_rng_gpu.hpp | 32 +-- src/rng/backends/mklgpu/mrg32k3a.cpp | 86 +++---- src/rng/backends/mklgpu/philox4x32x10.cpp | 86 +++---- src/rng/rng_loader.cpp | 12 +- .../rng/include/engines_api_tests.hpp | 52 ++-- .../rng/include/rng_test_common.hpp | 16 +- .../rng/include/skip_ahead_test.hpp | 20 +- .../rng/include/statistics_check_test.hpp | 11 +- 26 files changed, 686 insertions(+), 817 deletions(-) diff --git a/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp b/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp index e55ac506a..32d4ccca7 100644 --- a/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp +++ b/include/oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp @@ -65,10 +65,7 @@ #include "oneapi/mkl/detail/export.hpp" #include "oneapi/mkl/rng/detail/engine_impl.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace curand { +namespace oneapi::mkl::rng::curand { ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(cl::sycl::queue queue, std::uint64_t seed); @@ -88,9 +85,6 @@ ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queu ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59( cl::sycl::queue queue, std::initializer_list seed); -} // namespace curand -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::curand #endif //_ONEMKL_RNG_CURAND_HPP_ diff --git a/include/oneapi/mkl/rng/detail/engine_impl.hpp b/include/oneapi/mkl/rng/detail/engine_impl.hpp index 976dc5167..7c1b4105f 100644 --- a/include/oneapi/mkl/rng/detail/engine_impl.hpp +++ b/include/oneapi/mkl/rng/detail/engine_impl.hpp @@ -28,10 +28,7 @@ #include "oneapi/mkl/rng/distributions.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace detail { +namespace oneapi::mkl::rng::detail { class engine_impl { public: @@ -41,58 +38,58 @@ class engine_impl { // Buffers API virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) = 0; + cl::sycl::buffer& r) = 0; virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) = 0; + cl::sycl::buffer& r) = 0; virtual void generate(const uniform& distr, - std::int64_t n, cl::sycl::buffer& r) = 0; + std::int64_t n, cl::sycl::buffer& r) = 0; virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) = 0; + cl::sycl::buffer& r) = 0; virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) = 0; + cl::sycl::buffer& r) = 0; virtual void generate(const gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) = 0; + std::int64_t n, cl::sycl::buffer& r) = 0; virtual void generate(const gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) = 0; + std::int64_t n, cl::sycl::buffer& r) = 0; virtual void generate(const gaussian& distr, std::int64_t n, - cl::sycl::buffer& r) = 0; + cl::sycl::buffer& r) = 0; virtual void generate(const gaussian& distr, std::int64_t n, - cl::sycl::buffer& r) = 0; + cl::sycl::buffer& r) = 0; virtual void generate(const lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) = 0; + std::int64_t n, cl::sycl::buffer& r) = 0; virtual void generate(const lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) = 0; + std::int64_t n, cl::sycl::buffer& r) = 0; virtual void generate(const lognormal& distr, std::int64_t n, - cl::sycl::buffer& r) = 0; + cl::sycl::buffer& r) = 0; virtual void generate(const lognormal& distr, std::int64_t n, - cl::sycl::buffer& r) = 0; + cl::sycl::buffer& r) = 0; virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) = 0; + std::int64_t n, cl::sycl::buffer& r) = 0; virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) = 0; + std::int64_t n, cl::sycl::buffer& r) = 0; virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) = 0; + std::int64_t n, cl::sycl::buffer& r) = 0; virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) = 0; + std::int64_t n, cl::sycl::buffer& r) = 0; virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) = 0; + cl::sycl::buffer& r) = 0; // USM APIs virtual cl::sycl::event generate(const uniform& distr, @@ -185,9 +182,6 @@ class engine_impl { cl::sycl::queue queue_; }; -} // namespace detail -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::detail #endif //_ONEMKL_RNG_ENGINE_IMPL_HPP_ diff --git a/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp b/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp index 4a06ac6d4..d1dd6d9b7 100644 --- a/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp +++ b/include/oneapi/mkl/rng/detail/mklcpu/onemkl_rng_mklcpu.hpp @@ -26,10 +26,7 @@ #include "oneapi/mkl/detail/export.hpp" #include "oneapi/mkl/rng/detail/engine_impl.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace mklcpu { +namespace oneapi::mkl::rng::mklcpu { ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(cl::sycl::queue queue, std::uint64_t seed); @@ -49,9 +46,6 @@ ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queu ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59( cl::sycl::queue queue, std::initializer_list seed); -} // namespace mklcpu -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::mklcpu #endif //_ONEMKL_RNG_MKLCPU_HPP_ diff --git a/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp b/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp index c7213a046..34a2f21e2 100644 --- a/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp +++ b/include/oneapi/mkl/rng/detail/mklgpu/onemkl_rng_mklgpu.hpp @@ -26,10 +26,7 @@ #include "oneapi/mkl/detail/export.hpp" #include "oneapi/mkl/rng/detail/engine_impl.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace mklgpu { +namespace oneapi::mkl::rng::mklgpu { ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(cl::sycl::queue queue, std::uint64_t seed); @@ -49,9 +46,6 @@ ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queu ONEMKL_EXPORT oneapi::mkl::rng::detail::engine_impl* create_mcg59( cl::sycl::queue queue, std::initializer_list seed); -} // namespace mklgpu -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::mklgpu #endif //_ONEMKL_RNG_MKLGPU_HPP_ diff --git a/include/oneapi/mkl/rng/detail/rng_loader.hpp b/include/oneapi/mkl/rng/detail/rng_loader.hpp index a37897bad..73e2167fb 100644 --- a/include/oneapi/mkl/rng/detail/rng_loader.hpp +++ b/include/oneapi/mkl/rng/detail/rng_loader.hpp @@ -28,10 +28,7 @@ #include "oneapi/mkl/rng/detail/engine_impl.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace detail { +namespace oneapi::mkl::rng::detail { ONEMKL_EXPORT engine_impl* create_philox4x32x10(oneapi::mkl::device libkey, cl::sycl::queue queue, std::uint64_t seed); @@ -51,9 +48,6 @@ ONEMKL_EXPORT engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::qu ONEMKL_EXPORT engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::queue queue, std::initializer_list seed); -} // namespace detail -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::detail #endif //_ONEMKL_RNG_LOADER_HPP_ diff --git a/include/oneapi/mkl/rng/distributions.hpp b/include/oneapi/mkl/rng/distributions.hpp index bed5af418..9f1788150 100644 --- a/include/oneapi/mkl/rng/distributions.hpp +++ b/include/oneapi/mkl/rng/distributions.hpp @@ -26,9 +26,8 @@ #include "oneapi/mkl/exceptions.hpp" -namespace oneapi { -namespace mkl { -namespace rng { +namespace oneapi::mkl::rng { + // Class template oneapi::mkl::rng::uniform // @@ -364,8 +363,6 @@ class bits { using result_type = UIntType; }; -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng #endif //_ONEMKL_RNG_DISTRIBUTIONS_HPP_ diff --git a/include/oneapi/mkl/rng/engines.hpp b/include/oneapi/mkl/rng/engines.hpp index 8f2d601df..e9d795c6e 100644 --- a/include/oneapi/mkl/rng/engines.hpp +++ b/include/oneapi/mkl/rng/engines.hpp @@ -41,9 +41,7 @@ #include "oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp" #endif -namespace oneapi { -namespace mkl { -namespace rng { +namespace oneapi::mkl::rng { // Class oneapi::mkl::rng::philox4x32x10 // @@ -55,10 +53,10 @@ class philox4x32x10 { public: static constexpr std::uint64_t default_seed = 0; - philox4x32x10(sycl::queue queue, std::uint64_t seed = default_seed) + philox4x32x10(cl::sycl::queue queue, std::uint64_t seed = default_seed) : pimpl_(detail::create_philox4x32x10(get_device_id(queue), queue, seed)) {} - philox4x32x10(sycl::queue queue, std::initializer_list seed) + philox4x32x10(cl::sycl::queue queue, std::initializer_list seed) : pimpl_(detail::create_philox4x32x10(get_device_id(queue), queue, seed)) {} #ifdef ENABLE_MKLCPU_BACKEND @@ -121,12 +119,12 @@ class philox4x32x10 { template friend void generate(const Distr& distr, Engine& engine, std::int64_t n, - sycl::buffer& r); + cl::sycl::buffer& r); template - friend sycl::event generate(const Distr& distr, Engine& engine, std::int64_t n, + friend cl::sycl::event generate(const Distr& distr, Engine& engine, std::int64_t n, typename Distr::result_type* r, - const std::vector& dependencies); + const std::vector& dependencies); }; // Class oneapi::mkl::rng::mrg32k3a @@ -139,10 +137,10 @@ class mrg32k3a { public: static constexpr std::uint32_t default_seed = 1; - mrg32k3a(sycl::queue queue, std::uint32_t seed = default_seed) + mrg32k3a(cl::sycl::queue queue, std::uint32_t seed = default_seed) : pimpl_(detail::create_mrg32k3a(get_device_id(queue), queue, seed)) {} - mrg32k3a(sycl::queue queue, std::initializer_list seed) + mrg32k3a(cl::sycl::queue queue, std::initializer_list seed) : pimpl_(detail::create_mrg32k3a(get_device_id(queue), queue, seed)) {} #ifdef ENABLE_MKLCPU_BACKEND @@ -202,17 +200,17 @@ class mrg32k3a { template friend void generate(const Distr& distr, Engine& engine, std::int64_t n, - sycl::buffer& r); + cl::sycl::buffer& r); template - friend sycl::event generate(const Distr& distr, Engine& engine, std::int64_t n, + friend cl::sycl::event generate(const Distr& distr, Engine& engine, std::int64_t n, typename Distr::result_type* r, - const std::vector& dependencies); + const std::vector& dependencies); }; // Class oneapi::mkl::rng::mcg59 // -// Represents Mcg59 counter-based pseudorandom number generator +// Represents MCG59 counter-based pseudorandom number generator // // Supported parallelization methods: // leapfrog @@ -220,7 +218,7 @@ class mcg59 { public: static constexpr std::uint64_t default_seed = 0; - mcg59(sycl::queue queue, std::uint64_t seed = default_seed) + mcg59(cl::sycl::queue queue, std::uint64_t seed = default_seed) : pimpl_(detail::create_mcg59(get_device_id(queue), queue, seed)) {} #ifdef ENABLE_MKLCPU_BACKEND @@ -273,19 +271,17 @@ class mcg59 { template friend void generate(const Distr& distr, Engine& engine, std::int64_t n, - sycl::buffer& r); + cl::sycl::buffer& r); template - friend sycl::event generate(const Distr& distr, Engine& engine, std::int64_t n, + friend cl::sycl::event generate(const Distr& distr, Engine& engine, std::int64_t n, typename Distr::result_type* r, - const std::vector& dependencies); + const std::vector& dependencies); }; // Default engine to be used for common cases using default_engine = philox4x32x10; -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng #endif //_ONEMKL_RNG_ENGINES_HPP_ diff --git a/include/oneapi/mkl/rng/functions.hpp b/include/oneapi/mkl/rng/functions.hpp index b35be83e4..42c49b4e8 100644 --- a/include/oneapi/mkl/rng/functions.hpp +++ b/include/oneapi/mkl/rng/functions.hpp @@ -26,10 +26,7 @@ #include "oneapi/mkl/exceptions.hpp" #include "oneapi/mkl/rng/predicates.hpp" -namespace oneapi { -namespace mkl { -namespace rng { - +namespace oneapi::mkl::rng { // Function oneapi::mkl::rng::generate().Buffer API // Provides random numbers from a given engine with a given statistics // @@ -39,10 +36,10 @@ namespace rng { // std::int64_t n - number of random values to be generated // // Output parameters: -// cl::sycl::buffer& r - cl::sycl::buffer to the output vector +// cl::sycl::buffer& r - cl::sycl::buffer to the output vector template static inline void generate(const Distr& distr, Engine& engine, std::int64_t n, - sycl::buffer& r) { + cl::sycl::buffer& r) { generate_precondition(distr, engine, n, r); engine.pimpl_->generate(distr, n, r); } @@ -109,8 +106,6 @@ static inline void leapfrog(Engine& engine, std::uint64_t idx, std::uint64_t str engine.pimpl_->leapfrog(idx, stride); } -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng #endif //_ONEMKL_RNG_FUNCTIONS_HPP_ diff --git a/include/oneapi/mkl/rng/predicates.hpp b/include/oneapi/mkl/rng/predicates.hpp index 620569198..ec42e69a6 100644 --- a/include/oneapi/mkl/rng/predicates.hpp +++ b/include/oneapi/mkl/rng/predicates.hpp @@ -26,15 +26,13 @@ #include "oneapi/mkl/exceptions.hpp" #include "oneapi/mkl/types.hpp" -namespace oneapi { -namespace mkl { -namespace rng { +namespace oneapi::mkl::rng { // Buffer APIs template inline void generate_precondition(const Distr& distr, Engine& engine, std::int64_t n, - sycl::buffer& r) { + cl::sycl::buffer& r) { #ifndef ONEMKL_DISABLE_PREDICATES if (n < 0 || n > r.get_count()) { throw oneapi::mkl::invalid_argument("rng", "generate", "n"); @@ -58,8 +56,6 @@ inline void generate_precondition(const Distr& distr, Engine& engine, std::int64 #endif } -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng #endif //_ONEMKL_RNG_PREDICATES_HPP_ diff --git a/src/rng/backends/curand/curand_helper.hpp b/src/rng/backends/curand/curand_helper.hpp index 3884e05a9..972689822 100644 --- a/src/rng/backends/curand/curand_helper.hpp +++ b/src/rng/backends/curand/curand_helper.hpp @@ -69,10 +69,7 @@ #include "oneapi/mkl/types.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace curand { +namespace oneapi::mkl::rng::curand { class curand_error : virtual public std::runtime_error { protected: @@ -210,7 +207,7 @@ template static inline void range_transform_fp(cl::sycl::queue& queue, T a, T b, std::int64_t n, cl::sycl::buffer& r) { queue.submit([&](cl::sycl::handler& cgh) { - auto acc = r.template get_access(cgh); + auto acc = r.template get_access(cgh); cgh.parallel_for(cl::sycl::range<1>(n), [=](cl::sycl::id<1> id) { acc[id] = acc[id] * (b - a) + a; }); }); @@ -227,7 +224,7 @@ template static inline void range_transform_fp_accurate(cl::sycl::queue& queue, T a, T b, std::int64_t n, cl::sycl::buffer& r) { queue.submit([&](cl::sycl::handler& cgh) { - auto acc = r.template get_access(cgh); + auto acc = r.template get_access(cgh); cgh.parallel_for(cl::sycl::range<1>(n), [=](cl::sycl::id<1> id) { acc[id] = acc[id] * (b - a) + a; if (acc[id] < a) { @@ -276,8 +273,8 @@ inline void range_transform_int(cl::sycl::queue& queue, T a, T b, std::int64_t n cl::sycl::buffer& in, cl::sycl::buffer& out) { queue.submit([&](cl::sycl::handler& cgh) { - auto acc_in = in.template get_access(cgh); - auto acc_out = out.template get_access(cgh); + auto acc_in = in.template get_access(cgh); + auto acc_out = out.template get_access(cgh); cgh.parallel_for(cl::sycl::range<1>(n), [=](cl::sycl::id<1> id) { acc_out[id] = a + acc_in[id] % (b - a); }); }); @@ -312,8 +309,8 @@ static inline void sample_bernoulli_from_uniform(cl::sycl::queue& queue, float p cl::sycl::buffer in, cl::sycl::buffer& out) { queue.submit([&](cl::sycl::handler& cgh) { - auto acc_in = in.template get_access(cgh); - auto acc_out = out.template get_access(cgh); + auto acc_in = in.template get_access(cgh); + auto acc_out = out.template get_access(cgh); cgh.parallel_for(cl::sycl::range<1>(n), [=](cl::sycl::id<1> id) { acc_out[id] = acc_in[id] < p; }); }); @@ -326,9 +323,6 @@ static inline cl::sycl::event sample_bernoulli_from_uniform(cl::sycl::queue& que }); } -} // namespace curand -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::curand #endif // _MKL_RNG_CURAND_HELPER_HPP_ diff --git a/src/rng/backends/curand/mcg59.cpp b/src/rng/backends/curand/mcg59.cpp index 84cd7af42..83e39faa8 100644 --- a/src/rng/backends/curand/mcg59.cpp +++ b/src/rng/backends/curand/mcg59.cpp @@ -1,5 +1,5 @@ /******************************************************************************* - * cuRAND back-end Copyright (c) 2021, The Regents of the University of + * cuRAND back-end Copyright (c) 2022, The Regents of the University of * California, through Lawrence Berkeley National Laboratory (subject to receipt * of any required approvals from the U.S. Dept. of Energy). All rights * reserved. @@ -66,10 +66,7 @@ #include "oneapi/mkl/exceptions.hpp" #include "oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace curand { +namespace oneapi:: mkl::rng::curand { /* * Note that cuRAND consists of two pieces: a host (CPU) API and a device (GPU) * API. The host API acts like any standard library; the `curand.h' header is @@ -93,7 +90,7 @@ namespace curand { * */ -oneapi::mkl::rng::detail::engine_impl* create_mcg59(sycl::queue queue, std::uint64_t seed) { +oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, std::uint64_t seed) { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); return nullptr; } @@ -104,7 +101,4 @@ oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, return nullptr; } -} // namespace curand -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi:: mkl::rng::curand diff --git a/src/rng/backends/curand/mrg32k3a.cpp b/src/rng/backends/curand/mrg32k3a.cpp index 69c50bbf5..86fbe756b 100644 --- a/src/rng/backends/curand/mrg32k3a.cpp +++ b/src/rng/backends/curand/mrg32k3a.cpp @@ -66,10 +66,7 @@ #include "oneapi/mkl/rng/detail/engine_impl.hpp" #include "oneapi/mkl/rng/engines.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace curand { +namespace oneapi::mkl::rng::curand { #if !defined(_WIN64) class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { @@ -96,10 +93,10 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -113,10 +110,10 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -130,11 +127,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate(const oneapi::mkl::rng::uniform< std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, - std::int64_t n, cl::sycl::buffer& r) override { - cl::sycl::buffer ib(n); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer ib(n); queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = ib.get_access(cgh); + cl::sycl::accessor acc{ ib, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto ib_ptr = reinterpret_cast( ih.get_native_mem(acc)); @@ -148,10 +145,10 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -165,10 +162,10 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -182,10 +179,10 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate(const oneapi::mkl::rng::gaussian< float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -199,10 +196,10 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate(const oneapi::mkl::rng::gaussian< double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -216,7 +213,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "mrg32ka engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); @@ -224,7 +221,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "mrg32ka engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); @@ -232,10 +229,10 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate(const oneapi::mkl::rng::lognormal< float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -249,10 +246,10 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate(const oneapi::mkl::rng::lognormal< double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -266,7 +263,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "mrg32ka engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); @@ -274,45 +271,45 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "mrg32ka engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "mrg32ka engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "mrg32ka engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "mrg32ka engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "mrg32ka engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { + cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.template get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast( ih.get_native_mem(acc)); @@ -589,104 +586,104 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate(const oneapi::mkl::rng::uniform< std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate(const oneapi::mkl::rng::gaussian< float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate(const oneapi::mkl::rng::gaussian< double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate(const oneapi::mkl::rng::lognormal< float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate(const oneapi::mkl::rng::lognormal< double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { + cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32ka engine"); } @@ -845,7 +842,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { }; #endif -oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std::uint32_t seed) { +oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(cl::sycl::queue queue, std::uint32_t seed) { return new mrg32k3a_impl(queue, seed); } @@ -854,7 +851,4 @@ oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(cl::sycl::queue queue, return new mrg32k3a_impl(queue, seed); } -} // namespace curand -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::curand diff --git a/src/rng/backends/curand/philox4x32x10.cpp b/src/rng/backends/curand/philox4x32x10.cpp index 52cba0c36..9686369b8 100644 --- a/src/rng/backends/curand/philox4x32x10.cpp +++ b/src/rng/backends/curand/philox4x32x10.cpp @@ -66,10 +66,7 @@ #include "oneapi/mkl/exceptions.hpp" #include "oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace curand { +namespace oneapi::mkl::rng::curand { #if !defined(_WIN64) /* @@ -119,10 +116,10 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual inline void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -136,10 +133,10 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -153,11 +150,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate(const oneapi::mkl::rng::uniform< std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, - std::int64_t n, cl::sycl::buffer& r) override { - cl::sycl::buffer ib(n); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer ib(n); queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = ib.get_access(cgh); + cl::sycl::accessor acc{ ib, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast( ih.get_native_mem(acc)); @@ -171,10 +168,10 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -188,10 +185,10 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -205,10 +202,10 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate(const oneapi::mkl::rng::gaussian< float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -222,10 +219,10 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate(const oneapi::mkl::rng::gaussian< double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -239,7 +236,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "philox4x32x10 engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); @@ -247,7 +244,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "philox4x32x10 engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); @@ -255,10 +252,10 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate(const oneapi::mkl::rng::lognormal< float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -272,10 +269,10 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate(const oneapi::mkl::rng::lognormal< double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast(ih.get_native_mem(acc)); @@ -289,7 +286,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "philox4x32x10 engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); @@ -297,45 +294,45 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "philox4x32x10 engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "philox4x32x10 engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "philox4x32x10 engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "philox4x32x10 engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented( "rng", "philox4x32x10 engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { + cl::sycl::buffer& r) override { queue_ .submit([&](cl::sycl::handler& cgh) { - auto acc = r.template get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.host_task([=](cl::sycl::interop_handle ih) { auto r_ptr = reinterpret_cast( ih.get_native_mem(acc)); @@ -612,104 +609,104 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const oneapi::mkl::rng::uniform< std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const oneapi::mkl::rng::gaussian< float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const oneapi::mkl::rng::gaussian< double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const oneapi::mkl::rng::lognormal< float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const oneapi::mkl::rng::lognormal< double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { + cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } @@ -868,7 +865,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { }; #endif -oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, std::uint64_t seed) { +oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(cl::sycl::queue queue, std::uint64_t seed) { return new philox4x32x10_impl(queue, seed); } @@ -877,7 +874,4 @@ oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10( return new philox4x32x10_impl(queue, seed); } -} // namespace curand -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::curand \ No newline at end of file diff --git a/src/rng/backends/mklcpu/cpu_common.hpp b/src/rng/backends/mklcpu/cpu_common.hpp index 21bd10031..8c0579115 100644 --- a/src/rng/backends/mklcpu/cpu_common.hpp +++ b/src/rng/backends/mklcpu/cpu_common.hpp @@ -22,10 +22,7 @@ #include -namespace oneapi { -namespace mkl { -namespace rng { -namespace mklcpu { +namespace oneapi::mkl::rng::mklcpu { // host_task automatically uses run_on_host_intel if it is supported by the // compiler. Otherwise, it falls back to single_task. @@ -50,9 +47,6 @@ class kernel_name {}; template class kernel_name_usm {}; -} // namespace mklcpu -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::mklcpu #endif //_RNG_CPU_COMMON_HPP_ diff --git a/src/rng/backends/mklcpu/mcg59.cpp b/src/rng/backends/mklcpu/mcg59.cpp index f6e0147ee..a5c0d0ea0 100644 --- a/src/rng/backends/mklcpu/mcg59.cpp +++ b/src/rng/backends/mklcpu/mcg59.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2021 Intel Corporation +* Copyright 2022 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,12 +28,7 @@ #include "cpu_common.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace mklcpu { - -using namespace cl; +namespace oneapi::mkl::rng::mklcpu { class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { public: @@ -51,11 +46,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { // Buffers APIs virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, static_cast(acc_stream.get_pointer()), n, @@ -65,11 +60,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, static_cast(acc_stream.get_pointer()), n, @@ -79,11 +74,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, static_cast(acc_stream.get_pointer()), n, @@ -93,11 +88,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, static_cast(acc_stream.get_pointer()), n, @@ -107,11 +102,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, static_cast(acc_stream.get_pointer()), n, @@ -121,11 +116,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -135,11 +130,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -149,11 +144,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -163,11 +158,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -177,11 +172,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -192,11 +187,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -207,11 +202,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -222,11 +217,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -237,11 +232,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -251,11 +246,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { std::uint32_t* r_ptr = acc_r.get_pointer(); viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, @@ -266,11 +261,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, static_cast(acc_stream.get_pointer()), n, @@ -280,11 +275,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { std::uint32_t* r_ptr = acc_r.get_pointer(); viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, @@ -295,11 +290,11 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, static_cast(acc_stream.get_pointer()), n, @@ -314,7 +309,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); @@ -326,7 +321,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); @@ -338,7 +333,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); @@ -350,7 +345,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, stream, n, r, distr.a(), @@ -363,7 +358,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, stream, n, r, distr.a(), @@ -376,7 +371,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, n, r, distr.mean(), @@ -389,7 +384,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, n, r, distr.mean(), @@ -402,7 +397,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, n, r, distr.mean(), @@ -415,7 +410,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, n, r, distr.mean(), @@ -428,7 +423,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, stream, n, r, distr.m(), @@ -441,7 +436,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, stream, n, r, distr.m(), @@ -454,7 +449,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, stream, n, r, distr.m(), distr.s(), @@ -467,7 +462,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, stream, n, r, distr.m(), distr.s(), @@ -480,7 +475,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, n, r, distr.p()); @@ -492,7 +487,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, std::uint32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, n, @@ -505,7 +500,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { const poisson& distr, std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, stream, n, r, distr.lambda()); @@ -517,7 +512,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { const poisson& distr, std::int64_t n, std::uint32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, stream, n, @@ -530,7 +525,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { std::uint32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream, n, @@ -573,7 +568,4 @@ oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, throw oneapi::mkl::unimplemented("rng", "mcg59 skipAheadEx"); } -} // namespace mklcpu -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::mklcpu diff --git a/src/rng/backends/mklcpu/mrg32k3a.cpp b/src/rng/backends/mklcpu/mrg32k3a.cpp index e51b59737..3c14ad62f 100644 --- a/src/rng/backends/mklcpu/mrg32k3a.cpp +++ b/src/rng/backends/mklcpu/mrg32k3a.cpp @@ -28,12 +28,7 @@ #include "cpu_common.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace mklcpu { - -using namespace cl; +namespace oneapi::mkl::rng::mklcpu { class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { public: @@ -58,11 +53,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { // Buffers APIs virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, static_cast(acc_stream.get_pointer()), n, @@ -72,11 +67,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, static_cast(acc_stream.get_pointer()), n, @@ -86,11 +81,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, static_cast(acc_stream.get_pointer()), n, @@ -100,11 +95,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, static_cast(acc_stream.get_pointer()), n, @@ -114,11 +109,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, static_cast(acc_stream.get_pointer()), n, @@ -128,11 +123,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -142,11 +137,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -156,11 +151,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -170,11 +165,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -184,11 +179,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -199,11 +194,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -214,11 +209,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -229,11 +224,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -244,11 +239,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -258,11 +253,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { std::uint32_t* r_ptr = acc_r.get_pointer(); viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, @@ -273,11 +268,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, static_cast(acc_stream.get_pointer()), n, @@ -287,11 +282,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { std::uint32_t* r_ptr = acc_r.get_pointer(); viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, @@ -302,11 +297,11 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, static_cast(acc_stream.get_pointer()), n, @@ -321,7 +316,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); @@ -333,7 +328,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); @@ -345,7 +340,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); @@ -357,7 +352,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, stream, n, r, distr.a(), @@ -370,7 +365,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, stream, n, r, distr.a(), @@ -383,7 +378,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, n, r, distr.mean(), @@ -396,7 +391,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, n, r, distr.mean(), @@ -409,7 +404,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, n, r, distr.mean(), @@ -422,7 +417,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, n, r, distr.mean(), @@ -435,7 +430,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, stream, n, r, distr.m(), @@ -448,7 +443,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, stream, n, r, distr.m(), @@ -461,7 +456,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, stream, n, r, distr.m(), distr.s(), @@ -474,7 +469,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, stream, n, r, distr.m(), distr.s(), @@ -487,7 +482,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, n, r, distr.p()); @@ -499,7 +494,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, std::uint32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, n, @@ -512,7 +507,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { const poisson& distr, std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, stream, n, r, distr.lambda()); @@ -524,7 +519,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { const poisson& distr, std::int64_t n, std::uint32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, stream, n, @@ -537,7 +532,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { std::uint32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>( cgh, [=]() { viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream, n, r); }); @@ -578,7 +573,4 @@ oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(cl::sycl::queue queue, return new mrg32k3a_impl(queue, seed); } -} // namespace mklcpu -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::mklcpu diff --git a/src/rng/backends/mklcpu/philox4x32x10.cpp b/src/rng/backends/mklcpu/philox4x32x10.cpp index 253d927ef..fe81cfd47 100644 --- a/src/rng/backends/mklcpu/philox4x32x10.cpp +++ b/src/rng/backends/mklcpu/philox4x32x10.cpp @@ -28,12 +28,7 @@ #include "cpu_common.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace mklcpu { - -using namespace cl; +namespace oneapi::mkl::rng::mklcpu { class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { public: @@ -60,11 +55,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { // Buffers APIs virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, static_cast(acc_stream.get_pointer()), n, @@ -74,11 +69,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, static_cast(acc_stream.get_pointer()), n, @@ -88,11 +83,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, static_cast(acc_stream.get_pointer()), n, @@ -102,11 +97,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, static_cast(acc_stream.get_pointer()), n, @@ -116,11 +111,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const uniform& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, static_cast(acc_stream.get_pointer()), n, @@ -130,11 +125,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -144,11 +139,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -158,11 +153,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -172,11 +167,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const gaussian& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -186,11 +181,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -201,11 +196,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, static_cast(acc_stream.get_pointer()), n, @@ -216,11 +211,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -231,11 +226,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const lognormal& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -246,11 +241,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, static_cast(acc_stream.get_pointer()), n, @@ -260,11 +255,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { std::uint32_t* r_ptr = acc_r.get_pointer(); viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, @@ -275,11 +270,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, static_cast(acc_stream.get_pointer()), n, @@ -289,11 +284,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + std::int64_t n, cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { std::uint32_t* r_ptr = acc_r.get_pointer(); viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, @@ -304,11 +299,11 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { - sycl::buffer stream_buf(static_cast(stream_), state_size_); - queue_.submit([&](sycl::handler& cgh) { - auto acc_stream = stream_buf.get_access(cgh); - auto acc_r = r.get_access(cgh); + cl::sycl::buffer& r) override { + cl::sycl::buffer stream_buf(static_cast(stream_), state_size_); + queue_.submit([&](cl::sycl::handler& cgh) { + cl::sycl::accessor acc_stream{ stream_buf, cgh, cl::sycl::read_write }; + cl::sycl::accessor acc_r{ r, cgh, cl::sycl::read_write }; host_task>(cgh, [=]() { viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, static_cast(acc_stream.get_pointer()), n, @@ -323,7 +318,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); @@ -335,7 +330,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); @@ -347,7 +342,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngUniform(VSL_RNG_METHOD_UNIFORM_STD, stream, n, r, distr.a(), distr.b()); @@ -359,7 +354,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, stream, n, r, distr.a(), @@ -372,7 +367,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD_ACCURATE, stream, n, r, distr.a(), @@ -385,7 +380,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, n, r, distr.mean(), @@ -398,7 +393,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2, stream, n, r, distr.mean(), @@ -411,7 +406,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, n, r, distr.mean(), @@ -424,7 +419,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_ICDF, stream, n, r, distr.mean(), @@ -437,7 +432,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, stream, n, r, distr.m(), @@ -450,7 +445,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_BOXMULLER2, stream, n, r, distr.m(), @@ -463,7 +458,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, float* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vsRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, stream, n, r, distr.m(), distr.s(), @@ -476,7 +471,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, double* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { vdRngLognormal(VSL_RNG_METHOD_LOGNORMAL_ICDF, stream, n, r, distr.m(), distr.s(), @@ -489,7 +484,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, n, r, distr.p()); @@ -501,7 +496,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::int64_t n, std::uint32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, stream, n, @@ -514,7 +509,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { const poisson& distr, std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, stream, n, r, distr.lambda()); @@ -526,7 +521,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { const poisson& distr, std::int64_t n, std::uint32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>(cgh, [=]() { viRngPoisson(VSL_RNG_METHOD_POISSON_POISNORM, stream, n, @@ -539,7 +534,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { std::uint32_t* r, const std::vector& dependencies) override { cl::sycl::event::wait_and_throw(dependencies); - return queue_.submit([&](sycl::handler& cgh) { + return queue_.submit([&](cl::sycl::handler& cgh) { VSLStreamStatePtr stream = stream_; host_task>( cgh, [=]() { viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream, n, r); }); @@ -581,7 +576,4 @@ oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10( return new philox4x32x10_impl(queue, seed); } -} // namespace mklcpu -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::mklcpu diff --git a/src/rng/backends/mklgpu/mcg59.cpp b/src/rng/backends/mklgpu/mcg59.cpp index 5721e2218..d19d965c9 100644 --- a/src/rng/backends/mklgpu/mcg59.cpp +++ b/src/rng/backends/mklgpu/mcg59.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2021 Intel Corporation +* Copyright 2022 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,10 +29,7 @@ #include "mkl_internal_rng_gpu.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace mklgpu { +namespace oneapi::mkl::rng::mklgpu { #if !defined(_WIN64) class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { @@ -44,7 +41,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { } mcg59_impl(const mcg59_impl* other) : oneapi::mkl::rng::detail::engine_impl(*other) { - sycl::queue queue(other->queue_); + cl::sycl::queue queue(other->queue_); engine_ = oneapi::mkl::rng::detail::gpu::create_engine( queue, other->engine_); } @@ -53,104 +50,104 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::uniform< std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::gaussian< float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::gaussian< double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::lognormal< float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::lognormal< double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { + cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } @@ -315,104 +312,104 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate(const oneapi::mkl::rng::uniform< std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate(const oneapi::mkl::rng::gaussian< float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate(const oneapi::mkl::rng::gaussian< double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate(const oneapi::mkl::rng::lognormal< float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate(const oneapi::mkl::rng::lognormal< double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { + cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mcg59 engine"); } @@ -571,7 +568,7 @@ class mcg59_impl : public oneapi::mkl::rng::detail::engine_impl { }; #endif -oneapi::mkl::rng::detail::engine_impl* create_mcg59(sycl::queue queue, std::uint64_t seed) { +oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, std::uint64_t seed) { return new mcg59_impl(queue, seed); } @@ -580,7 +577,4 @@ oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, throw oneapi::mkl::unimplemented("rng", "mcg59 skipAheadEx"); } -} // namespace mklgpu -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::mklgpu diff --git a/src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp b/src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp index 644688074..25f11812d 100755 --- a/src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp +++ b/src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp @@ -22,10 +22,7 @@ #include -namespace oneapi { -namespace mkl { -namespace rng { -namespace detail { +namespace oneapi::mkl::rng::detail { template class engine_base_impl; @@ -33,45 +30,42 @@ class engine_base_impl; namespace gpu { template -engine_base_impl* create_engine(sycl::queue& queue, std::uint64_t seed); +engine_base_impl* create_engine(cl::sycl::queue& queue, std::uint64_t seed); template -engine_base_impl* create_engine(sycl::queue& queue, std::int64_t n, +engine_base_impl* create_engine(cl::sycl::queue& queue, std::int64_t n, const unsigned int* seed_ptr); template -engine_base_impl* create_engine(sycl::queue& queue, +engine_base_impl* create_engine(cl::sycl::queue& queue, engine_base_impl* other_impl); template -void skip_ahead(sycl::queue& queue, engine_base_impl* impl, std::uint64_t num_to_skip); +void skip_ahead(cl::sycl::queue& queue, engine_base_impl* impl, std::uint64_t num_to_skip); template -void skip_ahead(sycl::queue& queue, engine_base_impl* impl, +void skip_ahead(cl::sycl::queue& queue, engine_base_impl* impl, std::initializer_list num_to_skip); template -void leapfrog(sycl::queue& queue, engine_base_impl* impl, std::uint64_t idx, +void leapfrog(cl::sycl::queue& queue, engine_base_impl* impl, std::uint64_t idx, std::uint64_t stride); template -void delete_engine(sycl::queue& queue, engine_base_impl* impl); +void delete_engine(cl::sycl::queue& queue, engine_base_impl* impl); template -sycl::event generate(sycl::queue& queue, const DistrType& distr, +cl::sycl::event generate(cl::sycl::queue& queue, const DistrType& distr, engine_base_impl* engine, std::int64_t n, - sycl::buffer& r); + cl::sycl::buffer& r); template -sycl::event generate(sycl::queue& queue, const DistrType& distr, +cl::sycl::event generate(cl::sycl::queue& queue, const DistrType& distr, engine_base_impl* engine, std::int64_t n, typename DistrType::result_type* r, - const std::vector& dependencies = {}); + const std::vector& dependencies = {}); } // namespace gpu -} // namespace detail -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::detail #endif //_MKL_INTERNAL_RNG_GPU_HPP_ diff --git a/src/rng/backends/mklgpu/mrg32k3a.cpp b/src/rng/backends/mklgpu/mrg32k3a.cpp index 3a5f017b9..5e6a0019f 100644 --- a/src/rng/backends/mklgpu/mrg32k3a.cpp +++ b/src/rng/backends/mklgpu/mrg32k3a.cpp @@ -29,10 +29,7 @@ #include "mkl_internal_rng_gpu.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace mklgpu { +namespace oneapi::mkl::rng::mklgpu { #if !defined(_WIN64) class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { @@ -50,7 +47,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { } mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::mkl::rng::detail::engine_impl(*other) { - sycl::queue queue(other->queue_); + cl::sycl::queue queue(other->queue_); engine_ = oneapi::mkl::rng::detail::gpu::create_engine( queue, other->engine_); } @@ -59,104 +56,104 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::uniform< std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::gaussian< float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::gaussian< double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::lognormal< float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::lognormal< double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { + cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } @@ -321,104 +318,104 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate(const oneapi::mkl::rng::uniform< std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate(const oneapi::mkl::rng::gaussian< float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate(const oneapi::mkl::rng::gaussian< double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate(const oneapi::mkl::rng::lognormal< float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate(const oneapi::mkl::rng::lognormal< double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { + cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "mrg32k3a engine"); } @@ -577,7 +574,7 @@ class mrg32k3a_impl : public oneapi::mkl::rng::detail::engine_impl { }; #endif -oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std::uint32_t seed) { +oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(cl::sycl::queue queue, std::uint32_t seed) { return new mrg32k3a_impl(queue, seed); } @@ -586,7 +583,4 @@ oneapi::mkl::rng::detail::engine_impl* create_mrg32k3a(cl::sycl::queue queue, return new mrg32k3a_impl(queue, seed); } -} // namespace mklgpu -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::mklgpu diff --git a/src/rng/backends/mklgpu/philox4x32x10.cpp b/src/rng/backends/mklgpu/philox4x32x10.cpp index 514d1a199..9961ac75e 100644 --- a/src/rng/backends/mklgpu/philox4x32x10.cpp +++ b/src/rng/backends/mklgpu/philox4x32x10.cpp @@ -29,10 +29,7 @@ #include "mkl_internal_rng_gpu.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace mklgpu { +namespace oneapi::mkl::rng::mklgpu { #if !defined(_WIN64) class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { @@ -51,7 +48,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { philox4x32x10_impl(const philox4x32x10_impl* other) : oneapi::mkl::rng::detail::engine_impl(*other) { - sycl::queue queue(other->queue_); + cl::sycl::queue queue(other->queue_); engine_ = oneapi::mkl::rng::detail::gpu::create_engine( queue, other->engine_); } @@ -60,104 +57,104 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::uniform< std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::gaussian< float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::gaussian< double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::lognormal< float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const oneapi::mkl::rng::lognormal< double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { + cl::sycl::buffer& r) override { oneapi::mkl::rng::detail::gpu::generate(queue_, distr, engine_, n, r); } @@ -323,104 +320,104 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const oneapi::mkl::rng::uniform< std::int32_t, oneapi::mkl::rng::uniform_method::standard>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::uniform& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const oneapi::mkl::rng::gaussian< float, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const oneapi::mkl::rng::gaussian< double, oneapi::mkl::rng::gaussian_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::gaussian& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const oneapi::mkl::rng::lognormal< float, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const oneapi::mkl::rng::lognormal< double, oneapi::mkl::rng::lognormal_method::box_muller2>& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate( const oneapi::mkl::rng::lognormal& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const bernoulli& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const poisson& distr, - std::int64_t n, cl::sycl::buffer& r) override { + std::int64_t n, cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } virtual void generate(const bits& distr, std::int64_t n, - cl::sycl::buffer& r) override { + cl::sycl::buffer& r) override { throw oneapi::mkl::unimplemented("rng", "philox4x32x10 engine"); } @@ -579,7 +576,7 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { }; #endif -oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, std::uint64_t seed) { +oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(cl::sycl::queue queue, std::uint64_t seed) { return new philox4x32x10_impl(queue, seed); } @@ -588,7 +585,4 @@ oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10( return new philox4x32x10_impl(queue, seed); } -} // namespace mklgpu -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::mklgpu diff --git a/src/rng/rng_loader.cpp b/src/rng/rng_loader.cpp index 675fc5e42..d0bc85bf2 100644 --- a/src/rng/rng_loader.cpp +++ b/src/rng/rng_loader.cpp @@ -22,10 +22,9 @@ #include "function_table_initializer.hpp" #include "rng/function_table.hpp" -namespace oneapi { -namespace mkl { -namespace rng { -namespace detail { +using namespace cl; + +namespace oneapi::mkl::rng::detail { static oneapi::mkl::detail::table_initializer function_tables; @@ -58,7 +57,4 @@ engine_impl* create_mcg59(oneapi::mkl::device libkey, cl::sycl::queue queue, return function_tables[libkey].create_mcg59_ex_sycl(queue, seed); } -} // namespace detail -} // namespace rng -} // namespace mkl -} // namespace oneapi +} // namespace oneapi::mkl::rng::detail \ No newline at end of file diff --git a/tests/unit_tests/rng/include/engines_api_tests.hpp b/tests/unit_tests/rng/include/engines_api_tests.hpp index 68b92fa8a..21a47e8b5 100644 --- a/tests/unit_tests/rng/include/engines_api_tests.hpp +++ b/tests/unit_tests/rng/include/engines_api_tests.hpp @@ -33,15 +33,15 @@ template class engines_constructors_test { template - void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, - cl::sycl::buffer& buf) { + void test_generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, + cl::sycl::buffer& buf) { oneapi::mkl::rng::generate(distr, engine, n, buf); } template <> - void generate(oneapi::mkl::rng::bits distr, + void test_generate(oneapi::mkl::rng::bits distr, oneapi::mkl::rng::mcg59& engine, std::uint64_t n, - cl::sycl::buffer& buf) { + cl::sycl::buffer& buf) { oneapi::mkl::rng::generate(distr, engine, n / 2, buf); } @@ -63,15 +63,15 @@ class engines_constructors_test { oneapi::mkl::rng::bits distr; - cl::sycl::buffer r1_buffer(r1.data(), r1.size()); - cl::sycl::buffer r2_buffer(r2.data(), r2.size()); - cl::sycl::buffer r3_buffer(r3.data(), r3.size()); - cl::sycl::buffer r4_buffer(r4.data(), r4.size()); + cl::sycl::buffer r1_buffer(r1.data(), r1.size()); + cl::sycl::buffer r2_buffer(r2.data(), r2.size()); + cl::sycl::buffer r3_buffer(r3.data(), r3.size()); + cl::sycl::buffer r4_buffer(r4.data(), r4.size()); - generate(distr, engine1, N_GEN, r1_buffer); - generate(distr, engine2, N_GEN, r2_buffer); - generate(distr, engine3, N_GEN, r3_buffer); - generate(distr, engine4, N_GEN, r4_buffer); + test_generate(distr, engine1, N_GEN, r1_buffer); + test_generate(distr, engine2, N_GEN, r2_buffer); + test_generate(distr, engine3, N_GEN, r3_buffer); + test_generate(distr, engine4, N_GEN, r4_buffer); } catch (const oneapi::mkl::unimplemented& e) { status = test_skipped; @@ -95,15 +95,15 @@ class engines_constructors_test { template class engines_copy_test { template - void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, - cl::sycl::buffer& buf) { + void test_generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, + cl::sycl::buffer& buf) { oneapi::mkl::rng::generate(distr, engine, n, buf); } template <> - void generate(oneapi::mkl::rng::bits distr, + void test_generate(oneapi::mkl::rng::bits distr, oneapi::mkl::rng::mcg59& engine, std::uint64_t n, - cl::sycl::buffer& buf) { + cl::sycl::buffer& buf) { // mcg59 generates numbers with type uint64_t, but retruns uint32_t // so it necessary to use n/2 here to generate n/2 numbers with type uint64_t and retrun n numbers with type uint32_t oneapi::mkl::rng::generate(distr, engine, n / 2, buf); @@ -124,23 +124,23 @@ class engines_copy_test { oneapi::mkl::rng::bits distr; { - cl::sycl::buffer r1_buffer(r1.data(), r1.size()); - cl::sycl::buffer r2_buffer(r2.data(), r2.size()); + cl::sycl::buffer r1_buffer(r1.data(), r1.size()); + cl::sycl::buffer r2_buffer(r2.data(), r2.size()); - generate(distr, engine1, N_GEN, r1_buffer); - generate(distr, engine2, N_GEN, r2_buffer); + test_generate(distr, engine1, N_GEN, r1_buffer); + test_generate(distr, engine2, N_GEN, r2_buffer); } Engine engine3 = engine1; Engine engine4 = std::move(engine2); { - cl::sycl::buffer r1_buffer(r1.data(), r1.size()); - cl::sycl::buffer r2_buffer(r2.data(), r2.size()); - cl::sycl::buffer r3_buffer(r3.data(), r3.size()); + cl::sycl::buffer r1_buffer(r1.data(), r1.size()); + cl::sycl::buffer r2_buffer(r2.data(), r2.size()); + cl::sycl::buffer r3_buffer(r3.data(), r3.size()); - generate(distr, engine1, N_GEN, r1_buffer); - generate(distr, engine3, N_GEN, r2_buffer); - generate(distr, engine4, N_GEN, r3_buffer); + test_generate(distr, engine1, N_GEN, r1_buffer); + test_generate(distr, engine3, N_GEN, r2_buffer); + test_generate(distr, engine4, N_GEN, r3_buffer); } } catch (const oneapi::mkl::unimplemented& e) { diff --git a/tests/unit_tests/rng/include/rng_test_common.hpp b/tests/unit_tests/rng/include/rng_test_common.hpp index bbeaa1517..0b2453f62 100644 --- a/tests/unit_tests/rng/include/rng_test_common.hpp +++ b/tests/unit_tests/rng/include/rng_test_common.hpp @@ -63,32 +63,30 @@ static inline bool check_equal(std::uint64_t x, std::uint64_t x_ref) { template static inline bool check_equal_vector(std::vector& r1, std::vector& r2) { - bool good = true; for (int i = 0; i < r1.size(); i++) { if (!check_equal(r1[i], r2[i])) { - good = false; + return false; break; } } - return good; + return true; } template static inline bool leapfrog_check(std::vector& r1, std::vector& r2, int n_portion, int n_engines) { - bool good = true; int j = 0; for (int i = 0; i < n_engines; i++) { for (int k = 0; k < n_portion / 2; k++) { for (int p = 0; p < 2; p++) { if (!check_equal(r2[j++], r1[k * n_engines + i * 2 + p])) { - good = false; + return false; break; } } } } - return good; + return true; } template @@ -111,15 +109,15 @@ class rng_test { }; #ifdef ENABLE_CURAND_BACKEND // w/a for cuda backend hangs when there are several queues with different contexts - static sycl::device* previous_device = nullptr; - static sycl::context* context = nullptr; + static cl::sycl::device* previous_device = nullptr; + static cl::sycl::context* context = nullptr; if ((previous_device != dev)) { previous_device = dev; if (context != nullptr) { delete context; } - context = new sycl::context(*dev); + context = new cl::sycl::context(*dev); } cl::sycl::queue queue(*context, *dev, exception_handler); diff --git a/tests/unit_tests/rng/include/skip_ahead_test.hpp b/tests/unit_tests/rng/include/skip_ahead_test.hpp index d449f381b..cbed8bf8e 100644 --- a/tests/unit_tests/rng/include/skip_ahead_test.hpp +++ b/tests/unit_tests/rng/include/skip_ahead_test.hpp @@ -34,14 +34,14 @@ template class skip_ahead_test { template void generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, - cl::sycl::buffer& buf) { + cl::sycl::buffer& buf) { oneapi::mkl::rng::generate(distr, engine, n, buf); } template <> void generate(oneapi::mkl::rng::bits distr, oneapi::mkl::rng::mcg59& engine, std::uint64_t n, - cl::sycl::buffer& buf) { + cl::sycl::buffer& buf) { oneapi::mkl::rng::generate(distr, engine, n / 2, buf); } @@ -75,11 +75,11 @@ class skip_ahead_test { skip_ahead(*(engines[i]), i * N_PORTION); } - cl::sycl::buffer r_buffer(r1.data(), r1.size()); - std::vector> r_buffers; + cl::sycl::buffer r_buffer(r1.data(), r1.size()); + std::vector> r_buffers; for (int i = 0; i < N_ENGINES; i++) { r_buffers.push_back( - cl::sycl::buffer(r2.data() + i * N_PORTION, N_PORTION)); + cl::sycl::buffer(r2.data() + i * N_PORTION, N_PORTION)); } generate(distr, engine, N_GEN_SERVICE, r_buffer); @@ -132,8 +132,8 @@ class skip_ahead_ex_test { } oneapi::mkl::rng::skip_ahead(engine2, NUM_TO_SKIP); - cl::sycl::buffer r1_buffer(r1.data(), r1.size()); - cl::sycl::buffer r2_buffer(r2.data(), r2.size()); + cl::sycl::buffer r1_buffer(r1.data(), r1.size()); + cl::sycl::buffer r2_buffer(r2.data(), r2.size()); oneapi::mkl::rng::generate(distr, engine1, N_GEN, r1_buffer); oneapi::mkl::rng::generate(distr, engine2, N_GEN, r2_buffer); @@ -178,11 +178,11 @@ class leapfrog_test { oneapi::mkl::rng::leapfrog(*(engines[i]), i, N_ENGINES / 2); } - cl::sycl::buffer r_buffer(r1.data(), r1.size()); - std::vector> r_buffers; + cl::sycl::buffer r_buffer(r1.data(), r1.size()); + std::vector> r_buffers; for (int i = 0; i < N_ENGINES; i++) { r_buffers.push_back( - cl::sycl::buffer(r2.data() + i * N_PORTION, N_PORTION)); + cl::sycl::buffer(r2.data() + i * N_PORTION, N_PORTION)); } oneapi::mkl::rng::generate(distr, engine, N_GEN_SERVICE / 2, r_buffer); diff --git a/tests/unit_tests/rng/include/statistics_check_test.hpp b/tests/unit_tests/rng/include/statistics_check_test.hpp index dbf84d6c9..ef28f9b1d 100644 --- a/tests/unit_tests/rng/include/statistics_check_test.hpp +++ b/tests/unit_tests/rng/include/statistics_check_test.hpp @@ -44,7 +44,6 @@ #define POISSON_ARGS 0.5 -using namespace cl; template class statistics_test { public: @@ -55,13 +54,13 @@ class statistics_test { std::vector r(n_gen); try { - sycl::buffer r_buffer(r.data(), r.size()); + cl::sycl::buffer r_buffer(r.data(), r.size()); Engine engine(queue, SEED); Distr distr(args...); oneapi::mkl::rng::generate(distr, engine, n_gen, r_buffer); } - catch (sycl::exception const& e) { + catch (cl::sycl::exception const& e) { std::cout << "Caught synchronous SYCL exception during generation:\n" << e.what() << std::endl; print_error_code(e); @@ -88,9 +87,9 @@ class statistics_usm_test { using Type = typename Distr::result_type; #ifdef CALL_RT_API - auto ua = sycl::usm_allocator(queue); + auto ua = cl::sycl::usm_allocator(queue); #else - auto ua = sycl::usm_allocator(queue.get_queue()); + auto ua = cl::sycl::usm_allocator(queue.get_queue()); #endif std::vector r(n_gen, ua); @@ -100,7 +99,7 @@ class statistics_usm_test { auto event = oneapi::mkl::rng::generate(distr, engine, n_gen, r.data()); event.wait_and_throw(); } - catch (sycl::exception const& e) { + catch (cl::sycl::exception const& e) { std::cout << "Caught synchronous SYCL exception during generation:\n" << e.what() << std::endl; print_error_code(e); From f50b2c24593e6fdb7eac96aa392c3460c9b4a5d6 Mon Sep 17 00:00:00 2001 From: ksenia zaitseva Date: Thu, 20 Jan 2022 22:39:53 +0300 Subject: [PATCH 5/7] [RNG] clang format --- include/oneapi/mkl/rng/distributions.hpp | 1 - include/oneapi/mkl/rng/engines.hpp | 12 ++++++------ src/rng/backends/curand/mcg59.cpp | 4 ++-- src/rng/backends/curand/philox4x32x10.cpp | 3 ++- src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp | 13 +++++++------ src/rng/backends/mklgpu/philox4x32x10.cpp | 3 ++- tests/unit_tests/rng/include/engines_api_tests.hpp | 12 ++++++------ .../rng/include/statistics_check_test.hpp | 3 ++- 8 files changed, 27 insertions(+), 24 deletions(-) mode change 100755 => 100644 src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp diff --git a/include/oneapi/mkl/rng/distributions.hpp b/include/oneapi/mkl/rng/distributions.hpp index 9f1788150..cc7b90eab 100644 --- a/include/oneapi/mkl/rng/distributions.hpp +++ b/include/oneapi/mkl/rng/distributions.hpp @@ -28,7 +28,6 @@ namespace oneapi::mkl::rng { - // Class template oneapi::mkl::rng::uniform // // Represents continuous and discrete uniform random number distribution diff --git a/include/oneapi/mkl/rng/engines.hpp b/include/oneapi/mkl/rng/engines.hpp index e9d795c6e..59957704e 100644 --- a/include/oneapi/mkl/rng/engines.hpp +++ b/include/oneapi/mkl/rng/engines.hpp @@ -123,8 +123,8 @@ class philox4x32x10 { template friend cl::sycl::event generate(const Distr& distr, Engine& engine, std::int64_t n, - typename Distr::result_type* r, - const std::vector& dependencies); + typename Distr::result_type* r, + const std::vector& dependencies); }; // Class oneapi::mkl::rng::mrg32k3a @@ -204,8 +204,8 @@ class mrg32k3a { template friend cl::sycl::event generate(const Distr& distr, Engine& engine, std::int64_t n, - typename Distr::result_type* r, - const std::vector& dependencies); + typename Distr::result_type* r, + const std::vector& dependencies); }; // Class oneapi::mkl::rng::mcg59 @@ -275,8 +275,8 @@ class mcg59 { template friend cl::sycl::event generate(const Distr& distr, Engine& engine, std::int64_t n, - typename Distr::result_type* r, - const std::vector& dependencies); + typename Distr::result_type* r, + const std::vector& dependencies); }; // Default engine to be used for common cases diff --git a/src/rng/backends/curand/mcg59.cpp b/src/rng/backends/curand/mcg59.cpp index 83e39faa8..25556258d 100644 --- a/src/rng/backends/curand/mcg59.cpp +++ b/src/rng/backends/curand/mcg59.cpp @@ -66,7 +66,7 @@ #include "oneapi/mkl/exceptions.hpp" #include "oneapi/mkl/rng/detail/curand/onemkl_rng_curand.hpp" -namespace oneapi:: mkl::rng::curand { +namespace oneapi::mkl::rng::curand { /* * Note that cuRAND consists of two pieces: a host (CPU) API and a device (GPU) * API. The host API acts like any standard library; the `curand.h' header is @@ -101,4 +101,4 @@ oneapi::mkl::rng::detail::engine_impl* create_mcg59(cl::sycl::queue queue, return nullptr; } -} // namespace oneapi:: mkl::rng::curand +} // namespace oneapi::mkl::rng::curand diff --git a/src/rng/backends/curand/philox4x32x10.cpp b/src/rng/backends/curand/philox4x32x10.cpp index 9686369b8..1adeacd6e 100644 --- a/src/rng/backends/curand/philox4x32x10.cpp +++ b/src/rng/backends/curand/philox4x32x10.cpp @@ -865,7 +865,8 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { }; #endif -oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(cl::sycl::queue queue, std::uint64_t seed) { +oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(cl::sycl::queue queue, + std::uint64_t seed) { return new philox4x32x10_impl(queue, seed); } diff --git a/src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp b/src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp old mode 100755 new mode 100644 index 25f11812d..45da17065 --- a/src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp +++ b/src/rng/backends/mklgpu/mkl_internal_rng_gpu.hpp @@ -41,7 +41,8 @@ engine_base_impl* create_engine(cl::sycl::queue& queue, engine_base_impl* other_impl); template -void skip_ahead(cl::sycl::queue& queue, engine_base_impl* impl, std::uint64_t num_to_skip); +void skip_ahead(cl::sycl::queue& queue, engine_base_impl* impl, + std::uint64_t num_to_skip); template void skip_ahead(cl::sycl::queue& queue, engine_base_impl* impl, @@ -56,14 +57,14 @@ void delete_engine(cl::sycl::queue& queue, engine_base_impl* impl); template cl::sycl::event generate(cl::sycl::queue& queue, const DistrType& distr, - engine_base_impl* engine, std::int64_t n, - cl::sycl::buffer& r); + engine_base_impl* engine, std::int64_t n, + cl::sycl::buffer& r); template cl::sycl::event generate(cl::sycl::queue& queue, const DistrType& distr, - engine_base_impl* engine, std::int64_t n, - typename DistrType::result_type* r, - const std::vector& dependencies = {}); + engine_base_impl* engine, std::int64_t n, + typename DistrType::result_type* r, + const std::vector& dependencies = {}); } // namespace gpu } // namespace oneapi::mkl::rng::detail diff --git a/src/rng/backends/mklgpu/philox4x32x10.cpp b/src/rng/backends/mklgpu/philox4x32x10.cpp index 9961ac75e..da080779d 100644 --- a/src/rng/backends/mklgpu/philox4x32x10.cpp +++ b/src/rng/backends/mklgpu/philox4x32x10.cpp @@ -576,7 +576,8 @@ class philox4x32x10_impl : public oneapi::mkl::rng::detail::engine_impl { }; #endif -oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(cl::sycl::queue queue, std::uint64_t seed) { +oneapi::mkl::rng::detail::engine_impl* create_philox4x32x10(cl::sycl::queue queue, + std::uint64_t seed) { return new philox4x32x10_impl(queue, seed); } diff --git a/tests/unit_tests/rng/include/engines_api_tests.hpp b/tests/unit_tests/rng/include/engines_api_tests.hpp index 21a47e8b5..04374bca6 100644 --- a/tests/unit_tests/rng/include/engines_api_tests.hpp +++ b/tests/unit_tests/rng/include/engines_api_tests.hpp @@ -34,14 +34,14 @@ template class engines_constructors_test { template void test_generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, - cl::sycl::buffer& buf) { + cl::sycl::buffer& buf) { oneapi::mkl::rng::generate(distr, engine, n, buf); } template <> void test_generate(oneapi::mkl::rng::bits distr, - oneapi::mkl::rng::mcg59& engine, std::uint64_t n, - cl::sycl::buffer& buf) { + oneapi::mkl::rng::mcg59& engine, std::uint64_t n, + cl::sycl::buffer& buf) { oneapi::mkl::rng::generate(distr, engine, n / 2, buf); } @@ -96,14 +96,14 @@ template class engines_copy_test { template void test_generate(oneapi::mkl::rng::bits distr, T& engine, std::uint64_t n, - cl::sycl::buffer& buf) { + cl::sycl::buffer& buf) { oneapi::mkl::rng::generate(distr, engine, n, buf); } template <> void test_generate(oneapi::mkl::rng::bits distr, - oneapi::mkl::rng::mcg59& engine, std::uint64_t n, - cl::sycl::buffer& buf) { + oneapi::mkl::rng::mcg59& engine, std::uint64_t n, + cl::sycl::buffer& buf) { // mcg59 generates numbers with type uint64_t, but retruns uint32_t // so it necessary to use n/2 here to generate n/2 numbers with type uint64_t and retrun n numbers with type uint32_t oneapi::mkl::rng::generate(distr, engine, n / 2, buf); diff --git a/tests/unit_tests/rng/include/statistics_check_test.hpp b/tests/unit_tests/rng/include/statistics_check_test.hpp index ef28f9b1d..0be8cce9e 100644 --- a/tests/unit_tests/rng/include/statistics_check_test.hpp +++ b/tests/unit_tests/rng/include/statistics_check_test.hpp @@ -89,7 +89,8 @@ class statistics_usm_test { #ifdef CALL_RT_API auto ua = cl::sycl::usm_allocator(queue); #else - auto ua = cl::sycl::usm_allocator(queue.get_queue()); + auto ua = + cl::sycl::usm_allocator(queue.get_queue()); #endif std::vector r(n_gen, ua); From cd6a555464b97ff41fcd973f7513df5153aeec79 Mon Sep 17 00:00:00 2001 From: ksenia zaitseva Date: Thu, 20 Jan 2022 22:57:32 +0300 Subject: [PATCH 6/7] [RNG] change accessor --- src/rng/backends/curand/curand_helper.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rng/backends/curand/curand_helper.hpp b/src/rng/backends/curand/curand_helper.hpp index 972689822..b041fb1f3 100644 --- a/src/rng/backends/curand/curand_helper.hpp +++ b/src/rng/backends/curand/curand_helper.hpp @@ -207,7 +207,7 @@ template static inline void range_transform_fp(cl::sycl::queue& queue, T a, T b, std::int64_t n, cl::sycl::buffer& r) { queue.submit([&](cl::sycl::handler& cgh) { - auto acc = r.template get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.parallel_for(cl::sycl::range<1>(n), [=](cl::sycl::id<1> id) { acc[id] = acc[id] * (b - a) + a; }); }); @@ -224,7 +224,7 @@ template static inline void range_transform_fp_accurate(cl::sycl::queue& queue, T a, T b, std::int64_t n, cl::sycl::buffer& r) { queue.submit([&](cl::sycl::handler& cgh) { - auto acc = r.template get_access(cgh); + cl::sycl::accessor acc{ r, cgh, cl::sycl::read_write }; cgh.parallel_for(cl::sycl::range<1>(n), [=](cl::sycl::id<1> id) { acc[id] = acc[id] * (b - a) + a; if (acc[id] < a) { From a8861e7c4e87b7ba148a92bd76450c86dbe06b80 Mon Sep 17 00:00:00 2001 From: ksenia zaitseva Date: Thu, 20 Jan 2022 23:12:00 +0300 Subject: [PATCH 7/7] [RNG] delete namespace --- src/rng/rng_loader.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/rng/rng_loader.cpp b/src/rng/rng_loader.cpp index d0bc85bf2..fddc6d473 100644 --- a/src/rng/rng_loader.cpp +++ b/src/rng/rng_loader.cpp @@ -22,8 +22,6 @@ #include "function_table_initializer.hpp" #include "rng/function_table.hpp" -using namespace cl; - namespace oneapi::mkl::rng::detail { static oneapi::mkl::detail::table_initializer function_tables;