kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit b063114b7bb195a28a3d6e40a9ba203594891523
parent b849acc08979dc137dd03dddc285fc51da65decd
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Tue,  9 Aug 2016 08:58:46 +0300

Merge remote-tracking branch 'origin/dev'

Diffstat:
M.travis.yml | 13+++----------
MCMakeLists.txt | 20++++++++++++++++----
Mexamples/CMakeLists.txt | 6+++---
Mexamples/biquads.cpp | 34+++++++++++++++++-----------------
Mexamples/dft.cpp | 9++++-----
Mexamples/fir.cpp | 12++++++------
Dexamples/resampling.cpp | 105-------------------------------------------------------------------------------
Aexamples/sample_rate_conversion.cpp | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mexamples/window.cpp | 2+-
Minclude/kfr/all.hpp | 74++++----------------------------------------------------------------------
Ainclude/kfr/base.hpp | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/base/abs.hpp | 39++++++++++++++++++++++++++++++++++++---
Minclude/kfr/base/asin_acos.hpp | 4++--
Minclude/kfr/base/atan.hpp | 12++++++------
Minclude/kfr/base/basic_expressions.hpp | 53+++++++++++++++++++++++++++--------------------------
Minclude/kfr/base/complex.hpp | 98+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Minclude/kfr/base/conversion.hpp | 6+++---
Minclude/kfr/base/cpuid.hpp | 30++++++++++++++----------------
Minclude/kfr/base/cpuid_auto.hpp | 8++++----
Minclude/kfr/base/digitreverse.hpp | 6+++---
Minclude/kfr/base/expression.hpp | 63+++++++++++++++++++++++++++++++--------------------------------
Minclude/kfr/base/function.hpp | 6+++---
Minclude/kfr/base/gamma.hpp | 4++--
Minclude/kfr/base/generators.hpp | 40++++++++++++++++++++--------------------
Minclude/kfr/base/intrinsics.h | 6+++---
Minclude/kfr/base/kfr.h | 103++-----------------------------------------------------------------------------
Minclude/kfr/base/log_exp.hpp | 2+-
Minclude/kfr/base/logical.hpp | 63++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Minclude/kfr/base/memory.hpp | 22+++++++++++-----------
Minclude/kfr/base/min_max.hpp | 39+++++++++++++++++++++++++++++++++++----
Minclude/kfr/base/modzerobessel.hpp | 6+++---
Minclude/kfr/base/operators.hpp | 206+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Minclude/kfr/base/pointer.hpp | 42++++++++++++++++++++++--------------------
Minclude/kfr/base/random.hpp | 8++++----
Minclude/kfr/base/read_write.hpp | 34+++++++++++++++++-----------------
Minclude/kfr/base/reduce.hpp | 18+++++++++---------
Minclude/kfr/base/round.hpp | 24++++++++++++++++++++++--
Minclude/kfr/base/saturation.hpp | 31+++++++++++++++++++++++++++++--
Minclude/kfr/base/select.hpp | 70+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Minclude/kfr/base/shuffle.hpp | 127++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Minclude/kfr/base/sin_cos.hpp | 2+-
Minclude/kfr/base/sort.hpp | 4++--
Minclude/kfr/base/sqrt.hpp | 4++--
Minclude/kfr/base/types.hpp | 94++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Minclude/kfr/base/univector.hpp | 34+++++++++++++++++-----------------
Minclude/kfr/base/vec.hpp | 563+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Minclude/kfr/cident.h | 532+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Minclude/kfr/cometa.hpp | 164+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Minclude/kfr/cometa/string.hpp | 120++++++++++++++++++++++++++++++++++++++++----------------------------------------
Ainclude/kfr/dft.hpp | 31+++++++++++++++++++++++++++++++
Minclude/kfr/dft/bitrev.hpp | 6+++---
Minclude/kfr/dft/conv.hpp | 2+-
Minclude/kfr/dft/fft.hpp | 54+++++++++++++++++++++++++++---------------------------
Minclude/kfr/dft/ft.hpp | 124+++++++++++++++++++++++++++++++++++++++----------------------------------------
Ainclude/kfr/dsp.hpp | 43+++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/dsp/biquad.hpp | 144++++++++++++++++++++++++++++++++++++++++---------------------------------------
Minclude/kfr/dsp/biquad_design.hpp | 32++++++++++++++++----------------
Ainclude/kfr/dsp/dcremove.hpp | 37+++++++++++++++++++++++++++++++++++++
Minclude/kfr/dsp/fir.hpp | 18+++++++++++-------
Minclude/kfr/dsp/fir_design.hpp | 8++++----
Minclude/kfr/dsp/fracdelay.hpp | 4+---
Minclude/kfr/dsp/goertzel.hpp | 147+++++++++++++++++++++++++++++++++++++------------------------------------------
Minclude/kfr/dsp/interpolation.hpp | 4++--
Ainclude/kfr/dsp/mixdown.hpp | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/dsp/resample.hpp | 188+------------------------------------------------------------------------------
Ainclude/kfr/dsp/sample_rate_conversion.hpp | 227+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/dsp/units.hpp | 2+-
Minclude/kfr/dsp/waveshaper.hpp | 7+++----
Minclude/kfr/dsp/window.hpp | 91++++++++++++++++++++++++++++++++++++++++---------------------------------------
Ainclude/kfr/io.hpp | 30++++++++++++++++++++++++++++++
Minclude/kfr/io/file.hpp | 4++--
Minclude/kfr/io/python_plot.hpp | 9+++++++--
Minclude/kfr/io/tostring.hpp | 6+++++-
Minclude/kfr/math.hpp | 23+----------------------
Minclude/kfr/version.hpp | 2+-
Msources.cmake | 75+++++++++++++++++++++++++++++++++------------------------------------------
Mtests/CMakeLists.txt | 71++++++++++++++++++++++++++++++++++++++++++++---------------------------
Mtests/complex_test.cpp | 17++++++++++++++---
Mtests/conv_test.cpp | 8++++----
Mtests/dft_test.cpp | 8+++++++-
Mtests/empty_test.cpp | 2+-
Mtests/intrinsic_test.cpp | 82+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
Atests/multiarch.cpp | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/multiarch_fir_avx.cpp | 18++++++++++++++++++
Atests/multiarch_fir_sse2.cpp | 18++++++++++++++++++
Mtests/testo/testo.hpp | 22+++++++++++-----------
Mtests/vec_test.cpp | 79++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Aupdate-sources.py | 31+++++++++++++++++++++++++++++++
88 files changed, 2861 insertions(+), 2061 deletions(-)

diff --git a/.travis.yml b/.travis.yml @@ -4,17 +4,13 @@ matrix: include: - os: linux compiler: clang - sudo: required - dist: precise + sudo: false addons: apt: sources: - ubuntu-toolchain-r-test - llvm-toolchain-precise-3.8 - - george-edison55-precise-backports packages: - - cmake - - cmake-data - g++-5 - clang-3.8 - libmpfr-dev @@ -25,14 +21,11 @@ matrix: - os: osx osx_image: xcode7.3 - os: osx - osx_image: xcode7.2 - - os: osx - osx_image: xcode7.1 - - os: osx osx_image: xcode7 + - os: osx + osx_image: beta-xcode6.3 before_install: - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo apt-get update -qq ; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew reinstall cmake ; fi - cmake --version diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -1,21 +1,27 @@ # Copyright (C) 2016 D Levin (http://www.kfrlib.com) # This file is part of KFR -# +# # KFR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. -# +# # KFR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with KFR. -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 2.8) + +if (CMAKE_VERSION VERSION_LESS "2.8.12") + function(add_compile_options) + add_definitions(${ARGN}) + endfunction(add_compile_options) +endif () set(OPT_BITNESS "") # cmake -DOPT_BITNESS="-m32" or -m64 set(OPT_STATIC "") # cmake -DOPT_STATIC="-static" @@ -46,6 +52,12 @@ include(sources.cmake) set(ALL_WARNINGS -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-c99-extensions -Wno-padded) +if (IOS) + set(STD_LIB) +else () + set(STD_LIB stdc++) +endif () + if (NOT MSVC) add_compile_options(-std=c++1y) else () diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt @@ -15,7 +15,7 @@ # along with KFR. -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 2.8) if (NOT MSVC) add_compile_options(-fno-exceptions -fno-rtti) @@ -24,7 +24,7 @@ if (NOT MSVC) else () set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_FLAGS}") endif () - link_libraries(stdc++ pthread) + link_libraries(${STD_LIB} pthread) else () add_compile_options(/arch:AVX) endif () @@ -34,5 +34,5 @@ include_directories(../include) add_executable(biquads biquads.cpp ${KFR_SRC}) add_executable(window window.cpp ${KFR_SRC}) add_executable(fir fir.cpp ${KFR_SRC}) -add_executable(resampling resampling.cpp ${KFR_SRC}) +add_executable(sample_rate_conversion sample_rate_conversion.cpp ${KFR_SRC}) add_executable(dft dft.cpp ${KFR_SRC} ${DFT_SRC}) diff --git a/examples/biquads.cpp b/examples/biquads.cpp @@ -32,53 +32,53 @@ int main(int argc, char** argv) const std::string options = "phaseresp=True"; - univector<double, 128> output; + univector<fbase, 128> output; { - biquad_params<double> bq[] = { biquad_notch(0.1, 0.5), biquad_notch(0.2, 0.5), biquad_notch(0.3, 0.5), - biquad_notch(0.4, 0.5) }; + biquad_params<fbase> bq[] = { biquad_notch(0.1, 0.5), biquad_notch(0.2, 0.5), biquad_notch(0.3, 0.5), + biquad_notch(0.4, 0.5) }; output = biquad(bq, simpleimpulse()); } plot_save("biquad_notch", output, options + ", title='Four Biquad Notch filters'"); { - biquad_params<double> bq[] = { biquad_lowpass(0.2, 0.9) }; - output = biquad(bq, simpleimpulse()); + biquad_params<fbase> bq[] = { biquad_lowpass(0.2, 0.9) }; + output = biquad(bq, simpleimpulse()); } plot_save("biquad_lowpass", output, options + ", title='Biquad Low pass filter (0.2, 0.9)'"); { - biquad_params<double> bq[] = { biquad_highpass(0.3, 0.1) }; - output = biquad(bq, simpleimpulse()); + biquad_params<fbase> bq[] = { biquad_highpass(0.3, 0.1) }; + output = biquad(bq, simpleimpulse()); } plot_save("biquad_highpass", output, options + ", title='Biquad High pass filter (0.3, 0.1)'"); { - biquad_params<double> bq[] = { biquad_peak(0.3, 0.5, +9.0) }; - output = biquad(bq, simpleimpulse()); + biquad_params<fbase> bq[] = { biquad_peak(0.3, 0.5, +9.0) }; + output = biquad(bq, simpleimpulse()); } plot_save("biquad_peak", output, options + ", title='Biquad Peak filter (0.2, 0.5, +9)'"); { - biquad_params<double> bq[] = { biquad_peak(0.3, 3.0, -2.0) }; - output = biquad(bq, simpleimpulse()); + biquad_params<fbase> bq[] = { biquad_peak(0.3, 3.0, -2.0) }; + output = biquad(bq, simpleimpulse()); } plot_save("biquad_peak2", output, options + ", title='Biquad Peak filter (0.3, 3, -2)'"); { - biquad_params<double> bq[] = { biquad_lowshelf(0.3, -1.0) }; - output = biquad(bq, simpleimpulse()); + biquad_params<fbase> bq[] = { biquad_lowshelf(0.3, -1.0) }; + output = biquad(bq, simpleimpulse()); } plot_save("biquad_lowshelf", output, options + ", title='Biquad low shelf filter (0.3, -1)'"); { - biquad_params<double> bq[] = { biquad_highshelf(0.3, +9.0) }; - output = biquad(bq, simpleimpulse()); + biquad_params<fbase> bq[] = { biquad_highshelf(0.3, +9.0) }; + output = biquad(bq, simpleimpulse()); } plot_save("biquad_highshelf", output, options + ", title='Biquad high shelf filter (0.3, +9)'"); { - biquad_params<double> bq[] = { biquad_bandpass(0.25, 0.2) }; - output = biquad(bq, simpleimpulse()); + biquad_params<fbase> bq[] = { biquad_bandpass(0.25, 0.2) }; + output = biquad(bq, simpleimpulse()); } plot_save("biquad_bandpass", output, options + ", title='Biquad band pass (0.25, 0.2)'"); diff --git a/examples/dft.cpp b/examples/dft.cpp @@ -29,14 +29,13 @@ int main(int argc, char** argv) // fft size const size_t size = 128; - using float_type = double; // initialize input & output buffers - univector<complex<float_type>, size> in = sin(linspace(0.0, c_pi<float_type, 2> * 4.0, size)); - univector<complex<float_type>, size> out = scalar(qnan); + univector<complex<fbase>, size> in = sin(linspace(0.0, c_pi<fbase, 2> * 4.0, size)); + univector<complex<fbase>, size> out = scalar(qnan); // initialize fft - const dft_plan<float_type> dft(size); + const dft_plan<fbase> dft(size); // allocate work buffer for fft (if needed) univector<u8> temp(dft.temp_size); @@ -48,7 +47,7 @@ int main(int argc, char** argv) out = out / size; // get magnitude and convert to decibels - univector<float_type, size> dB = amp_to_dB(cabs(out)); + univector<fbase, size> dB = amp_to_dB(cabs(out)); println("max = ", maxof(dB)); println("min = ", minof(dB)); diff --git a/examples/fir.cpp b/examples/fir.cpp @@ -37,15 +37,15 @@ int main(int argc, char** argv) const std::string options = "phaseresp=False"; - univector<double, 15> taps15; - univector<double, 127> taps127; - univector<double, 8191> taps8191; + univector<fbase, 15> taps15; + univector<fbase, 127> taps127; + univector<fbase, 8191> taps8191; - expression_pointer<double> hann = to_pointer(window_hann(taps15.size())); + expression_pointer<fbase> hann = to_pointer(window_hann(taps15.size())); - expression_pointer<double> kaiser = to_pointer(window_kaiser(taps127.size(), 3.0)); + expression_pointer<fbase> kaiser = to_pointer(window_kaiser(taps127.size(), 3.0)); - expression_pointer<double> blackman_harris = to_pointer(window_blackman_harris(taps8191.size())); + expression_pointer<fbase> blackman_harris = to_pointer(window_blackman_harris(taps8191.size())); fir_lowpass(taps15, 0.15, hann, true); plot_save("fir_lowpass_hann", taps15, options + ", title='15-point lowpass FIR, Hann window'"); diff --git a/examples/resampling.cpp b/examples/resampling.cpp @@ -1,105 +0,0 @@ -/** - * KFR (http://kfrlib.com) - * Copyright (C) 2016 D Levin - * See LICENSE.txt for details - */ - -// library_version() -#include <kfr/version.hpp> - -// print(), format() -#include <kfr/cometa/string.hpp> - -#include <kfr/math.hpp> - -// resample* -#include <kfr/dsp/resample.hpp> - -// file* -#include <kfr/io/audiofile.hpp> - -// swept -#include <kfr/dsp/oscillators.hpp> - -// plot_save() -#include <kfr/io/python_plot.hpp> - -#include <iostream> - -using namespace kfr; - -constexpr size_t input_sr = 96000; -constexpr size_t output_sr = 44100; -constexpr size_t len = 96000 * 6; -constexpr f64 i32max = 2147483647.0; - -int main(int argc, char** argv) -{ - println(library_version()); - - const std::string options = "phaseresp=False"; - - univector<f64> swept_sine = swept(0.5, len); - - { - auto r = resampler(resample_quality::high, output_sr, input_sr, 1.0, 0.496); - univector<f64> resampled(len * output_sr / input_sr); - - const size_t destsize = r(resampled.data(), swept_sine); - - univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); - univector2d<i32> data = { i32data }; - - auto wr = sequential_file_writer("audio_high_quality.wav"); - audio_encode(wr, data, audioformat(data, output_sr)); - - plot_save("audio_high_quality", "audio_high_quality.wav", ""); - } - - { - auto r = resampler(resample_quality::normal, output_sr, input_sr, 1.0, 0.496); - univector<f64> resampled(len * output_sr / input_sr); - - const size_t destsize = r(resampled.data(), swept_sine); - - univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); - univector2d<i32> data = { i32data }; - - auto wr = sequential_file_writer("audio_normal_quality.wav"); - audio_encode(wr, data, audioformat(data, output_sr)); - - plot_save("audio_normal_quality", "audio_normal_quality.wav", ""); - } - - { - auto r = resampler(resample_quality::low, output_sr, input_sr, 1.0, 0.496); - univector<f64> resampled(len * output_sr / input_sr); - - const size_t destsize = r(resampled.data(), swept_sine); - - univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); - univector2d<i32> data = { i32data }; - - auto wr = sequential_file_writer("audio_low_quality.wav"); - audio_encode(wr, data, audioformat(data, output_sr)); - - plot_save("audio_low_quality", "audio_low_quality.wav", ""); - } - - { - auto r = resampler(resample_quality::draft, output_sr, input_sr, 1.0, 0.496); - univector<f64> resampled(len * output_sr / input_sr); - - const size_t destsize = r(resampled.data(), swept_sine); - - univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); - univector2d<i32> data = { i32data }; - - auto wr = sequential_file_writer("audio_draft_quality.wav"); - audio_encode(wr, data, audioformat(data, output_sr)); - - plot_save("audio_draft_quality", "audio_draft_quality.wav", ""); - } - - return 0; -} diff --git a/examples/sample_rate_conversion.cpp b/examples/sample_rate_conversion.cpp @@ -0,0 +1,105 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ + +// library_version() +#include <kfr/version.hpp> + +// print(), format() +#include <kfr/cometa/string.hpp> + +#include <kfr/math.hpp> + +// resample* +#include <kfr/dsp/sample_rate_conversion.hpp> + +// file* +#include <kfr/io/audiofile.hpp> + +// swept +#include <kfr/dsp/oscillators.hpp> + +// plot_save() +#include <kfr/io/python_plot.hpp> + +#include <iostream> + +using namespace kfr; + +constexpr size_t input_sr = 96000; +constexpr size_t output_sr = 44100; +constexpr size_t len = 96000 * 6; +constexpr fbase i32max = 2147483647.0; + +int main(int argc, char** argv) +{ + println(library_version()); + + const std::string options = "phaseresp=False"; + + univector<fbase> swept_sine = swept<fbase>(0.5, len); + + { + auto r = resampler<fbase>(resample_quality::high, output_sr, input_sr, 1.0, 0.496); + univector<fbase> resampled(len * output_sr / input_sr); + + const size_t destsize = r(resampled.data(), swept_sine); + + univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); + univector2d<i32> data = { i32data }; + + auto wr = sequential_file_writer("audio_high_quality.wav"); + audio_encode(wr, data, audioformat(data, output_sr)); + + plot_save("audio_high_quality", "audio_high_quality.wav", ""); + } + + { + auto r = resampler<fbase>(resample_quality::normal, output_sr, input_sr, 1.0, 0.496); + univector<fbase> resampled(len * output_sr / input_sr); + + const size_t destsize = r(resampled.data(), swept_sine); + + univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); + univector2d<i32> data = { i32data }; + + auto wr = sequential_file_writer("audio_normal_quality.wav"); + audio_encode(wr, data, audioformat(data, output_sr)); + + plot_save("audio_normal_quality", "audio_normal_quality.wav", ""); + } + + { + auto r = resampler<fbase>(resample_quality::low, output_sr, input_sr, 1.0, 0.496); + univector<fbase> resampled(len * output_sr / input_sr); + + const size_t destsize = r(resampled.data(), swept_sine); + + univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); + univector2d<i32> data = { i32data }; + + auto wr = sequential_file_writer("audio_low_quality.wav"); + audio_encode(wr, data, audioformat(data, output_sr)); + + plot_save("audio_low_quality", "audio_low_quality.wav", ""); + } + + { + auto r = resampler<fbase>(resample_quality::draft, output_sr, input_sr, 1.0, 0.496); + univector<fbase> resampled(len * output_sr / input_sr); + + const size_t destsize = r(resampled.data(), swept_sine); + + univector<i32> i32data = clamp(resampled.slice(0, destsize) * i32max, -i32max, +i32max); + univector2d<i32> data = { i32data }; + + auto wr = sequential_file_writer("audio_draft_quality.wav"); + audio_encode(wr, data, audioformat(data, output_sr)); + + plot_save("audio_draft_quality", "audio_draft_quality.wav", ""); + } + + return 0; +} diff --git a/examples/window.cpp b/examples/window.cpp @@ -30,7 +30,7 @@ int main(int argc, char** argv) const std::string options = "freqresp=True, dots=True, padwidth=1024, " "log_freq=False, horizontal=False, normalized_freq=True"; - univector<double, 64> output; + univector<fbase, 64> output; output = window_hann(output.size()); plot_save("window_hann", output, options + ", title='Hann window'"); diff --git a/include/kfr/all.hpp b/include/kfr/all.hpp @@ -21,73 +21,7 @@ * See http://www.kfrlib.com for details. */ -#include "cometa/string.hpp" - -#include "base/abs.hpp" -#include "base/asin_acos.hpp" -#include "base/atan.hpp" -#include "base/basic_expressions.hpp" -#include "base/clamp.hpp" -#include "base/compiletime.hpp" -#include "base/complex.hpp" -#include "base/constants.hpp" -#include "base/conversion.hpp" -#include "base/cpuid.hpp" -#include "base/cpuid_auto.hpp" -#include "base/digitreverse.hpp" -#include "base/function.hpp" -#include "base/gamma.hpp" -#include "base/generators.hpp" -#include "base/hyperbolic.hpp" -#include "base/log_exp.hpp" -#include "base/logical.hpp" -#include "base/memory.hpp" -#include "base/min_max.hpp" -#include "base/modzerobessel.hpp" -#include "base/operators.hpp" -#include "base/pointer.hpp" -#include "base/random.hpp" -#include "base/read_write.hpp" -#include "base/reduce.hpp" -#include "base/round.hpp" -#include "base/saturation.hpp" -#include "base/select.hpp" -#include "base/shuffle.hpp" -#include "base/sin_cos.hpp" -#include "base/small_buffer.hpp" -#include "base/sort.hpp" -#include "base/sqrt.hpp" -#include "base/tan.hpp" -#include "base/types.hpp" -#include "base/univector.hpp" -#include "base/vec.hpp" -#include "version.hpp" - -#include "data/bitrev.hpp" -#include "data/sincos.hpp" -#include "dsp/biquad.hpp" -#include "dsp/biquad_design.hpp" -#include "dsp/fir.hpp" -#include "dsp/fir_design.hpp" -#include "dsp/fracdelay.hpp" -#include "dsp/goertzel.hpp" -#include "dsp/impulse.hpp" -#include "dsp/interpolation.hpp" -#include "dsp/oscillators.hpp" -#include "dsp/resample.hpp" -#include "dsp/speaker.hpp" -#include "dsp/units.hpp" -#include "dsp/waveshaper.hpp" -#include "dsp/weighting.hpp" -#include "dsp/window.hpp" -#include "io/audiofile.hpp" -#include "io/file.hpp" -#include "io/python_plot.hpp" -#include "io/tostring.hpp" -#include "math.hpp" - -#include "dft/bitrev.hpp" -#include "dft/conv.hpp" -#include "dft/fft.hpp" -#include "dft/ft.hpp" -#include "dft/reference_dft.hpp" +#include "base.hpp" +#include "dft.hpp" +#include "dsp.hpp" +#include "io.hpp" diff --git a/include/kfr/base.hpp b/include/kfr/base.hpp @@ -0,0 +1,63 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "base/abs.hpp" +#include "base/asin_acos.hpp" +#include "base/atan.hpp" +#include "base/basic_expressions.hpp" +#include "base/clamp.hpp" +#include "base/compiletime.hpp" +#include "base/complex.hpp" +#include "base/constants.hpp" +#include "base/conversion.hpp" +#include "base/cpuid.hpp" +#include "base/cpuid_auto.hpp" +#include "base/digitreverse.hpp" +#include "base/expression.hpp" +#include "base/function.hpp" +#include "base/gamma.hpp" +#include "base/generators.hpp" +#include "base/hyperbolic.hpp" +#include "base/log_exp.hpp" +#include "base/logical.hpp" +#include "base/memory.hpp" +#include "base/min_max.hpp" +#include "base/modzerobessel.hpp" +#include "base/operators.hpp" +#include "base/pointer.hpp" +#include "base/random.hpp" +#include "base/read_write.hpp" +#include "base/reduce.hpp" +#include "base/round.hpp" +#include "base/saturation.hpp" +#include "base/select.hpp" +#include "base/shuffle.hpp" +#include "base/sin_cos.hpp" +#include "base/small_buffer.hpp" +#include "base/sort.hpp" +#include "base/sqrt.hpp" +#include "base/tan.hpp" +#include "base/types.hpp" +#include "base/univector.hpp" +#include "base/vec.hpp" diff --git a/include/kfr/base/abs.hpp b/include/kfr/base/abs.hpp @@ -31,6 +31,9 @@ namespace kfr namespace intrinsics { + +#if defined CMT_ARCH_SSSE3 + // floating point template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> KFR_SINTRIN vec<T, N> abs(const vec<T, N>& x) @@ -38,8 +41,6 @@ KFR_SINTRIN vec<T, N> abs(const vec<T, N>& x) return x & internal::invhighbitmask<T>; } -#if defined CID_ARCH_SSSE3 - KFR_SINTRIN i64sse abs(const i64sse& x) { return select(x >= 0, x, -x); } KFR_SINTRIN i32sse abs(const i32sse& x) { return _mm_abs_epi32(*x); } KFR_SINTRIN i16sse abs(const i16sse& x) { return _mm_abs_epi16(*x); } @@ -49,7 +50,7 @@ KFR_SINTRIN u32sse abs(const u32sse& x) { return x; } KFR_SINTRIN u16sse abs(const u16sse& x) { return x; } KFR_SINTRIN u8sse abs(const u8sse& x) { return x; } -#if defined CID_ARCH_AVX2 +#if defined CMT_ARCH_AVX2 KFR_SINTRIN i64avx abs(const i64avx& x) { return select(x >= 0, x, -x); } KFR_SINTRIN i32avx abs(const i32avx& x) { return _mm256_abs_epi32(*x); } KFR_SINTRIN i16avx abs(const i16avx& x) { return _mm256_abs_epi16(*x); } @@ -62,7 +63,39 @@ KFR_SINTRIN u8avx abs(const u8avx& x) { return x; } KFR_HANDLE_ALL_SIZES_NOT_F_1(abs) +#elif defined CMT_ARCH_NEON + +KFR_SINTRIN i8neon abs(const i8neon& x) { return vabsq_s8(*x); } +KFR_SINTRIN i16neon abs(const i16neon& x) { return vabsq_s16(*x); } +KFR_SINTRIN i32neon abs(const i32neon& x) { return vabsq_s32(*x); } +#if defined CMT_ARCH_NEON64 +KFR_SINTRIN i64neon abs(const i64neon& x) { return vabsq_s64(*x); } +#else +KFR_SINTRIN i64neon abs(const i64neon& x) { return select(x >= 0, x, -x); } +#endif + +KFR_SINTRIN u8neon abs(const u8neon& x) { return x; } +KFR_SINTRIN u16neon abs(const u16neon& x) { return x; } +KFR_SINTRIN u32neon abs(const u32neon& x) { return x; } +KFR_SINTRIN u64neon abs(const u64neon& x) { return x; } + +KFR_SINTRIN f32neon abs(const f32neon& x) { return vabsq_f32(*x); } +#if defined CMT_ARCH_NEON64 +KFR_SINTRIN f64neon abs(const f64neon& x) { return vabsq_f64(*x); } #else +KFR_SINTRIN f64neon abs(const f64neon& x) { return x & internal::invhighbitmask<f64>; } +#endif + +KFR_HANDLE_ALL_SIZES_1(abs) + +#else + +// floating point +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +KFR_SINTRIN vec<T, N> abs(const vec<T, N>& x) +{ + return x & internal::invhighbitmask<T>; +} // fallback template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> diff --git a/include/kfr/base/asin_acos.hpp b/include/kfr/base/asin_acos.hpp @@ -36,14 +36,14 @@ namespace intrinsics template <typename T, size_t N, typename Tout = flt_type<T>> KFR_SINTRIN vec<Tout, N> asin(const vec<T, N>& x) { - const vec<Tout, N> xx = cast<Tout>(x); + const vec<Tout, N> xx = x; return atan2(xx, sqrt(Tout(1) - xx * xx)); } template <typename T, size_t N, typename Tout = flt_type<T>> KFR_SINTRIN vec<Tout, N> acos(const vec<T, N>& x) { - const vec<Tout, N> xx = cast<Tout>(x); + const vec<Tout, N> xx = x; return atan2(sqrt(Tout(1) - xx * xx), xx); } KFR_I_CONVERTER(asin) diff --git a/include/kfr/base/atan.hpp b/include/kfr/base/atan.hpp @@ -65,14 +65,14 @@ KFR_SINTRIN vec<f64, N> atan2k(vec<f64, N> y, vec<f64, N> x) { vec<f64, N> s, t, u; vec<i64, N> q; - q = select(x < 0, -2ll, 0ll); + q = select(x < 0, i64(-2), i64(0)); x = select(x < 0, -x, x); - vec<i64, N> m; + mask<i64, N> m; m = y > x; t = x; x = select(m, y, x); y = select(m, -t, y); - q = select(m, q + 1ll, q); + q = select(m, q + i64(1), q); s = y / x; t = s * s; u = -1.88796008463073496563746e-05; @@ -122,8 +122,8 @@ KFR_SINTRIN vec<f64, N> atan2(const vec<f64, N>& y, const vec<f64, N>& x) constexpr f64 pi_over_2 = 1.5707963267948966192313216916398; constexpr f64 pi_over_4 = 0.78539816339744830961566084581988; r = mulsign(r, x); - r = select(isinf(x) || x == 0.0, pi_over_2 - select(x, mulsign(pi_over_2, x), 0.0), r); - r = select(isinf(y), pi_over_2 - select(x, mulsign(pi_over_4, x), 0.0), r); + r = select(isinf(x) || x == 0.0, pi_over_2 - select(x.asmask(), mulsign(pi_over_2, x), 0.0), r); + r = select(isinf(y), pi_over_2 - select(x.asmask(), mulsign(pi_over_4, x), 0.0), r); r = select(y == 0.0, fbitcast(ibitcast(x < 0) & ibitcast(pi)), r); r = fbitcast(ibitcast(isnan(x) || isnan(y)) | ibitcast(mulsign(r, y))); return r; @@ -158,7 +158,7 @@ KFR_SINTRIN vec<f64, N> atan(const vec<f64, N>& s) { vec<f64, N> t, u; vec<i64, N> q; - q = select(s < 0.0, 2ll, 0ll); + q = select(s < 0.0, i64(2), i64(0)); s = select(s < 0.0, -s, s); q = select(s > 1.0, q | 1, q); s = select(s > 1.0, 1.0 / s, s); diff --git a/include/kfr/base/basic_expressions.hpp b/include/kfr/base/basic_expressions.hpp @@ -36,8 +36,8 @@ struct expression_iterator constexpr expression_iterator(E1&& e1) : e1(std::forward<E1>(e1)) {} struct iterator { - T operator*() { return get(); } - T get() { return expr.e1(cinput, position, vec_t<T, 1>())[0]; } + T operator*() const { return get(); } + T get() const { return expr.e1(cinput, position, vec_t<T, 1>())[0]; } iterator& operator++() { ++position; @@ -50,40 +50,40 @@ struct expression_iterator return copy; } bool operator!=(const iterator& other) const { return position != other.position; } - expression_iterator& expr; + const expression_iterator& expr; size_t position; }; - iterator begin() { return { *this, 0 }; } - iterator end() { return { *this, e1.size() }; } + iterator begin() const { return { *this, 0 }; } + iterator end() const { return { *this, e1.size() }; } E1 e1; }; } template <typename E1, typename T = value_type_of<E1>> -KFR_INLINE internal::expression_iterator<T, E1> to_iterator(E1&& e1) +CMT_INLINE internal::expression_iterator<T, E1> to_iterator(E1&& e1) { return internal::expression_iterator<T, E1>(std::forward<E1>(e1)); } template <typename T, typename... Ts> -KFR_INLINE auto sequence(T x, Ts... rest) +CMT_INLINE auto sequence(T x, Ts... rest) { const T seq[] = { x, static_cast<T>(rest)... }; constexpr size_t N = arraysize(seq); return lambda([=](size_t index) { return seq[index % N]; }); } -KFR_INLINE auto zeros() +CMT_INLINE auto zeros() { return lambda([](cinput_t, size_t, auto x) { return zerovector(x); }); } -KFR_INLINE auto ones() +CMT_INLINE auto ones() { return lambda([](cinput_t, size_t, auto x) { using U = subtype<decltype(x)>; return U(1); }); } -KFR_INLINE auto counter() +CMT_INLINE auto counter() { return lambda([](cinput_t, size_t index, auto x) { using T = subtype<decltype(x)>; @@ -93,7 +93,7 @@ KFR_INLINE auto counter() }); } template <typename T1> -KFR_INLINE auto counter(T1 start) +CMT_INLINE auto counter(T1 start) { return lambda([start](cinput_t, size_t index, auto x) { using T = subtype<decltype(x)>; @@ -103,7 +103,7 @@ KFR_INLINE auto counter(T1 start) }); } template <typename T1, typename T2> -KFR_INLINE auto counter(T1 start, T2 step) +CMT_INLINE auto counter(T1 start, T2 step) { return lambda([start, step](cinput_t, size_t index, auto x) { using T = subtype<decltype(x)>; @@ -135,13 +135,13 @@ template <typename T, typename E1> struct expression_reader { constexpr expression_reader(E1&& e1) noexcept : e1(std::forward<E1>(e1)) {} - T read() + T read() const { const T result = e1(cinput, m_position, vec_t<T, 1>()); m_position++; return result; } - size_t m_position = 0; + mutable size_t m_position = 0; E1 e1; }; template <typename T, typename E1> @@ -192,7 +192,7 @@ struct expression_skip : expression<E1>, inherit_value_type<E1> { expression_skip(E1&& e1, size_t count) : expression<E1>(std::forward<E1>(e1)), count(count) {} template <typename T, size_t N> - KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y) const + CMT_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y) const { return this->argument_first(index + count, y); } @@ -218,7 +218,7 @@ struct expression_linspace<T, false> : input_expression } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const { using UI = itype<U>; return U(start) + (enumerate(x) + cast<U>(cast<UI>(index))) * U(offset); @@ -242,13 +242,13 @@ struct expression_linspace<T, true> : input_expression } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const { using UI = itype<U>; return mix((enumerate(x) + cast<U>(cast<UI>(index))) * invsize, cast<U>(start), cast<U>(stop)); } template <typename U, size_t N> - KFR_INLINE static vec<U, N> mix(vec<U, N> t, U x, U y) + CMT_INLINE static vec<U, N> mix(vec<U, N> t, U x, U y) { return (U(1.0) - t) * x + t * y; } @@ -265,7 +265,7 @@ public: using base = expression<E...>; template <typename... Expr_> - KFR_INLINE expression_sequence(const size_t (&segments)[base::size], Expr_&&... expr) noexcept + CMT_INLINE expression_sequence(const size_t (&segments)[base::size], Expr_&&... expr) noexcept : base(std::forward<Expr_>(expr)...) { std::copy(std::begin(segments), std::end(segments), this->segments.begin() + 1); @@ -274,7 +274,7 @@ public: } template <typename T, size_t N> - KFR_NOINLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y) const + CMT_NOINLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y) const { std::size_t sindex = size_t(std::upper_bound(std::begin(segments), std::end(segments), index) - 1 - std::begin(segments)); @@ -296,7 +296,7 @@ public: protected: template <typename T, size_t N> - KFR_NOINLINE vec<T, N> get(size_t index, size_t expr_index, vec_t<T, N> y) + CMT_NOINLINE vec<T, N> get(size_t index, size_t expr_index, vec_t<T, N> y) { return cswitch(indicesfor<E...>, expr_index, [&](auto val) { return this->argument(val, index, y); }, [&]() { return zerovector(y); }); @@ -307,13 +307,13 @@ protected: } template <typename E1> -KFR_INLINE internal::expression_skip<E1> skip(E1&& e1, size_t count = 1) +CMT_INLINE internal::expression_skip<E1> skip(E1&& e1, size_t count = 1) { return internal::expression_skip<E1>(std::forward<E1>(e1), count); } template <typename T1, typename T2, bool precise = false, typename TF = ftype<common_type<T1, T2>>> -KFR_INLINE internal::expression_linspace<TF, precise> linspace(T1 start, T2 stop, size_t size, +CMT_INLINE internal::expression_linspace<TF, precise> linspace(T1 start, T2 stop, size_t size, bool endpoint = false) { return internal::expression_linspace<TF, precise>(start, stop, size, endpoint); @@ -321,7 +321,7 @@ KFR_INLINE internal::expression_linspace<TF, precise> linspace(T1 start, T2 stop KFR_FN(linspace) template <typename T, bool precise = false, typename TF = ftype<T>> -KFR_INLINE internal::expression_linspace<TF, precise> symmlinspace(T symsize, size_t size, +CMT_INLINE internal::expression_linspace<TF, precise> symmlinspace(T symsize, size_t size, bool endpoint = false) { return internal::expression_linspace<TF, precise>(symmetric_linspace, symsize, size, endpoint); @@ -329,7 +329,7 @@ KFR_INLINE internal::expression_linspace<TF, precise> symmlinspace(T symsize, si KFR_FN(symmlinspace) template <size_t size, typename... E> -KFR_INLINE internal::expression_sequence<decay<E>...> gen_sequence(const size_t (&list)[size], E&&... gens) +CMT_INLINE internal::expression_sequence<decay<E>...> gen_sequence(const size_t (&list)[size], E&&... gens) { static_assert(size == sizeof...(E), "Lists must be of equal length"); return internal::expression_sequence<decay<E>...>(list, std::forward<E>(gens)...); @@ -348,7 +348,8 @@ struct multioutput : output_expression template <typename T, size_t N> void operator()(coutput_t, size_t index, const vec<T, N>& x) { - cfor(csize<0>, csize<sizeof...(E)>, [&](auto n) { std::get<val_of(n)>(outputs)(coutput, index, x); }); + cfor(csize<0>, csize<sizeof...(E)>, + [&](auto n) { std::get<val_of(decltype(n)())>(outputs)(coutput, index, x); }); } std::tuple<E...> outputs; diff --git a/include/kfr/base/complex.hpp b/include/kfr/base/complex.hpp @@ -100,10 +100,12 @@ namespace cometa template <typename T> struct compound_type_traits<kfr::complex<T>> { - constexpr static size_t width = 2; - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static bool is_scalar = false; + constexpr static size_t width = 2; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = kfr::complex<U>; template <typename U> @@ -155,41 +157,41 @@ struct vec_op<complex<T>> : private vec_op<T> }; template <typename T, size_t N> -KFR_INLINE vec<complex<T>, N> cdupreal(const vec<complex<T>, N>& x) +CMT_INLINE vec<complex<T>, N> cdupreal(const vec<complex<T>, N>& x) { - return subcast<complex<T>>(dupeven(subcast<T>(x))); + return compcast<complex<T>>(dupeven(compcast<T>(x))); } KFR_FN(cdupreal) template <typename T, size_t N> -KFR_INLINE vec<complex<T>, N> cdupimag(const vec<complex<T>, N>& x) +CMT_INLINE vec<complex<T>, N> cdupimag(const vec<complex<T>, N>& x) { - return subcast<complex<T>>(dupodd(subcast<T>(x))); + return compcast<complex<T>>(dupodd(compcast<T>(x))); } KFR_FN(cdupimag) template <typename T, size_t N> -KFR_INLINE vec<complex<T>, N> cswapreim(const vec<complex<T>, N>& x) +CMT_INLINE vec<complex<T>, N> cswapreim(const vec<complex<T>, N>& x) { - return subcast<complex<T>>(swap<2>(subcast<T>(x))); + return compcast<complex<T>>(swap<2>(compcast<T>(x))); } KFR_FN(cswapreim) template <typename T, size_t N> -KFR_INLINE vec<complex<T>, N> cnegreal(const vec<complex<T>, N>& x) +CMT_INLINE vec<complex<T>, N> cnegreal(const vec<complex<T>, N>& x) { return x ^ complex<T>(-T(), T()); } KFR_FN(cnegreal) template <typename T, size_t N> -KFR_INLINE vec<complex<T>, N> cnegimag(const vec<complex<T>, N>& x) +CMT_INLINE vec<complex<T>, N> cnegimag(const vec<complex<T>, N>& x) { return x ^ complex<T>(T(), -T()); } KFR_FN(cnegimag) template <typename T, size_t N> -KFR_INLINE vec<complex<T>, N> cconj(const vec<complex<T>, N>& x) +CMT_INLINE vec<complex<T>, N> cconj(const vec<complex<T>, N>& x) { return cnegimag(x); } @@ -205,52 +207,54 @@ template <typename T> struct is_complex_impl<complex<T>> : std::true_type { }; -} - -// real to complex -template <typename To, typename From, size_t N, KFR_ENABLE_IF(internal::is_complex_impl<To>::value)> -constexpr KFR_INLINE vec<To, N> cast(const vec<From, N>& value) noexcept -{ - const vec<subtype<To>, N> casted = cast<subtype<To>>(value); - return subcast<To>(interleave(casted, zerovector(casted))); -} -// complex to complex -template <typename To, typename From, size_t N, KFR_ENABLE_IF(internal::is_complex_impl<To>::value)> -constexpr KFR_INLINE vec<To, N> cast(const vec<complex<From>, N>& value) noexcept +// vector<complex> to vector<complex> +template <typename To, typename From, size_t N> +struct conversion<vec<complex<To>, N>, vec<complex<From>, N>> { - return subcast<To>(cast<subtype<To>>(subcast<From>(value))); -} + static_assert(!is_compound<To>::value, ""); + static_assert(!is_compound<From>::value, ""); + static vec<complex<To>, N> cast(const vec<complex<From>, N>& value) + { + return builtin_convertvector<complex<To>>(value); + } +}; -// complex to real -template <typename To, typename From, size_t N, KFR_ENABLE_IF(!internal::is_complex_impl<To>::value)> -constexpr KFR_INLINE vec<To, N> cast(const vec<complex<From>, N>& value) noexcept +// vector to vector<complex> +template <typename To, typename From, size_t N> +struct conversion<vec<complex<To>, N>, vec<From, N>> { - static_assert(sizeof(To) == 0, "Can't cast complex to real"); - return {}; + static_assert(!is_compound<To>::value, ""); + static_assert(!is_compound<From>::value, ""); + static vec<complex<To>, N> cast(const vec<From, N>& value) + { + const vec<To, N> casted = static_cast<vec<To, N>>(value); + return *interleave(casted, zerovector(casted)); + } +}; } template <typename T, size_t N> -constexpr KFR_INLINE vec<complex<T>, N / 2> ccomp(const vec<T, N>& x) +constexpr CMT_INLINE vec<complex<T>, N / 2> ccomp(const vec<T, N>& x) { - return subcast<complex<T>>(x); + return compcast<complex<T>>(x); } template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N * 2> cdecom(const vec<complex<T>, N>& x) +constexpr CMT_INLINE vec<T, N * 2> cdecom(const vec<complex<T>, N>& x) { - return subcast<T>(x); + return compcast<T>(x); } template <typename T> -constexpr KFR_INLINE T real(const complex<T>& value) +constexpr CMT_INLINE T real(const complex<T>& value) { return value.real(); } template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N> real(const vec<complex<T>, N>& value) +constexpr CMT_INLINE vec<T, N> real(const vec<complex<T>, N>& value) { - return even(subcast<T>(value)); + return even(compcast<T>(value)); } template <typename T> @@ -260,36 +264,36 @@ using realftype = ftype<decltype(kfr::real(std::declval<T>()))>; KFR_FN(real) template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INLINE internal::expression_function<fn_real, E1> real(E1&& x) +CMT_INLINE internal::expression_function<fn_real, E1> real(E1&& x) { return { {}, std::forward<E1>(x) }; } template <typename T> -constexpr KFR_INLINE T imag(const complex<T>& value) +constexpr CMT_INLINE T imag(const complex<T>& value) { return value.imag(); } template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N> imag(const vec<complex<T>, N>& value) +constexpr CMT_INLINE vec<T, N> imag(const vec<complex<T>, N>& value) { - return odd(subcast<T>(value)); + return odd(compcast<T>(value)); } KFR_FN(imag) template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INLINE internal::expression_function<fn_imag, E1> imag(E1&& x) +CMT_INLINE internal::expression_function<fn_imag, E1> imag(E1&& x) { return { {}, std::forward<E1>(x) }; } template <typename T1, typename T2 = T1, size_t N, typename T = common_type<T1, T2>> -constexpr KFR_INLINE vec<complex<T>, N> make_complex(const vec<T1, N>& real, const vec<T2, N>& imag = T2(0)) +constexpr CMT_INLINE vec<complex<T>, N> make_complex(const vec<T1, N>& real, const vec<T2, N>& imag = T2(0)) { - return subcast<complex<T>>(interleave(cast<T>(real), cast<T>(imag))); + return compcast<complex<T>>(interleave(cast<T>(real), cast<T>(imag))); } template <typename T1, typename T2 = T1, typename T = common_type<T1, T2>> -constexpr KFR_INLINE complex<T> make_complex(T1 real, T2 imag = T2(0)) +constexpr CMT_INLINE complex<T> make_complex(T1 real, T2 imag = T2(0)) { return complex<T>(cast<T>(real), cast<T>(imag)); } diff --git a/include/kfr/base/conversion.hpp b/include/kfr/base/conversion.hpp @@ -35,10 +35,10 @@ namespace internal template <typename From, typename E> struct expression_convert : expression<E> { - KFR_INLINE expression_convert(E&& expr) noexcept : expression<E>(std::forward<E>(expr)) {} + CMT_INLINE expression_convert(E&& expr) noexcept : expression<E>(std::forward<E>(expr)) {} template <typename T, size_t N> - KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N>) const + CMT_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N>) const { return this->argument_first(index, vec_t<From, N>()); } @@ -46,7 +46,7 @@ struct expression_convert : expression<E> } template <typename From, typename E> -KFR_INLINE internal::expression_convert<From, decay<E>> convert(E&& expr) +CMT_INLINE internal::expression_convert<From, decay<E>> convert(E&& expr) { return internal::expression_convert<From, decay<E>>(std::forward<E>(expr)); } diff --git a/include/kfr/base/cpuid.hpp b/include/kfr/base/cpuid.hpp @@ -27,6 +27,7 @@ namespace kfr { +#ifdef CMT_ARCH_X86 struct cpu_features { @@ -104,17 +105,17 @@ struct cpu_data u32 data[4]; }; -#if defined KFR_COMPILER_GNU || defined KFR_COMPILER_CLANG -KFR_INLINE u32 get_cpuid(u32 func, u32 subfunc, u32* eax, u32* ebx, u32* ecx, u32* edx) +#if defined CMT_COMPILER_GNU || defined CMT_COMPILER_CLANG +CMT_INLINE u32 get_cpuid(u32 func, u32 subfunc, u32* eax, u32* ebx, u32* ecx, u32* edx) { __asm__("cpuid" : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) : "0"(func), "2"(subfunc)); return 1; } -KFR_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0) +CMT_INLINE void cpuid(u32* ptr, u32 func, u32 subfunc = 0) { get_cpuid(func, subfunc, &ptr[0], &ptr[1], &ptr[2], &ptr[3]); } -KFR_INLINE u32 get_xcr0() +CMT_INLINE u32 get_xcr0() { u32 xcr0; __asm__("xgetbv" : "=a"(xcr0) : "c"(0) : "%edx"); @@ -244,31 +245,28 @@ cpu_t detect_cpu() c.hasAVXOSSUPPORT = c.hasAVX && c.hasOSXSAVE && (get_xcr0() & 0x06) == 0x06; c.hasAVX512OSSUPPORT = c.hasAVXOSSUPPORT && c.hasAVX512F && c.hasOSXSAVE && (get_xcr0() & 0xE0) == 0xE0; -#ifdef KFR_AVAIL_AVX2 if (c.hasAVX2 && c.hasAVXOSSUPPORT) return cpu_t::avx2; -#endif -#ifdef KFR_AVAIL_AVX if (c.hasAVX && c.hasAVXOSSUPPORT) return cpu_t::avx1; -#endif -#ifdef KFR_AVAIL_SSE41 if (c.hasSSE41) return cpu_t::sse41; -#endif -#ifdef KFR_AVAIL_SSSE3 if (c.hasSSSE3) return cpu_t::ssse3; -#endif -#ifdef KFR_AVAIL_SSE3 if (c.hasSSE3) return cpu_t::sse3; -#endif -#ifdef KFR_AVAIL_SSE2 if (c.hasSSE2) return cpu_t::sse2; -#endif return cpu_t::lowest; } } +#else + +template <size_t = 0> +cpu_t detect_cpu() +{ + return cpu_t::native; +} + +#endif } diff --git a/include/kfr/base/cpuid_auto.hpp b/include/kfr/base/cpuid_auto.hpp @@ -29,19 +29,19 @@ namespace kfr namespace internal { -KFR_INLINE cpu_t& cpu_v() +CMT_INLINE cpu_t& cpu_v() { static cpu_t v1 = cpu_t::native; return v1; } -KFR_INLINE char init_cpu_v() +CMT_INLINE char init_cpu_v() { cpu_v() = detect_cpu<0>(); return 0; } -KFR_INLINE char init_dummyvar() +CMT_INLINE char init_dummyvar() { static char dummy = init_cpu_v(); return dummy; @@ -49,5 +49,5 @@ KFR_INLINE char init_dummyvar() static char dummyvar = init_dummyvar(); } -KFR_INLINE cpu_t get_cpu() { return internal::cpu_v(); } +CMT_INLINE cpu_t get_cpu() { return internal::cpu_v(); } } diff --git a/include/kfr/base/digitreverse.hpp b/include/kfr/base/digitreverse.hpp @@ -90,19 +90,19 @@ struct shuffle_index_digitreverse } template <size_t radix, size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, N> digitreverse(const vec<T, N>& x) +CMT_INLINE vec<T, N> digitreverse(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_digitreverse<radix, ilog2(N / groupsize)>, groupsize>(x); } template <size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, N> bitreverse(const vec<T, N>& x) +CMT_INLINE vec<T, N> bitreverse(const vec<T, N>& x) { return digitreverse<2, groupsize>(x); } template <size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, N> digitreverse4(const vec<T, N>& x) +CMT_INLINE vec<T, N> digitreverse4(const vec<T, N>& x) { return digitreverse<4, groupsize>(x); } diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp @@ -65,11 +65,11 @@ struct expression : input_expression expression() = delete; constexpr expression(Args&&... args) noexcept : args(std::forward<Args>(args)...) {} - KFR_INLINE void begin_block(size_t size) { begin_block_impl(size, indicesfor_t<Args...>()); } - KFR_INLINE void end_block(size_t size) { end_block_impl(size, indicesfor_t<Args...>()); } + CMT_INLINE void begin_block(size_t size) { begin_block_impl(size, indicesfor_t<Args...>()); } + CMT_INLINE void end_block(size_t size) { end_block_impl(size, indicesfor_t<Args...>()); } - KFR_INLINE void begin_block(size_t size) const { begin_block_impl(size, indicesfor_t<Args...>()); } - KFR_INLINE void end_block(size_t size) const { end_block_impl(size, indicesfor_t<Args...>()); } + CMT_INLINE void begin_block(size_t size) const { begin_block_impl(size, indicesfor_t<Args...>()); } + CMT_INLINE void end_block(size_t size) const { end_block_impl(size, indicesfor_t<Args...>()); } protected: std::tuple<Args...> args; @@ -81,57 +81,56 @@ protected: } template <typename Fn, typename T, size_t N> - KFR_INLINE vec<T, N> call(Fn&& fn, size_t index, vec_t<T, N> x) const + CMT_INLINE vec<T, N> call(Fn&& fn, size_t index, vec_t<T, N> x) const { return call_impl(std::forward<Fn>(fn), indicesfor_t<Args...>(), index, x); } template <size_t ArgIndex, typename T, size_t N> - KFR_INLINE vec<T, N> argument(csize_t<ArgIndex>, size_t index, vec_t<T, N> x) const + CMT_INLINE vec<T, N> argument(csize_t<ArgIndex>, size_t index, vec_t<T, N> x) const { static_assert(ArgIndex < count, "Incorrect ArgIndex"); return std::get<ArgIndex>(this->args)(cinput, index, x); } template <typename T, size_t N> - KFR_INLINE vec<T, N> argument_first(size_t index, vec_t<T, N> x) const + CMT_INLINE vec<T, N> argument_first(size_t index, vec_t<T, N> x) const { return std::get<0>(this->args)(cinput, index, x); } private: template <typename Arg, size_t N, typename Tin, - typename Tout1 = conditional<is_generic<Arg>::value, Tin, typename decay<Arg>::value_type>, - typename Tout = Tout1> - KFR_INLINE vec_t<Tout, N> vec_t_for() const + typename Tout = conditional<is_generic<Arg>::value, Tin, value_type_of<Arg>>> + CMT_INLINE vec_t<Tout, N> vec_t_for() const { return {}; } template <typename Fn, typename T, size_t N, size_t... indices> - KFR_INLINE vec<T, N> call_impl(Fn&& fn, csizes_t<indices...>, size_t index, vec_t<T, N>) const + CMT_INLINE vec<T, N> call_impl(Fn&& fn, csizes_t<indices...>, size_t index, vec_t<T, N>) const { using ratio = func_ratio<Fn>; constexpr size_t Nin = N * ratio::input / ratio::output; using Tout = conditional<is_same<generic, value_type>::value, T, common_type<T, value_type>>; - return cast<T>(fn(cast<Tout>(std::get<indices>(this->args)( - cinput, index * ratio::input / ratio::output, vec_t_for<Args, Nin, Tout>()))...)); + return fn(std::get<indices>(this->args)(cinput, index * ratio::input / ratio::output, + vec_t_for<Args, Nin, Tout>())...); } template <size_t... indices> - KFR_INLINE void begin_block_impl(size_t size, csizes_t<indices...>) + CMT_INLINE void begin_block_impl(size_t size, csizes_t<indices...>) { swallow{ (std::get<indices>(args).begin_block(size), 0)... }; } template <size_t... indices> - KFR_INLINE void end_block_impl(size_t size, csizes_t<indices...>) + CMT_INLINE void end_block_impl(size_t size, csizes_t<indices...>) { swallow{ (std::get<indices>(args).end_block(size), 0)... }; } template <size_t... indices> - KFR_INLINE void begin_block_impl(size_t size, csizes_t<indices...>) const + CMT_INLINE void begin_block_impl(size_t size, csizes_t<indices...>) const { swallow{ (std::get<indices>(args).begin_block(size), 0)... }; } template <size_t... indices> - KFR_INLINE void end_block_impl(size_t size, csizes_t<indices...>) const + CMT_INLINE void end_block_impl(size_t size, csizes_t<indices...>) const { swallow{ (std::get<indices>(args).end_block(size), 0)... }; } @@ -147,9 +146,9 @@ struct expression_scalar : input_expression const vec<T, width> val; template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const { - return resize<N>(cast<U>(val)); + return resize<N>(static_cast<vec<U, width>>(val)); } }; @@ -185,7 +184,7 @@ struct expression_function : expression<arg<Args>...> { } template <typename T, size_t N> - KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> x) const + CMT_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> x) const { static_assert(is_same<T, value_type_of<expression_function>>::value || is_generic<expression_function>::value, @@ -198,37 +197,37 @@ protected: }; template <typename Tout, typename Tin, size_t width, typename OutFn, typename Fn> -KFR_INLINE void process_cycle(OutFn&& outfn, const Fn& fn, size_t& i, size_t size) +CMT_INLINE void process_cycle(OutFn&& outfn, const Fn& fn, size_t& i, size_t size) { const size_t count = size / width * width; - KFR_LOOP_NOUNROLL + CMT_LOOP_NOUNROLL for (; i < count; i += width) { - outfn(coutput, i, cast<Tout>(fn(cinput, i, vec_t<Tin, width>()))); + outfn(coutput, i, fn(cinput, i, vec_t<Tin, width>())); } } } template <typename A> -KFR_INLINE internal::arg<A> e(A&& a) +CMT_INLINE internal::arg<A> e(A&& a) { return internal::arg<A>(std::forward<A>(a)); } template <typename T> -KFR_INLINE internal::expression_scalar<T> scalar(const T& val) +CMT_INLINE internal::expression_scalar<T> scalar(const T& val) { return internal::expression_scalar<T>(val); } template <typename T, size_t N> -KFR_INLINE internal::expression_scalar<T, N> scalar(const vec<T, N>& val) +CMT_INLINE internal::expression_scalar<T, N> scalar(const vec<T, N>& val) { return internal::expression_scalar<T, N>(val); } template <typename Fn, typename... Args> -KFR_INLINE internal::expression_function<decay<Fn>, internal::arg<Args>...> bind_expression(Fn&& fn, +CMT_INLINE internal::expression_function<decay<Fn>, internal::arg<Args>...> bind_expression(Fn&& fn, Args&&... args) { return internal::expression_function<decay<Fn>, internal::arg<Args>...>(std::forward<Fn>(fn), @@ -236,7 +235,7 @@ KFR_INLINE internal::expression_function<decay<Fn>, internal::arg<Args>...> bind } template <typename Tout, cpu_t c = cpu_t::native, size_t width = 0, typename OutFn, typename Fn> -KFR_INLINE void process(OutFn&& outfn, const Fn& fn, size_t size) +CMT_INLINE void process(OutFn&& outfn, const Fn& fn, size_t size) { static_assert(is_output_expression<OutFn>::value, "OutFn must be an expression"); static_assert(is_input_expression<Fn>::value, "Fn must be an expression"); @@ -267,9 +266,9 @@ struct expressoin_typed : input_expression expressoin_typed(E1&& e1) : e1(std::forward<E1>(e1)) {} template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { - return cast<U>(e1(cinput, index, vec_t<T, N>())); + return e1(cinput, index, vec_t<T, N>()); } E1 e1; }; @@ -283,10 +282,10 @@ struct expressoin_sized : input_expression expressoin_sized(E1&& e1, size_t size) : e1(std::forward<E1>(e1)), m_size(size) {} template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { auto val = e1(cinput, index, vec_t<T, N>()); - return cast<U>(val); + return val; } constexpr size_t size() const noexcept { return m_size; } diff --git a/include/kfr/base/function.hpp b/include/kfr/base/function.hpp @@ -46,7 +46,7 @@ using flt_type = conditional<std::is_floating_point<deep_subtype<T>>::value, T, namespace intrinsics { -#ifdef CID_ARCH_X86 +#ifdef CMT_ARCH_X86 using f32sse = vec<f32, 4>; using f64sse = vec<f64, 2>; using i8sse = vec<i8, 16>; @@ -117,10 +117,10 @@ using mu64neon = mask<u64, 2>; template <cpu_t c, typename T> constexpr inline size_t next_simd_width(size_t n) { -#ifdef CID_ARCH_X86 +#ifdef CMT_ARCH_X86 return n > vector_width<T, cpu_t::sse2> ? vector_width<T, c> : vector_width<T, cpu_t::sse2>; #endif -#ifdef CID_ARCH_ARM +#ifdef CMT_ARCH_ARM return vector_width<T, cpu_t::neon>; #endif } diff --git a/include/kfr/base/gamma.hpp b/include/kfr/base/gamma.hpp @@ -25,7 +25,7 @@ #include "log_exp.hpp" #pragma clang diagnostic push -#if CID_HAS_WARNING("-Wc99-extensions") +#if CMT_HAS_WARNING("-Wc99-extensions") #pragma clang diagnostic ignored "-Wc99-extensions" #endif @@ -46,7 +46,7 @@ KFR_SINTRIN vec<T, N> gamma(const vec<T, N>& z) { constexpr size_t Count = arraysize(gamma_precalc<T>); vec<T, N> accm = gamma_precalc<T>[0]; - KFR_LOOP_UNROLL + CMT_LOOP_UNROLL for (size_t k = 1; k < Count; k++) accm += gamma_precalc<T>[k] / (z + cast<utype<T>>(k)); accm *= exp(-(z + Count)) * pow(z + Count, z + 0.5); diff --git a/include/kfr/base/generators.hpp b/include/kfr/base/generators.hpp @@ -41,9 +41,9 @@ struct generator : input_expression using type = T; template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N> t) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t, vec_t<U, N> t) const { - return cast<U>(generate(t)); + return generate(t); } void resync(T start) const { ptr_cast<Class>(this)->sync(start); } @@ -65,7 +65,7 @@ protected: } template <size_t N, KFR_ENABLE_IF(N == width)> - KFR_INLINE vec<T, N> generate(vec_t<T, N>) const + CMT_INLINE vec<T, N> generate(vec_t<T, N>) const { const vec<T, N> result = value; call_next(); @@ -73,7 +73,7 @@ protected: } template <size_t N, KFR_ENABLE_IF(N < width)> - KFR_INLINE vec<T, N> generate(vec_t<T, N>) const + CMT_INLINE vec<T, N> generate(vec_t<T, N>) const { const vec<T, N> result = narrow<N>(value); shift(csize<N>); @@ -81,7 +81,7 @@ protected: } template <size_t N, KFR_ENABLE_IF(N > width)> - KFR_INLINE vec<T, N> generate(vec_t<T, N> x) const + CMT_INLINE vec<T, N> generate(vec_t<T, N> x) const { const auto lo = generate(low(x)); const auto hi = generate(high(x)); @@ -99,16 +99,16 @@ struct generator_linear : generator<T, width, generator_linear<T, width>> this->resync(start); } - KFR_INLINE void sync(T start) const noexcept { this->value = start + enumerate<T, width>() * step; } + CMT_INLINE void sync(T start) const noexcept { this->value = start + enumerate<T, width>() * step; } - KFR_INLINE void next() const noexcept { this->value += vstep; } + CMT_INLINE void next() const noexcept { this->value += vstep; } protected: T step; T vstep; }; -template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2)> +template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP> struct generator_exp : generator<T, width, generator_exp<T, width>> { generator_exp(T start, T step) noexcept : step(step), vstep(exp(make_vector(step* width))[0] - 1) @@ -116,16 +116,16 @@ struct generator_exp : generator<T, width, generator_exp<T, width>> this->resync(start); } - KFR_INLINE void sync(T start) const noexcept { this->value = exp(start + enumerate<T, width>() * step); } + CMT_INLINE void sync(T start) const noexcept { this->value = exp(start + enumerate<T, width>() * step); } - KFR_INLINE void next() const noexcept { this->value += this->value * vstep; } + CMT_INLINE void next() const noexcept { this->value += this->value * vstep; } protected: T step; T vstep; }; -template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2)> +template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP> struct generator_exp2 : generator<T, width, generator_exp2<T, width>> { generator_exp2(T start, T step) noexcept : step(step), vstep(exp2(make_vector(step* width))[0] - 1) @@ -133,16 +133,16 @@ struct generator_exp2 : generator<T, width, generator_exp2<T, width>> this->resync(start); } - KFR_INLINE void sync(T start) const noexcept { this->value = exp2(start + enumerate<T, width>() * step); } + CMT_INLINE void sync(T start) const noexcept { this->value = exp2(start + enumerate<T, width>() * step); } - KFR_INLINE void next() const noexcept { this->value += this->value * vstep; } + CMT_INLINE void next() const noexcept { this->value += this->value * vstep; } protected: T step; T vstep; }; -template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2)> +template <typename T, size_t width = get_vector_width<T, cpu_t::native>(1, 2), KFR_ARCH_DEP> struct generator_cossin : generator<T, width, generator_cossin<T, width>> { generator_cossin(T start, T step) @@ -150,9 +150,9 @@ struct generator_cossin : generator<T, width, generator_cossin<T, width>> { this->resync(start); } - KFR_INLINE void sync(T start) const noexcept { this->value = init_cossin(step, start); } + CMT_INLINE void sync(T start) const noexcept { this->value = init_cossin(step, start); } - KFR_INLINE void next() const noexcept + CMT_INLINE void next() const noexcept { this->value = this->value - subadd(alpha * this->value, beta * swap<2>(this->value)); } @@ -161,13 +161,13 @@ protected: T step; T alpha; T beta; - KFR_NOINLINE static vec<T, width> init_cossin(T w, T phase) + CMT_NOINLINE static vec<T, width> init_cossin(T w, T phase) { return cossin(dup(phase + enumerate<T, width / 2>() * w)); } }; -template <typename T, size_t width = get_vector_width<T, cpu_t::native>(2, 4)> +template <typename T, size_t width = get_vector_width<T, cpu_t::native>(2, 4), KFR_ARCH_DEP> struct generator_sin : generator<T, width, generator_sin<T, width>> { generator_sin(T start, T step) @@ -175,14 +175,14 @@ struct generator_sin : generator<T, width, generator_sin<T, width>> { this->resync(start); } - KFR_INLINE void sync(T start) const noexcept + CMT_INLINE void sync(T start) const noexcept { const vec<T, width* 2> cs = splitpairs(cossin(dup(start + enumerate<T, width>() * step))); this->cos_value = low(cs); this->value = high(cs); } - KFR_INLINE void next() const noexcept + CMT_INLINE void next() const noexcept { const vec<T, width> c = this->cos_value; const vec<T, width> s = this->value; diff --git a/include/kfr/base/intrinsics.h b/include/kfr/base/intrinsics.h @@ -2,13 +2,13 @@ #include "kfr.h" -#ifdef CID_ARCH_SSE2 +#ifdef CMT_ARCH_SSE2 #include <immintrin.h> -#ifdef KFR_OS_WIN +#ifdef CMT_OS_WIN #include <intrin.h> #endif #endif -#ifdef CID_ARCH_NEON +#ifdef CMT_ARCH_NEON #include <arm_neon.h> #endif diff --git a/include/kfr/base/kfr.h b/include/kfr/base/kfr.h @@ -5,51 +5,6 @@ #include "../cident.h" -#define KFR_INLINE CID_INLINE -#define KFR_INLINE_MEMBER CID_INLINE_MEMBER -#define KFR_INLINE_LAMBDA CID_INLINE_LAMBDA -#define KFR_NOINLINE CID_NOINLINE -#define KFR_FLATTEN CID_FLATTEN -#define KFR_RESTRICT CID_RESTRICT - -#ifdef CID_COMPILER_CLANG -#define KFR_COMPILER_CLANG CID_COMPILER_CLANG -#endif - -#ifdef CID_OS_WIN -#define KFR_OS_WIN CID_OS_WIN -#endif - -#ifdef CID_OS_OSX -#define KFR_OS_OSX CID_OS_OSX -#endif - -#ifdef CID_OS_LINUX -#define KFR_OS_LINUX CID_OS_LINUX -#endif - -#ifdef CID_GNU_ATTRIBUTES -#define KFR_GNU_ATTRIBUTES CID_GNU_ATTRIBUTES -#endif - -#ifdef CID_MSVC_ATTRIBUTES -#define KFR_MSVC_ATTRIBUTES CID_MSVC_ATTRIBUTES -#endif - -#ifdef CID_ARCH_X64 -#define KFR_ARCH_X64 CID_ARCH_X64 -#endif - -#ifdef CID_ARCH_X32 -#define KFR_ARCH_X32 CID_ARCH_X32 -#endif - -#define KFR_ARCH_NAME CID_ARCH_NAME - -#define KFR_CDECL CID_CDECL - -#define KFR_PUBLIC_C CID_PUBLIC_C - #ifdef __cplusplus namespace kfr { @@ -74,59 +29,5 @@ constexpr int version = KFR_VERSION; } #endif -//#define KFR_MEMORY_ALIGNMENT 64 - -#if KFR_COMPILER_CLANG -#define KFR_LOOP_NOUNROLL \ - _Pragma("clang loop vectorize( disable )") _Pragma("clang loop interleave( disable )") \ - _Pragma("clang loop unroll( disable )") - -#define KFR_LOOP_UNROLL _Pragma("clang loop unroll( full )") - -#define KFR_VEC_CC __attribute__((vectorcall)) -#else -#define KFR_LOOP_NOUNROLL -#define KFR_LOOP_UNROLL -#ifdef KFR_COMPILER_MSVC -#define KFR_VEC_CC __vectorcall -#endif - -#endif - -#define KFR_AVAIL_AVX2 1 -#define KFR_AVAIL_AVX 1 -#define KFR_AVAIL_SSE42 1 -#define KFR_AVAIL_SSE41 1 -#define KFR_AVAIL_SSSE3 1 -#define KFR_AVAIL_SSE3 1 -#define KFR_AVAIL_SSE2 1 -#define KFR_AVAIL_SSE 1 - -#if defined(KFR_GNU_ATTRIBUTES) - -#define KFR_CPU_NAME_avx2 "avx2" -#define KFR_CPU_NAME_avx "avx" -#define KFR_CPU_NAME_sse42 "sse4.2" -#define KFR_CPU_NAME_sse41 "sse4.1" -#define KFR_CPU_NAME_ssse3 "ssse3" -#define KFR_CPU_NAME_sse3 "sse3" -#define KFR_CPU_NAME_sse2 "sse2" - -#define KFR_USE_CPU(arch) __attribute__((target(KFR_CPU_NAME_##arch))) - -#else -#define KFR_USE_CPU(arch) -#endif - -#if defined(KFR_GNU_ATTRIBUTES) -#define KFR_FAST_CC __attribute__((fastcall)) -#else -#define KFR_FAST_CC __fastcall -#endif - -#define KFR_INTRIN CID_INTRIN -#define KFR_SINTRIN CID_INTRIN CID_NODEBUG static -#define KFR_AINTRIN inline CID_NODEBUG static -#define KFR_FAST_NOINLINE CID_NOINLINE - -#define KFR_CPU_INTRIN(c) KFR_AINTRIN KFR_USE_CPU(c) +#define KFR_INTRIN CMT_INTRIN +#define KFR_SINTRIN CMT_INTRIN static diff --git a/include/kfr/base/log_exp.hpp b/include/kfr/base/log_exp.hpp @@ -81,7 +81,7 @@ KFR_SINTRIN vec<f64, N> vldexpk(const vec<f64, N>& x, const vec<i64, N>& q) template <typename T, size_t N> KFR_SINTRIN vec<T, N> logb(const vec<T, N>& x) { - return select(x == T(), -c_infinity<T>, cast<T>(vilogbp1(x) - 1)); + return select(x == T(), -c_infinity<T>, static_cast<vec<T, N>>(vilogbp1(x) - 1)); } template <size_t N> diff --git a/include/kfr/base/logical.hpp b/include/kfr/base/logical.hpp @@ -46,9 +46,9 @@ struct bitmask type value; }; -#if defined CID_ARCH_SSE2 +#if defined CMT_ARCH_SSE2 -#if defined CID_ARCH_SSE41 +#if defined CMT_ARCH_SSE41 KFR_SINTRIN bool bittestany(const u8sse& x) { return !_mm_testz_si128(*x, *x); } KFR_SINTRIN bool bittestany(const u16sse& x) { return !_mm_testz_si128(*x, *x); } @@ -69,7 +69,7 @@ KFR_SINTRIN bool bittestall(const i32sse& x) { return _mm_testc_si128(*x, *allon KFR_SINTRIN bool bittestall(const i64sse& x) { return _mm_testc_si128(*x, *allonesvector(x)); } #endif -#if defined CID_ARCH_AVX +#if defined CMT_ARCH_AVX KFR_SINTRIN bool bittestany(const f32sse& x) { return !_mm_testz_ps(*x, *x); } KFR_SINTRIN bool bittestany(const f64sse& x) { return !_mm_testz_pd(*x, *x); } KFR_SINTRIN bool bittestall(const f32sse& x) { return _mm_testc_ps(*x, *allonesvector(x)); } @@ -98,7 +98,7 @@ KFR_SINTRIN bool bittestall(const i8avx& x) { return _mm256_testc_si256(*x, *all KFR_SINTRIN bool bittestall(const i16avx& x) { return _mm256_testc_si256(*x, *allonesvector(x)); } KFR_SINTRIN bool bittestall(const i32avx& x) { return _mm256_testc_si256(*x, *allonesvector(x)); } KFR_SINTRIN bool bittestall(const i64avx& x) { return _mm256_testc_si256(*x, *allonesvector(x)); } -#elif defined CID_ARCH_SSE41 +#elif defined CMT_ARCH_SSE41 KFR_SINTRIN bool bittestany(const f32sse& x) { return !_mm_testz_si128(*bitcast<u8>(x), *bitcast<u8>(x)); } KFR_SINTRIN bool bittestany(const f64sse& x) { return !_mm_testz_si128(*bitcast<u8>(x), *bitcast<u8>(x)); } KFR_SINTRIN bool bittestall(const f32sse& x) @@ -111,7 +111,7 @@ KFR_SINTRIN bool bittestall(const f64sse& x) } #endif -#if !defined CID_ARCH_SSE41 +#if !defined CMT_ARCH_SSE41 KFR_SINTRIN bool bittestany(const f32sse& x) { return _mm_movemask_ps(*x); } KFR_SINTRIN bool bittestany(const f64sse& x) { return _mm_movemask_pd(*x); } @@ -158,6 +158,59 @@ KFR_SINTRIN bool bittestany(const vec<T, N>& a) return bittestany(low(a)) || bittestany(high(a)); } +#elif CMT_ARCH_NEON + +KFR_SINTRIN bool bittestall(const u32neon& a) +{ + const uint32x2_t tmp = vand_u32(vget_low_u32(*a), vget_high_u32(*a)); + return vget_lane_u32(vpmin_u32(tmp, tmp), 0) == 0xFFFFFFFFu; +} + +KFR_SINTRIN bool bittestany(const u32neon& a) +{ + const uint32x2_t tmp = vorr_u32(vget_low_u32(*a), vget_high_u32(*a)); + return vget_lane_u32(vpmax_u32(tmp, tmp), 0) != 0; +} +KFR_SINTRIN bool bittestany(const u8neon& a) { return bittestany(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestany(const u16neon& a) { return bittestany(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestany(const u64neon& a) { return bittestany(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestany(const i8neon& a) { return bittestany(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestany(const i16neon& a) { return bittestany(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestany(const i64neon& a) { return bittestany(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestany(const f32neon& a) { return bittestany(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestany(const f64neon& a) { return bittestany(bitcast<u32>(a)); } + +KFR_SINTRIN bool bittestall(const u8neon& a) { return bittestall(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestall(const u16neon& a) { return bittestall(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestall(const u64neon& a) { return bittestall(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestall(const i8neon& a) { return bittestall(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestall(const i16neon& a) { return bittestall(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestall(const i64neon& a) { return bittestall(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestall(const f32neon& a) { return bittestall(bitcast<u32>(a)); } +KFR_SINTRIN bool bittestall(const f64neon& a) { return bittestall(bitcast<u32>(a)); } + +template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> +KFR_SINTRIN bool bittestall(const vec<T, N>& a) +{ + return bittestall(expand_simd(a, internal::maskbits<T>(true))); +} +template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> +KFR_SINTRIN bool bittestall(const vec<T, N>& a) +{ + return bittestall(low(a)) && bittestall(high(a)); +} + +template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> +KFR_SINTRIN bool bittestany(const vec<T, N>& a) +{ + return bittestany(expand_simd(a, internal::maskbits<T>(false))); +} +template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> +KFR_SINTRIN bool bittestany(const vec<T, N>& a) +{ + return bittestany(low(a)) || bittestany(high(a)); +} + #else template <typename T, size_t N> diff --git a/include/kfr/base/memory.hpp b/include/kfr/base/memory.hpp @@ -83,15 +83,15 @@ inline void aligned_free(void* ptr) } template <typename T = void, size_t alignment = native_cache_alignment> -KFR_INLINE T* aligned_allocate(size_t size = 1) +CMT_INLINE T* aligned_allocate(size_t size = 1) { - T* ptr = static_cast<T*>(__builtin_assume_aligned( + T* ptr = static_cast<T*>(CMT_ASSUME_ALIGNED( internal::aligned_malloc(std::max(alignment, size * details::elementsize<T>), alignment), alignment)); return ptr; } template <typename T = void> -KFR_INLINE void aligned_deallocate(T* ptr) +CMT_INLINE void aligned_deallocate(T* ptr) { return internal::aligned_free(ptr); } @@ -101,29 +101,29 @@ namespace internal template <typename T> struct aligned_deleter { - KFR_INLINE void operator()(T* ptr) const { aligned_deallocate(ptr); } + CMT_INLINE void operator()(T* ptr) const { aligned_deallocate(ptr); } }; } template <typename T> struct autofree { - KFR_INLINE autofree() {} - explicit KFR_INLINE autofree(size_t size) : ptr(aligned_allocate<T>(size)) {} + CMT_INLINE autofree() {} + explicit CMT_INLINE autofree(size_t size) : ptr(aligned_allocate<T>(size)) {} autofree(const autofree&) = delete; autofree& operator=(const autofree&) = delete; autofree(autofree&&) noexcept = default; autofree& operator=(autofree&&) noexcept = default; - KFR_INLINE T& operator[](size_t index) noexcept { return ptr[index]; } - KFR_INLINE const T& operator[](size_t index) const noexcept { return ptr[index]; } + CMT_INLINE T& operator[](size_t index) noexcept { return ptr[index]; } + CMT_INLINE const T& operator[](size_t index) const noexcept { return ptr[index]; } template <typename U = T> - KFR_INLINE U* data() noexcept + CMT_INLINE U* data() noexcept { return ptr_cast<U>(ptr.get()); } template <typename U = T> - KFR_INLINE const U* data() const noexcept + CMT_INLINE const U* data() const noexcept { return ptr_cast<U>(ptr.get()); } @@ -159,7 +159,7 @@ struct allocator { pointer result = aligned_allocate<value_type>(n); if (!result) - CID_THROW(std::bad_alloc()); + CMT_THROW(std::bad_alloc()); return result; } void deallocate(pointer p, size_type) { aligned_deallocate(p); } diff --git a/include/kfr/base/min_max.hpp b/include/kfr/base/min_max.hpp @@ -33,7 +33,7 @@ namespace kfr namespace intrinsics { -#if defined CID_ARCH_SSE2 +#if defined CMT_ARCH_SSE2 KFR_SINTRIN f32sse min(const f32sse& x, const f32sse& y) { return _mm_min_ps(*x, *y); } KFR_SINTRIN f64sse min(const f64sse& x, const f64sse& y) { return _mm_min_pd(*x, *y); } @@ -49,7 +49,7 @@ KFR_SINTRIN i16sse max(const i16sse& x, const i16sse& y) { return _mm_max_epi16( KFR_SINTRIN i64sse max(const i64sse& x, const i64sse& y) { return select(x > y, x, y); } KFR_SINTRIN u64sse max(const u64sse& x, const u64sse& y) { return select(x > y, x, y); } -#if defined CID_ARCH_AVX2 +#if defined CMT_ARCH_AVX2 KFR_SINTRIN u8avx min(const u8avx& x, const u8avx& y) { return _mm256_min_epu8(*x, *y); } KFR_SINTRIN i16avx min(const i16avx& x, const i16avx& y) { return _mm256_min_epi16(*x, *y); } KFR_SINTRIN i8avx min(const i8avx& x, const i8avx& y) { return _mm256_min_epi8(*x, *y); } @@ -70,14 +70,14 @@ KFR_SINTRIN i64avx max(const i64avx& x, const i64avx& y) { return select(x > y, KFR_SINTRIN u64avx max(const u64avx& x, const u64avx& y) { return select(x > y, x, y); } #endif -#if defined CID_ARCH_AVX +#if defined CMT_ARCH_AVX KFR_SINTRIN f32avx min(const f32avx& x, const f32avx& y) { return _mm256_min_ps(*x, *y); } KFR_SINTRIN f64avx min(const f64avx& x, const f64avx& y) { return _mm256_min_pd(*x, *y); } KFR_SINTRIN f32avx max(const f32avx& x, const f32avx& y) { return _mm256_max_ps(*x, *y); } KFR_SINTRIN f64avx max(const f64avx& x, const f64avx& y) { return _mm256_max_pd(*x, *y); } #endif -#if defined CID_ARCH_SSE41 +#if defined CMT_ARCH_SSE41 KFR_SINTRIN i8sse min(const i8sse& x, const i8sse& y) { return _mm_min_epi8(*x, *y); } KFR_SINTRIN u16sse min(const u16sse& x, const u16sse& y) { return _mm_min_epu16(*x, *y); } KFR_SINTRIN i32sse min(const i32sse& x, const i32sse& y) { return _mm_min_epi32(*x, *y); } @@ -103,6 +103,37 @@ KFR_SINTRIN u32sse max(const u32sse& x, const u32sse& y) { return select(x > y, KFR_HANDLE_ALL_SIZES_2(min) KFR_HANDLE_ALL_SIZES_2(max) +#elif defined CMT_ARCH_NEON + +KFR_SINTRIN i8neon min(const i8neon& x, const i8neon& y) { return vminq_s8(*x, *y); } +KFR_SINTRIN u8neon min(const u8neon& x, const u8neon& y) { return vminq_u8(*x, *y); } +KFR_SINTRIN i16neon min(const i16neon& x, const i16neon& y) { return vminq_s16(*x, *y); } +KFR_SINTRIN u16neon min(const u16neon& x, const u16neon& y) { return vminq_u16(*x, *y); } +KFR_SINTRIN i32neon min(const i32neon& x, const i32neon& y) { return vminq_s32(*x, *y); } +KFR_SINTRIN u32neon min(const u32neon& x, const u32neon& y) { return vminq_u32(*x, *y); } + +KFR_SINTRIN i8neon max(const i8neon& x, const i8neon& y) { return vmaxq_s8(*x, *y); } +KFR_SINTRIN u8neon max(const u8neon& x, const u8neon& y) { return vmaxq_u8(*x, *y); } +KFR_SINTRIN i16neon max(const i16neon& x, const i16neon& y) { return vmaxq_s16(*x, *y); } +KFR_SINTRIN u16neon max(const u16neon& x, const u16neon& y) { return vmaxq_u16(*x, *y); } +KFR_SINTRIN i32neon max(const i32neon& x, const i32neon& y) { return vmaxq_s32(*x, *y); } +KFR_SINTRIN u32neon max(const u32neon& x, const u32neon& y) { return vmaxq_u32(*x, *y); } +KFR_SINTRIN i64neon min(const i64neon& x, const i64neon& y) { return select(x < y, x, y); } +KFR_SINTRIN u64neon min(const u64neon& x, const u64neon& y) { return select(x < y, x, y); } + +KFR_SINTRIN f32neon min(const f32neon& x, const f32neon& y) { return vminq_f32(*x, *y); } +KFR_SINTRIN f32neon max(const f32neon& x, const f32neon& y) { return vmaxq_f32(*x, *y); } +#if defined CMT_ARCH_NEON64 +KFR_SINTRIN f64neon min(const f64neon& x, const f64neon& y) { return vminq_f64(*x, *y); } +KFR_SINTRIN f64neon max(const f64neon& x, const f64neon& y) { return vmaxq_f64(*x, *y); } +#else +KFR_SINTRIN f64neon min(const f64neon& x, const f64neon& y) { return select(x < y, x, y); } +KFR_SINTRIN f64neon max(const f64neon& x, const f64neon& y) { return select(x > y, x, y); } +#endif + +KFR_HANDLE_ALL_SIZES_2(min) +KFR_HANDLE_ALL_SIZES_2(max) + #else // fallback diff --git a/include/kfr/base/modzerobessel.hpp b/include/kfr/base/modzerobessel.hpp @@ -25,7 +25,7 @@ #include "log_exp.hpp" #pragma clang diagnostic push -#if CID_HAS_WARNING("-Wc99-extensions") +#if CMT_HAS_WARNING("-Wc99-extensions") #pragma clang diagnostic ignored "-Wc99-extensions" #endif @@ -77,7 +77,7 @@ constexpr T bessel_coef[] = { T(0.25), T(1.5021381070956226783e-096) }; template <typename T, size_t N> -KFR_INLINE vec<T, N> modzerobessel(const vec<T, N>& x) +CMT_INLINE vec<T, N> modzerobessel(const vec<T, N>& x) { const vec<T, N> x_2 = x * 0.5; const vec<T, N> x_2_sqr = x_2 * x_2; @@ -85,7 +85,7 @@ KFR_INLINE vec<T, N> modzerobessel(const vec<T, N>& x) vec<T, N> result; result = 1 + x_2_sqr; - KFR_LOOP_UNROLL + CMT_LOOP_UNROLL for (size_t i = 0; i < (sizeof(T) == 4 ? 20 : 39); i++) { result = fmadd((num *= x_2_sqr), bessel_coef<T>[i], result); diff --git a/include/kfr/base/operators.hpp b/include/kfr/base/operators.hpp @@ -32,18 +32,18 @@ namespace internal { template <typename T, typename ReduceFn> -KFR_INLINE T horizontal_impl(const vec<T, 1>& value, ReduceFn&&) +CMT_INLINE T horizontal_impl(const vec<T, 1>& value, ReduceFn&&) { return T(value[0]); } template <typename T, size_t N, typename ReduceFn, KFR_ENABLE_IF(N > 1 && is_poweroftwo(N))> -KFR_INLINE T horizontal_impl(const vec<T, N>& value, ReduceFn&& reduce) +CMT_INLINE T horizontal_impl(const vec<T, N>& value, ReduceFn&& reduce) { return horizontal_impl(reduce(low(value), high(value)), std::forward<ReduceFn>(reduce)); } template <typename T, size_t N, typename ReduceFn, KFR_ENABLE_IF(N > 1 && !is_poweroftwo(N))> -KFR_INLINE T horizontal_impl(const vec<T, N>& value, ReduceFn&& reduce) +CMT_INLINE T horizontal_impl(const vec<T, N>& value, ReduceFn&& reduce) { const T initial = reduce(initialvalue<T>()); return horizontal_impl(widen<next_poweroftwo(N)>(value, initial), std::forward<ReduceFn>(reduce)); @@ -51,7 +51,7 @@ KFR_INLINE T horizontal_impl(const vec<T, N>& value, ReduceFn&& reduce) } template <typename T, size_t N, typename ReduceFn> -KFR_INLINE T horizontal(const vec<T, N>& value, ReduceFn&& reduce) +CMT_INLINE T horizontal(const vec<T, N>& value, ReduceFn&& reduce) { return internal::horizontal_impl(value, std::forward<ReduceFn>(reduce)); } @@ -74,16 +74,14 @@ constexpr inline T add(initialvalue<T>) KFR_FN(add) template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE internal::expression_function<fn_add, E1, E2> add(E1&& x, E2&& y) +CMT_INLINE internal::expression_function<fn_add, E1, E2> add(E1&& x, E2&& y) { return { fn_add(), std::forward<E1>(x), std::forward<E2>(y) }; } template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE internal::expression_function<fn_add, E1> add(E1&& x, E2&& y, E3&& z) +CMT_INLINE internal::expression_function<fn_add, E1> add(E1&& x, E2&& y, E3&& z) { - return { fn_add(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) - - }; + return { fn_add(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) }; } template <typename T1, typename T2> @@ -99,11 +97,9 @@ constexpr inline T sub(initialvalue<T>) KFR_FN(sub) template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE internal::expression_function<fn_sub, E1, E2> sub(E1&& x, E2&& y) +CMT_INLINE internal::expression_function<fn_sub, E1, E2> sub(E1&& x, E2&& y) { - return { fn_sub(), std::forward<E1>(x), std::forward<E2>(y) - - }; + return { fn_sub(), std::forward<E1>(x), std::forward<E2>(y) }; } template <typename T1> @@ -124,12 +120,12 @@ constexpr inline T mul(initialvalue<T>) } KFR_FN(mul) template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE internal::expression_function<fn_mul, E1, E2> mul(E1&& x, E2&& y) +CMT_INLINE internal::expression_function<fn_mul, E1, E2> mul(E1&& x, E2&& y) { return { fn_mul(), std::forward<E1>(x), std::forward<E2>(y) }; } template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE internal::expression_function<fn_mul, E1> mul(E1&& x, E2&& y, E3&& z) +CMT_INLINE internal::expression_function<fn_mul, E1> mul(E1&& x, E2&& y, E3&& z) { return { fn_mul(), std::forward<E1>(x), std::forward<E2>(y), std::forward<E3>(z) }; } @@ -141,7 +137,7 @@ constexpr inline T1 sqr(T1 x) } KFR_FN(sqr) template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INLINE internal::expression_function<fn_sqr, E1> sqr(E1&& x) +CMT_INLINE internal::expression_function<fn_sqr, E1> sqr(E1&& x) { return { fn_sqr(), std::forward<E1>(x) }; } @@ -154,11 +150,9 @@ constexpr inline T1 cub(T1 x) KFR_FN(cub) template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INLINE internal::expression_function<fn_cub, E1> cub(E1&& x) +CMT_INLINE internal::expression_function<fn_cub, E1> cub(E1&& x) { - return { fn_cub(), std::forward<E1>(x) - - }; + return { fn_cub(), std::forward<E1>(x) }; } template <typename T> @@ -190,32 +184,24 @@ KFR_FN(pow4) KFR_FN(pow5) template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INLINE internal::expression_function<fn_pow2, E1> pow2(E1&& x) +CMT_INLINE internal::expression_function<fn_pow2, E1> pow2(E1&& x) { - return { fn_pow2(), std::forward<E1>(x) - - }; + return { fn_pow2(), std::forward<E1>(x) }; } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INLINE internal::expression_function<fn_pow3, E1> pow3(E1&& x) +CMT_INLINE internal::expression_function<fn_pow3, E1> pow3(E1&& x) { - return { fn_pow3(), std::forward<E1>(x) - - }; + return { fn_pow3(), std::forward<E1>(x) }; } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INLINE internal::expression_function<fn_pow4, E1> pow4(E1&& x) +CMT_INLINE internal::expression_function<fn_pow4, E1> pow4(E1&& x) { - return { fn_pow4(), std::forward<E1>(x) - - }; + return { fn_pow4(), std::forward<E1>(x) }; } template <typename E1, KFR_ENABLE_IF(is_input_expression<E1>::value)> -KFR_INLINE internal::expression_function<fn_pow5, E1> pow5(E1&& x) +CMT_INLINE internal::expression_function<fn_pow5, E1> pow5(E1&& x) { - return { fn_pow5(), std::forward<E1>(x) - - }; + return { fn_pow5(), std::forward<E1>(x) }; } /// Raise x to the power base $x^{base}$ @@ -239,7 +225,7 @@ constexpr inline T ipow(T x, int base) KFR_FN(ipow) template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE internal::expression_function<fn_ipow, E1, E2> ipow(E1&& x, E2&& b) +CMT_INLINE internal::expression_function<fn_ipow, E1, E2> ipow(E1&& x, E2&& b) { return { fn_ipow(), std::forward<E1>(x), std::forward<E2>(b) @@ -265,24 +251,24 @@ KFR_FN(sqrsum) KFR_FN(sqrdiff) /// Division -template <typename T1, typename T2> -inline common_type<T1, T2> div(T1 x, T2 y) +template <typename T1, typename T2, typename Tout = common_type<T1, T2>> +inline Tout div(const T1& x, const T2& y) { - return x / y; + return static_cast<Tout>(x) / static_cast<Tout>(y); } KFR_FN(div) /// Remainder -template <typename T1, typename T2> -inline common_type<T1, T2> rem(T1 x, T2 y) +template <typename T1, typename T2, typename Tout = common_type<T1, T2>> +inline Tout rem(const T1& x, const T2& y) { - return x % y; + return static_cast<Tout>(x) % static_cast<Tout>(y); } KFR_FN(rem) /// Negation template <typename T1> -inline T1 neg(T1 x) +inline T1 neg(const T1& x) { return -x; } @@ -290,7 +276,7 @@ KFR_FN(neg) /// Bitwise Not template <typename T1> -inline T1 bitwisenot(T1 x) +inline T1 bitwisenot(const T1& x) { return ~x; } @@ -453,13 +439,13 @@ namespace internal { template <typename T1, typename T2> -constexpr KFR_INLINE T1 horner(T1, T2 c0) +constexpr CMT_INLINE T1 horner(T1, T2 c0) { return c0; } template <typename T1, typename T2, typename T3, typename... Ts> -constexpr KFR_INLINE T1 horner(T1 x, T2 c0, T3 c1, Ts... values) +constexpr CMT_INLINE T1 horner(T1 x, T2 c0, T3 c1, Ts... values) { return fmadd(horner(x, c1, values...), x, c0); } @@ -469,7 +455,7 @@ constexpr KFR_INLINE T1 horner(T1 x, T2 c0, T3 c1, Ts... values) /// /// ``horner(x, 1, 2, 3)`` is equivalent to \(3x^2 + 2x + 1\) template <typename T1, typename... Ts> -constexpr KFR_INLINE T1 horner(T1 x, Ts... c) +constexpr CMT_INLINE T1 horner(T1 x, Ts... c) { return internal::horner(x, c...); } @@ -478,7 +464,7 @@ KFR_FN(horner) /// Calculate Multiplicative Inverse of `x` /// Returns `1/x` template <typename T> -constexpr KFR_INLINE T reciprocal(T x) +constexpr CMT_INLINE T reciprocal(T x) { static_assert(std::is_floating_point<subtype<T>>::value, "T must be floating point type"); return subtype<T>(1) / x; @@ -486,7 +472,7 @@ constexpr KFR_INLINE T reciprocal(T x) KFR_FN(reciprocal) template <typename T, size_t N> -KFR_INLINE vec<T, N> mulsign(const vec<T, N>& x, const vec<T, N>& y) +CMT_INLINE vec<T, N> mulsign(const vec<T, N>& x, const vec<T, N>& y) { return x ^ (y & internal::highbitmask<T>); } @@ -494,85 +480,65 @@ KFR_FN_S(mulsign) KFR_FN(mulsign) template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N> copysign(const vec<T, N>& x, const vec<T, N>& y) +constexpr CMT_INLINE vec<T, N> copysign(const vec<T, N>& x, const vec<T, N>& y) { return (x & internal::highbitmask<T>) | (y & internal::highbitmask<T>); } -template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> -KFR_INLINE vec<T, N> fmod(const vec<T, N>& x, const vec<T, N>& y) -{ - return x - cast<itype<T>>(x / y) * y; -} - -KFR_FN_S(fmod) -KFR_FN(fmod) - -template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> -constexpr KFR_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y) -{ - return x % y; -} -template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> -KFR_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y) -{ - return fmod(x, y); -} - template <typename T, size_t N> -KFR_INLINE mask<T, N> isnan(const vec<T, N>& x) +CMT_INLINE mask<T, N> isnan(const vec<T, N>& x) { return x != x; } template <typename T, size_t N> -KFR_INLINE mask<T, N> isinf(const vec<T, N>& x) +CMT_INLINE mask<T, N> isinf(const vec<T, N>& x) { return x == c_infinity<T> || x == -c_infinity<T>; } template <typename T, size_t N> -KFR_INLINE mask<T, N> isfinite(const vec<T, N>& x) +CMT_INLINE mask<T, N> isfinite(const vec<T, N>& x) { return !isnan(x) && !isinf(x); } template <typename T, size_t N> -KFR_INLINE mask<T, N> isnegative(const vec<T, N>& x) +CMT_INLINE mask<T, N> isnegative(const vec<T, N>& x) { return (x & internal::highbitmask<T>) != 0; } template <typename T, size_t N> -KFR_INLINE mask<T, N> ispositive(const vec<T, N>& x) +CMT_INLINE mask<T, N> ispositive(const vec<T, N>& x) { return !isnegative(x); } template <typename T, size_t N> -KFR_INLINE mask<T, N> iszero(const vec<T, N>& x) +CMT_INLINE mask<T, N> iszero(const vec<T, N>& x) { return x == T(); } /// Swap byte order template <typename T, size_t N, KFR_ENABLE_IF(sizeof(vec<T, N>) > 8)> -KFR_INLINE vec<T, N> swapbyteorder(const vec<T, N>& x) +CMT_INLINE vec<T, N> swapbyteorder(const vec<T, N>& x) { return bitcast<T>(swap<sizeof(T)>(bitcast<u8>(x))); } template <typename T, KFR_ENABLE_IF(sizeof(T) == 8)> -KFR_INLINE T swapbyteorder(T x) +CMT_INLINE T swapbyteorder(T x) { return reinterpret_cast<const T&>(__builtin_bswap64(reinterpret_cast<const u64&>(x))); } template <typename T, KFR_ENABLE_IF(sizeof(T) == 4)> -KFR_INLINE T swapbyteorder(T x) +CMT_INLINE T swapbyteorder(T x) { return reinterpret_cast<const T&>(__builtin_bswap32(reinterpret_cast<const u32&>(x))); } template <typename T, KFR_ENABLE_IF(sizeof(T) == 2)> -KFR_INLINE T swapbyteorder(T x) +CMT_INLINE T swapbyteorder(T x) { return reinterpret_cast<const T&>(__builtin_bswap16(reinterpret_cast<const u16&>(x))); } @@ -580,7 +546,7 @@ KFR_FN(swapbyteorder) /// Sum all elements of the vector template <typename T, size_t N> -KFR_INLINE T hadd(const vec<T, N>& value) +CMT_INLINE T hadd(const vec<T, N>& value) { return horizontal(value, fn_add()); } @@ -588,26 +554,26 @@ KFR_FN(hadd) /// Multiply all elements of the vector template <typename T, size_t N> -KFR_INLINE T hmul(const vec<T, N>& value) +CMT_INLINE T hmul(const vec<T, N>& value) { return horizontal(value, fn_mul()); } KFR_FN(hmul) template <typename T, size_t N> -KFR_INLINE T hbitwiseand(const vec<T, N>& value) +CMT_INLINE T hbitwiseand(const vec<T, N>& value) { return horizontal(value, fn_bitwiseand()); } KFR_FN(hbitwiseand) template <typename T, size_t N> -KFR_INLINE T hbitwiseor(const vec<T, N>& value) +CMT_INLINE T hbitwiseor(const vec<T, N>& value) { return horizontal(value, fn_bitwiseor()); } KFR_FN(hbitwiseor) template <typename T, size_t N> -KFR_INLINE T hbitwisexor(const vec<T, N>& value) +CMT_INLINE T hbitwisexor(const vec<T, N>& value) { return horizontal(value, fn_bitwisexor()); } @@ -615,7 +581,7 @@ KFR_FN(hbitwisexor) /// Calculate the Dot-Product of two vectors template <typename T, size_t N> -KFR_INLINE T dot(const vec<T, N>& x, const vec<T, N>& y) +CMT_INLINE T dot(const vec<T, N>& x, const vec<T, N>& y) { return hadd(x * y); } @@ -623,7 +589,7 @@ KFR_FN(dot) /// Calculate the Arithmetic mean of all elements in the vector template <typename T, size_t N> -KFR_INLINE T avg(const vec<T, N>& value) +CMT_INLINE T avg(const vec<T, N>& value) { return hadd(value) / N; } @@ -631,19 +597,19 @@ KFR_FN(avg) /// Calculate the RMS of all elements in the vector template <typename T, size_t N> -KFR_INLINE T rms(const vec<T, N>& value) +CMT_INLINE T rms(const vec<T, N>& value) { return internal::builtin_sqrt(hadd(value * value) / N); } KFR_FN(rms) template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> -KFR_INLINE vec<T, N> subadd(const vec<T, N>& a, const vec<T, N>& b) +CMT_INLINE vec<T, N> subadd(const vec<T, N>& a, const vec<T, N>& b) { return blend<1, 0>(a + b, a - b); } template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> -KFR_INLINE vec<T, N> addsub(const vec<T, N>& a, const vec<T, N>& b) +CMT_INLINE vec<T, N> addsub(const vec<T, N>& a, const vec<T, N>& b) { return blend<0, 1>(a + b, a - b); } @@ -651,26 +617,26 @@ KFR_FN(subadd) KFR_FN(addsub) template <typename T, size_t N> -KFR_INLINE vec<T, N> negeven(const vec<T, N>& x) +CMT_INLINE vec<T, N> negeven(const vec<T, N>& x) { return x ^ broadcast<N>(-T(), T()); } template <typename T, size_t N> -KFR_INLINE vec<T, N> negodd(const vec<T, N>& x) +CMT_INLINE vec<T, N> negodd(const vec<T, N>& x) { return x ^ broadcast<N>(T(), -T()); } #define KFR_EXPR_UNARY(fn, op) \ template <typename A1, KFR_ENABLE_IF(is_input_expression<A1>::value)> \ - KFR_INLINE auto operator op(A1&& a1)->decltype(bind_expression(fn(), std::forward<A1>(a1))) \ + CMT_INLINE auto operator op(A1&& a1)->decltype(bind_expression(fn(), std::forward<A1>(a1))) \ { \ return bind_expression(fn(), std::forward<A1>(a1)); \ } #define KFR_EXPR_BINARY(fn, op) \ template <typename A1, typename A2, KFR_ENABLE_IF(is_input_expressions<A1, A2>::value)> \ - KFR_INLINE auto operator op(A1&& a1, A2&& a2) \ + CMT_INLINE auto operator op(A1&& a1, A2&& a2) \ ->decltype(bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2))) \ { \ return bind_expression(fn(), std::forward<A1>(a1), std::forward<A2>(a2)); \ @@ -695,4 +661,54 @@ KFR_EXPR_BINARY(fn_less, <) KFR_EXPR_BINARY(fn_greater, >) KFR_EXPR_BINARY(fn_lessorequal, <=) KFR_EXPR_BINARY(fn_greaterorequal, >=) +#undef KFR_EXPR_UNARY +#undef KFR_EXPR_BINARY + +template <typename T, size_t N1, size_t... Ns> +vec<vec<T, sizeof...(Ns) + 1>, N1> packtranspose(const vec<T, N1>& x, const vec<T, Ns>&... rest) +{ + const vec<T, N1*(sizeof...(Ns) + 1)> t = transpose<N1>(concat(x, rest...)); + return compcast<vec<T, sizeof...(Ns) + 1>>(t); +} + +KFR_FN(packtranspose) + +namespace internal +{ +template <typename... E> +struct expression_pack : expression<E...>, output_expression +{ + constexpr static size_t count = sizeof...(E); + + expression_pack(E&&... e) : expression<E...>(std::forward<E>(e)...) {} + using value_type = vec<common_type<value_type_of<E>...>, count>; + using size_type = typename expression<E...>::size_type; + constexpr size_type size() const noexcept { return expression<E...>::size(); } + + template <typename U, size_t N> + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const + { + return this->call(fn_packtranspose(), index, x); + } + template <typename U, size_t N> + CMT_INLINE void operator()(coutput_t, size_t index, const vec<vec<U, count>, N>& x) + { + output(index, x, csizeseq<count>); + } + +private: + template <typename U, size_t N, size_t... indices> + void output(size_t index, const vec<vec<U, count>, N>& x, csizes_t<indices...>) + { + const vec<vec<U, N>, count> xx = compcast<vec<U, N>>(transpose<count>(flatten(x))); + swallow{ (std::get<indices>(this->args)(coutput, index, xx[indices]), void(), 0)... }; + } +}; +} + +template <typename... E, KFR_ENABLE_IF(is_input_expressions<E...>::value)> +internal::expression_pack<internal::arg<E>...> pack(E&&... e) +{ + return internal::expression_pack<internal::arg<E>...>(std::forward<E>(e)...); +} } diff --git a/include/kfr/base/pointer.hpp b/include/kfr/base/pointer.hpp @@ -32,7 +32,7 @@ namespace kfr constexpr size_t maximum_expression_width() { return bitness_const(16, 32); } template <typename T, size_t maxwidth = maximum_expression_width()> -using expression_vtable = carray<void*, 2 + ilog2(maxwidth) + 1>; +using expression_vtable = std::array<void*, 2 + ilog2(maxwidth) + 1>; struct dummy_content { @@ -74,27 +74,27 @@ struct expression_pointer : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { using func_t = simd<T, N> (*)(void*, size_t); static_assert(is_poweroftwo(N), "N must be a power of two"); constexpr size_t findex = ilog2(N); static_assert(N <= maxwidth, "N is greater than maxwidth"); - func_t func = reinterpret_cast<func_t>(vtable->get(csize<2 + findex>)); - vec<U, N> result = cast<U>(func(instance, index)); + func_t func = reinterpret_cast<func_t>((*vtable)[2 + findex]); + vec<U, N> result = vec<T, N>(func(instance, index)); return result; } - KFR_INLINE void begin_block(size_t size) const + CMT_INLINE void begin_block(size_t size) const { using func_t = void (*)(void*, size_t); - func_t func = reinterpret_cast<func_t>(vtable->get(csize<0>)); + func_t func = reinterpret_cast<func_t>((*vtable)[0]); func(instance, size); } - KFR_INLINE void end_block(size_t size) const + CMT_INLINE void end_block(size_t size) const { using func_t = void (*)(void*, size_t); - func_t func = reinterpret_cast<func_t>(vtable->get(csize<1>)); + func_t func = reinterpret_cast<func_t>((*vtable)[1]); func(instance, size); } @@ -107,19 +107,21 @@ private: namespace internal { template <typename T, size_t N, typename Fn, typename Ret = simd<T, N>, - typename NonMemFn = Ret (*)(Fn*, size_t, vec_t<T, N>)> -KFR_INLINE NonMemFn make_expression_func() + typename NonMemFn = Ret (*)(void*, size_t)> +CMT_INLINE NonMemFn make_expression_func() { - return [](Fn* fn, size_t index, vec_t<T, N> x) { return *(fn->operator()(cinput, index, x)); }; + return [](void* fn, size_t index) -> Ret { + return *(reinterpret_cast<Fn*>(fn)->operator()(cinput, index, vec_t<T, N>())); + }; } template <typename Fn, typename NonMemFn = void (*)(Fn*, size_t)> -KFR_INLINE NonMemFn make_expression_begin_block() +CMT_INLINE NonMemFn make_expression_begin_block() { return [](Fn* fn, size_t size) { return fn->begin_block(size); }; } template <typename Fn, typename NonMemFn = void (*)(Fn*, size_t)> -KFR_INLINE NonMemFn make_expression_end_block() +CMT_INLINE NonMemFn make_expression_end_block() { return [](Fn* fn, size_t size) { return fn->end_block(size); }; } @@ -130,19 +132,19 @@ expression_vtable<T, maxwidth> make_expression_vtable_impl() expression_vtable<T, maxwidth> result; constexpr size_t size = result.size() - 2; - result.get(csize<0>) = reinterpret_cast<void*>(&internal::make_expression_begin_block<decay<E>>); - result.get(csize<1>) = reinterpret_cast<void*>(&internal::make_expression_end_block<decay<E>>); + result[0] = reinterpret_cast<void*>(&internal::make_expression_begin_block<decay<E>>); + result[1] = reinterpret_cast<void*>(&internal::make_expression_end_block<decay<E>>); cforeach(csizeseq<size>, [&](auto u) { - constexpr size_t N = 1 << val_of(u); - result.get(csize<2 + val_of(u)>) = + constexpr size_t N = 1 << val_of(decltype(u)()); + result[2 + val_of(decltype(u)())] = reinterpret_cast<void*>(internal::make_expression_func<T, N, decay<E>>()); }); return result; } template <typename T, size_t maxwidth, typename E> -KFR_INLINE expression_vtable<T, maxwidth>* make_expression_vtable() +CMT_INLINE expression_vtable<T, maxwidth>* make_expression_vtable() { static_assert(is_input_expression<E>::value, "E must be an expression"); static expression_vtable<T, maxwidth> vtable = internal::make_expression_vtable_impl<T, maxwidth, E>(); @@ -151,7 +153,7 @@ KFR_INLINE expression_vtable<T, maxwidth>* make_expression_vtable() } template <typename E, typename T = value_type_of<E>, size_t maxwidth = maximum_expression_width()> -KFR_INLINE expression_pointer<T, maxwidth> to_pointer(E& expr) +CMT_INLINE expression_pointer<T, maxwidth> to_pointer(E& expr) { static_assert(is_input_expression<E>::value, "E must be an expression"); return expression_pointer<T, maxwidth>(std::addressof(expr), @@ -159,7 +161,7 @@ KFR_INLINE expression_pointer<T, maxwidth> to_pointer(E& expr) } template <typename E, typename T = value_type_of<E>, size_t maxwidth = maximum_expression_width()> -KFR_INLINE expression_pointer<T, maxwidth> to_pointer(E&& expr) +CMT_INLINE expression_pointer<T, maxwidth> to_pointer(E&& expr) { static_assert(is_input_expression<E>::value, "E must be an expression"); std::shared_ptr<expression_resource> ptr = make_resource(std::move(expr)); diff --git a/include/kfr/base/random.hpp b/include/kfr/base/random.hpp @@ -114,8 +114,8 @@ inline enable_if_not_f<vec<T, N>> random_range(random_bit_generator& gen, T min, using big_type = findinttype<sqr(std::numeric_limits<T>::min()), sqr(std::numeric_limits<T>::max())>; vec<T, N> u = random_uniform<T, N>(gen); - const vec<big_type, N> tmp = cast<big_type>(u); - return cast<T>((tmp * (max - min) + min) >> typebits<T>::bits); + const vec<big_type, N> tmp = u; + return (tmp * (max - min) + min) >> typebits<T>::bits; } namespace internal @@ -128,7 +128,7 @@ struct expression_random_uniform : input_expression template <typename U, size_t N> vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const { - return cast<U>(random_uniform<T, N>(gen)); + return random_uniform<T, N>(gen); } mutable random_bit_generator gen; }; @@ -146,7 +146,7 @@ struct expression_random_range : input_expression template <typename U, size_t N> vec<U, N> operator()(cinput_t, size_t, vec_t<U, N>) const { - return cast<U>(random_range<N, T>(gen, min, max)); + return random_range<N, T>(gen, min, max); } mutable random_bit_generator gen; const T min; diff --git a/include/kfr/base/read_write.hpp b/include/kfr/base/read_write.hpp @@ -30,31 +30,31 @@ namespace kfr { template <size_t N, bool A = false, typename T> -KFR_INLINE vec<T, N> read(const T* src) +CMT_INLINE vec<T, N> read(const T* src) { return internal_read_write::read<N, A, T>(src); } template <bool A = false, size_t N, typename T> -KFR_INLINE void write(T* dest, const vec<T, N>& value) +CMT_INLINE void write(T* dest, const vec<T, N>& value) { internal_read_write::write<A, N, T>(dest, value); } template <typename... Indices, typename T, size_t Nout = 1 + sizeof...(Indices)> -KFR_INLINE vec<T, Nout> gather(const T* base, size_t index, Indices... indices) +CMT_INLINE vec<T, Nout> gather(const T* base, size_t index, Indices... indices) { return make_vector(base[index], base[indices]...); } template <size_t Index, size_t... Indices, typename T, size_t Nout = 1 + sizeof...(Indices)> -KFR_INLINE vec<T, Nout> gather(const T* base) +CMT_INLINE vec<T, Nout> gather(const T* base) { return make_vector(base[Index], base[Indices]...); } template <size_t Index, size_t... Indices, typename T, size_t N, size_t InIndex = 0> -KFR_INLINE void scatter(const T* base, const vec<T, N>& value) +CMT_INLINE void scatter(const T* base, const vec<T, N>& value) { base[Index] = value[InIndex]; scatter<Indices..., T, N, InIndex + 1>(base, value); @@ -63,60 +63,60 @@ KFR_INLINE void scatter(const T* base, const vec<T, N>& value) namespace internal { template <typename T, size_t N, size_t... Indices> -KFR_INLINE vec<T, N> gather(const T* base, const vec<u32, N>& indices, csizes_t<Indices...>) +CMT_INLINE vec<T, N> gather(const T* base, const vec<u32, N>& indices, csizes_t<Indices...>) { return make_vector(base[indices[Indices]]...); } template <size_t Nout, size_t Stride, typename T, size_t... Indices> -KFR_INLINE vec<T, Nout> gather_stride(const T* base, csizes_t<Indices...>) +CMT_INLINE vec<T, Nout> gather_stride(const T* base, csizes_t<Indices...>) { return make_vector(base[Indices * Stride]...); } template <size_t Nout, typename T, size_t... Indices> -KFR_INLINE vec<T, Nout> gather_stride_s(const T* base, size_t stride, csizes_t<Indices...>) +CMT_INLINE vec<T, Nout> gather_stride_s(const T* base, size_t stride, csizes_t<Indices...>) { return make_vector(base[Indices * stride]...); } } template <typename T, size_t N> -KFR_INLINE vec<T, N> gather(const T* base, const vec<u32, N>& indices) +CMT_INLINE vec<T, N> gather(const T* base, const vec<u32, N>& indices) { return internal::gather(base, indices, csizeseq<N>); } template <size_t Nout, typename T> -KFR_INLINE vec<T, Nout> gather_stride(const T* base, size_t stride) +CMT_INLINE vec<T, Nout> gather_stride(const T* base, size_t stride) { return internal::gather_stride_s<Nout>(base, stride, csizeseq<Nout>); } template <size_t Nout, size_t Stride, typename T> -KFR_INLINE vec<T, Nout> gather_stride(const T* base) +CMT_INLINE vec<T, Nout> gather_stride(const T* base) { return internal::gather_stride<Nout, Stride>(base, csizeseq<Nout>); } template <size_t groupsize, typename T, size_t N, typename IT, size_t... Indices> -KFR_INLINE vec<T, N * groupsize> gather_helper(const T* base, const vec<IT, N>& offset, csizes_t<Indices...>) +CMT_INLINE vec<T, N * groupsize> gather_helper(const T* base, const vec<IT, N>& offset, csizes_t<Indices...>) { return concat(read<groupsize>(base + groupsize * (*offset)[Indices])...); } template <size_t groupsize = 1, typename T, size_t N, typename IT> -KFR_INLINE vec<T, N * groupsize> gather(const T* base, const vec<IT, N>& offset) +CMT_INLINE vec<T, N * groupsize> gather(const T* base, const vec<IT, N>& offset) { return gather_helper<groupsize>(base, offset, csizeseq<N>); } template <size_t groupsize, typename T, size_t N, size_t Nout = N* groupsize, typename IT, size_t... Indices> -KFR_INLINE void scatter_helper(T* base, const vec<IT, N>& offset, const vec<T, Nout>& value, +CMT_INLINE void scatter_helper(T* base, const vec<IT, N>& offset, const vec<T, Nout>& value, csizes_t<Indices...>) { swallow{ (write(base + groupsize * (*offset)[Indices], slice<Indices * groupsize, groupsize>(value)), 0)... }; } template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N* groupsize, typename IT> -KFR_INLINE void scatter(T* base, const vec<IT, N>& offset, const vec<T, Nout>& value) +CMT_INLINE void scatter(T* base, const vec<IT, N>& offset, const vec<T, Nout>& value) { return scatter_helper<groupsize>(base, offset, value, csizeseq<N>); } @@ -188,14 +188,14 @@ constexpr T partial_masks[] = { internal::allones<T>, T() }; template <typename T, size_t N> -KFR_INLINE vec<T, N> partial_mask(size_t index) +CMT_INLINE vec<T, N> partial_mask(size_t index) { static_assert(N <= arraysize(partial_masks<T>) / 2, "N must not be greater than half of partial_masks expression_array"); return read<N>(&partial_masks<T>[0] + arraysize(partial_masks<T>) / 2 - index); } template <typename T, size_t N> -KFR_INLINE vec<T, N> partial_mask(size_t index, vec_t<T, N>) +CMT_INLINE vec<T, N> partial_mask(size_t index, vec_t<T, N>) { return partial_mask<T, N>(index); } diff --git a/include/kfr/base/reduce.hpp b/include/kfr/base/reduce.hpp @@ -32,14 +32,14 @@ namespace kfr { template <typename T> -KFR_INLINE T final_mean(T value, size_t size) +CMT_INLINE T final_mean(T value, size_t size) { return value / T(size); } KFR_FN(final_mean) template <typename T> -KFR_INLINE T final_rootmean(T value, size_t size) +CMT_INLINE T final_rootmean(T value, size_t size) { return internal::builtin_sqrt(value / T(size)); } @@ -48,12 +48,12 @@ KFR_FN(final_rootmean) namespace internal { template <typename FinalFn, typename T, KFR_ENABLE_IF(is_callable<FinalFn, T, size_t>::value)> -KFR_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t size, T value) +CMT_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t size, T value) { return finalfn(value, size); } template <typename FinalFn, typename T, KFR_ENABLE_IF(!is_callable<FinalFn, T, size_t>::value)> -KFR_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t, T value) +CMT_INLINE auto reduce_call_final(FinalFn&& finalfn, size_t, T value) { return finalfn(value); } @@ -70,26 +70,26 @@ struct expression_reduce : output_expression } template <typename U, size_t N> - KFR_INLINE void operator()(coutput_t, size_t, const vec<U, N>& x) const + CMT_INLINE void operator()(coutput_t, size_t, const vec<U, N>& x) const { counter += N; process(x); } - KFR_INLINE T get() { return internal::reduce_call_final(finalfn, counter, horizontal(value, reducefn)); } + CMT_INLINE T get() { return internal::reduce_call_final(finalfn, counter, horizontal(value, reducefn)); } protected: void reset() { counter = 0; } - KFR_INLINE void process(vec<T, width> x) const { value = reducefn(transformfn(x), value); } + CMT_INLINE void process(vec<T, width> x) const { value = reducefn(transformfn(x), value); } template <size_t N, KFR_ENABLE_IF(N < width)> - KFR_INLINE void process(vec<T, N> x) const + CMT_INLINE void process(vec<T, N> x) const { value = combine(value, reducefn(transformfn(x), narrow<N>(value))); } template <size_t N, KFR_ENABLE_IF(N > width)> - KFR_INLINE void process(vec<T, N> x) const + CMT_INLINE void process(vec<T, N> x) const { process(low(x)); process(high(x)); diff --git a/include/kfr/base/round.hpp b/include/kfr/base/round.hpp @@ -51,7 +51,7 @@ namespace intrinsics #define KFR_mm256_trunc_pd(V) _mm256_round_pd((V), _MM_FROUND_TRUNC) #define KFR_mm256_roundnearest_pd(V) _mm256_round_pd((V), _MM_FROUND_NINT) -#if defined CID_ARCH_SSE41 +#if defined CMT_ARCH_SSE41 KFR_SINTRIN f32sse floor(const f32sse& value) { return _mm_floor_ps(*value); } KFR_SINTRIN f32sse ceil(const f32sse& value) { return _mm_ceil_ps(*value); } @@ -64,7 +64,7 @@ KFR_SINTRIN f64sse round(const f64sse& value) { return KFR_mm_roundnearest_pd(*v KFR_SINTRIN f32sse fract(const f32sse& x) { return x - floor(x); } KFR_SINTRIN f64sse fract(const f64sse& x) { return x - floor(x); } -#if defined CID_ARCH_AVX +#if defined CMT_ARCH_AVX KFR_SINTRIN f32avx floor(const f32avx& value) { return _mm256_floor_ps(*value); } KFR_SINTRIN f32avx ceil(const f32avx& value) { return _mm256_ceil_ps(*value); } @@ -318,6 +318,26 @@ KFR_INTRIN internal::expression_function<fn::itrunc, E1> itrunc(E1&& x) { return { fn::itrunc(), std::forward<E1>(x) }; } + +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +CMT_INLINE vec<T, N> fmod(const vec<T, N>& x, const vec<T, N>& y) +{ + return x - trunc(x / y) * y; +} + +KFR_FN_S(fmod) +KFR_FN(fmod) + +template <typename T, size_t N, KFR_ENABLE_IF(!is_f_class<T>::value)> +constexpr CMT_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y) +{ + return x % y; +} +template <typename T, size_t N, KFR_ENABLE_IF(is_f_class<T>::value)> +CMT_INLINE vec<T, N> rem(const vec<T, N>& x, const vec<T, N>& y) +{ + return fmod(x, y); +} } #undef KFR_mm_trunc_ps diff --git a/include/kfr/base/saturation.hpp b/include/kfr/base/saturation.hpp @@ -30,6 +30,8 @@ namespace kfr namespace intrinsics { + +// Generic functions template <typename T, size_t N> KFR_SINTRIN vec<T, N> saturated_signed_add(const vec<T, N>& a, const vec<T, N>& b) { @@ -66,7 +68,7 @@ KFR_SINTRIN vec<T, N> saturated_unsigned_sub(const vec<T, N>& a, const vec<T, N> return select(a < b, zerovector(a), a - b); } -#if defined CID_ARCH_SSE2 +#if defined CMT_ARCH_SSE2 KFR_SINTRIN u8sse satadd(const u8sse& x, const u8sse& y) { return _mm_adds_epu8(*x, *y); } KFR_SINTRIN i8sse satadd(const i8sse& x, const i8sse& y) { return _mm_adds_epi8(*x, *y); } @@ -88,7 +90,7 @@ KFR_SINTRIN i64sse satsub(const i64sse& a, const i64sse& b) { return saturated_s KFR_SINTRIN u32sse satsub(const u32sse& a, const u32sse& b) { return saturated_unsigned_sub(a, b); } KFR_SINTRIN u64sse satsub(const u64sse& a, const u64sse& b) { return saturated_unsigned_sub(a, b); } -#if defined CID_ARCH_AVX2 +#if defined CMT_ARCH_AVX2 KFR_SINTRIN u8avx satadd(const u8avx& x, const u8avx& y) { return _mm256_adds_epu8(*x, *y); } KFR_SINTRIN i8avx satadd(const i8avx& x, const i8avx& y) { return _mm256_adds_epi8(*x, *y); } KFR_SINTRIN u16avx satadd(const u16avx& x, const u16avx& y) { return _mm256_adds_epu16(*x, *y); } @@ -103,6 +105,31 @@ KFR_SINTRIN i16avx satsub(const i16avx& x, const i16avx& y) { return _mm256_subs KFR_HANDLE_ALL_SIZES_2(satadd) KFR_HANDLE_ALL_SIZES_2(satsub) +#elif defined CMT_ARCH_NEON + +KFR_SINTRIN u8neon satadd(const u8neon& x, const u8neon& y) { return vqaddq_u8(*x, *y); } +KFR_SINTRIN i8neon satadd(const i8neon& x, const i8neon& y) { return vqaddq_s8(*x, *y); } +KFR_SINTRIN u16neon satadd(const u16neon& x, const u16neon& y) { return vqaddq_u16(*x, *y); } +KFR_SINTRIN i16neon satadd(const i16neon& x, const i16neon& y) { return vqaddq_s16(*x, *y); } + +KFR_SINTRIN u8neon satsub(const u8neon& x, const u8neon& y) { return vqsubq_u8(*x, *y); } +KFR_SINTRIN i8neon satsub(const i8neon& x, const i8neon& y) { return vqsubq_s8(*x, *y); } +KFR_SINTRIN u16neon satsub(const u16neon& x, const u16neon& y) { return vqsubq_u16(*x, *y); } +KFR_SINTRIN i16neon satsub(const i16neon& x, const i16neon& y) { return vqsubq_s16(*x, *y); } + +KFR_SINTRIN u32neon satadd(const u32neon& a, const u32neon& b) { return vqaddq_u32(*a, *b); } +KFR_SINTRIN i32neon satadd(const i32neon& a, const i32neon& b) { return vqaddq_s32(*a, *b); } +KFR_SINTRIN u64neon satadd(const u64neon& a, const u64neon& b) { return vqaddq_u64(*a, *b); } +KFR_SINTRIN i64neon satadd(const i64neon& a, const i64neon& b) { return vqaddq_s64(*a, *b); } + +KFR_SINTRIN i32neon satsub(const i32neon& a, const i32neon& b) { return vqsubq_u32(*a, *b); } +KFR_SINTRIN i64neon satsub(const i64neon& a, const i64neon& b) { return vqsubq_s32(*a, *b); } +KFR_SINTRIN u32neon satsub(const u32neon& a, const u32neon& b) { return vqsubq_u64(*a, *b); } +KFR_SINTRIN u64neon satsub(const u64neon& a, const u64neon& b) { return vqsubq_s64(*a, *b); } + +KFR_HANDLE_ALL_SIZES_2(satadd) +KFR_HANDLE_ALL_SIZES_2(satsub) + #else // fallback template <typename T, size_t N, KFR_ENABLE_IF(std::is_signed<T>::value)> diff --git a/include/kfr/base/select.hpp b/include/kfr/base/select.hpp @@ -29,7 +29,7 @@ namespace kfr namespace intrinsics { -#if defined CID_ARCH_SSE41 +#if defined CMT_ARCH_SSE41 KFR_SINTRIN u8sse select(const mu8sse& m, const u8sse& x, const u8sse& y) { @@ -72,7 +72,7 @@ KFR_SINTRIN f64sse select(const mf64sse& m, const f64sse& x, const f64sse& y) return _mm_blendv_pd(*y, *x, *m); } -#if defined CID_ARCH_AVX +#if defined CMT_ARCH_AVX KFR_SINTRIN f64avx select(const mf64avx& m, const f64avx& x, const f64avx& y) { return _mm256_blendv_pd(*y, *x, *m); @@ -83,7 +83,7 @@ KFR_SINTRIN f32avx select(const mf32avx& m, const f32avx& x, const f32avx& y) } #endif -#if defined CID_ARCH_AVX2 +#if defined CMT_ARCH_AVX2 KFR_SINTRIN u8avx select(const mu8avx& m, const u8avx& x, const u8avx& y) { return _mm256_blendv_epi8(*y, *x, *m); @@ -129,13 +129,70 @@ KFR_SINTRIN vec<T, N> select(const mask<T, N>& a, const vec<T, N>& b, const vec< return concat(select(low(a).asmask(), low(b), low(c)), select(high(a).asmask(), high(b), high(c))); } +#elif defined CMT_ARCH_NEON + +KFR_SINTRIN f32neon select(const mf32neon& m, const f32neon& x, const f32neon& y) +{ + return vbslq_f32(*m, *x, *y); +} + +KFR_SINTRIN i8neon select(const mi8neon& m, const i8neon& x, const i8neon& y) { return vbslq_s8(*m, *x, *y); } +KFR_SINTRIN u8neon select(const mu8neon& m, const u8neon& x, const u8neon& y) { return vbslq_u8(*m, *x, *y); } +KFR_SINTRIN i16neon select(const mi16neon& m, const i16neon& x, const i16neon& y) +{ + return vbslq_s16(*m, *x, *y); +} +KFR_SINTRIN u16neon select(const mu16neon& m, const u16neon& x, const u16neon& y) +{ + return vbslq_u16(*m, *x, *y); +} +KFR_SINTRIN i32neon select(const mi32neon& m, const i32neon& x, const i32neon& y) +{ + return vbslq_s32(*m, *x, *y); +} +KFR_SINTRIN u32neon select(const mu32neon& m, const u32neon& x, const u32neon& y) +{ + return vbslq_u32(*m, *x, *y); +} +KFR_SINTRIN i64neon select(const mi64neon& m, const i64neon& x, const i64neon& y) +{ + return vbslq_s64(*m, *x, *y); +} +KFR_SINTRIN u64neon select(const mu64neon& m, const u64neon& x, const u64neon& y) +{ + return vbslq_u64(*m, *x, *y); +} + +#ifdef CMT_ARCH_NEON64 +KFR_SINTRIN f64neon select(const mf64neon& m, const f64neon& x, const f64neon& y) +{ + return vbslq_f64(*m, *x, *y); +} +#else +KFR_SINTRIN f64neon select(const mf64neon& m, const f64neon& x, const f64neon& y) +{ + return y ^ ((x ^ y) & f64neon(*m)); +} +#endif + +template <typename T, size_t N, KFR_ENABLE_IF(N < vector_width<T, cpu_t::native>)> +KFR_SINTRIN vec<T, N> select(const mask<T, N>& a, const vec<T, N>& b, const vec<T, N>& c) +{ + return slice<0, N>(select(expand_simd(a).asmask(), expand_simd(b), expand_simd(c))); +} +template <typename T, size_t N, KFR_ENABLE_IF(N >= vector_width<T, cpu_t::native>), typename = void> +KFR_SINTRIN vec<T, N> select(const mask<T, N>& a, const vec<T, N>& b, const vec<T, N>& c) +{ + return concat(select(low(a).asmask(), low(b), low(c)), select(high(a).asmask(), high(b), high(c))); +} + #else // fallback template <typename T, size_t N> -KFR_SINTRIN vec<T, N> select(mask<T, N> m, const vec<T, N>& x, const vec<T, N>& y) +KFR_SINTRIN vec<T, N> select(const mask<T, N>& m, const vec<T, N>& x, const vec<T, N>& y) { - return y ^ ((x ^ y) & m); + return y ^ ((x ^ y) & vec<T, N>(*m)); } #endif } @@ -146,8 +203,7 @@ template <typename T1, size_t N, typename T2, typename T3, KFR_ENABLE_IF(is_nume KFR_INTRIN vec<Tout, N> select(const mask<T1, N>& m, const T2& x, const T3& y) { static_assert(sizeof(T1) == sizeof(Tout), "select: incompatible types"); - return intrinsics::select(bitcast<Tout>(m).asmask(), static_cast<vec<Tout, N>>(x), - static_cast<vec<Tout, N>>(y)); + return intrinsics::select(bitcast<Tout>(m), static_cast<vec<Tout, N>>(x), static_cast<vec<Tout, N>>(y)); } template <typename E1, typename E2, typename E3, KFR_ENABLE_IF(is_input_expressions<E1, E2, E3>::value)> diff --git a/include/kfr/base/shuffle.hpp b/include/kfr/base/shuffle.hpp @@ -35,20 +35,20 @@ namespace internal { template <size_t index, typename T> -constexpr KFR_INLINE T broadcast_get_nth() +constexpr CMT_INLINE T broadcast_get_nth() { return c_qnan<T>; } template <size_t index, typename T, typename... Ts> -constexpr KFR_INLINE T broadcast_get_nth(T x, Ts... rest) +constexpr CMT_INLINE T broadcast_get_nth(T x, Ts... rest) { return index == 0 ? x : broadcast_get_nth<index - 1, T>(rest...); } template <typename T, typename... Ts, size_t... indices, size_t Nin = 1 + sizeof...(Ts), size_t Nout = sizeof...(indices)> -KFR_INLINE constexpr vec<T, Nout> broadcast_helper(csizes_t<indices...>, T x, Ts... rest) +CMT_INLINE constexpr vec<T, Nout> broadcast_helper(csizes_t<indices...>, T x, Ts... rest) { simd<T, Nout> result{ broadcast_get_nth<indices % Nin>(x, rest...)... }; return result; @@ -56,46 +56,46 @@ KFR_INLINE constexpr vec<T, Nout> broadcast_helper(csizes_t<indices...>, T x, Ts } template <size_t Nout, typename T, typename... Ts> -constexpr KFR_INLINE vec<T, Nout> broadcast(T x, T y, Ts... rest) +constexpr CMT_INLINE vec<T, Nout> broadcast(T x, T y, Ts... rest) { return internal::broadcast_helper(csizeseq<Nout>, x, y, rest...); } KFR_FN(broadcast) template <size_t Ncount, typename T, size_t N> -KFR_INLINE vec<T, N + Ncount> padhigh(const vec<T, N>& x) +CMT_INLINE vec<T, N + Ncount> padhigh(const vec<T, N>& x) { return shufflevector<N + Ncount, internal::shuffle_index_extend<0, N>>(x); } KFR_FN(padhigh) template <size_t Ncount, typename T, size_t N> -KFR_INLINE vec<T, N + Ncount> padlow(const vec<T, N>& x) +CMT_INLINE vec<T, N + Ncount> padlow(const vec<T, N>& x) { return shufflevector<N + Ncount, internal::shuffle_index_extend<Ncount, N>>(x); } KFR_FN(padlow) template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N != Nout)> -KFR_INLINE vec<T, Nout> extend(const vec<T, N>& x) +CMT_INLINE vec<T, Nout> extend(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index_extend<0, N>>(x); } template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(N == Nout)> -constexpr KFR_INLINE vec<T, Nout> extend(const vec<T, N>& x) +constexpr CMT_INLINE vec<T, Nout> extend(const vec<T, N>& x) { return x; } KFR_FN(extend) template <size_t start, size_t count, typename T, size_t N> -KFR_INLINE vec<T, count> slice(const vec<T, N>& x) +CMT_INLINE vec<T, count> slice(const vec<T, N>& x) { static_assert(start + count <= N, "start + count <= N"); return shufflevector<count, internal::shuffle_index<start>>(x); } template <size_t start, size_t count, typename T, size_t N> -KFR_INLINE vec<T, count> slice(const vec<T, N>& x, const vec<T, N>& y) +CMT_INLINE vec<T, count> slice(const vec<T, N>& x, const vec<T, N>& y) { static_assert(start + count <= N * 2, "start + count <= N * 2"); return shufflevector<count, internal::shuffle_index<start>>(x, y); @@ -103,11 +103,11 @@ KFR_INLINE vec<T, count> slice(const vec<T, N>& x, const vec<T, N>& y) KFR_FN(slice) template <size_t, typename T, size_t N> -KFR_INLINE void split(const vec<T, N>&) +CMT_INLINE void split(const vec<T, N>&) { } template <size_t start = 0, typename T, size_t N, size_t Nout, typename... Args> -KFR_INLINE void split(const vec<T, N>& x, vec<T, Nout>& out, Args&&... args) +CMT_INLINE void split(const vec<T, N>& x, vec<T, Nout>& out, Args&&... args) { out = slice<start, Nout>(x); split<start + Nout>(x, std::forward<Args>(args)...); @@ -115,7 +115,7 @@ KFR_INLINE void split(const vec<T, N>& x, vec<T, Nout>& out, Args&&... args) KFR_FN(split) template <size_t total, size_t number, typename T, size_t N, size_t Nout = N / total> -KFR_INLINE vec<T, Nout> part(const vec<T, N>& x) +CMT_INLINE vec<T, Nout> part(const vec<T, N>& x) { static_assert(N % total == 0, "N % total == 0"); return shufflevector<Nout, internal::shuffle_index<number * Nout>>(x); @@ -123,27 +123,27 @@ KFR_INLINE vec<T, Nout> part(const vec<T, N>& x) KFR_FN(part) template <size_t start, size_t count, typename T, size_t N1, size_t N2> -KFR_INLINE vec<T, count> concat_and_slice(const vec<T, N1>& x, const vec<T, N2>& y) +CMT_INLINE vec<T, count> concat_and_slice(const vec<T, N1>& x, const vec<T, N2>& y) { return internal::concattwo<start, count>(x, y); } KFR_FN(concat_and_slice) template <size_t Nout, typename T, size_t N> -KFR_INLINE vec<T, Nout> widen(const vec<T, N>& x, identity<T> newvalue = T()) +CMT_INLINE vec<T, Nout> widen(const vec<T, N>& x, identity<T> newvalue = T()) { static_assert(Nout > N, "Nout > N"); return concat(x, broadcast<Nout - N>(newvalue)); } template <size_t Nout, typename T, typename TS> -constexpr KFR_INLINE vec<T, Nout> widen(const vec<T, Nout>& x, TS) +constexpr CMT_INLINE vec<T, Nout> widen(const vec<T, Nout>& x, TS) { return x; } KFR_FN(widen) template <size_t Nout, typename T, size_t N> -KFR_INLINE vec<T, Nout> narrow(const vec<T, N>& x) +CMT_INLINE vec<T, Nout> narrow(const vec<T, N>& x) { static_assert(Nout <= N, "Nout <= N"); return slice<0, Nout>(x); @@ -152,7 +152,7 @@ KFR_FN(narrow) template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N / 2, KFR_ENABLE_IF(N >= 2 && (N & 1) == 0)> -KFR_INLINE vec<T, Nout> even(const vec<T, N>& x) +CMT_INLINE vec<T, Nout> even(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index<0, 2>, groupsize>(x); } @@ -160,7 +160,7 @@ KFR_FNR(even, 2, 1) template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N / 2, KFR_ENABLE_IF(N >= 2 && (N & 1) == 0)> -KFR_INLINE vec<T, Nout> odd(const vec<T, N>& x) +CMT_INLINE vec<T, Nout> odd(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index<1, 2>, groupsize>(x); } @@ -182,7 +182,7 @@ struct shuffle_index_dup } template <typename T, size_t N> -KFR_INLINE vec<T, N> dupeven(const vec<T, N>& x) +CMT_INLINE vec<T, N> dupeven(const vec<T, N>& x) { static_assert(N % 2 == 0, "N must be even"); return shufflevector<N, internal::shuffle_index_dup<2, 0>>(x); @@ -190,7 +190,7 @@ KFR_INLINE vec<T, N> dupeven(const vec<T, N>& x) KFR_FN(dupeven) template <typename T, size_t N> -KFR_INLINE vec<T, N> dupodd(const vec<T, N>& x) +CMT_INLINE vec<T, N> dupodd(const vec<T, N>& x) { static_assert(N % 2 == 0, "N must be even"); return shufflevector<N, internal::shuffle_index_dup<2, 1>>(x); @@ -198,7 +198,7 @@ KFR_INLINE vec<T, N> dupodd(const vec<T, N>& x) KFR_FN(dupodd) template <typename T, size_t N> -KFR_INLINE vec<T, N * 2> duphalfs(const vec<T, N>& x) +CMT_INLINE vec<T, N * 2> duphalfs(const vec<T, N>& x) { return concat(x, x); } @@ -221,7 +221,7 @@ struct shuffle_index_shuffle } template <size_t... Indices, typename T, size_t N> -KFR_INLINE vec<T, N> shuffle(const vec<T, N>& x, const vec<T, N>& y, +CMT_INLINE vec<T, N> shuffle(const vec<T, N>& x, const vec<T, N>& y, elements_t<Indices...> = elements_t<Indices...>()) { return shufflevector<N, internal::shuffle_index_shuffle<N, Indices...>>(x, y); @@ -229,7 +229,7 @@ KFR_INLINE vec<T, N> shuffle(const vec<T, N>& x, const vec<T, N>& y, KFR_FN(shuffle) template <size_t groupsize, size_t... Indices, typename T, size_t N> -KFR_INLINE vec<T, N> shufflegroups(const vec<T, N>& x, const vec<T, N>& y, +CMT_INLINE vec<T, N> shufflegroups(const vec<T, N>& x, const vec<T, N>& y, elements_t<Indices...> = elements_t<Indices...>()) { return shufflevector<N, internal::shuffle_index_shuffle<N, Indices...>, groupsize>(x, y); @@ -254,14 +254,14 @@ struct shuffle_index_permute } template <size_t... Indices, typename T, size_t N> -KFR_INLINE vec<T, N> permute(const vec<T, N>& x, elements_t<Indices...> = elements_t<Indices...>()) +CMT_INLINE vec<T, N> permute(const vec<T, N>& x, elements_t<Indices...> = elements_t<Indices...>()) { return shufflevector<N, internal::shuffle_index_permute<N, Indices...>>(x); } KFR_FN(permute) template <size_t groupsize, size_t... Indices, typename T, size_t N> -KFR_INLINE vec<T, N> permutegroups(const vec<T, N>& x, elements_t<Indices...> = elements_t<Indices...>()) +CMT_INLINE vec<T, N> permutegroups(const vec<T, N>& x, elements_t<Indices...> = elements_t<Indices...>()) { return shufflevector<N, internal::shuffle_index_permute<N, Indices...>, groupsize>(x); } @@ -271,7 +271,7 @@ namespace internal { template <typename T, size_t Nout, typename Fn, size_t... Indices> -constexpr KFR_INLINE vec<T, Nout> generate_vector(csizes_t<Indices...>) +constexpr CMT_INLINE vec<T, Nout> generate_vector(csizes_t<Indices...>) { constexpr Fn fn{}; return make_vector(static_cast<T>(fn(Indices))...); @@ -279,7 +279,7 @@ constexpr KFR_INLINE vec<T, Nout> generate_vector(csizes_t<Indices...>) } template <typename T, size_t Nout, typename Fn> -constexpr KFR_INLINE vec<T, Nout> generate_vector() +constexpr CMT_INLINE vec<T, Nout> generate_vector() { return internal::generate_vector<T, Nout, Fn>(csizeseq<Nout>); } @@ -288,19 +288,19 @@ KFR_FN(generate_vector) namespace internal { template <typename T, size_t N> -constexpr KFR_INLINE mask<T, N> evenmask() +constexpr CMT_INLINE mask<T, N> evenmask() { return broadcast<N, T>(maskbits<T>(true), maskbits<T>(false)); } template <typename T, size_t N> -constexpr KFR_INLINE mask<T, N> oddmask() +constexpr CMT_INLINE mask<T, N> oddmask() { return broadcast<N, T>(maskbits<T>(false), maskbits<T>(true)); } } template <typename T, size_t N, size_t Nout = N * 2> -KFR_INLINE vec<T, Nout> dup(const vec<T, N>& x) +CMT_INLINE vec<T, Nout> dup(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index_dup1<2>>(x, x); } @@ -316,7 +316,7 @@ struct shuffle_index_duphalf } template <typename T, size_t N> -KFR_INLINE vec<T, N> duplow(const vec<T, N>& x) +CMT_INLINE vec<T, N> duplow(const vec<T, N>& x) { static_assert(N % 2 == 0, "N must be even"); return shufflevector<N, internal::shuffle_index_duphalf<N / 2, 0>>(x); @@ -324,7 +324,7 @@ KFR_INLINE vec<T, N> duplow(const vec<T, N>& x) KFR_FN(duplow) template <typename T, size_t N> -KFR_INLINE vec<T, N> duphigh(vec<T, N> x) +CMT_INLINE vec<T, N> duphigh(vec<T, N> x) { static_assert(N % 2 == 0, "N must be even"); return shufflevector<N, internal::shuffle_index_duphalf<N / 2, N / 2>>(x); @@ -347,7 +347,7 @@ struct shuffle_index_blend } template <size_t... Indices, typename T, size_t N> -KFR_INLINE vec<T, N> blend(const vec<T, N>& x, const vec<T, N>& y, +CMT_INLINE vec<T, N> blend(const vec<T, N>& x, const vec<T, N>& y, elements_t<Indices...> = elements_t<Indices...>()) { return shufflevector<N, internal::shuffle_index_blend<N, Indices...>, 1>(x, y); @@ -376,20 +376,20 @@ struct shuffle_index_outputright } template <size_t elements, typename T, size_t N> -KFR_INLINE vec<T, N> swap(vec<T, N> x) +CMT_INLINE vec<T, N> swap(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_swap<elements>>(x); } -KFR_FN(swap) +CMT_FN_TPL((size_t elements), (elements), swap) template <size_t shift, typename T, size_t N> -KFR_INLINE vec<T, N> rotatetwo(const vec<T, N>& lo, const vec<T, N>& hi) +CMT_INLINE vec<T, N> rotatetwo(const vec<T, N>& lo, const vec<T, N>& hi) { return shift == 0 ? lo : (shift == N ? hi : shufflevector<N, internal::shuffle_index<N - shift>>(hi, lo)); } template <size_t amount, typename T, size_t N> -KFR_INLINE vec<T, N> rotateright(const vec<T, N>& x, csize_t<amount> = csize_t<amount>()) +CMT_INLINE vec<T, N> rotateright(const vec<T, N>& x, csize_t<amount> = csize_t<amount>()) { static_assert(amount >= 0 && amount < N, "amount >= 0 && amount < N"); return shufflevector<N, internal::shuffle_index_wrap<N, N - amount>>(x); @@ -397,7 +397,7 @@ KFR_INLINE vec<T, N> rotateright(const vec<T, N>& x, csize_t<amount> = csize_t<a KFR_FN(rotateright) template <size_t amount, typename T, size_t N> -KFR_INLINE vec<T, N> rotateleft(const vec<T, N>& x, csize_t<amount> = csize_t<amount>()) +CMT_INLINE vec<T, N> rotateleft(const vec<T, N>& x, csize_t<amount> = csize_t<amount>()) { static_assert(amount >= 0 && amount < N, "amount >= 0 && amount < N"); return shufflevector<N, internal::shuffle_index_wrap<N, amount>>(x); @@ -405,21 +405,21 @@ KFR_INLINE vec<T, N> rotateleft(const vec<T, N>& x, csize_t<amount> = csize_t<am KFR_FN(rotateleft) template <typename T, size_t N> -KFR_INLINE vec<T, N> insertright(T x, const vec<T, N>& y) +CMT_INLINE vec<T, N> insertright(T x, const vec<T, N>& y) { return concat_and_slice<1, N>(y, vec<T, 1>(x)); } KFR_FN(insertright) template <typename T, size_t N> -KFR_INLINE vec<T, N> insertleft(T x, const vec<T, N>& y) +CMT_INLINE vec<T, N> insertleft(T x, const vec<T, N>& y) { return concat_and_slice<0, N>(vec<T, 1>(x), y); } KFR_FN(insertleft) template <typename T, size_t N, size_t N2> -KFR_INLINE vec<T, N> outputright(const vec<T, N>& x, const vec<T, N2>& y) +CMT_INLINE vec<T, N> outputright(const vec<T, N>& x, const vec<T, N2>& y) { return shufflevector<N, internal::shuffle_index_outputright<N2, N>>(x, extend<N>(y)); } @@ -439,46 +439,51 @@ struct shuffle_index_transpose } template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize > 3)> -KFR_INLINE vec<T, N> transpose(const vec<T, N>& x) +CMT_INLINE vec<T, N> transpose(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, side>, groupsize>(x); } template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize <= 3)> -KFR_INLINE vec<T, N> transpose(const vec<T, N>& x) +CMT_INLINE vec<T, N> transpose(const vec<T, N>& x) { return x; } +template <typename T, size_t N> +CMT_INLINE vec<vec<T, N>, N> transpose(const vec<vec<T, N>, N>& x) +{ + return *transpose<2>(flatten(x)); +} KFR_FN(transpose) template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize > 3)> -KFR_INLINE vec<T, N> transposeinverse(const vec<T, N>& x) +CMT_INLINE vec<T, N> transposeinverse(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, N / groupsize / side>, groupsize>(x); } template <size_t side, size_t groupsize = 1, typename T, size_t N, KFR_ENABLE_IF(N / groupsize <= 3)> -KFR_INLINE vec<T, N> transposeinverse(const vec<T, N>& x) +CMT_INLINE vec<T, N> transposeinverse(const vec<T, N>& x) { return x; } KFR_FN(transposeinverse) template <size_t side, typename T, size_t N> -KFR_INLINE vec<T, N> ctranspose(const vec<T, N>& x) +CMT_INLINE vec<T, N> ctranspose(const vec<T, N>& x) { return transpose<side, 2>(x); } KFR_FN(ctranspose) template <size_t side, typename T, size_t N> -KFR_INLINE vec<T, N> ctransposeinverse(const vec<T, N>& x) +CMT_INLINE vec<T, N> ctransposeinverse(const vec<T, N>& x) { return transposeinverse<side, 2>(x); } KFR_FN(ctransposeinverse) template <size_t groupsize = 1, typename T, size_t N, size_t Nout = N * 2> -KFR_INLINE vec<T, Nout> interleave(const vec<T, N>& x, const vec<T, N>& y) +CMT_INLINE vec<T, Nout> interleave(const vec<T, N>& x, const vec<T, N>& y) { return shufflevector<Nout, internal::shuffle_index_transpose<Nout / groupsize, Nout / groupsize / 2>, groupsize>(x, y); @@ -486,13 +491,13 @@ KFR_INLINE vec<T, Nout> interleave(const vec<T, N>& x, const vec<T, N>& y) KFR_FNR(interleave, 1, 2) template <typename E1, typename E2, KFR_ENABLE_IF(is_input_expressions<E1, E2>::value)> -KFR_INLINE internal::expression_function<fn_interleave, E1, E2> interleave(E1&& x, E2&& y) +CMT_INLINE internal::expression_function<fn_interleave, E1, E2> interleave(E1&& x, E2&& y) { return { fn_interleave(), std::forward<E1>(x), std::forward<E2>(y) }; } template <size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, N> interleavehalfs(const vec<T, N>& x) +CMT_INLINE vec<T, N> interleavehalfs(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, N / groupsize / 2>, groupsize>( x); @@ -500,7 +505,7 @@ KFR_INLINE vec<T, N> interleavehalfs(const vec<T, N>& x) KFR_FN(interleavehalfs) template <size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, N> splitpairs(const vec<T, N>& x) +CMT_INLINE vec<T, N> splitpairs(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_transpose<N / groupsize, 2>, groupsize>(x); } @@ -516,10 +521,15 @@ struct shuffle_index_reverse } template <size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, N> reverse(const vec<T, N>& x) +CMT_INLINE vec<T, N> reverse(const vec<T, N>& x) { return shufflevector<N, internal::shuffle_index_reverse<N / groupsize>, groupsize>(x); } +template <typename T, size_t N1, size_t N2> +CMT_INLINE vec<vec<T, N1>, N2> reverse(const vec<vec<T, N1>, N2>& x) +{ + return *swap<N1>(flatten(x)); +} KFR_FN(reverse) namespace internal @@ -532,7 +542,7 @@ struct shuffle_index_combine } template <typename T, size_t N1, size_t N2> -KFR_INLINE vec<T, N1> combine(const vec<T, N1>& x, const vec<T, N2>& y) +CMT_INLINE vec<T, N1> combine(const vec<T, N1>& x, const vec<T, N2>& y) { static_assert(N2 <= N1, "N2 <= N1"); return shufflevector<N1, internal::shuffle_index_combine<N1, N2>>(x, extend<N1>(y)); @@ -557,28 +567,27 @@ struct generate_onoff } template <typename T, size_t N, size_t start = 0, size_t stride = 1> -constexpr KFR_INLINE vec<T, N> enumerate() +constexpr CMT_INLINE vec<T, N> enumerate() { return generate_vector<T, N, internal::generate_index<start, stride>>(); } template <size_t start = 0, size_t stride = 1, typename T, size_t N> -constexpr KFR_INLINE vec<T, N> enumerate(vec_t<T, N>) +constexpr CMT_INLINE vec<T, N> enumerate(vec_t<T, N>) { return generate_vector<T, N, internal::generate_index<start, stride>>(); } KFR_FN(enumerate) template <typename T, size_t N, size_t start = 0, size_t size = 1, int on = 1, int off = 0> -constexpr KFR_INLINE vec<T, N> onoff(cint_t<on> = cint_t<on>(), cint_t<off> = cint_t<off>()) +constexpr CMT_INLINE vec<T, N> onoff(cint_t<on> = cint_t<on>(), cint_t<off> = cint_t<off>()) { return generate_vector<T, N, internal::generate_onoff<start, size, on, off>>(); } template <size_t start = 0, size_t size = 1, int on = 1, int off = 0, typename T, size_t N> -constexpr KFR_INLINE vec<T, N> onoff(vec_t<T, N>, cint_t<on> = cint_t<on>(), cint_t<off> = cint_t<off>()) +constexpr CMT_INLINE vec<T, N> onoff(vec_t<T, N>, cint_t<on> = cint_t<on>(), cint_t<off> = cint_t<off>()) { return generate_vector<T, N, internal::generate_onoff<start, size, on, off>>(); } KFR_FN(onoff) } -#define KFR_SHUFFLE_SPECIALIZATIONS #include "specializations.i" diff --git a/include/kfr/base/sin_cos.hpp b/include/kfr/base/sin_cos.hpp @@ -31,7 +31,7 @@ #include "select.hpp" #include "shuffle.hpp" -#if CID_HAS_WARNING("-Wc99-extensions") +#if CMT_HAS_WARNING("-Wc99-extensions") #pragma clang diagnostic ignored "-Wc99-extensions" #endif diff --git a/include/kfr/base/sort.hpp b/include/kfr/base/sort.hpp @@ -37,7 +37,7 @@ namespace kfr * @endcode */ template <typename T, size_t N> -KFR_INLINE vec<T, N> sort(const vec<T, N>& x) +CMT_INLINE vec<T, N> sort(const vec<T, N>& x) { constexpr size_t Nhalf = N / 2; vec<T, Nhalf> e = low(x); @@ -70,7 +70,7 @@ KFR_INLINE vec<T, N> sort(const vec<T, N>& x) * @endcode */ template <typename T, size_t N> -KFR_INLINE vec<T, N> sortdesc(const vec<T, N>& x) +CMT_INLINE vec<T, N> sortdesc(const vec<T, N>& x) { constexpr size_t Nhalf = N / 2; vec<T, Nhalf> e = low(x); diff --git a/include/kfr/base/sqrt.hpp b/include/kfr/base/sqrt.hpp @@ -30,7 +30,7 @@ namespace kfr namespace intrinsics { -#if defined CID_ARCH_SSE2 +#if defined CMT_ARCH_SSE2 KFR_SINTRIN f32x1 sqrt(const f32x1& x) { return slice<0, 1>(tovec(_mm_sqrt_ss(*extend<4>(x)))); } KFR_SINTRIN f64x1 sqrt(const f64x1& x) @@ -40,7 +40,7 @@ KFR_SINTRIN f64x1 sqrt(const f64x1& x) KFR_SINTRIN f32sse sqrt(const f32sse& x) { return _mm_sqrt_ps(*x); } KFR_SINTRIN f64sse sqrt(const f64sse& x) { return _mm_sqrt_pd(*x); } -#if defined CID_ARCH_AVX +#if defined CMT_ARCH_AVX KFR_SINTRIN f32avx sqrt(const f32avx& x) { return _mm256_sqrt_ps(*x); } KFR_SINTRIN f64avx sqrt(const f64avx& x) { return _mm256_sqrt_pd(*x); } #endif diff --git a/include/kfr/base/types.hpp b/include/kfr/base/types.hpp @@ -41,7 +41,7 @@ struct fn_##fn \ { \ template <typename... Args> \ - CID_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ + CMT_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ { \ return fn(std::forward<Args>(args)...); \ } \ @@ -53,7 +53,7 @@ struct FN \ { \ template <typename... Args> \ - CID_INLINE_MEMBER decltype(::kfr::intrinsics::FN(std::declval<Args>()...)) operator()( \ + CMT_INLINE_MEMBER decltype(::kfr::intrinsics::FN(std::declval<Args>()...)) operator()( \ Args&&... args) const \ { \ return ::kfr::intrinsics::FN(std::forward<Args>(args)...); \ @@ -66,7 +66,7 @@ { \ using ratio = ioratio<in, out>; \ template <typename... Args> \ - CID_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ + CMT_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ { \ return fn(std::forward<Args>(args)...); \ } \ @@ -77,7 +77,7 @@ { \ constexpr fn_##fn() noexcept = default; \ template <typename... Args> \ - KFR_INLINE decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ + CMT_INLINE decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ { \ return fn(std::forward<Args>(args)...); \ } \ @@ -102,7 +102,7 @@ using imax = int64_t; using fmax = double; using f80 = long double; -#ifdef KFR_BASETYPE_F32 +#if defined(KFR_BASETYPE_F32) || defined(KFR_NO_NATIVE_F64) using fbase = f32; #else using fbase = f64; @@ -200,7 +200,7 @@ inline datatype operator&(datatype x, datatype y) struct generic { template <typename T> - KFR_INLINE constexpr operator T() const noexcept + CMT_INLINE constexpr operator T() const noexcept { return T(); } @@ -209,7 +209,7 @@ struct generic struct infinite { template <typename T> - KFR_INLINE constexpr operator T() const noexcept + CMT_INLINE constexpr operator T() const noexcept { return T(); } @@ -234,9 +234,9 @@ enum class archendianness : int _archendianness_max = static_cast<int>(bigendian) }; -typedef void*(KFR_CDECL* func_allocate)(size_t); +typedef void*(CMT_CDECL* func_allocate)(size_t); -typedef void(KFR_CDECL* func_deallocate)(void*); +typedef void(CMT_CDECL* func_deallocate)(void*); struct mem_allocator { @@ -328,7 +328,7 @@ constexpr inline ptrdiff_t distance(const void* x, const void* y) enum class cpu_t : int { common = 0, -#ifdef CID_ARCH_X86 +#ifdef CMT_ARCH_X86 sse2 = 1, sse3 = 2, ssse3 = 3, @@ -340,15 +340,18 @@ enum class cpu_t : int lowest = static_cast<int>(sse2), highest = static_cast<int>(avx2), #endif -#ifdef CID_ARCH_ARM +#ifdef CMT_ARCH_ARM neon = 1, + neon64 = 2, lowest = static_cast<int>(neon), - highest = static_cast<int>(neon), + highest = static_cast<int>(neon64), #endif - native = static_cast<int>(KFR_ARCH_NAME), + native = static_cast<int>(CMT_ARCH_NAME), runtime = -1, }; +#define KFR_ARCH_DEP cpu_t cpu = cpu_t::native + template <cpu_t cpu> using ccpu_t = cval_t<cpu_t, cpu>; @@ -360,7 +363,7 @@ namespace internal constexpr cpu_t older(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) - 1); } constexpr cpu_t newer(cpu_t x) { return static_cast<cpu_t>(static_cast<int>(x) + 1); } -#ifdef CID_ARCH_X86 +#ifdef CMT_ARCH_X86 constexpr auto cpu_list = cvals<cpu_t, cpu_t::avx2, cpu_t::avx1, cpu_t::sse41, cpu_t::ssse3, cpu_t::sse3, cpu_t::sse2>; #else @@ -516,23 +519,23 @@ using enable_if_not_f = enable_if<typeclass<T> != datatype::f, R>; namespace internal { -KFR_INLINE f32 builtin_sqrt(f32 x) { return __builtin_sqrtf(x); } -KFR_INLINE f64 builtin_sqrt(f64 x) { return __builtin_sqrt(x); } -KFR_INLINE f80 builtin_sqrt(f80 x) { return __builtin_sqrtl(x); } -KFR_INLINE void builtin_memcpy(void* dest, const void* src, size_t size) +CMT_INLINE f32 builtin_sqrt(f32 x) { return __builtin_sqrtf(x); } +CMT_INLINE f64 builtin_sqrt(f64 x) { return __builtin_sqrt(x); } +CMT_INLINE f80 builtin_sqrt(f80 x) { return __builtin_sqrtl(x); } +CMT_INLINE void builtin_memcpy(void* dest, const void* src, size_t size) { __builtin_memcpy(dest, src, size); } -KFR_INLINE void builtin_memset(void* dest, int val, size_t size) { __builtin_memset(dest, val, size); } +CMT_INLINE void builtin_memset(void* dest, int val, size_t size) { __builtin_memset(dest, val, size); } template <typename T1> -KFR_INLINE void zeroize(T1& value) +CMT_INLINE void zeroize(T1& value) { builtin_memset(static_cast<void*>(std::addressof(value)), 0, sizeof(T1)); } } #pragma clang diagnostic push -#if CID_HAS_WARNING("-Wundefined-reinterpret-cast") +#if CMT_HAS_WARNING("-Wundefined-reinterpret-cast") #pragma clang diagnostic ignored "-Wundefined-reinterpret-cast" #endif @@ -578,6 +581,12 @@ constexpr inline static const T* derived_cast(const U* ptr) return static_cast<const T*>(ptr); } +template <typename T, typename U> +constexpr inline static T implicit_cast(U&& value) +{ + return std::forward<T>(value); +} + #pragma clang diagnostic pop __attribute__((unused)) static const char* cpu_name(cpu_t set) @@ -590,7 +599,7 @@ __attribute__((unused)) static const char* cpu_name(cpu_t set) #define KFR_FN_S(fn) \ template <typename Arg, typename... Args> \ - KFR_INLINE enable_if_not_vec<Arg> fn(Arg arg, Args... args) \ + CMT_INLINE enable_if_not_vec<Arg> fn(Arg arg, Args... args) \ { \ return fn(make_vector(arg), make_vector(args)...)[0]; \ } @@ -649,7 +658,7 @@ constexpr size_t widthof() template <typename T> constexpr inline const T& bitness_const(const T& x32, const T& x64) { -#ifdef KFR_ARCH_X64 +#ifdef CMT_ARCH_X64 (void)x32; return x64; #else @@ -660,7 +669,7 @@ constexpr inline const T& bitness_const(const T& x32, const T& x64) constexpr inline const char* bitness_const(const char* x32, const char* x64) { -#ifdef KFR_ARCH_X64 +#ifdef CMT_ARCH_X64 (void)x32; return x64; #else @@ -680,18 +689,18 @@ constexpr size_t common_int_vector_size = 16; template <cpu_t c> constexpr size_t native_float_vector_size = -#ifdef CID_ARCH_X86 +#ifdef CMT_ARCH_X86 c >= cpu_t::avx1 ? 32 : c >= cpu_t::sse2 ? 16 : common_float_vector_size; #endif -#ifdef CID_ARCH_ARM +#ifdef CMT_ARCH_ARM c == cpu_t::neon ? 16 : common_float_vector_size; #endif template <cpu_t c> constexpr size_t native_int_vector_size = -#ifdef CID_ARCH_X86 +#ifdef CMT_ARCH_X86 c >= cpu_t::avx2 ? 32 : c >= cpu_t::sse2 ? 16 : common_int_vector_size; #endif -#ifdef CID_ARCH_ARM +#ifdef CMT_ARCH_ARM c == cpu_t::neon ? 16 : common_int_vector_size; #endif @@ -701,8 +710,8 @@ struct input_expression using size_type = infinite; constexpr size_type size() const noexcept { return {}; } - KFR_INLINE void begin_block(size_t) const {} - KFR_INLINE void end_block(size_t) const {} + CMT_INLINE void begin_block(size_t) const {} + CMT_INLINE void end_block(size_t) const {} }; struct output_expression @@ -711,8 +720,8 @@ struct output_expression using size_type = infinite; constexpr size_type size() const noexcept { return {}; } - KFR_INLINE void output_begin_block(size_t) const {} - KFR_INLINE void output_end_block(size_t) const {} + CMT_INLINE void output_begin_block(size_t) const {} + CMT_INLINE void output_end_block(size_t) const {} }; template <typename E> @@ -731,8 +740,9 @@ template <typename... Ts> using is_numeric_args = and_t<is_numeric<Ts>...>; template <typename T, cpu_t c = cpu_t::native> -constexpr size_t vector_width = typeclass<T> == datatype::f ? native_float_vector_size<c> / sizeof(T) - : native_int_vector_size<c> / sizeof(T); +constexpr size_t vector_width = const_max(size_t(1), typeclass<T> == datatype::f + ? native_float_vector_size<c> / sizeof(T) + : native_int_vector_size<c> / sizeof(T)); template <cpu_t c> constexpr size_t vector_width<void, c> = 0; @@ -741,11 +751,11 @@ namespace internal { template <cpu_t c> -constexpr size_t native_vector_alignment = std::max(native_float_vector_size<c>, native_int_vector_size<c>); +constexpr size_t native_vector_alignment = const_max(native_float_vector_size<c>, native_int_vector_size<c>); template <cpu_t c> constexpr bool fast_unaligned = -#ifdef CID_ARCH_X86 +#ifdef CMT_ARCH_X86 c >= cpu_t::avx1; #else false; @@ -772,7 +782,7 @@ template <typename T, cpu_t c> constexpr size_t vector_capacity = native_register_count* vector_width<T, c>; template <typename T, cpu_t c> -constexpr size_t maximum_vector_size = std::min(static_cast<size_t>(32), vector_capacity<T, c> / 4); +constexpr size_t maximum_vector_size = const_min(static_cast<size_t>(32), vector_capacity<T, c> / 4); } } namespace cometa @@ -781,10 +791,12 @@ namespace cometa template <typename T, size_t N> struct compound_type_traits<kfr::vec_t<T, N>> { - constexpr static size_t width = N; - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static bool is_scalar = false; + constexpr static size_t width = N; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = kfr::vec_t<U, N>; diff --git a/include/kfr/base/univector.hpp b/include/kfr/base/univector.hpp @@ -40,20 +40,20 @@ template <typename T, typename Class> struct univector_base : input_expression, output_expression { template <typename U, size_t N> - KFR_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& value) + CMT_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& value) { T* data = derived_cast<Class>(this)->data(); - write(ptr_cast<T>(data) + index, cast<T>(value)); + write(ptr_cast<T>(data) + index, vec<T, N>(value)); } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { const T* data = derived_cast<Class>(this)->data(); - return cast<U>(read<N>(ptr_cast<T>(data) + index)); + return static_cast<vec<U, N>>(read<N>(ptr_cast<T>(data) + index)); } template <typename Input, KFR_ENABLE_IF(is_input_expression<Input>::value)> - KFR_INLINE Class& operator=(Input&& input) + CMT_INLINE Class& operator=(Input&& input) { assign_expr(std::forward<Input>(input)); return *derived_cast<Class>(this); @@ -126,16 +126,16 @@ struct univector_base : input_expression, output_expression protected: template <typename Input> - KFR_INLINE void assign_expr(Input&& input) + CMT_INLINE void assign_expr(Input&& input) { process<T>(*this, std::forward<Input>(input), get_size()); } private: constexpr infinite size() const noexcept = delete; - KFR_INLINE size_t get_size() const { return derived_cast<Class>(this)->size(); } - KFR_INLINE const T* get_data() const { return derived_cast<Class>(this)->data(); } - KFR_INLINE T* get_data() { return derived_cast<Class>(this)->data(); } + CMT_INLINE size_t get_size() const { return derived_cast<Class>(this)->size(); } + CMT_INLINE const T* get_data() const { return derived_cast<Class>(this)->data(); } + CMT_INLINE T* get_data() { return derived_cast<Class>(this)->data(); } }; template <typename T, size_t Size> @@ -197,7 +197,7 @@ struct univector<T, tag_array_ref> : array_ref<T>, univector_base<T, univector<T constexpr static bool is_array_ref = true; constexpr static bool is_vector = false; constexpr static bool is_aligned = false; - using value_type = T; + using value_type = remove_const<T>; using univector_base<T, univector>::operator=; }; @@ -249,39 +249,39 @@ template <typename T, size_t Size1 = tag_dynamic_vector, size_t Size2 = tag_dyna using univector3d = univector<univector<univector<T, Size3>, Size2>, Size1>; template <cpu_t c = cpu_t::native, size_t Tag, typename T, typename Fn> -KFR_INLINE void process(univector<T, Tag>& vector, Fn&& fn) +CMT_INLINE void process(univector<T, Tag>& vector, Fn&& fn) { static_assert(is_input_expression<Fn>::value, "Fn must be an expression"); return process<T, c>(vector, std::forward<Fn>(fn), vector.size()); } template <cpu_t c = cpu_t::native, typename T, size_t Nsize, typename Fn> -KFR_INLINE void process(T (&dest)[Nsize], Fn&& fn) +CMT_INLINE void process(T (&dest)[Nsize], Fn&& fn) { static_assert(is_input_expression<Fn>::value, "Fn must be an expression"); return process<T, c>(univector<T, tag_array_ref>(dest), std::forward<Fn>(fn), Nsize); } template <cpu_t c = cpu_t::native, typename T, typename Fn> -KFR_INLINE void process(const array_ref<T>& vector, Fn&& fn) +CMT_INLINE void process(const array_ref<T>& vector, Fn&& fn) { static_assert(is_input_expression<Fn>::value, "Fn must be an expression"); return process<T, c>(univector<T, tag_array_ref>(vector), std::forward<Fn>(fn), vector.size()); } template <typename T> -KFR_INLINE univector_ref<T> make_univector(T* data, size_t size) +CMT_INLINE univector_ref<T> make_univector(T* data, size_t size) { return univector_ref<T>(data, size); } template <typename T> -KFR_INLINE univector_ref<const T> make_univector(const T* data, size_t size) +CMT_INLINE univector_ref<const T> make_univector(const T* data, size_t size) { return univector_ref<const T>(data, size); } template <typename Expr, typename T = value_type_of<Expr>> -KFR_INLINE univector<T> render(Expr&& expr) +CMT_INLINE univector<T> render(Expr&& expr) { univector<T> result; result.resize(expr.size()); @@ -290,7 +290,7 @@ KFR_INLINE univector<T> render(Expr&& expr) } template <typename Expr, typename T = value_type_of<Expr>> -KFR_INLINE univector<T> render(Expr&& expr, size_t size) +CMT_INLINE univector<T> render(Expr&& expr, size_t size) { univector<T> result; result.resize(size); diff --git a/include/kfr/base/vec.hpp b/include/kfr/base/vec.hpp @@ -86,20 +86,20 @@ using vec_algn = internal::struct_with_alignment<simd<T, N>, A>; template <typename T, size_t N, bool A> struct vec_ptr { - constexpr KFR_INLINE vec_ptr(T* data) noexcept : data(data) {} - constexpr KFR_INLINE vec_ptr(const T* data) noexcept : data(const_cast<T*>(data)) {} - KFR_INLINE const vec_algn<T, N, A>& operator[](size_t i) const + constexpr CMT_INLINE vec_ptr(T* data) noexcept : data(data) {} + constexpr CMT_INLINE vec_ptr(const T* data) noexcept : data(const_cast<T*>(data)) {} + CMT_INLINE const vec_algn<T, N, A>& operator[](size_t i) const { return *static_cast<vec_algn<T, N, A>*>(data + i); } - KFR_INLINE vec_algn<T, N, A>& operator[](size_t i) { return *static_cast<vec_algn<T, N, A>*>(data + i); } + CMT_INLINE vec_algn<T, N, A>& operator[](size_t i) { return *static_cast<vec_algn<T, N, A>*>(data + i); } T* data; }; template <typename To, typename From, size_t N, KFR_ENABLE_IF(std::is_same<subtype<From>, subtype<To>>::value), size_t Nout = N* compound_type_traits<From>::width / compound_type_traits<To>::width> -constexpr KFR_INLINE vec<To, Nout> subcast(const vec<From, N>& value) noexcept +constexpr CMT_INLINE vec<To, Nout> compcast(const vec<From, N>& value) noexcept { return *value; } @@ -127,7 +127,7 @@ get_vec_index(int = 0) constexpr size_t index_undefined = static_cast<size_t>(-1); template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(!is_compound<T>::value)> -KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...>, const vec<T, N>& x, +CMT_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...>, const vec<T, N>& x, const vec<T, N>& y) { vec<T, sizeof...(Indices)> result = __builtin_shufflevector( @@ -151,22 +151,22 @@ constexpr auto inflate(csize_t<groupsize>, csizes_t<indices...>) } template <typename T, size_t N, size_t... Indices, KFR_ENABLE_IF(is_compound<T>::value)> -KFR_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...> indices, const vec<T, N>& x, +CMT_INLINE vec<T, sizeof...(Indices)> shufflevector(csizes_t<Indices...> indices, const vec<T, N>& x, const vec<T, N>& y) { - return subcast<T>( - shufflevector(inflate(csize<widthof<T>()>, indices), subcast<subtype<T>>(x), subcast<subtype<T>>(y))); + return compcast<T>(shufflevector(inflate(csize<widthof<T>()>, indices), compcast<subtype<T>>(x), + compcast<subtype<T>>(y))); } template <size_t... Indices, size_t Nout = sizeof...(Indices), typename T, size_t N> -KFR_INLINE vec<T, Nout> shufflevector(csizes_t<Indices...>, const vec<T, N>& x) +CMT_INLINE vec<T, Nout> shufflevector(csizes_t<Indices...>, const vec<T, N>& x) { return internal::shufflevector<T, N>(csizes<Indices...>, x, x); } template <typename Fn, size_t groupsize, typename T, size_t N, size_t... Indices, size_t Nout = sizeof...(Indices)> -KFR_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x, const vec<T, N>& y, cvals_t<size_t, Indices...>) +CMT_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x, const vec<T, N>& y, cvals_t<size_t, Indices...>) { static_assert(N % groupsize == 0, "N % groupsize == 0"); return internal::shufflevector<T, N>( @@ -175,13 +175,13 @@ KFR_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x, const vec<T, N>& y, cv } template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x, const vec<T, N>& y) +CMT_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x, const vec<T, N>& y) { return internal::shufflevector<Fn, groupsize>(x, y, csizeseq<Nout>); } template <size_t Nout, typename Fn, size_t groupsize = 1, typename T, size_t N> -KFR_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x) +CMT_INLINE vec<T, Nout> shufflevector(const vec<T, N>& x) { return internal::shufflevector<Fn, groupsize>(x, x, csizeseq<Nout>); } @@ -225,110 +225,165 @@ constexpr swiz<14> s14{}; constexpr swiz<15> s15{}; } -template <typename To, typename From, KFR_ENABLE_IF(!is_compound<From>::value)> -constexpr KFR_INLINE To cast(From value) noexcept +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" + +template <size_t N, typename T> +constexpr CMT_INLINE vec<T, N> broadcast(T x) { - return static_cast<To>(value); + return (simd<T, N>)(x); } -template <typename To, typename From, KFR_ENABLE_IF(!is_compound<From>::value)> -constexpr KFR_INLINE To bitcast(From value) noexcept + +#pragma clang diagnostic pop + +namespace internal { - union { - From from; - To to; - } u{ value }; - return u.to; + +template <typename To, typename From, size_t N, typename Tsub = deep_subtype<To>, + size_t Nout = N* compound_type_traits<To>::deep_width> +constexpr CMT_INLINE vec<To, N> builtin_convertvector(const vec<From, N>& value) noexcept +{ + return __builtin_convertvector(*value, simd<Tsub, Nout>); } -template <typename From, typename To = utype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> -constexpr KFR_INLINE To ubitcast(From value) noexcept +// scalar to scalar +template <typename To, typename From> +struct conversion { - return bitcast<To>(value); + static_assert(std::is_convertible<From, To>::value, ""); + static To cast(const From& value) { return value; } +}; + +// vector to vector +template <typename To, typename From, size_t N> +struct conversion<vec<To, N>, vec<From, N>> +{ + static_assert(!is_compound<To>::value, ""); + static_assert(!is_compound<From>::value, ""); + static vec<To, N> cast(const vec<From, N>& value) { return builtin_convertvector<To>(value); } +}; + +// vector<vector> to vector<vector> +template <typename To, typename From, size_t N1, size_t N2> +struct conversion<vec<vec<To, N1>, N2>, vec<vec<From, N1>, N2>> +{ + static_assert(!is_compound<To>::value, ""); + static_assert(!is_compound<From>::value, ""); + static vec<vec<To, N1>, N2> cast(const vec<vec<From, N1>, N2>& value) + { + return builtin_convertvector<vec<To, N1>>(value); + } +}; + +// scalar to vector +template <typename To, typename From, size_t N> +struct conversion<vec<To, N>, From> +{ + static_assert(std::is_convertible<From, To>::value, ""); + static vec<To, N> cast(const From& value) { return broadcast<N>(static_cast<To>(value)); } +}; + +// mask to mask +template <typename To, typename From, size_t N> +struct conversion<mask<To, N>, mask<From, N>> +{ + static_assert(sizeof(To) == sizeof(From), ""); + static mask<To, N> cast(const mask<From, N>& value) { return reinterpret_cast<simd<To, N>>(*value); } +}; } -template <typename From, typename To = itype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> -constexpr KFR_INLINE To ibitcast(From value) noexcept +template <typename T> +constexpr size_t size_of() noexcept { - return bitcast<To>(value); + return sizeof(deep_subtype<T>) * compound_type_traits<T>::deep_width; } -template <typename From, typename To = ftype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> -constexpr KFR_INLINE To fbitcast(From value) noexcept +template <typename From, size_t N, typename Tsub = deep_subtype<From>, + size_t Nout = N* size_of<From>() / size_of<Tsub>()> +constexpr CMT_INLINE vec<Tsub, Nout> flatten(const vec<From, N>& value) noexcept { - return bitcast<To>(value); + return *value; } -template <typename To, typename From, size_t N, KFR_ENABLE_IF(!is_compound<To>::value)> -constexpr KFR_INLINE vec<To, N> cast(const vec<From, N>& value) noexcept +template <typename To, typename From, typename Tout = deep_rebind<From, To>> +constexpr CMT_INLINE Tout cast(const From& value) noexcept { - return __builtin_convertvector(*value, simd<To, N>); + return static_cast<Tout>(value); } -template <typename To, typename From, simdindex N> -constexpr KFR_INLINE simd<To, N> cast(const simd<From, N>& value) noexcept + +template <typename To, typename From> +constexpr CMT_INLINE To bitcast(const From& value) noexcept { - return __builtin_convertvector(value, simd<To, N>); + static_assert(sizeof(From) == sizeof(To), "bitcast: Incompatible types"); + union { + From from; + To to; + } u{ value }; + return u.to; } -template <typename To, typename From, size_t N, size_t Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept + +template <typename To, typename From, size_t N, size_t Nout = N* size_of<From>() / size_of<To>()> +constexpr CMT_INLINE vec<To, Nout> bitcast(const vec<From, N>& value) noexcept { - return reinterpret_cast<simd<To, Nout>>(*value); + return reinterpret_cast<typename vec<To, Nout>::simd_t>(*value); } -template <typename To, typename From, simdindex N, simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> bitcast(const simd<From, N>& value) noexcept + +template <typename To, typename From, size_t N, size_t Nout = N* size_of<From>() / size_of<To>()> +constexpr CMT_INLINE mask<To, Nout> bitcast(const mask<From, N>& value) noexcept { - return reinterpret_cast<simd<To, Nout>>(value); + return reinterpret_cast<typename mask<To, Nout>::simd_t>(*value); } -template <typename From, size_t N, typename To = utype<From>, size_t Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE vec<To, Nout> ubitcast(const vec<From, N>& value) noexcept +template <typename From, typename To = utype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> +constexpr CMT_INLINE To ubitcast(const From& value) noexcept { - return reinterpret_cast<simd<To, Nout>>(*value); + return bitcast<To>(value); } -template <typename From, size_t N, typename To = itype<From>, size_t Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE vec<To, Nout> ibitcast(const vec<From, N>& value) noexcept +template <typename From, typename To = itype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> +constexpr CMT_INLINE To ibitcast(const From& value) noexcept { - return reinterpret_cast<simd<To, Nout>>(*value); + return bitcast<To>(value); } -template <typename From, size_t N, typename To = ftype<From>, size_t Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE vec<To, Nout> fbitcast(const vec<From, N>& value) noexcept +template <typename From, typename To = ftype<From>, KFR_ENABLE_IF(!is_compound<From>::value)> +constexpr CMT_INLINE To fbitcast(const From& value) noexcept { - return reinterpret_cast<simd<To, Nout>>(*value); + return bitcast<To>(value); } -template <typename From, simdindex N, typename To = utype<From>, - simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> ubitcast(const simd<From, N>& value) noexcept +template <typename From, size_t N, typename To = utype<From>, + size_t Nout = size_of<From>() * N / size_of<To>()> +constexpr CMT_INLINE vec<To, Nout> ubitcast(const vec<From, N>& value) noexcept { - return reinterpret_cast<simd<To, Nout>>(value); + return reinterpret_cast<simd<To, Nout>>(*value); } -template <typename From, simdindex N, typename To = itype<From>, - simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> ibitcast(const simd<From, N>& value) noexcept +template <typename From, size_t N, typename To = itype<From>, + size_t Nout = size_of<From>() * N / size_of<To>()> +constexpr CMT_INLINE vec<To, Nout> ibitcast(const vec<From, N>& value) noexcept { - return reinterpret_cast<simd<To, Nout>>(value); + return reinterpret_cast<simd<To, Nout>>(*value); } -template <typename From, simdindex N, typename To = ftype<From>, - simdindex Nout = sizeof(From) * N / sizeof(To)> -constexpr KFR_INLINE simd<To, Nout> fbitcast(const simd<From, N>& value) noexcept +template <typename From, size_t N, typename To = ftype<From>, + size_t Nout = size_of<From>() * N / size_of<To>()> +constexpr CMT_INLINE vec<To, Nout> fbitcast(const vec<From, N>& value) noexcept { - return reinterpret_cast<simd<To, Nout>>(value); + return reinterpret_cast<simd<To, Nout>>(*value); } -constexpr KFR_INLINE size_t vector_alignment(size_t size) { return next_poweroftwo(size); } +constexpr CMT_INLINE size_t vector_alignment(size_t size) { return next_poweroftwo(size); } -template <typename T, size_t N, size_t... Sizes, size_t Nout = N + csum(csizes<Sizes...>)> -KFR_INLINE vec<T, Nout> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest); +template <typename T, size_t N, size_t... Sizes> +CMT_INLINE vec<T, N + csum(csizes<Sizes...>)> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest); namespace internal { template <size_t start = 0, size_t stride = 1> struct shuffle_index { - constexpr KFR_INLINE size_t operator()(size_t index) const { return start + index * stride; } + constexpr CMT_INLINE size_t operator()(size_t index) const { return start + index * stride; } }; template <size_t count, size_t start = 0, size_t stride = 1> @@ -339,30 +394,19 @@ struct shuffle_index_wrap } template <size_t count, typename T, size_t N, size_t Nout = N* count> -KFR_INLINE vec<T, Nout> repeat(const vec<T, N>& x) +CMT_INLINE vec<T, Nout> repeat(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index_wrap<N, 0, 1>>(x); } KFR_FN(repeat) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wold-style-cast" - -template <size_t N, typename T> -constexpr KFR_INLINE vec<T, N> broadcast(T x) -{ - return (simd<T, N>)(x); -} - -#pragma clang diagnostic pop - template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout != N)> -KFR_INLINE vec<T, Nout> resize(const vec<T, N>& x) +CMT_INLINE vec<T, Nout> resize(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index_wrap<N, 0, 1>>(x); } template <size_t Nout, typename T, size_t N, KFR_ENABLE_IF(Nout == N)> -constexpr KFR_INLINE vec<T, Nout> resize(const vec<T, N>& x) +constexpr CMT_INLINE vec<T, Nout> resize(const vec<T, N>& x) { return x; } @@ -372,13 +416,13 @@ namespace internal_read_write { template <size_t N, bool A = false, typename T, KFR_ENABLE_IF(is_poweroftwo(N))> -KFR_INLINE vec<T, N> read(const T* src) +CMT_INLINE vec<T, N> read(const T* src) { return ptr_cast<vec_algn<subtype<T>, vec<T, N>::scalar_size(), A>>(src)->value; } template <size_t N, bool A = false, typename T, KFR_ENABLE_IF(!is_poweroftwo(N))> -KFR_INLINE vec<T, N> read(const T* src) +CMT_INLINE vec<T, N> read(const T* src) { constexpr size_t first = prev_poweroftwo(N); constexpr size_t rest = N - first; @@ -387,13 +431,13 @@ KFR_INLINE vec<T, N> read(const T* src) } template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(is_poweroftwo(N))> -KFR_INLINE void write(T* dest, const vec<T, N>& value) +CMT_INLINE void write(T* dest, const vec<T, N>& value) { ptr_cast<vec_algn<subtype<T>, vec<T, N>::scalar_size(), A>>(dest)->value = *value; } template <bool A = false, size_t N, typename T, KFR_ENABLE_IF(!is_poweroftwo(N))> -KFR_INLINE void write(T* dest, const vec<T, N>& value) +CMT_INLINE void write(T* dest, const vec<T, N>& value) { constexpr size_t first = prev_poweroftwo(N); constexpr size_t rest = N - first; @@ -422,7 +466,8 @@ private: template <typename T> struct vec_op { - using scalar_type = subtype<T>; + using scalar_type = subtype<T>; + using uscalar_type = utype<scalar_type>; template <simdindex N> constexpr static simd<scalar_type, N> add(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept @@ -467,64 +512,67 @@ struct vec_op template <simdindex N> constexpr static simd<scalar_type, N> band(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(ubitcast(x) & ubitcast(y)); + return reinterpret_cast<simd<scalar_type, N>>(reinterpret_cast<simd<uscalar_type, N>>(x) & + reinterpret_cast<simd<uscalar_type, N>>(y)); } template <simdindex N> constexpr static simd<scalar_type, N> bor(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(ubitcast(x) | ubitcast(y)); + return reinterpret_cast<simd<scalar_type, N>>(reinterpret_cast<simd<uscalar_type, N>>(x) | + reinterpret_cast<simd<uscalar_type, N>>(y)); } template <simdindex N> constexpr static simd<scalar_type, N> bxor(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(ubitcast(x) ^ ubitcast(y)); + return reinterpret_cast<simd<scalar_type, N>>(reinterpret_cast<simd<uscalar_type, N>>(x) ^ + reinterpret_cast<simd<uscalar_type, N>>(y)); } template <simdindex N> constexpr static simd<scalar_type, N> bnot(simd<scalar_type, N> x) noexcept { - return bitcast<scalar_type>(~ubitcast(x)); + return reinterpret_cast<simd<scalar_type, N>>(~reinterpret_cast<simd<uscalar_type, N>>(x)); } template <simdindex N> constexpr static simd<scalar_type, N> eq(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x == y); + return reinterpret_cast<simd<scalar_type, N>>(x == y); } template <simdindex N> constexpr static simd<scalar_type, N> ne(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x != y); + return reinterpret_cast<simd<scalar_type, N>>(x != y); } template <simdindex N> constexpr static simd<scalar_type, N> lt(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x < y); + return reinterpret_cast<simd<scalar_type, N>>(x < y); } template <simdindex N> constexpr static simd<scalar_type, N> gt(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x > y); + return reinterpret_cast<simd<scalar_type, N>>(x > y); } template <simdindex N> constexpr static simd<scalar_type, N> le(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x <= y); + return reinterpret_cast<simd<scalar_type, N>>(x <= y); } template <simdindex N> constexpr static simd<scalar_type, N> ge(simd<scalar_type, N> x, simd<scalar_type, N> y) noexcept { - return bitcast<scalar_type>(x >= y); + return reinterpret_cast<simd<scalar_type, N>>(x >= y); } }; namespace internal { template <typename T, typename... Args, size_t... indices, size_t N = 1 + sizeof...(Args)> -constexpr KFR_INLINE vec<T, N> make_vector_impl(csizes_t<indices...>, const T& x, const Args&... rest) +constexpr CMT_INLINE vec<T, N> make_vector_impl(csizes_t<indices...>, const T& x, const Args&... rest) { constexpr size_t width = compound_type_traits<T>::width; - const std::tuple<const T&, const Args&...> list(x, rest...); - typename vec<T, N>::simd_t result{ compound_type_traits<T>::at(std::get<indices / width>(list), + const T list[] = { x, rest... }; + typename vec<T, N>::simd_t result{ compound_type_traits<T>::at(list[indices / width], indices % width)... }; return result; } @@ -536,26 +584,27 @@ constexpr KFR_INLINE vec<T, N> make_vector_impl(csizes_t<indices...>, const T& x /// @encode template <typename Type = void, typename Arg, typename... Args, size_t N = (sizeof...(Args) + 1), typename SubType = conditional<is_void<Type>::value, common_type<Arg, Args...>, Type>> -constexpr KFR_INLINE vec<SubType, N> make_vector(const Arg& x, const Args&... rest) +constexpr CMT_INLINE vec<SubType, N> make_vector(const Arg& x, const Args&... rest) { return internal::make_vector_impl<SubType>(csizeseq<N * widthof<SubType>()>, static_cast<SubType>(x), static_cast<SubType>(rest)...); } template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N> make_vector(const vec<T, N>& x) +constexpr CMT_INLINE vec<T, N> make_vector(const vec<T, N>& x) { return x; } template <typename T, T... Values, size_t N = sizeof...(Values)> -constexpr KFR_INLINE vec<T, N> make_vector(cvals_t<T, Values...>) +constexpr CMT_INLINE vec<T, N> make_vector(cvals_t<T, Values...>) { return make_vector<T>(Values...); } KFR_FN(make_vector) template <typename Type = void, typename Arg, typename... Args, size_t N = (sizeof...(Args) + 1), - typename SubType = conditional<is_void<Type>::value, common_type<Arg, Args...>, Type>> -constexpr KFR_INLINE vec<SubType, N> pack(const Arg& x, const Args&... rest) + typename SubType = conditional<is_void<Type>::value, common_type<Arg, Args...>, Type>, + KFR_ENABLE_IF(is_numeric<SubType>::value)> +constexpr CMT_INLINE vec<SubType, N> pack(const Arg& x, const Args&... rest) { return internal::make_vector_impl<SubType>(csizeseq<N * widthof<SubType>()>, static_cast<SubType>(x), static_cast<SubType>(rest)...); @@ -567,6 +616,10 @@ struct vec : vec_t<T, N> { static_assert(N > 0 && N <= 256, "Invalid vector size"); + static_assert(!is_vec<T>::value || is_poweroftwo(size_of<T>()), + "Inner vector size must be a power of two"); + + using UT = utype<T>; using value_type = T; using scalar_type = subtype<T>; constexpr static size_t scalar_size() noexcept { return N * compound_type_traits<T>::width; } @@ -576,94 +629,98 @@ struct vec : vec_t<T, N> constexpr static bool is_pod = true; - constexpr KFR_INLINE vec() noexcept {} - constexpr KFR_INLINE vec(simd_t value) noexcept : v(value) {} - constexpr KFR_INLINE vec(const array_ref<T>& value) noexcept + constexpr CMT_INLINE vec() noexcept {} + constexpr CMT_INLINE vec(simd_t value) noexcept : v(value) {} + constexpr CMT_INLINE vec(const array_ref<T>& value) noexcept + : v(*internal_read_write::read<N, false>(value.data())) + { + } + constexpr CMT_INLINE vec(const array_ref<const T>& value) noexcept : v(*internal_read_write::read<N, false>(value.data())) { } template <typename U, KFR_ENABLE_IF(std::is_convertible<U, T>::value&& compound_type_traits<T>::width > 1)> - constexpr KFR_INLINE vec(const U& value) noexcept + constexpr CMT_INLINE vec(const U& value) noexcept : v(*resize<scalar_size()>(bitcast<scalar_type>(make_vector(static_cast<T>(value))))) { } template <typename U, KFR_ENABLE_IF(std::is_convertible<U, T>::value&& compound_type_traits<T>::width == 1)> - constexpr KFR_INLINE vec(const U& value) noexcept : v(static_cast<T>(value)) + constexpr CMT_INLINE vec(const U& value) noexcept : v(static_cast<T>(value)) { } template <typename... Ts> - constexpr KFR_INLINE vec(const T& x, const T& y, const Ts&... rest) noexcept + constexpr CMT_INLINE vec(const T& x, const T& y, const Ts&... rest) noexcept : v(*make_vector<T>(x, y, rest...)) { static_assert(N <= 2 + sizeof...(Ts), "Too few initializers for vec"); } template <size_t N1, size_t N2, size_t... Ns> - constexpr KFR_INLINE vec(const vec<T, N1>& v1, const vec<T, N2>& v2, + constexpr CMT_INLINE vec(const vec<T, N1>& v1, const vec<T, N2>& v2, const vec<T, Ns>&... vectors) noexcept : v(*concat(v1, v2, vectors...)) { static_assert(csum(csizes<N1, N2, Ns...>) == N, "Can't concat vectors: invalid csizes"); } - constexpr KFR_INLINE vec(const vec&) noexcept = default; - constexpr KFR_INLINE vec(vec&&) noexcept = default; - constexpr KFR_INLINE vec& operator=(const vec&) noexcept = default; - constexpr KFR_INLINE vec& operator=(vec&&) noexcept = default; - - friend constexpr KFR_INLINE vec operator+(const vec& x, const vec& y) { return vec_op<T>::add(x.v, y.v); } - friend constexpr KFR_INLINE vec operator-(const vec& x, const vec& y) { return vec_op<T>::sub(x.v, y.v); } - friend constexpr KFR_INLINE vec operator*(const vec& x, const vec& y) { return vec_op<T>::mul(x.v, y.v); } - friend constexpr KFR_INLINE vec operator/(const vec& x, const vec& y) { return vec_op<T>::div(x.v, y.v); } - friend constexpr KFR_INLINE vec operator%(const vec& x, const vec& y) { return vec_op<T>::rem(x.v, y.v); } - friend constexpr KFR_INLINE vec operator-(const vec& x) { return vec_op<T>::neg(x.v); } - - friend constexpr KFR_INLINE vec operator&(const vec& x, const vec& y) + constexpr CMT_INLINE vec(const vec&) noexcept = default; + constexpr CMT_INLINE vec(vec&&) noexcept = default; + constexpr CMT_INLINE vec& operator=(const vec&) noexcept = default; + constexpr CMT_INLINE vec& operator=(vec&&) noexcept = default; + + friend constexpr CMT_INLINE vec operator+(const vec& x, const vec& y) { return vec_op<T>::add(x.v, y.v); } + friend constexpr CMT_INLINE vec operator-(const vec& x, const vec& y) { return vec_op<T>::sub(x.v, y.v); } + friend constexpr CMT_INLINE vec operator*(const vec& x, const vec& y) { return vec_op<T>::mul(x.v, y.v); } + friend constexpr CMT_INLINE vec operator/(const vec& x, const vec& y) { return vec_op<T>::div(x.v, y.v); } + friend constexpr CMT_INLINE vec operator%(const vec& x, const vec& y) { return vec_op<T>::rem(x.v, y.v); } + friend constexpr CMT_INLINE vec operator-(const vec& x) { return vec_op<T>::neg(x.v); } + + friend constexpr CMT_INLINE vec operator&(const vec& x, const vec& y) { return vec_op<T>::band(x.v, y.v); } - friend constexpr KFR_INLINE vec operator|(const vec& x, const vec& y) { return vec_op<T>::bor(x.v, y.v); } - friend constexpr KFR_INLINE vec operator^(const vec& x, const vec& y) + friend constexpr CMT_INLINE vec operator|(const vec& x, const vec& y) { return vec_op<T>::bor(x.v, y.v); } + friend constexpr CMT_INLINE vec operator^(const vec& x, const vec& y) { return vec_op<T>::bxor(x.v, y.v); } - friend constexpr KFR_INLINE vec operator~(const vec& x) { return vec_op<T>::bnot(x.v); } + friend constexpr CMT_INLINE vec operator~(const vec& x) { return vec_op<T>::bnot(x.v); } - friend constexpr KFR_INLINE vec operator<<(const vec& x, const vec& y) + friend constexpr CMT_INLINE vec operator<<(const vec& x, const vec& y) { return vec_op<T>::shl(x.v, y.v); } - friend constexpr KFR_INLINE vec operator>>(const vec& x, const vec& y) + friend constexpr CMT_INLINE vec operator>>(const vec& x, const vec& y) { return vec_op<T>::shr(x.v, y.v); } - friend constexpr KFR_INLINE mask<T, N> operator==(const vec& x, const vec& y) + friend constexpr CMT_INLINE mask<T, N> operator==(const vec& x, const vec& y) { return vec_op<T>::eq(x.v, y.v); } - friend constexpr KFR_INLINE mask<T, N> operator!=(const vec& x, const vec& y) + friend constexpr CMT_INLINE mask<T, N> operator!=(const vec& x, const vec& y) { return vec_op<T>::ne(x.v, y.v); } - friend constexpr KFR_INLINE mask<T, N> operator<(const vec& x, const vec& y) + friend constexpr CMT_INLINE mask<T, N> operator<(const vec& x, const vec& y) { return vec_op<T>::lt(x.v, y.v); } - friend constexpr KFR_INLINE mask<T, N> operator>(const vec& x, const vec& y) + friend constexpr CMT_INLINE mask<T, N> operator>(const vec& x, const vec& y) { return vec_op<T>::gt(x.v, y.v); } - friend constexpr KFR_INLINE mask<T, N> operator<=(const vec& x, const vec& y) + friend constexpr CMT_INLINE mask<T, N> operator<=(const vec& x, const vec& y) { return vec_op<T>::le(x.v, y.v); } - friend constexpr KFR_INLINE mask<T, N> operator>=(const vec& x, const vec& y) + friend constexpr CMT_INLINE mask<T, N> operator>=(const vec& x, const vec& y) { return vec_op<T>::ge(x.v, y.v); } #define KFR_ASGN_OP(aop, op) \ - friend KFR_INLINE vec& operator aop(vec& x, const vec& y) \ + friend CMT_INLINE vec& operator aop(vec& x, const vec& y) \ { \ x = x op y; \ return x; \ @@ -678,22 +735,49 @@ struct vec : vec_t<T, N> KFR_ASGN_OP(^=, ^) KFR_ASGN_OP(<<=, <<) KFR_ASGN_OP(>>=, >>) +#undef KFR_ASGN_OP + + template <typename U, typename C = common_type<U, T>> + friend constexpr CMT_INLINE vec<C, N> operator+(const vec& x, const vec<U, N>& y) + { + return vec_op<C>::add(static_cast<vec<C, N>>(x).v, static_cast<vec<C, N>>(y).v); + } + template <typename U, typename C = common_type<U, T>> + friend constexpr CMT_INLINE vec<C, N> operator-(const vec& x, const vec<U, N>& y) + { + return vec_op<C>::sub(static_cast<vec<C, N>>(x).v, static_cast<vec<C, N>>(y).v); + } + template <typename U, typename C = common_type<U, T>> + friend constexpr CMT_INLINE vec<C, N> operator*(const vec& x, const vec<U, N>& y) + { + return vec_op<C>::mul(static_cast<vec<C, N>>(x).v, static_cast<vec<C, N>>(y).v); + } + template <typename U, typename C = common_type<U, T>> + friend constexpr CMT_INLINE vec<C, N> operator/(const vec& x, const vec<U, N>& y) + { + return vec_op<C>::div(static_cast<vec<C, N>>(x).v, static_cast<vec<C, N>>(y).v); + } + template <typename U, typename C = common_type<U, T>> + friend constexpr CMT_INLINE vec<C, N> operator%(const vec& x, const vec<U, N>& y) + { + return vec_op<C>::rem(static_cast<vec<C, N>>(x).v, static_cast<vec<C, N>>(y).v); + } - constexpr KFR_INLINE simd_t operator*() const { return v; } - constexpr KFR_INLINE simd_t& operator*() { return v; } - KFR_INLINE mask<T, N>& asmask() { return ref_cast<mask<T, N>>(*this); } - KFR_INLINE const mask<T, N>& asmask() const { return ref_cast<mask<T, N>>(*this); } - KFR_INLINE value_type operator[](size_t index) const { return data()[index]; } + constexpr CMT_INLINE simd_t operator*() const { return v; } + constexpr CMT_INLINE simd_t& operator*() { return v; } + CMT_INLINE mask<T, N>& asmask() { return ref_cast<mask<T, N>>(*this); } + CMT_INLINE const mask<T, N>& asmask() const { return ref_cast<mask<T, N>>(*this); } + CMT_INLINE value_type operator[](size_t index) const { return data()[index]; } - KFR_INLINE value_type* data() { return ptr_cast<T>(&v); } - KFR_INLINE const T* data() const { return ptr_cast<T>(&v); } + CMT_INLINE value_type* data() { return ptr_cast<T>(&v); } + CMT_INLINE const T* data() const { return ptr_cast<T>(&v); } using array_t = T (&)[N]; - KFR_INLINE array_t arr() { return ref_cast<array_t>(v); } + CMT_INLINE array_t arr() { return ref_cast<array_t>(v); } - template <typename U, KFR_ENABLE_IF(std::is_convertible<T, U>::value)> + template <typename U, KFR_ENABLE_IF(std::is_convertible<T, U>::value && !std::is_same<U, vec>::value)> constexpr operator vec<U, N>() const noexcept { - return cast<U>(*this); + return internal::conversion<vec<U, N>, vec<T, N>>::cast(*this); } private: @@ -714,12 +798,12 @@ private: struct getter_setter { constexpr getter_setter(simd_t& v, size_t index) noexcept : v(v), index(index) {} - KFR_INLINE getter_setter& operator=(scalar_type value) noexcept + CMT_INLINE getter_setter& operator=(scalar_type value) noexcept { v[index] = value; return *this; } - KFR_INLINE operator scalar_type() const { return v[index]; } + CMT_INLINE operator scalar_type() const { return v[index]; } private: friend struct vec; simd_t& v; @@ -730,72 +814,69 @@ private: template <typename T, size_t N> struct mask : public vec<T, N> { + using UT = utype<T>; using type = T; constexpr static size_t width = N; using base = vec<T, N>; - constexpr KFR_INLINE mask() noexcept : base() {} + constexpr CMT_INLINE mask() noexcept : base() {} - constexpr KFR_INLINE mask(simd<T, N> value) noexcept : base(value) {} + constexpr CMT_INLINE mask(simd<T, N> value) noexcept : base(value) {} template <size_t N1, size_t... Ns> - constexpr KFR_INLINE mask(const mask<T, N1>& mask1, const mask<T, Ns>&... masks) noexcept + constexpr CMT_INLINE mask(const mask<T, N1>& mask1, const mask<T, Ns>&... masks) noexcept : base(*concat(mask1, masks...)) { } template <typename... Ts, typename = enable_if<sizeof...(Ts) + 2 == N>> - constexpr KFR_INLINE mask(bool x, bool y, Ts... rest) noexcept + constexpr CMT_INLINE mask(bool x, bool y, Ts... rest) noexcept : base{ internal::maskbits<T>(x), internal::maskbits<T>(y), internal::maskbits<T>(rest)... } { } - constexpr KFR_INLINE mask(const mask&) noexcept = default; - constexpr KFR_INLINE mask(mask&&) noexcept = default; - KFR_INLINE mask& operator=(const mask&) noexcept = default; - KFR_INLINE mask& operator=(mask&&) noexcept = default; + constexpr CMT_INLINE mask(const mask&) noexcept = default; + constexpr CMT_INLINE mask(mask&&) noexcept = default; + CMT_INLINE mask& operator=(const mask&) noexcept = default; + CMT_INLINE mask& operator=(mask&&) noexcept = default; template <typename M, KFR_ENABLE_IF(sizeof(T) == sizeof(M))> - constexpr KFR_INLINE mask(const vec<M, N>& value) : base(bitcast<T>(value)) + constexpr CMT_INLINE mask(const vec<M, N>& value) : base(bitcast<T>(value)) { } - // template <typename M, typename = u8[sizeof(T) == sizeof(M)]> - // constexpr KFR_INLINE mask(mask<M, N> value) : base(reinterpret_cast<const vec<T, N>&>(value)) - // { - // } - constexpr KFR_INLINE mask operator~() const { return bitcast<T>(~ubitcast(this->v)); } - constexpr KFR_INLINE mask operator&(const vec<T, N>& x) const + friend constexpr CMT_INLINE mask operator&(const mask& x, const mask& y) { - return bitcast<T>(ubitcast(this->v) & ubitcast(x.v)); + return vec_op<T>::band(x.v, y.v); } - constexpr KFR_INLINE mask operator|(const vec<T, N>& x) const + friend constexpr CMT_INLINE mask operator|(const mask& x, const mask& y) { - return bitcast<T>(ubitcast(this->v) | ubitcast(x.v)); + return vec_op<T>::bor(x.v, y.v); } - constexpr KFR_INLINE mask operator^(const vec<T, N>& x) const + friend constexpr CMT_INLINE mask operator^(const mask& x, const mask& y) { - return bitcast<T>(ubitcast(this->v) ^ ubitcast(x.v)); + return vec_op<T>::bxor(x.v, y.v); } + friend constexpr CMT_INLINE mask operator~(const mask& x) { return vec_op<T>::bnot(x.v); } - constexpr KFR_INLINE mask operator&&(const mask& x) const { return *this & x; } - constexpr KFR_INLINE mask operator||(const mask& x) const { return *this | x; } - constexpr KFR_INLINE mask operator!() const { return ~*this; } + constexpr CMT_INLINE mask operator&&(const mask& x) const { return *this & x; } + constexpr CMT_INLINE mask operator||(const mask& x) const { return *this | x; } + constexpr CMT_INLINE mask operator!() const { return ~*this; } - constexpr KFR_INLINE simd<T, N> operator*() const { return this->v; } + constexpr CMT_INLINE simd<T, N> operator*() const { return this->v; } - KFR_INLINE vec<T, N>& asvec() { return ref_cast<mask>(*this); } - KFR_INLINE const vec<T, N>& asvec() const { return ref_cast<mask>(*this); } + CMT_INLINE vec<T, N>& asvec() { return ref_cast<mask>(*this); } + CMT_INLINE const vec<T, N>& asvec() const { return ref_cast<mask>(*this); } template <typename U, KFR_ENABLE_IF(sizeof(T) == sizeof(U))> - KFR_INLINE operator mask<U, N>() const + CMT_INLINE operator mask<U, N>() const { return bitcast<U>(*this); } - KFR_INLINE bool operator[](size_t index) const { return ibitcast(this->v[index]) < 0; } + CMT_INLINE bool operator[](size_t index) const { return ibitcast(this->v[index]) < 0; } }; -template <typename T, size_t N> -using cvec = vec<T, N * 2>; +template <typename T, size_t N1, size_t N2 = N1> +using mat = vec<vec<T, N1>, N2>; namespace internal { @@ -803,31 +884,31 @@ namespace internal template <size_t start, size_t count> struct shuffle_index_extend { - constexpr KFR_INLINE size_t operator()(size_t index) const + constexpr CMT_INLINE size_t operator()(size_t index) const { return index >= start && index < start + count ? index - start : index_undefined; } }; template <size_t start, size_t count, typename T, size_t N> -KFR_INLINE vec<T, count> concatexact(const vec<T, N>& x, const vec<T, N>& y) +CMT_INLINE vec<T, count> concatexact(const vec<T, N>& x, const vec<T, N>& y) { return kfr::shufflevector<count, internal::shuffle_index<start>>(x, y); } template <size_t start, size_t count, typename T, size_t N1, size_t N2> -KFR_INLINE enable_if<(N1 == N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y) +CMT_INLINE enable_if<(N1 == N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y) { return concatexact<start, count>(x, y); } template <size_t start, size_t count, typename T, size_t N1, size_t N2> -KFR_INLINE enable_if<(N1 > N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y) +CMT_INLINE enable_if<(N1 > N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y) { return concatexact<start, count>(x, shufflevector<N1, internal::shuffle_index_extend<0, N2>>(y)); } template <size_t start, size_t count, typename T, size_t N1, size_t N2> -KFR_INLINE enable_if<(N1 < N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y) +CMT_INLINE enable_if<(N1 < N2), vec<T, count>> concattwo(const vec<T, N1>& x, const vec<T, N2>& y) { return concatexact<N2 - N1 + start, count>( shufflevector<N2, internal::shuffle_index_extend<N2 - N1, N1>>(x), y); @@ -845,26 +926,26 @@ constexpr mask<T, Nout> partial_mask() } template <typename T, size_t N> -KFR_INLINE vec<T, N> concat(const vec<T, N>& x) +CMT_INLINE vec<T, N> concat(const vec<T, N>& x) { return x; } template <typename T, size_t N1, size_t N2> -KFR_INLINE vec<T, N1 + N2> concat(const vec<T, N1>& x, const vec<T, N2>& y) +CMT_INLINE vec<T, N1 + N2> concat(const vec<T, N1>& x, const vec<T, N2>& y) { return concattwo<0, N1 + N2>(x, y); } template <typename T, size_t N1, size_t N2, size_t... Sizes> -KFR_INLINE auto concat(const vec<T, N1>& x, const vec<T, N2>& y, const vec<T, Sizes>&... args) +CMT_INLINE auto concat(const vec<T, N1>& x, const vec<T, N2>& y, const vec<T, Sizes>&... args) { return concat(x, concat(y, args...)); } } -template <typename T, size_t N, size_t... Sizes, size_t Nout> -KFR_INLINE vec<T, Nout> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest) +template <typename T, size_t N, size_t... Sizes> +CMT_INLINE vec<T, N + csum(csizes<Sizes...>)> concat(const vec<T, N>& x, const vec<T, Sizes>&... rest) { return internal::concat(x, rest...); } @@ -1012,6 +1093,28 @@ using mu64x8 = mask<u64, 8>; using mu64x16 = mask<u64, 16>; using mu64x32 = mask<u64, 32>; +using u8x2x2 = vec<vec<u8, 2>, 2>; +using i8x2x2 = vec<vec<i8, 2>, 2>; +using u16x2x2 = vec<vec<u16, 2>, 2>; +using i16x2x2 = vec<vec<i16, 2>, 2>; +using u32x2x2 = vec<vec<u32, 2>, 2>; +using i32x2x2 = vec<vec<i32, 2>, 2>; +using u64x2x2 = vec<vec<u64, 2>, 2>; +using i64x2x2 = vec<vec<i64, 2>, 2>; +using f32x2x2 = vec<vec<f32, 2>, 2>; +using f64x2x2 = vec<vec<f64, 2>, 2>; + +using u8x4x4 = vec<vec<u8, 4>, 4>; +using i8x4x4 = vec<vec<i8, 4>, 4>; +using u16x4x4 = vec<vec<u16, 4>, 4>; +using i16x4x4 = vec<vec<i16, 4>, 4>; +using u32x4x4 = vec<vec<u32, 4>, 4>; +using i32x4x4 = vec<vec<i32, 4>, 4>; +using u64x4x4 = vec<vec<u64, 4>, 4>; +using i64x4x4 = vec<vec<i64, 4>, 4>; +using f32x4x4 = vec<vec<f32, 4>, 4>; +using f64x4x4 = vec<vec<f64, 4>, 4>; + namespace glsl_names { using vec2 = f32x2; @@ -1117,19 +1220,19 @@ struct maxvec template <size_t Index, typename T, size_t N, typename Fn, typename... Args, typename Tout = result_of<Fn(subtype<decay<Args>>...)>> -constexpr KFR_INLINE Tout applyfn_helper(Fn&& fn, Args&&... args) +constexpr CMT_INLINE Tout applyfn_helper(Fn&& fn, Args&&... args) { return fn(args[Index]...); } template <typename T, size_t N, typename Fn, typename... Args, typename Tout = result_of<Fn(subtype<decay<Args>>...)>, size_t... Indices> -constexpr KFR_INLINE vec<Tout, N> apply_helper(Fn&& fn, csizes_t<Indices...>, Args&&... args) +constexpr CMT_INLINE vec<Tout, N> apply_helper(Fn&& fn, csizes_t<Indices...>, Args&&... args) { return make_vector(applyfn_helper<Indices, T, N>(std::forward<Fn>(fn), std::forward<Args>(args)...)...); } template <typename T, size_t N, typename Fn, size_t... Indices> -constexpr KFR_INLINE vec<T, N> apply0_helper(Fn&& fn, csizes_t<Indices...>) +constexpr CMT_INLINE vec<T, N> apply0_helper(Fn&& fn, csizes_t<Indices...>) { return make_vector(((void)Indices, void(), fn())...); } @@ -1137,30 +1240,30 @@ constexpr KFR_INLINE vec<T, N> apply0_helper(Fn&& fn, csizes_t<Indices...>) template <typename T, size_t N, typename Fn, typename... Args, typename Tout = result_of<Fn(T, subtype<decay<Args>>...)>> -constexpr KFR_INLINE vec<Tout, N> apply(Fn&& fn, const vec<T, N>& arg, Args&&... args) +constexpr CMT_INLINE vec<Tout, N> apply(Fn&& fn, const vec<T, N>& arg, Args&&... args) { return internal::apply_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>, arg, std::forward<Args>(args)...); } template <size_t N, typename Fn, typename T = result_of<Fn()>> -constexpr KFR_INLINE vec<T, N> apply(Fn&& fn) +constexpr CMT_INLINE vec<T, N> apply(Fn&& fn) { return internal::apply0_helper<T, N>(std::forward<Fn>(fn), csizeseq<N>); } template <typename T, int N> -KFR_INLINE vec<T, N> tovec(simd<T, N> x) +CMT_INLINE vec<T, N> tovec(simd<T, N> x) { return x; } -#ifdef CID_ARCH_SSE2 -KFR_INLINE f32x4 tovec(__m128 x) { return f32x4(x); } -KFR_INLINE f64x2 tovec(__m128d x) { return f64x2(x); } +#ifdef CMT_ARCH_SSE2 +CMT_INLINE f32x4 tovec(__m128 x) { return f32x4(x); } +CMT_INLINE f64x2 tovec(__m128d x) { return f64x2(x); } #endif template <typename T, typename... Args, size_t Nout = (sizeof...(Args) + 1)> -constexpr KFR_INLINE mask<T, Nout> make_mask(bool arg, Args... args) +constexpr CMT_INLINE mask<T, Nout> make_mask(bool arg, Args... args) { simd<T, Nout> temp{ internal::maskbits<T>(arg), internal::maskbits<T>(static_cast<bool>(args))... }; return temp; @@ -1168,63 +1271,63 @@ constexpr KFR_INLINE mask<T, Nout> make_mask(bool arg, Args... args) KFR_FN(make_mask) template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N> zerovector() +constexpr CMT_INLINE vec<T, N> zerovector() { constexpr size_t width = N * compound_type_traits<T>::width; - return subcast<T>(vec<subtype<T>, width>(simd<subtype<T>, width>())); + return compcast<T>(vec<subtype<T>, width>(simd<subtype<T>, width>())); } template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N> zerovector(vec_t<T, N>) +constexpr CMT_INLINE vec<T, N> zerovector(vec_t<T, N>) { return zerovector<T, N>(); } KFR_FN(zerovector) template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N> allonesvector() +constexpr CMT_INLINE vec<T, N> allonesvector() { return zerovector<T, N>() == zerovector<T, N>(); } template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N> allonesvector(vec_t<T, N>) +constexpr CMT_INLINE vec<T, N> allonesvector(vec_t<T, N>) { return allonesvector<T, N>(); } KFR_FN(allonesvector) template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N> undefinedvector() +constexpr CMT_INLINE vec<T, N> undefinedvector() { return vec<T, N>{}; } template <typename T, size_t N> -constexpr KFR_INLINE vec<T, N> undefinedvector(vec_t<T, N>) +constexpr CMT_INLINE vec<T, N> undefinedvector(vec_t<T, N>) { return undefinedvector<T, N>(); } KFR_FN(undefinedvector) template <typename T, size_t N, size_t Nout = prev_poweroftwo(N - 1)> -KFR_INLINE vec<T, Nout> low(const vec<T, N>& x) +CMT_INLINE vec<T, Nout> low(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index<>>(x); } template <typename T, size_t N, size_t Nout = prev_poweroftwo(N - 1)> -KFR_INLINE vec_t<T, Nout> low(vec_t<T, N>) +CMT_INLINE vec_t<T, Nout> low(vec_t<T, N>) { return {}; } template <typename T, size_t N, size_t Nout = N - prev_poweroftwo(N - 1)> -KFR_INLINE vec<T, Nout> high(const vec<T, N>& x) +CMT_INLINE vec<T, Nout> high(const vec<T, N>& x) { return shufflevector<Nout, internal::shuffle_index<prev_poweroftwo(N - 1)>>(x); } template <typename T, size_t N, size_t Nout = N - prev_poweroftwo(N - 1)> -KFR_INLINE vec_t<T, Nout> high(vec_t<T, N>) +CMT_INLINE vec_t<T, Nout> high(vec_t<T, N>) { return {}; } @@ -1237,16 +1340,16 @@ namespace internal template <typename Fn> struct expression_lambda : input_expression { - KFR_INLINE expression_lambda(Fn&& fn) : fn(std::move(fn)) {} + CMT_INLINE expression_lambda(Fn&& fn) : fn(std::move(fn)) {} template <typename T, size_t N, KFR_ENABLE_IF(N&& is_callable<Fn, cinput_t, size_t, vec_t<T, N>>::value)> - KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y) const + CMT_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N> y) const { return fn(cinput, index, y); } template <typename T, size_t N, KFR_ENABLE_IF(N&& is_callable<Fn, size_t>::value)> - KFR_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N>) const + CMT_INLINE vec<T, N> operator()(cinput_t, size_t index, vec_t<T, N>) const { vec<T, N> result; for (size_t i = 0; i < N; i++) @@ -1256,7 +1359,7 @@ struct expression_lambda : input_expression return result; } template <typename T, size_t N, KFR_ENABLE_IF(N&& is_callable<Fn>::value)> - KFR_INLINE vec<T, N> operator()(cinput_t, size_t, vec_t<T, N>) const + CMT_INLINE vec<T, N> operator()(cinput_t, size_t, vec_t<T, N>) const { vec<T, N> result; for (size_t i = 0; i < N; i++) @@ -1285,10 +1388,12 @@ namespace cometa template <typename T, size_t N> struct compound_type_traits<kfr::simd<T, N>> { - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static size_t width = N; - constexpr static bool is_scalar = false; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static size_t width = N; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = kfr::simd<U, N>; template <typename U> @@ -1300,10 +1405,12 @@ struct compound_type_traits<kfr::simd<T, N>> template <typename T, size_t N> struct compound_type_traits<kfr::vec<T, N>> { - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static size_t width = N; - constexpr static bool is_scalar = false; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static size_t width = N; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = kfr::vec<U, N>; template <typename U> @@ -1315,10 +1422,12 @@ struct compound_type_traits<kfr::vec<T, N>> template <typename T, size_t N> struct compound_type_traits<kfr::mask<T, N>> { - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static size_t width = N; - constexpr static bool is_scalar = false; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static size_t width = N; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = kfr::mask<U, N>; template <typename U> diff --git a/include/kfr/cident.h b/include/kfr/cident.h @@ -1,366 +1,395 @@ #pragma once +#ifdef LIBC_WORKAROUND_GETS +extern char* gets(char* __s); +#endif + #if defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__x86_64__) -#define CID_ARCH_X86 1 -#elif defined(__arm__) || defined(__arm64__) || defined(_M_ARM) -#define CID_ARCH_ARM 1 +#define CMT_ARCH_X86 1 +#elif defined(__arm__) || defined(__arm64__) || defined(_M_ARM) || defined(__aarch64__) +#define CMT_ARCH_ARM 1 #endif -#ifdef CID_ARCH_X86 +#ifdef CMT_ARCH_X86 #if defined(_M_X64) || defined(__x86_64__) -#define CID_ARCH_X64 1 +#define CMT_ARCH_X64 1 #else -#define CID_ARCH_X32 1 -#endif - -#if defined __AVX512F__ && !defined CID_ARCH_AVX512 -#define CID_ARCH_AVX512 1 -#define CID_ARCH_AVX2 1 -#define CID_ARCH_AVX 1 -#define CID_ARCH_SSE42 1 -#define CID_ARCH_SSE41 1 -#define CID_ARCH_SSSE3 1 -#define CID_ARCH_SSE3 1 -#define CID_ARCH_SSE2 1 -#define CID_ARCH_SSE 1 -#endif -#if defined __AVX2__ && !defined CID_ARCH_AVX2 -#define CID_ARCH_AVX2 1 -#define CID_ARCH_AVX 1 -#define CID_ARCH_SSE42 1 -#define CID_ARCH_SSE41 1 -#define CID_ARCH_SSSE3 1 -#define CID_ARCH_SSE3 1 -#define CID_ARCH_SSE2 1 -#define CID_ARCH_SSE 1 -#endif -#if defined __AVX__ && !defined CID_ARCH_AVX -#define CID_ARCH_AVX 1 -#define CID_ARCH_SSE42 1 -#define CID_ARCH_SSE41 1 -#define CID_ARCH_SSSE3 1 -#define CID_ARCH_SSE3 1 -#define CID_ARCH_SSE2 1 -#define CID_ARCH_SSE 1 -#endif -#if defined __SSE4_2__ && !defined CID_ARCH_SSE4_2 -#define CID_ARCH_SSE4_2 1 -#define CID_ARCH_SSE41 1 -#define CID_ARCH_SSSE3 1 -#define CID_ARCH_SSE3 1 -#define CID_ARCH_SSE2 1 -#define CID_ARCH_SSE 1 -#endif -#if defined __SSE4_1__ && !defined CID_ARCH_SSE4_1 -#define CID_ARCH_SSE4_1 1 -#define CID_ARCH_SSSE3 1 -#define CID_ARCH_SSE3 1 -#define CID_ARCH_SSE2 1 -#define CID_ARCH_SSE 1 -#endif -#if defined __SSSE3__ && !defined CID_ARCH_SSSE3 -#define CID_ARCH_SSSE3 1 -#define CID_ARCH_SSE3 1 -#define CID_ARCH_SSE2 1 -#define CID_ARCH_SSE 1 -#endif -#if defined __SSE3__ && !defined CID_ARCH_SSE3 -#define CID_ARCH_SSE3 1 -#define CID_ARCH_SSE2 1 -#define CID_ARCH_SSE 1 -#endif -#if (defined CID_ARCH_X64 || defined __SSE2__) && !defined CID_ARCH_SSE2 -#define CID_ARCH_SSE2 1 -#define CID_ARCH_SSE 1 -#endif - -#if (defined CID_ARCH_X64 || defined __SSE__) && !defined CID_ARCH_SSE1 -#define CID_ARCH_SSE 1 -#endif - -#if defined __FMA__ && !defined CID_ARCH_FMA -#define CID_ARCH_FMA 1 -#endif - -#if defined __AES__ && !defined CID_ARCH_AES -#define CID_ARCH_AES 1 -#endif - -#if defined __BMI__ && !defined CID_ARCH_BMI -#define CID_ARCH_BMI 1 -#endif - -#if defined __BMI2__ && !defined CID_ARCH_BMI2 -#define CID_ARCH_BMI2 1 -#endif - -#if defined __LZCNT__ && !defined CID_ARCH_LZCNT -#define CID_ARCH_LZCNT 1 -#endif - -#if defined CID_ARCH_AVX512 -#define CID_ARCH_NAME avx512 -#elif defined CID_ARCH_AVX2 -#define CID_ARCH_NAME avx2 -#elif defined CID_ARCH_AVX -#define CID_ARCH_NAME avx -#elif defined CID_ARCH_SSE4_1 -#define CID_ARCH_NAME sse41 -#elif defined CID_ARCH_SSSE3 -#define CID_ARCH_NAME ssse3 -#elif defined CID_ARCH_SSE3 -#define CID_ARCH_NAME sse3 -#elif defined CID_ARCH_SSE2 -#define CID_ARCH_NAME sse2 -#elif defined CID_ARCH_SSE -#define CID_ARCH_NAME sse -#endif - -#elif defined(CID_ARCH_ARM) - -#if defined(__arm64__) -#define CID_ARCH_X64 1 +#define CMT_ARCH_X32 1 +#endif + +#if defined __AVX512F__ && !defined CMT_ARCH_AVX512 +#define CMT_ARCH_AVX512 1 +#define CMT_ARCH_AVX2 1 +#define CMT_ARCH_AVX 1 +#define CMT_ARCH_SSE42 1 +#define CMT_ARCH_SSE41 1 +#define CMT_ARCH_SSSE3 1 +#define CMT_ARCH_SSE3 1 +#define CMT_ARCH_SSE2 1 +#define CMT_ARCH_SSE 1 +#endif +#if defined __AVX2__ && !defined CMT_ARCH_AVX2 +#define CMT_ARCH_AVX2 1 +#define CMT_ARCH_AVX 1 +#define CMT_ARCH_SSE42 1 +#define CMT_ARCH_SSE41 1 +#define CMT_ARCH_SSSE3 1 +#define CMT_ARCH_SSE3 1 +#define CMT_ARCH_SSE2 1 +#define CMT_ARCH_SSE 1 +#endif +#if defined __AVX__ && !defined CMT_ARCH_AVX +#define CMT_ARCH_AVX 1 +#define CMT_ARCH_SSE42 1 +#define CMT_ARCH_SSE41 1 +#define CMT_ARCH_SSSE3 1 +#define CMT_ARCH_SSE3 1 +#define CMT_ARCH_SSE2 1 +#define CMT_ARCH_SSE 1 +#endif +#if defined __SSE4_2__ && !defined CMT_ARCH_SSE4_2 +#define CMT_ARCH_SSE4_2 1 +#define CMT_ARCH_SSE41 1 +#define CMT_ARCH_SSSE3 1 +#define CMT_ARCH_SSE3 1 +#define CMT_ARCH_SSE2 1 +#define CMT_ARCH_SSE 1 +#endif +#if defined __SSE4_1__ && !defined CMT_ARCH_SSE4_1 +#define CMT_ARCH_SSE4_1 1 +#define CMT_ARCH_SSSE3 1 +#define CMT_ARCH_SSE3 1 +#define CMT_ARCH_SSE2 1 +#define CMT_ARCH_SSE 1 +#endif +#if defined __SSSE3__ && !defined CMT_ARCH_SSSE3 +#define CMT_ARCH_SSSE3 1 +#define CMT_ARCH_SSE3 1 +#define CMT_ARCH_SSE2 1 +#define CMT_ARCH_SSE 1 +#endif +#if defined __SSE3__ && !defined CMT_ARCH_SSE3 +#define CMT_ARCH_SSE3 1 +#define CMT_ARCH_SSE2 1 +#define CMT_ARCH_SSE 1 +#endif +#if (defined CMT_ARCH_X64 || defined __SSE2__) && !defined CMT_ARCH_SSE2 +#define CMT_ARCH_SSE2 1 +#define CMT_ARCH_SSE 1 +#endif + +#if (defined CMT_ARCH_X64 || defined __SSE__) && !defined CMT_ARCH_SSE1 +#define CMT_ARCH_SSE 1 +#endif + +#if defined __FMA__ && !defined CMT_ARCH_FMA +#define CMT_ARCH_FMA 1 +#endif + +#if defined __AES__ && !defined CMT_ARCH_AES +#define CMT_ARCH_AES 1 +#endif + +#if defined __BMI__ && !defined CMT_ARCH_BMI +#define CMT_ARCH_BMI 1 +#endif + +#if defined __BMI2__ && !defined CMT_ARCH_BMI2 +#define CMT_ARCH_BMI2 1 +#endif + +#if defined __LZCNT__ && !defined CMT_ARCH_LZCNT +#define CMT_ARCH_LZCNT 1 +#endif + +#if defined CMT_ARCH_AVX512 +#define CMT_ARCH_NAME avx512 +#elif defined CMT_ARCH_AVX2 +#define CMT_ARCH_NAME avx2 +#elif defined CMT_ARCH_AVX +#define CMT_ARCH_NAME avx +#elif defined CMT_ARCH_SSE4_1 +#define CMT_ARCH_NAME sse41 +#elif defined CMT_ARCH_SSSE3 +#define CMT_ARCH_NAME ssse3 +#elif defined CMT_ARCH_SSE3 +#define CMT_ARCH_NAME sse3 +#elif defined CMT_ARCH_SSE2 +#define CMT_ARCH_NAME sse2 +#elif defined CMT_ARCH_SSE +#define CMT_ARCH_NAME sse +#endif + +#elif defined(CMT_ARCH_ARM) + +#if defined(__aarch64__) +#define CMT_ARCH_X64 1 #else -#define CID_ARCH_X32 1 +#define CMT_ARCH_X32 1 #endif #ifdef __ARM_NEON__ #if __ARM_ARCH >= 8 && defined(__aarch64__) -#define CID_ARCH_NEON64 1 -#define CID_ARCH_NAME neon64 +#define CMT_ARCH_NEON64 1 +#define CMT_ARCH_NEON 1 +#define CMT_ARCH_NAME neon64 #else -#define CID_ARCH_NEON 1 -#define CID_ARCH_NAME neon +#define CMT_ARCH_NEON 1 +#define CMT_ARCH_NAME neon +#define KFR_NO_NATIVE_F64 1 +#endif +#endif + #endif + +#ifndef CMT_ARCH_NAME +#define CMT_ARCH_NAME common #endif +#ifndef KFR_NO_NATIVE_F64 +#define KFR_NATIVE_F64 1 #endif -#ifndef CID_ARCH_NAME -#define CID_ARCH_NAME common +#ifndef KFR_NO_NATIVE_I64 +#define KFR_NATIVE_I64 1 #endif -#define CID_STRINGIFY2(x) #x -#define CID_STRINGIFY(x) CID_STRINGIFY2(x) +#define CMT_STRINGIFY2(x) #x +#define CMT_STRINGIFY(x) CMT_STRINGIFY2(x) #if defined(_WIN32) // Windows -#define CID_OS_WIN 1 +#define CMT_OS_WIN 1 #endif #if defined(__APPLE__) #include "TargetConditionals.h" #ifdef TARGET_OS_IPHONE -#define CID_OS_IOS 1 -#define CID_OS_MOBILE 1 +#define CMT_OS_IOS 1 +#define CMT_OS_MOBILE 1 #elif TARGET_IPHONE_SIMULATOR -#define CID_OS_IOS 1 -#define CID_OS_IOS_SIMULATOR 1 -#define CID_OS_MOBILE 1 +#define CMT_OS_IOS 1 +#define CMT_OS_IOS_SIMULATOR 1 +#define CMT_OS_MOBILE 1 #elif TARGET_OS_MAC -#define CID_OS_MAC 1 -#define CID_OS_MACOS 1 -#define CID_OS_OSX 1 +#define CMT_OS_MAC 1 +#define CMT_OS_MACOS 1 +#define CMT_OS_OSX 1 #endif -#define CID_OS_POSIX 1 +#define CMT_OS_POSIX 1 #endif #if defined(__ANDROID__) -#define CID_OS_ANDROID 1 -#define CID_OS_MOBILE 1 -#define CID_OS_POSIX 1 +#define CMT_OS_ANDROID 1 +#define CMT_OS_MOBILE 1 +#define CMT_OS_POSIX 1 #endif #if defined(__linux__) -#define CID_OS_LINUX 1 -#define CID_OS_POSIX 1 +#define CMT_OS_LINUX 1 +#define CMT_OS_POSIX 1 #endif #if defined(_MSC_VER) // Visual C/C++ -#define CID_COMPILER_MSVC 1 -#define CID_MSVC_ATTRIBUTES 1 -#define CID_MSC_VER _MSC_VER +#define CMT_COMPILER_MSVC 1 +#define CMT_MSVC_ATTRIBUTES 1 +#define CMT_MSC_VER _MSC_VER #else -#define CID_MSC_VER 0 +#define CMT_MSC_VER 0 #endif #if defined(__GNUC__) || defined(__clang__) // GCC, Clang -#define CID_COMPILER_GNU 1 -#define CID_GNU_ATTRIBUTES 1 -#define CID_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#define CMT_COMPILER_GNU 1 +#define CMT_GNU_ATTRIBUTES 1 +#define CMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) #if __cplusplus >= 201103L || defined __GXX_EXPERIMENTAL_CXX0X__ -#define CID_HAS_GXX_CXX11 1 +#define CMT_HAS_GXX_CXX11 1 #endif #else -#define CID_GCC_VERSION 0 +#define CMT_GCC_VERSION 0 #endif #if defined(__INTEL_COMPILER) // Intel Compiler -#define CID_COMPILER_INTEL 1 -#define CID_ICC_VERSION __INTEL_COMPILER +#define CMT_COMPILER_INTEL 1 +#define CMT_ICC_VERSION __INTEL_COMPILER #elif defined(__ICL) -#define CID_COMPILER_INTEL 1 -#define CID_ICC_VERSION __ICL +#define CMT_COMPILER_INTEL 1 +#define CMT_ICC_VERSION __ICL #else -#define CID_ICC_VERSION 0 +#define CMT_ICC_VERSION 0 #endif #if defined(__clang__) // Clang -#define CID_COMPILER_CLANG 1 -#ifndef CID_GNU_ATTRIBUTES -#define CID_GNU_ATTRIBUTES 1 +#define CMT_COMPILER_CLANG 1 +#ifndef CMT_GNU_ATTRIBUTES +#define CMT_GNU_ATTRIBUTES 1 #endif #endif -#if defined(CID_GNU_ATTRIBUTES) +#if defined(CMT_GNU_ATTRIBUTES) -#define CID_NODEBUG +#define CMT_NODEBUG // __attribute__((__nodebug__)) -#define CID_INLINE __inline__ __attribute__((__always_inline__)) -#define CID_INTRIN CID_INLINE CID_NODEBUG -#define CID_INLINE_MEMBER __attribute__((__always_inline__)) -#define CID_INLINE_LAMBDA CID_INLINE_MEMBER -#define CID_NOINLINE __attribute__((__noinline__)) -#define CID_FLATTEN __attribute__((__flatten__)) -#define CID_RESTRICT __restrict__ +#define CMT_INLINE __inline__ __attribute__((__always_inline__)) +#define CMT_INTRIN CMT_INLINE CMT_NODEBUG +#define CMT_INLINE_MEMBER __attribute__((__always_inline__)) +#define CMT_INLINE_LAMBDA CMT_INLINE_MEMBER +#define CMT_NOINLINE __attribute__((__noinline__)) +#define CMT_FLATTEN __attribute__((__flatten__)) +#define CMT_RESTRICT __restrict__ -#elif defined(CID_MSVC_ATTRIBUTES) +#elif defined(CMT_MSVC_ATTRIBUTES) -#define CID_NODEBUG -#define CID_INLINE inline __forceinline -#define CID_INTRIN CID_INLINE CID_NODEBUG -#define CID_INLINE_MEMBER __forceinline -#define CID_INLINE_LAMBDA -#define CID_NOINLINE __declspec(noinline) -#define CID_FLATTEN -#define CID_RESTRICT __restrict +#define CMT_NODEBUG +#define CMT_INLINE inline __forceinline +#define CMT_INTRIN CMT_INLINE CMT_NODEBUG +#define CMT_INLINE_MEMBER __forceinline +#define CMT_INLINE_LAMBDA +#define CMT_NOINLINE __declspec(noinline) +#define CMT_FLATTEN +#define CMT_RESTRICT __restrict #endif -#define CID_INLINE_STATIC CID_INLINE static +#define CMT_INLINE_STATIC CMT_INLINE static -#define CID_EXTERN_C extern "C" +#define CMT_EXTERN_C extern "C" -#define CID_PUBLIC_C CID_EXTERN_C CID_NOINLINE +#define CMT_PUBLIC_C CMT_EXTERN_C CMT_NOINLINE -#define CID_ALWAYS_INLINE_STATIC CID_ALWAYS_INLINE static +#define CMT_ALWAYS_INLINE_STATIC CMT_ALWAYS_INLINE static -#ifdef CID_ARCH_x86 -#ifdef CID_OS_WIN -#define CID_CDECL __cdecl +#ifdef CMT_ARCH_x86 +#ifdef CMT_OS_WIN +#define CMT_CDECL __cdecl #else -#define CID_CDECL __attribute__((cdecl)) +#define CMT_CDECL __attribute__((cdecl)) #endif #else -#define CID_CDECL +#define CMT_CDECL #endif -#ifdef CID_OS_WIN -#if defined(CID_MSVC_ATTRIBUTES) -#define CID_DLL_EXPORT __declspec(dllexport) -#define CID_DLL_IMPORT __declspec(dllimport) +#ifdef CMT_OS_WIN +#if defined(CMT_MSVC_ATTRIBUTES) +#define CMT_DLL_EXPORT __declspec(dllexport) +#define CMT_DLL_IMPORT __declspec(dllimport) #else -#define CID_DLL_EXPORT __attribute__((dllexport)) -#define CID_DLL_IMPORT __attribute__((dllimport)) +#define CMT_DLL_EXPORT __attribute__((dllexport)) +#define CMT_DLL_IMPORT __attribute__((dllimport)) #endif #else -#define CID_DLL_EXPORT -#define CID_DLL_IMPORT +#define CMT_DLL_EXPORT +#define CMT_DLL_IMPORT #endif #ifdef __has_builtin -#define CID_HAS_BUILTIN(builtin) __has_builtin(builtin) +#define CMT_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +#define CMT_HAS_BUILTIN(builtin) 0 +#endif + +#if CMT_HAS_BUILTIN(CMT_ASSUME) +#define CMT_ASSUME(x) __builtin_assume(x) +#else +#define CMT_ASSUME(x) \ + do \ + { \ + } while (0) +#endif + +#if CMT_HAS_BUILTIN(CMT_ASSUME) +#define CMT_ASSUME_ALIGNED(x, a) __builtin_assume_aligned(x, a) #else -#define CID_HAS_BUILTIN(builtin) 0 +#define CMT_ASSUME_ALIGNED(x, a) x #endif #ifdef __has_feature -#define CID_HAS_FEATURE(feature) __has_feature(feature) +#define CMT_HAS_FEATURE(feature) __has_feature(feature) #else -#define CID_HAS_FEATURE(feature) 0 +#define CMT_HAS_FEATURE(feature) 0 #endif #ifdef __has_extension -#define CID_HAS_EXTENSION(extension) __has_extension(extension) +#define CMT_HAS_EXTENSION(extension) __has_extension(extension) #else -#define CID_HAS_EXTENSION(extension) 0 +#define CMT_HAS_EXTENSION(extension) 0 #endif #ifdef __has_attribute -#define CID_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#define CMT_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) #else -#define CID_HAS_ATTRIBUTE(attribute) 0 +#define CMT_HAS_ATTRIBUTE(attribute) 0 #endif #ifdef __has_warning -#define CID_HAS_WARNING(warning) __has_warning(warning) +#define CMT_HAS_WARNING(warning) __has_warning(warning) #else -#define CID_HAS_WARNING(warning) 0 +#define CMT_HAS_WARNING(warning) 0 #endif -#define CID_HAS_VARIADIC_TEMPLATES \ - (CID_HAS_FEATURE(cxx_variadic_templates) || (CID_GCC_VERSION >= 404 && CID_HAS_GXX_CXX11) || \ - CID_MSC_VER >= 1800) +#define CMT_HAS_VARIADIC_TEMPLATES \ + (CMT_HAS_FEATURE(cxx_variadic_templates) || (CMT_GCC_VERSION >= 404 && CMT_HAS_GXX_CXX11) || \ + CMT_MSC_VER >= 1800) -#ifdef CID_BUILDING_DLL -#define CID_C_API CID_DLL_EXPORT +#ifdef CMT_BUILDING_DLL +#define CMT_C_API CMT_DLL_EXPORT #else -#define CID_C_API CID_DLL_IMPORT +#define CMT_C_API CMT_DLL_IMPORT #endif -#if __cplusplus >= 201103L || CID_MSC_VER >= 1900 || CID_HAS_FEATURE(cxx_constexpr) -#define CID_HAS_CONSTEXPR 1 +#if __cplusplus >= 201103L || CMT_MSC_VER >= 1900 || CMT_HAS_FEATURE(cxx_constexpr) +#define CMT_HAS_CONSTEXPR 1 #endif -#if __cpp_constexpr >= 201304 || CID_HAS_FEATURE(cxx_constexpr) -#define CID_HAS_FULL_CONSTEXPR 1 +#if __cpp_constexpr >= 201304 || CMT_HAS_FEATURE(cxx_constexpr) +#define CMT_HAS_FULL_CONSTEXPR 1 #endif -#if CID_HAS_CONSTEXPR -#define CID_CONSTEXPR constexpr +#if CMT_HAS_CONSTEXPR +#define CMT_CONSTEXPR constexpr #else -#define CID_CONSTEXPR +#define CMT_CONSTEXPR #endif -#if CID_HAS_FEATURE(cxx_noexcept) || (CID_GCC_VERSION >= 408 && CID_HAS_GXX_CXX11) || CID_MSC_VER >= 1900 -#define CID_HAS_NOEXCEPT 1 +#if CMT_HAS_FEATURE(cxx_noexcept) || (CMT_GCC_VERSION >= 408 && CMT_HAS_GXX_CXX11) || CMT_MSC_VER >= 1900 +#define CMT_HAS_NOEXCEPT 1 #endif -#if CID_HAS_NOEXCEPT -#define CID_NOEXCEPT noexcept +#if CMT_HAS_NOEXCEPT +#define CMT_NOEXCEPT noexcept #else -#define CID_NOEXCEPT +#define CMT_NOEXCEPT #endif -#if CID_COMPILER_GNU && !defined(__EXCEPTIONS) -#define CID_HAS_EXCEPTIONS 0 +#if CMT_COMPILER_GNU && !defined(__EXCEPTIONS) +#define CMT_HAS_EXCEPTIONS 0 #endif -#if CID_COMPILER_MSVC && !_HAS_EXCEPTIONS -#define CID_HAS_EXCEPTIONS 0 +#if CMT_COMPILER_MSVC && !_HAS_EXCEPTIONS +#define CMT_HAS_EXCEPTIONS 0 #endif -#ifndef CID_HAS_EXCEPTIONS -#define CID_HAS_EXCEPTIONS 1 +#ifndef CMT_HAS_EXCEPTIONS +#define CMT_HAS_EXCEPTIONS 1 #endif #if __has_include(<assert.h>) #include <assert.h> -#define CID_HAS_ASSERT_H 1 +#define CMT_HAS_ASSERT_H 1 #endif -#ifndef CID_THROW -#if CID_HAS_EXCEPTIONS -#define CID_THROW(x) throw x +#ifndef CMT_THROW +#if CMT_HAS_EXCEPTIONS +#define CMT_THROW(x) throw x #else -#ifdef CID_HAS_ASSERT_H -#define CID_THROW(x) assert(false) +#ifdef CMT_HAS_ASSERT_H +#define CMT_THROW(x) assert(false) #else -#define CID_THROW(x) abort() +#define CMT_THROW(x) abort() #endif #endif #endif -#if __cplusplus >= 201103L || CID_MSC_VER >= 1900 || CID_HAS_FEATURE(cxx_constexpr) +#if __cplusplus >= 201103L || CMT_MSC_VER >= 1900 || CMT_HAS_FEATURE(cxx_constexpr) #include <cstdint> namespace cid @@ -372,21 +401,42 @@ constexpr inline static size_t arraysize(const T (&)[N]) noexcept } } -#define CID_ARRAYSIZE(arr) ::cid::arraysize(arr) -#elif CID_COMPILER_MSVC -#define CID_ARRAYSIZE(arr) _countof(arr) +#define CMT_ARRAYSIZE(arr) ::cid::arraysize(arr) +#elif CMT_COMPILER_MSVC +#define CMT_ARRAYSIZE(arr) _countof(arr) #elif __cplusplus >= 199711L && \ (defined(__INTEL_COMPILER) || defined(__clang__) || \ (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)))) template <typename T, size_t N> char (&COUNTOF_REQUIRES_ARRAY_ARGUMENT(T (&)[N]))[N]; -#define CID_ARRAYSIZE(x) sizeof(COUNTOF_REQUIRES_ARRAY_ARGUMENT(x)) +#define CMT_ARRAYSIZE(x) sizeof(COUNTOF_REQUIRES_ARRAY_ARGUMENT(x)) +#else +#define CMT_ARRAYSIZE(arr) sizeof(arr) / sizeof(arr[0]) +#endif + +#ifdef CMT_COMPILER_MSVC +#define CMT_FUNC_SIGNATURE __FUNCSIG__ #else -#define CID_ARRAYSIZE(arr) sizeof(arr) / sizeof(arr[0]) +#define CMT_FUNC_SIGNATURE __PRETTY_FUNCTION__ +#endif + +#if CMT_COMPILER_CLANG +#define CMT_LOOP_NOUNROLL \ + _Pragma("clang loop vectorize( disable )") _Pragma("clang loop interleave( disable )") \ + _Pragma("clang loop unroll( disable )") + +#define CMT_LOOP_UNROLL _Pragma("clang loop unroll( full )") +#define CMT_VEC_CC __attribute__((vectorcall)) +#else +#define CMT_LOOP_NOUNROLL +#define CMT_LOOP_UNROLL +#ifdef CMT_COMPILER_MSVC +#define CMT_VEC_CC __vectorcall +#endif #endif -#ifdef CID_COMPILER_MSVC -#define CID_FUNC_SIGNATURE __FUNCSIG__ +#if defined(CMT_GNU_ATTRIBUTES) +#define CMT_FAST_CC __attribute__((fastcall)) #else -#define CID_FUNC_SIGNATURE __PRETTY_FUNCTION__ +#define CMT_FAST_CC __fastcall #endif diff --git a/include/kfr/cometa.hpp b/include/kfr/cometa.hpp @@ -22,6 +22,18 @@ using pvoid = void*; template <typename...> using void_t = void; +// Workaround for GCC 4.8 +template <typename T> +constexpr const T& const_max(const T& x, const T& y) +{ + return x > y ? x : y; +} +template <typename T> +constexpr const T& const_min(const T& x, const T& y) +{ + return x < y ? x : y; +} + namespace details { constexpr inline bool args_or() { return false; } @@ -135,10 +147,12 @@ constexpr size_t typeindex() template <typename T> struct compound_type_traits { - constexpr static size_t width = 1; - using subtype = T; - using deep_subtype = T; - constexpr static bool is_scalar = true; + constexpr static size_t width = 1; + constexpr static size_t deep_width = width; + using subtype = T; + using deep_subtype = T; + constexpr static size_t depth = 0; + constexpr static bool is_scalar = true; template <typename U> using rebind = U; @@ -166,10 +180,12 @@ using deep_rebind = typename compound_type_traits<T>::template deep_rebind<SubTy template <typename T> struct compound_type_traits<std::pair<T, T>> { - constexpr static size_t width = 2; - using subtype = T; - using deep_subtype = cometa::deep_subtype<T>; - constexpr static bool is_scalar = false; + constexpr static size_t width = 2; + constexpr static size_t deep_width = width * compound_type_traits<T>::width; + using subtype = T; + using deep_subtype = cometa::deep_subtype<T>; + constexpr static bool is_scalar = false; + constexpr static size_t depth = cometa::compound_type_traits<T>::depth + 1; template <typename U> using rebind = std::pair<U, U>; @@ -744,7 +760,7 @@ inline auto call_if_callable(Fn&& fn) template <typename Fn, typename... Args> inline auto bind_func(Fn&& fn, Args&&... args) { - return [=]() CID_INLINE_LAMBDA { return fn(details::call_if_callable(std::forward<Args>(args))...); }; + return [=]() CMT_INLINE_LAMBDA { return fn(details::call_if_callable(std::forward<Args>(args))...); }; } template <typename T> @@ -880,7 +896,7 @@ using identity = typename details::identity_impl<T>::type; struct swallow { template <typename... T> - CID_INTRIN constexpr swallow(T&&...) noexcept + CMT_INTRIN constexpr swallow(T&&...) noexcept { } }; @@ -905,24 +921,24 @@ struct carray<T, 1> static constexpr size_t size() noexcept { return 1; } template <size_t index> - CID_INTRIN constexpr T& get(csize_t<index>) noexcept + CMT_INTRIN constexpr T& get(csize_t<index>) noexcept { static_assert(index == 0, "carray: Array index is out of range"); return val; } template <size_t index> - CID_INTRIN constexpr const T& get(csize_t<index>) const noexcept + CMT_INTRIN constexpr const T& get(csize_t<index>) const noexcept { static_assert(index == 0, "carray: Array index is out of range"); return val; } template <size_t index> - CID_INTRIN constexpr T& get() noexcept + CMT_INTRIN constexpr T& get() noexcept { return get(csize<index>); } template <size_t index> - CID_INTRIN constexpr const T& get() const noexcept + CMT_INTRIN constexpr const T& get() const noexcept { return get(csize<index>); } @@ -960,39 +976,39 @@ struct carray : carray<T, N - 1> constexpr carray(const carray&) noexcept = default; constexpr carray(carray&&) noexcept = default; static constexpr size_t size() noexcept { return N; } - CID_INTRIN constexpr T& get(csize_t<N - 1>) noexcept { return val; } + CMT_INTRIN constexpr T& get(csize_t<N - 1>) noexcept { return val; } template <size_t index> - CID_INTRIN constexpr T& get(csize_t<index>) noexcept + CMT_INTRIN constexpr T& get(csize_t<index>) noexcept { return carray<T, N - 1>::get(csize<index>); } template <size_t index> - CID_INTRIN constexpr T& get() noexcept + CMT_INTRIN constexpr T& get() noexcept { return get(csize<index>); } - CID_INTRIN constexpr const T& get(csize_t<N - 1>) const noexcept { return val; } + CMT_INTRIN constexpr const T& get(csize_t<N - 1>) const noexcept { return val; } template <size_t index> - CID_INTRIN constexpr const T& get(csize_t<index>) const noexcept + CMT_INTRIN constexpr const T& get(csize_t<index>) const noexcept { return carray<T, N - 1>::get(csize<index>); } template <size_t index> - CID_INTRIN constexpr const T& get() const noexcept + CMT_INTRIN constexpr const T& get() const noexcept { return get(csize<index>); } - CID_INTRIN constexpr const T* front() const noexcept { return carray<T, N - 1>::front(); } - CID_INTRIN constexpr T* front() noexcept { return carray<T, N - 1>::front(); } - CID_INTRIN constexpr const T* back() const noexcept { return val; } - CID_INTRIN constexpr T* back() noexcept { return val; } - CID_INTRIN constexpr const T* begin() const noexcept { return carray<T, N - 1>::begin(); } - CID_INTRIN constexpr const T* end() const noexcept { return &val + 1; } - CID_INTRIN constexpr T* begin() noexcept { return carray<T, N - 1>::begin(); } - CID_INTRIN constexpr T* end() noexcept { return &val + 1; } - CID_INTRIN constexpr const T* data() const noexcept { return begin(); } - CID_INTRIN constexpr T* data() noexcept { return begin(); } - CID_INTRIN constexpr bool empty() const noexcept { return false; } + CMT_INTRIN constexpr const T* front() const noexcept { return carray<T, N - 1>::front(); } + CMT_INTRIN constexpr T* front() noexcept { return carray<T, N - 1>::front(); } + CMT_INTRIN constexpr const T* back() const noexcept { return val; } + CMT_INTRIN constexpr T* back() noexcept { return val; } + CMT_INTRIN constexpr const T* begin() const noexcept { return carray<T, N - 1>::begin(); } + CMT_INTRIN constexpr const T* end() const noexcept { return &val + 1; } + CMT_INTRIN constexpr T* begin() noexcept { return carray<T, N - 1>::begin(); } + CMT_INTRIN constexpr T* end() noexcept { return &val + 1; } + CMT_INTRIN constexpr const T* data() const noexcept { return begin(); } + CMT_INTRIN constexpr T* data() noexcept { return begin(); } + CMT_INTRIN constexpr bool empty() const noexcept { return false; } private: T val; }; @@ -1001,7 +1017,7 @@ private: struct fn_##fn \ { \ template <typename... Args> \ - CID_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ + CMT_INLINE_MEMBER decltype(fn(std::declval<Args>()...)) operator()(Args&&... args) const \ { \ return fn(std::forward<Args>(args)...); \ } \ @@ -1014,7 +1030,7 @@ private: struct fn_##fn \ { \ template <typename... Args> \ - CID_INLINE_MEMBER decltype(fn<CMT_ESC tpl_args>(std::declval<Args>()...)) operator()( \ + CMT_INLINE_MEMBER decltype(fn<CMT_ESC tpl_args>(std::declval<Args>()...)) operator()( \ Args&&... args) const \ { \ return fn<CMT_ESC tpl_args>(std::forward<Args>(args)...); \ @@ -1156,19 +1172,19 @@ template <typename T> using value_type_of = typename decay<T>::value_type; template <typename T, typename Fn> -CID_INTRIN void cforeach(cvals_t<T>, Fn&&) +CMT_INTRIN void cforeach(cvals_t<T>, Fn&&) { } template <typename T, T v0, T... values, typename Fn> -CID_INTRIN void cforeach(cvals_t<T, v0, values...>, Fn&& fn) +CMT_INTRIN void cforeach(cvals_t<T, v0, values...>, Fn&& fn) { fn(cval<T, v0>); cforeach(cvals_t<T, values...>(), std::forward<Fn>(fn)); } template <typename T, typename Fn, CMT_ENABLE_IF(has_begin_end<T>::value)> -CID_INTRIN void cforeach(T&& list, Fn&& fn) +CMT_INTRIN void cforeach(T&& list, Fn&& fn) { for (const auto& v : list) { @@ -1177,7 +1193,7 @@ CID_INTRIN void cforeach(T&& list, Fn&& fn) } template <typename T, size_t N, typename Fn> -CID_INTRIN void cforeach(const T (&array)[N], Fn&& fn) +CMT_INTRIN void cforeach(const T (&array)[N], Fn&& fn) { for (size_t i = 0; i < N; i++) { @@ -1188,38 +1204,38 @@ CID_INTRIN void cforeach(const T (&array)[N], Fn&& fn) namespace details { template <typename... Ts, typename Fn, size_t... indices> -CID_INTRIN void cforeach_tuple_impl(const std::tuple<Ts...>& tuple, Fn&& fn, csizes_t<indices...>) +CMT_INTRIN void cforeach_tuple_impl(const std::tuple<Ts...>& tuple, Fn&& fn, csizes_t<indices...>) { swallow{ (fn(std::get<indices>(tuple)), void(), 0)... }; } template <typename T0, typename... types, typename Fn, size_t... indices> -CID_INTRIN void cforeach_types_impl(ctypes_t<T0, types...>, Fn&& fn, csizes_t<indices...>) +CMT_INTRIN void cforeach_types_impl(ctypes_t<T0, types...>, Fn&& fn, csizes_t<indices...>) { swallow{ (fn(ctype<type_of<details::get_nth_type<indices, T0, types...>>>), void(), 0)... }; } } template <typename... Ts, typename Fn> -CID_INTRIN void cforeach(ctypes_t<Ts...> types, Fn&& fn) +CMT_INTRIN void cforeach(ctypes_t<Ts...> types, Fn&& fn) { details::cforeach_types_impl(types, std::forward<Fn>(fn), csizeseq<sizeof...(Ts)>); } template <typename... Ts, typename Fn> -CID_INTRIN void cforeach(const std::tuple<Ts...>& tuple, Fn&& fn) +CMT_INTRIN void cforeach(const std::tuple<Ts...>& tuple, Fn&& fn) { details::cforeach_tuple_impl(tuple, std::forward<Fn>(fn), csizeseq<sizeof...(Ts)>); } template <typename A0, typename A1, typename Fn> -CID_INTRIN void cforeach(A0&& a0, A1&& a1, Fn&& fn) +CMT_INTRIN void cforeach(A0&& a0, A1&& a1, Fn&& fn) { cforeach(std::forward<A0>(a0), [&](auto v0) { cforeach(std::forward<A1>(a1), [&](auto v1) { fn(v0, v1); }); }); } template <typename A0, typename A1, typename A2, typename Fn> -CID_INTRIN void cforeach(A0&& a0, A1&& a1, A2&& a2, Fn&& fn) +CMT_INTRIN void cforeach(A0&& a0, A1&& a1, A2&& a2, Fn&& fn) { cforeach(std::forward<A0>(a0), [&](auto v0) { cforeach(std::forward<A1>(a1), @@ -1228,13 +1244,13 @@ CID_INTRIN void cforeach(A0&& a0, A1&& a1, A2&& a2, Fn&& fn) } template <typename T, typename Fn, typename DefFn = fn_noop, typename CmpFn = fn_is_equal> -CID_INTRIN decltype(auto) cswitch(cvals_t<T>, identity<T>, Fn&&, DefFn&& deffn = DefFn(), CmpFn&& = CmpFn()) +CMT_INTRIN decltype(auto) cswitch(cvals_t<T>, identity<T>, Fn&&, DefFn&& deffn = DefFn(), CmpFn&& = CmpFn()) { return deffn(); } template <typename T, T v0, T... values, typename Fn, typename DefFn = fn_noop, typename CmpFn = fn_is_equal> -CID_INTRIN decltype(auto) cswitch(cvals_t<T, v0, values...>, identity<T> value, Fn&& fn, +CMT_INTRIN decltype(auto) cswitch(cvals_t<T, v0, values...>, identity<T> value, Fn&& fn, DefFn&& deffn = DefFn(), CmpFn&& cmpfn = CmpFn()) { if (cmpfn(value, v0)) @@ -1249,19 +1265,19 @@ CID_INTRIN decltype(auto) cswitch(cvals_t<T, v0, values...>, identity<T> value, } template <typename TrueFn, typename FalseFn = fn_noop> -CID_INTRIN decltype(auto) cif(cbool_t<true>, TrueFn&& truefn, FalseFn&& = FalseFn()) +CMT_INTRIN decltype(auto) cif(cbool_t<true>, TrueFn&& truefn, FalseFn&& = FalseFn()) { return truefn(cbool<true>); } template <typename TrueFn, typename FalseFn = fn_noop> -CID_INTRIN decltype(auto) cif(cbool_t<false>, TrueFn&&, FalseFn&& falsefn = FalseFn()) +CMT_INTRIN decltype(auto) cif(cbool_t<false>, TrueFn&&, FalseFn&& falsefn = FalseFn()) { return falsefn(cbool<false>); } template <typename T, T start, T stop, typename BodyFn> -CID_INTRIN decltype(auto) cfor(cval_t<T, start>, cval_t<T, stop>, BodyFn&& bodyfn) +CMT_INTRIN decltype(auto) cfor(cval_t<T, start>, cval_t<T, stop>, BodyFn&& bodyfn) { return cforeach(cvalrange<T, start, stop>, std::forward<BodyFn>(bodyfn)); } @@ -1316,20 +1332,20 @@ struct virtual_function { virtual Result operator()(Args... args) = 0; virtual virtual_function* make_copy() const = 0; - CID_INTRIN virtual ~virtual_function() = default; + CMT_INTRIN virtual ~virtual_function() = default; }; template <typename Fn, typename Result, typename... Args> struct virtual_function_impl : virtual_function<Result, Args...> { public: - CID_INTRIN virtual_function_impl(const Fn& fn) : fn(fn) {} - CID_INTRIN Result operator()(Args... args) override final { return fn(args...); } - CID_INTRIN virtual_function<Result, Args...>* make_copy() const override final + CMT_INTRIN virtual_function_impl(const Fn& fn) : fn(fn) {} + CMT_INTRIN Result operator()(Args... args) override final { return fn(args...); } + CMT_INTRIN virtual_function<Result, Args...>* make_copy() const override final { return new virtual_function_impl{ fn }; } - CID_INTRIN ~virtual_function_impl() {} + CMT_INTRIN ~virtual_function_impl() {} private: Fn fn; @@ -1347,13 +1363,13 @@ struct func_filter<Result(Args...)> }; template <typename T> -constexpr CID_INTRIN T return_val() noexcept +constexpr CMT_INTRIN T return_val() noexcept { return {}; } template <> -constexpr CID_INTRIN void return_val<void>() noexcept +constexpr CMT_INTRIN void return_val<void>() noexcept { } } @@ -1381,16 +1397,16 @@ struct function<Result(Args...)> return *this; } - CID_INTRIN function() : fn(nullptr) {} - CID_INTRIN function(std::nullptr_t) : fn(nullptr) {} + CMT_INTRIN function() : fn(nullptr) {} + CMT_INTRIN function(std::nullptr_t) : fn(nullptr) {} template <typename Func> - CID_INTRIN function(const Func& x) + CMT_INTRIN function(const Func& x) : fn(new details::virtual_function_impl<typename details::func_filter<Func>::type, Result, Args...>( x)) { } function(const this_t& other) : fn(other.fn ? other.fn->make_copy() : nullptr) {} - CID_INTRIN function& operator=(const this_t& other) + CMT_INTRIN function& operator=(const this_t& other) { if ((&other != this) && (other.fn)) { @@ -1400,14 +1416,14 @@ struct function<Result(Args...)> } return *this; } - CID_INTRIN function& operator=(std::nullptr_t) + CMT_INTRIN function& operator=(std::nullptr_t) { delete fn; fn = nullptr; return *this; } template <typename Fn> - CID_INTRIN function& operator=(const Fn& x) + CMT_INTRIN function& operator=(const Fn& x) { using FnImpl = details::virtual_function_impl<typename details::func_filter<Fn>::type, Result, Args...>; @@ -1416,24 +1432,24 @@ struct function<Result(Args...)> fn = temp; return *this; } - CID_INTRIN Result operator()(Args... args) const + CMT_INTRIN Result operator()(Args... args) const { if (fn) return (*fn)(args...); else return details::return_val<Result>(); } - CID_INTRIN explicit operator bool() const noexcept { return !!fn; } + CMT_INTRIN explicit operator bool() const noexcept { return !!fn; } - CID_INTRIN ~function() { delete fn; } + CMT_INTRIN ~function() { delete fn; } private: details::virtual_function<Result, Args...>* fn; }; template <typename Ret, typename... Args, typename T, typename Fn, typename DefFn = fn_noop> -CID_INLINE function<Ret(Args...)> cdispatch(cvals_t<T>, identity<T>, Fn&&, DefFn&& deffn = DefFn()) +CMT_INLINE function<Ret(Args...)> cdispatch(cvals_t<T>, identity<T>, Fn&&, DefFn&& deffn = DefFn()) { - return [=](Args... args) CID_INLINE_MEMBER -> Ret { return deffn(std::forward<Args>(args)...); }; + return [=](Args... args) CMT_INLINE_MEMBER -> Ret { return deffn(std::forward<Args>(args)...); }; } template <typename Ret, typename... Args, typename T, T v0, T... values, typename Fn, @@ -1444,7 +1460,7 @@ inline function<Ret(Args...)> cdispatch(cvals_t<T, v0, values...>, identity<T> v if (value == v0) { return [=](Args... args) - CID_INLINE_MEMBER -> Ret { return fn(cval<T, v0>, std::forward<Args>(args)...); }; + CMT_INLINE_MEMBER -> Ret { return fn(cval<T, v0>, std::forward<Args>(args)...); }; } else { @@ -1462,7 +1478,7 @@ inline size_t cfind(cvals_t<T, values...>, identity<T> value) } template <typename Fn, typename... Args> -CID_NOINLINE static result_of<Fn(Args...)> noinline(Fn&& fn, Args&&... args) +CMT_NOINLINE static result_of<Fn(Args...)> noinline(Fn&& fn, Args&&... args) { return fn(std::forward<Args>(args)...); } @@ -1471,7 +1487,7 @@ template <typename Fn> struct fn_noinline { template <typename... Args> - CID_INTRIN result_of<Fn(Args...)> operator()(Args&&... args) const + CMT_INTRIN result_of<Fn(Args...)> operator()(Args&&... args) const { return noinline(Fn{}, std::forward<Args>(args)...); } @@ -1479,7 +1495,7 @@ struct fn_noinline template <typename... Args, typename Fn, typename Ret = decltype(std::declval<Fn>()(std::declval<Args>()...)), typename NonMemFn = Ret (*)(Fn*, Args...)> -CID_INTRIN NonMemFn make_nonmember(const Fn&) +CMT_INTRIN NonMemFn make_nonmember(const Fn&) { return [](Fn* fn, Args... args) -> Ret { return fn->operator()(std::forward<Args>(args)...); }; } @@ -1515,9 +1531,9 @@ inline const char* type_name() noexcept { constexpr size_t prefix = details::strlen("const char *cometa::type_name() [T = "); constexpr size_t postfix = details::strlen("]"); - constexpr size_t length = sizeof(CID_FUNC_SIGNATURE) - 1 - prefix - postfix; + constexpr size_t length = sizeof(CMT_FUNC_SIGNATURE) - 1 - prefix - postfix; static const std::array<char, length + 1> name = - details::gettypename_impl(CID_FUNC_SIGNATURE + prefix, csizeseq<length>); + details::gettypename_impl(CMT_FUNC_SIGNATURE + prefix, csizeseq<length>); return name.data(); } @@ -1728,14 +1744,14 @@ struct autocast_impl { const Tfrom value; template <typename T> - CID_INTRIN constexpr operator T() const noexcept + CMT_INTRIN constexpr operator T() const noexcept { return static_cast<T>(value); } }; template <typename Tfrom> -CID_INTRIN constexpr autocast_impl<Tfrom> autocast(const Tfrom& value) noexcept +CMT_INTRIN constexpr autocast_impl<Tfrom> autocast(const Tfrom& value) noexcept { return { value }; } diff --git a/include/kfr/cometa/string.hpp b/include/kfr/cometa/string.hpp @@ -7,7 +7,7 @@ #include <utility> #pragma clang diagnostic push -#if CID_HAS_WARNING("-Wformat-security") +#if CMT_HAS_WARNING("-Wformat-security") #pragma clang diagnostic ignored "-Wformat-security" #pragma clang diagnostic ignored "-Wused-but-marked-unused" #endif @@ -16,7 +16,7 @@ namespace cometa { template <typename... Args> -CID_INLINE std::string as_string(const Args&... args); +CMT_INLINE std::string as_string(const Args&... args); template <typename T> constexpr inline const T& repr(const T& value) @@ -46,13 +46,13 @@ namespace details { template <size_t N, size_t... indices> -CID_INLINE constexpr cstring<N> make_cstring_impl(const char (&str)[N], csizes_t<indices...>) +CMT_INLINE constexpr cstring<N> make_cstring_impl(const char (&str)[N], csizes_t<indices...>) { return { { str[indices]..., 0 } }; } template <size_t N1, size_t N2, size_t... indices> -CID_INLINE constexpr cstring<N1 - 1 + N2 - 1 + 1> concat_str_impl(const cstring<N1>& str1, +CMT_INLINE constexpr cstring<N1 - 1 + N2 - 1 + 1> concat_str_impl(const cstring<N1>& str1, const cstring<N2>& str2, csizes_t<indices...>) { @@ -60,7 +60,7 @@ CID_INLINE constexpr cstring<N1 - 1 + N2 - 1 + 1> concat_str_impl(const cstring< return { { (indices < L1 ? str1[indices] : str2[indices - L1])..., 0 } }; } template <size_t N1, size_t N2, typename... Args> -CID_INLINE constexpr cstring<N1 - 1 + N2 - 1 + 1> concat_str_impl(const cstring<N1>& str1, +CMT_INLINE constexpr cstring<N1 - 1 + N2 - 1 + 1> concat_str_impl(const cstring<N1>& str1, const cstring<N2>& str2) { return concat_str_impl(str1, str2, csizeseq<N1 - 1 + N2 - 1>); @@ -77,29 +77,29 @@ cstring<N1 - Nfrom + Nto> str_replace_impl(size_t pos, const cstring<N1>& str, c } } -CID_INLINE constexpr cstring<1> concat_cstring() { return { { 0 } }; } +CMT_INLINE constexpr cstring<1> concat_cstring() { return { { 0 } }; } template <size_t N1> -CID_INLINE constexpr cstring<N1> concat_cstring(const cstring<N1>& str1) +CMT_INLINE constexpr cstring<N1> concat_cstring(const cstring<N1>& str1) { return str1; } template <size_t N1, size_t N2, typename... Args> -CID_INLINE constexpr auto concat_cstring(const cstring<N1>& str1, const cstring<N2>& str2, +CMT_INLINE constexpr auto concat_cstring(const cstring<N1>& str1, const cstring<N2>& str2, const Args&... args) { return details::concat_str_impl(str1, concat_cstring(str2, args...)); } template <size_t N> -CID_INLINE constexpr cstring<N> make_cstring(const char (&str)[N]) +CMT_INLINE constexpr cstring<N> make_cstring(const char (&str)[N]) { return details::make_cstring_impl(str, csizeseq<N - 1>); } template <char... chars> -CID_INLINE constexpr cstring<sizeof...(chars) + 1> make_cstring(cchars_t<chars...>) +CMT_INLINE constexpr cstring<sizeof...(chars) + 1> make_cstring(cchars_t<chars...>) { return { { chars..., 0 } }; } @@ -152,99 +152,99 @@ constexpr auto itoa() } template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width < 0 && prec >= 0)> -CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) +CMT_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) { return concat_cstring(make_cstring("."), itoa<prec>()); } template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width >= 0 && prec < 0)> -CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) +CMT_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) { return itoa<width>(); } template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width < 0 && prec < 0)> -CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) +CMT_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) { return make_cstring(""); } template <typename T, char t, int width, int prec, CMT_ENABLE_IF(width >= 0 && prec >= 0)> -CID_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) +CMT_INLINE constexpr auto value_fmt_arg(ctype_t<fmt_t<T, t, width, prec>>) { return concat_cstring(itoa<width>(), make_cstring("."), itoa<prec>()); } -CID_INLINE constexpr auto value_fmt(ctype_t<bool>) { return make_cstring("s"); } -CID_INLINE constexpr auto value_fmt(ctype_t<std::string>) { return make_cstring("s"); } -CID_INLINE constexpr auto value_fmt(ctype_t<char>) { return make_cstring("d"); } -CID_INLINE constexpr auto value_fmt(ctype_t<signed char>) { return make_cstring("d"); } -CID_INLINE constexpr auto value_fmt(ctype_t<unsigned char>) { return make_cstring("d"); } -CID_INLINE constexpr auto value_fmt(ctype_t<short>) { return make_cstring("d"); } -CID_INLINE constexpr auto value_fmt(ctype_t<unsigned short>) { return make_cstring("d"); } -CID_INLINE constexpr auto value_fmt(ctype_t<int>) { return make_cstring("d"); } -CID_INLINE constexpr auto value_fmt(ctype_t<long>) { return make_cstring("ld"); } -CID_INLINE constexpr auto value_fmt(ctype_t<long long>) { return make_cstring("lld"); } -CID_INLINE constexpr auto value_fmt(ctype_t<unsigned int>) { return make_cstring("u"); } -CID_INLINE constexpr auto value_fmt(ctype_t<unsigned long>) { return make_cstring("lu"); } -CID_INLINE constexpr auto value_fmt(ctype_t<unsigned long long>) { return make_cstring("llu"); } -CID_INLINE constexpr auto value_fmt(ctype_t<float>) { return make_cstring("g"); } -CID_INLINE constexpr auto value_fmt(ctype_t<double>) { return make_cstring("g"); } -CID_INLINE constexpr auto value_fmt(ctype_t<long double>) { return make_cstring("Lg"); } -CID_INLINE constexpr auto value_fmt(ctype_t<const char*>) { return make_cstring("s"); } -CID_INLINE constexpr auto value_fmt(ctype_t<char*>) { return make_cstring("s"); } -CID_INLINE constexpr auto value_fmt(ctype_t<void*>) { return make_cstring("p"); } -CID_INLINE constexpr auto value_fmt(ctype_t<const void*>) { return make_cstring("p"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<bool>) { return make_cstring("s"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<std::string>) { return make_cstring("s"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<char>) { return make_cstring("d"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<signed char>) { return make_cstring("d"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<unsigned char>) { return make_cstring("d"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<short>) { return make_cstring("d"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<unsigned short>) { return make_cstring("d"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<int>) { return make_cstring("d"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<long>) { return make_cstring("ld"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<long long>) { return make_cstring("lld"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<unsigned int>) { return make_cstring("u"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<unsigned long>) { return make_cstring("lu"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<unsigned long long>) { return make_cstring("llu"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<float>) { return make_cstring("g"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<double>) { return make_cstring("g"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<long double>) { return make_cstring("Lg"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<const char*>) { return make_cstring("s"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<char*>) { return make_cstring("s"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<void*>) { return make_cstring("p"); } +CMT_INLINE constexpr auto value_fmt(ctype_t<const void*>) { return make_cstring("p"); } template <char... chars> -CID_INLINE constexpr auto value_fmt(ctype_t<cchars_t<chars...>>) +CMT_INLINE constexpr auto value_fmt(ctype_t<cchars_t<chars...>>) { return concat_cstring(make_cstring("s"), make_cstring(cchars<chars...>)); } template <typename T> -CID_INLINE constexpr auto value_fmt(ctype_t<ctype_t<T>>) +CMT_INLINE constexpr auto value_fmt(ctype_t<ctype_t<T>>) { return make_cstring("s"); } template <typename T, int width, int prec> -CID_INLINE constexpr auto value_fmt(ctype_t<fmt_t<T, static_cast<char>(-1), width, prec>> fmt) +CMT_INLINE constexpr auto value_fmt(ctype_t<fmt_t<T, static_cast<char>(-1), width, prec>> fmt) { return concat_cstring(value_fmt_arg(fmt), value_fmt(ctype<repr_type<T>>)); } template <typename T, char t, int width, int prec> -CID_INLINE constexpr auto value_fmt(ctype_t<fmt_t<T, t, width, prec>> fmt) +CMT_INLINE constexpr auto value_fmt(ctype_t<fmt_t<T, t, width, prec>> fmt) { return concat_cstring(value_fmt_arg(fmt), cstring<2>{ { t, 0 } }); } template <char... chars> -CID_INLINE const char* pack_value(const cchars_t<chars...>&) +CMT_INLINE const char* pack_value(const cchars_t<chars...>&) { return ""; } template <typename Arg> -CID_INLINE const Arg& pack_value(const Arg& value) +CMT_INLINE const Arg& pack_value(const Arg& value) { return value; } -CID_INLINE double pack_value(float value) { return static_cast<double>(value); } -CID_INLINE auto pack_value(bool value) { return value ? "true" : "false"; } -CID_INLINE auto pack_value(const std::string& value) { return value.c_str(); } +CMT_INLINE double pack_value(float value) { return static_cast<double>(value); } +CMT_INLINE auto pack_value(bool value) { return value ? "true" : "false"; } +CMT_INLINE auto pack_value(const std::string& value) { return value.c_str(); } template <typename T> -CID_INLINE const char* pack_value(ctype_t<T>) +CMT_INLINE const char* pack_value(ctype_t<T>) { return type_name<T>(); } template <typename T, char t, int width, int prec> -CID_INLINE auto pack_value(const fmt_t<T, t, width, prec>& value) +CMT_INLINE auto pack_value(const fmt_t<T, t, width, prec>& value) { return pack_value(repr(value.value)); } template <size_t N1, size_t Nnew, size_t... indices> -CID_INLINE constexpr cstring<N1 - 3 + Nnew> fmt_replace_impl(const cstring<N1>& str, +CMT_INLINE constexpr cstring<N1 - 3 + Nnew> fmt_replace_impl(const cstring<N1>& str, const cstring<Nnew>& newfmt, csizes_t<indices...>) { @@ -279,7 +279,7 @@ CID_INLINE constexpr cstring<N1 - 3 + Nnew> fmt_replace_impl(const cstring<N1>& } template <size_t N1, size_t Nto> -CID_INLINE constexpr cstring<N1 - 3 + Nto> fmt_replace(const cstring<N1>& str, const cstring<Nto>& newfmt) +CMT_INLINE constexpr cstring<N1 - 3 + Nto> fmt_replace(const cstring<N1>& str, const cstring<Nto>& newfmt) { return fmt_replace_impl(str, newfmt, csizeseq<N1 - 3 + Nto - 1>); } @@ -295,10 +295,10 @@ inline std::string replace_one(const std::string& str, const std::string& from, return r; } -CID_INLINE const std::string& build_fmt(const std::string& str, ctypes_t<>) { return str; } +CMT_INLINE const std::string& build_fmt(const std::string& str, ctypes_t<>) { return str; } template <typename Arg, typename... Args> -CID_INLINE auto build_fmt(const std::string& str, ctypes_t<Arg, Args...>) +CMT_INLINE auto build_fmt(const std::string& str, ctypes_t<Arg, Args...>) { constexpr auto fmt = value_fmt(ctype<decay<Arg>>); return build_fmt(replace_one(str, "{}", "%" + std::string(fmt.data())), ctypes<Args...>); @@ -306,13 +306,13 @@ CID_INLINE auto build_fmt(const std::string& str, ctypes_t<Arg, Args...>) } template <char t, int width = -1, int prec = -1, typename T> -CID_INLINE details::fmt_t<T, t, width, prec> fmt(const T& value) +CMT_INLINE details::fmt_t<T, t, width, prec> fmt(const T& value) { return { value }; } template <int width = -1, int prec = -1, typename T> -CID_INLINE details::fmt_t<T, static_cast<char>(-1), width, prec> fmtwidth(const T& value) +CMT_INLINE details::fmt_t<T, static_cast<char>(-1), width, prec> fmtwidth(const T& value) { return { value }; } @@ -358,7 +358,7 @@ template <char... chars> struct print_t { template <typename... Args> - CID_INLINE void operator()(const Args&... args) + CMT_INLINE void operator()(const Args&... args) { constexpr auto format_str = build_fmt_str(cchars<chars...>, ctypes<repr_type<Args>...>); @@ -373,7 +373,7 @@ constexpr format_t<chars...> operator""_format() } template <typename Char, Char... chars> -constexpr CID_INLINE print_t<chars...> operator""_print() +constexpr CMT_INLINE print_t<chars...> operator""_print() { return {}; } @@ -381,28 +381,28 @@ constexpr CID_INLINE print_t<chars...> operator""_print() #pragma clang diagnostic pop template <typename... Args> -CID_INLINE void printfmt(const std::string& fmt, const Args&... args) +CMT_INLINE void printfmt(const std::string& fmt, const Args&... args) { const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>); std::printf(format_str.data(), details::pack_value(repr(args))...); } template <typename... Args> -CID_INLINE void fprintfmt(FILE* f, const std::string& fmt, const Args&... args) +CMT_INLINE void fprintfmt(FILE* f, const std::string& fmt, const Args&... args) { const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>); std::fprintf(f, format_str.data(), details::pack_value(repr(args))...); } template <typename... Args> -CID_INLINE int snprintfmt(char* str, size_t size, const std::string& fmt, const Args&... args) +CMT_INLINE int snprintfmt(char* str, size_t size, const std::string& fmt, const Args&... args) { const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>); return std::snprintf(str, size, format_str.data(), details::pack_value(repr(args))...); } template <typename... Args> -CID_INLINE std::string format(const std::string& fmt, const Args&... args) +CMT_INLINE std::string format(const std::string& fmt, const Args&... args) { std::string result; const auto format_str = details::build_fmt(fmt, ctypes<repr_type<Args>...>); @@ -416,7 +416,7 @@ CID_INLINE std::string format(const std::string& fmt, const Args&... args) } template <typename... Args> -CID_INLINE void print(const Args&... args) +CMT_INLINE void print(const Args&... args) { constexpr auto format_str = concat_cstring( concat_cstring(make_cstring("%"), details::value_fmt(ctype<decay<repr_type<Args>>>))...); @@ -424,7 +424,7 @@ CID_INLINE void print(const Args&... args) } template <typename... Args> -CID_INLINE void println(const Args&... args) +CMT_INLINE void println(const Args&... args) { constexpr auto format_str = concat_cstring( concat_cstring(make_cstring("%"), details::value_fmt(ctype<decay<repr_type<Args>>>))..., @@ -433,7 +433,7 @@ CID_INLINE void println(const Args&... args) } template <typename... Args> -CID_INLINE std::string as_string(const Args&... args) +CMT_INLINE std::string as_string(const Args&... args) { std::string result; constexpr auto format_str = concat_cstring( diff --git a/include/kfr/dft.hpp b/include/kfr/dft.hpp @@ -0,0 +1,31 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "base.hpp" + +#include "dft/bitrev.hpp" +#include "dft/conv.hpp" +#include "dft/fft.hpp" +#include "dft/ft.hpp" +#include "dft/reference_dft.hpp" diff --git a/include/kfr/dft/bitrev.hpp b/include/kfr/dft/bitrev.hpp @@ -85,7 +85,7 @@ KFR_INTRIN void fft_reorder_swap(T* inout, size_t i) template <size_t log2n, size_t bitrev, typename T> KFR_INTRIN void fft_reorder_swap_two(T* inout, size_t i, size_t j) { - __builtin_assume(i != j); + CMT_ASSUME(i != j); using cxx = cvec<T, 16>; constexpr size_t N = 1 << log2n; constexpr size_t N4 = 2 * N / 4; @@ -102,7 +102,7 @@ KFR_INTRIN void fft_reorder_swap_two(T* inout, size_t i, size_t j) template <size_t log2n, size_t bitrev, typename T> KFR_INTRIN void fft_reorder_swap(T* inout, size_t i, size_t j) { - __builtin_assume(i != j); + CMT_ASSUME(i != j); using cxx = cvec<T, 16>; constexpr size_t N = 1 << log2n; constexpr size_t N4 = 2 * N / 4; @@ -259,7 +259,7 @@ void cwrite_reordered(T* out, cvec<T, 16> value, size_t N4, cbool_t<use_br2>) template <typename T, bool use_br2> KFR_INTRIN void fft_reorder_swap_n4(T* inout, size_t i, size_t j, size_t N4, cbool_t<use_br2>) { - __builtin_assume(i != j); + CMT_ASSUME(i != j); const cvec<T, 16> vi = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + i), N4); const cvec<T, 16> vj = cread_group<4, 4, fft_reorder_aligned>(ptr_cast<complex<T>>(inout + j), N4); cwrite_reordered(inout + j, vi, N4, cbool<use_br2>); diff --git a/include/kfr/dft/conv.hpp b/include/kfr/dft/conv.hpp @@ -31,7 +31,7 @@ #include "fft.hpp" #pragma clang diagnostic push -#if CID_HAS_WARNING("-Wshadow") +#if CMT_HAS_WARNING("-Wshadow") #pragma clang diagnostic ignored "-Wshadow" #endif diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp @@ -35,7 +35,7 @@ #include "ft.hpp" #pragma clang diagnostic push -#if CID_HAS_WARNING("-Wshadow") +#if CMT_HAS_WARNING("-Wshadow") #pragma clang diagnostic ignored "-Wshadow" #endif @@ -65,7 +65,7 @@ protected: }; #pragma clang diagnostic push -#if CID_HAS_WARNING("-Wassume") +#if CMT_HAS_WARNING("-Wassume") #pragma clang diagnostic ignored "-Wassume" #endif @@ -194,7 +194,7 @@ KFR_SINTRIN void radix4_body(size_t N, csize_t<width>, ctrue_t, cbool_t<splitout } template <typename T> -KFR_NOINLINE cvec<T, 1> calculate_twiddle(size_t n, size_t size) +CMT_NOINLINE cvec<T, 1> calculate_twiddle(size_t n, size_t size) { if (n == 0) { @@ -214,9 +214,9 @@ KFR_NOINLINE cvec<T, 1> calculate_twiddle(size_t n, size_t size) } else { - double kth = c_pi<double, 2> * (n / static_cast<double>(size)); - double tcos = +kfr::cos(kth); - double tsin = -kfr::sin(kth); + fbase kth = c_pi<fbase, 2> * (n / static_cast<fbase>(size)); + fbase tcos = +kfr::cos(kth); + fbase tsin = -kfr::sin(kth); return make_vector(static_cast<T>(tcos), static_cast<T>(tsin)); } } @@ -226,7 +226,7 @@ KFR_SINTRIN void initialize_twiddles_impl(complex<T>*& twiddle, size_t nn, size_ bool split_format) { vec<T, 2 * width> result = T(); - KFR_LOOP_UNROLL + CMT_LOOP_UNROLL for (size_t i = 0; i < width; i++) { const cvec<T, 1> r = calculate_twiddle<T>(nn + nnstep * i, size); @@ -241,10 +241,10 @@ KFR_SINTRIN void initialize_twiddles_impl(complex<T>*& twiddle, size_t nn, size_ } template <typename T, size_t width> -KFR_NOINLINE void initialize_twiddles(complex<T>*& twiddle, size_t stage_size, size_t size, bool split_format) +CMT_NOINLINE void initialize_twiddles(complex<T>*& twiddle, size_t stage_size, size_t size, bool split_format) { size_t nnstep = size / stage_size; - KFR_LOOP_NOUNROLL + CMT_LOOP_NOUNROLL for (size_t n = 0; n < stage_size / 4; n += width) { initialize_twiddles_impl<T, width>(twiddle, n * nnstep * 1, nnstep * 1, size, split_format); @@ -256,7 +256,7 @@ KFR_NOINLINE void initialize_twiddles(complex<T>*& twiddle, size_t stage_size, s template <typename T> KFR_SINTRIN void prefetch_one(const complex<T>* in) { -#ifdef CID_ARCH_X86 +#ifdef CMT_ARCH_X86 __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0); #else __builtin_prefetch(ptr_cast<void>(in)); @@ -266,7 +266,7 @@ KFR_SINTRIN void prefetch_one(const complex<T>* in) template <typename T> KFR_SINTRIN void prefetch_four(size_t stride, const complex<T>* in) { -#ifdef CID_ARCH_X86 +#ifdef CMT_ARCH_X86 __builtin_prefetch(ptr_cast<void>(in), 0, _MM_HINT_T0); __builtin_prefetch(ptr_cast<void>(in + stride), 0, _MM_HINT_T0); __builtin_prefetch(ptr_cast<void>(in + stride * 2), 0, _MM_HINT_T0); @@ -288,12 +288,12 @@ KFR_SINTRIN cfalse_t radix4_pass(Ntype N, size_t blocks, csize_t<width>, cbool_t constexpr static size_t prefetch_offset = width * 8; const auto N4 = N / csize<4>; const auto N43 = N4 * csize<3>; - __builtin_assume(blocks > 0); - __builtin_assume(N > 0); - __builtin_assume(N4 > 0); - KFR_LOOP_NOUNROLL for (size_t b = 0; b < blocks; b++) + CMT_ASSUME(blocks > 0); + CMT_ASSUME(N > 0); + CMT_ASSUME(N4 > 0); + CMT_LOOP_NOUNROLL for (size_t b = 0; b < blocks; b++) { -#pragma clang loop unroll_count(default_unroll_count) +#pragma clang loop unroll_count(2) for (size_t n2 = 0; n2 < N4; n2 += width) { if (prefetch) @@ -315,7 +315,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<32>, size_t blocks, csize_t<width>, cfal cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) { - __builtin_assume(blocks > 0); + CMT_ASSUME(blocks > 0); constexpr static size_t prefetch_offset = 32 * 4; for (size_t b = 0; b < blocks; b++) { @@ -352,7 +352,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<8>, size_t blocks, csize_t<width>, cfals cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) { - __builtin_assume(blocks > 0); + CMT_ASSUME(blocks > 0); constexpr static size_t prefetch_offset = width * 16; for (size_t b = 0; b < blocks; b += 2) { @@ -377,7 +377,7 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<16>, size_t blocks, csize_t<width>, cfal cbool_t<use_br2>, cbool_t<prefetch>, cbool_t<inverse>, cbool_t<aligned>, complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) { - __builtin_assume(blocks > 0); + CMT_ASSUME(blocks > 0); constexpr static size_t prefetch_offset = width * 4; #pragma clang loop unroll_count(2) for (size_t b = 0; b < blocks; b += 2) @@ -409,8 +409,8 @@ KFR_SINTRIN ctrue_t radix4_pass(csize_t<4>, size_t blocks, csize_t<width>, cfals complex<T>* out, const complex<T>*, const complex<T>*& /*twiddle*/) { constexpr static size_t prefetch_offset = width * 4; - __builtin_assume(blocks > 0); - KFR_LOOP_NOUNROLL + CMT_ASSUME(blocks > 0); + CMT_LOOP_NOUNROLL for (size_t b = 0; b < blocks; b += 4) { if (prefetch) @@ -453,8 +453,8 @@ protected: if (splitin) in = out; const size_t stage_size = this->stage_size; - __builtin_assume(stage_size >= 2048); - __builtin_assume(stage_size % 2048 == 0); + CMT_ASSUME(stage_size >= 2048); + CMT_ASSUME(stage_size % 2048 == 0); radix4_pass(stage_size, 1, csize<width>, ctrue, cbool<splitin>, cbool<!is_even>, cbool<prefetch>, cbool<inverse>, cbool<aligned>, out, in, twiddle); } @@ -836,14 +836,14 @@ struct dft_plan const size_t log2n = ilog2(size); cswitch(csizes<1, 2, 3, 4, 5, 6, 7, 8>, log2n, [&](auto log2n) { - add_stage<internal::fft_specialization_t<T, val_of(log2n), false>::template type>( - size, type); + add_stage<internal::fft_specialization_t<T, val_of(decltype(log2n)()), + false>::template type>(size, type); }, [&]() { cswitch(cfalse_true, is_even(log2n), [&](auto is_even) { make_fft(size, type, is_even, ctrue); - add_stage<internal::fft_reorder_stage_impl_t<T, val_of(is_even)>::template type>( - size, type); + add_stage<internal::fft_reorder_stage_impl_t< + T, val_of(decltype(is_even)())>::template type>(size, type); }); }); initialize(type); diff --git a/include/kfr/dft/ft.hpp b/include/kfr/dft/ft.hpp @@ -41,18 +41,18 @@ namespace internal { template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> -KFR_INLINE vec<T, N> cmul_impl(vec<T, N> x, vec<T, N> y) +CMT_INLINE vec<T, N> cmul_impl(vec<T, N> x, vec<T, N> y) { return subadd(x * dupeven(y), swap<2>(x) * dupodd(y)); } template <typename T, size_t N, KFR_ENABLE_IF(N > 2)> -KFR_INLINE vec<T, N> cmul_impl(vec<T, N> x, vec<T, 2> y) +CMT_INLINE vec<T, N> cmul_impl(vec<T, N> x, vec<T, 2> y) { vec<T, N> yy = resize<N>(y); return cmul_impl(x, yy); } template <typename T, size_t N, KFR_ENABLE_IF(N > 2)> -KFR_INLINE vec<T, N> cmul_impl(vec<T, 2> x, vec<T, N> y) +CMT_INLINE vec<T, N> cmul_impl(vec<T, 2> x, vec<T, N> y) { vec<T, N> xx = resize<N>(x); return cmul_impl(xx, y); @@ -60,24 +60,24 @@ KFR_INLINE vec<T, N> cmul_impl(vec<T, 2> x, vec<T, N> y) /// Complex Multiplication template <typename T, size_t N1, size_t N2> -KFR_INLINE vec<T, std::max(N1, N2)> cmul(vec<T, N1> x, vec<T, N2> y) +CMT_INLINE vec<T, const_max(N1, N2)> cmul(vec<T, N1> x, vec<T, N2> y) { return internal::cmul_impl(x, y); } KFR_FN(cmul) template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> -KFR_INLINE vec<T, N> cmul_conj(vec<T, N> x, vec<T, N> y) +CMT_INLINE vec<T, N> cmul_conj(vec<T, N> x, vec<T, N> y) { return swap<2>(subadd(swap<2>(x) * cdupreal(y), x * cdupimag(y))); } template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> -KFR_INLINE vec<T, N> cmul_2conj(vec<T, N> in0, vec<T, N> in1, vec<T, N> tw) +CMT_INLINE vec<T, N> cmul_2conj(vec<T, N> in0, vec<T, N> in1, vec<T, N> tw) { return (in0 + in1) * cdupreal(tw) + swap<2>(cnegimag(in0 - in1)) * cdupimag(tw); } template <typename T, size_t N, KFR_ENABLE_IF(N >= 2)> -KFR_INLINE void cmul_2conj(vec<T, N>& out0, vec<T, N>& out1, vec<T, 2> in0, vec<T, 2> in1, vec<T, N> tw) +CMT_INLINE void cmul_2conj(vec<T, N>& out0, vec<T, N>& out1, vec<T, 2> in0, vec<T, 2> in1, vec<T, N> tw) { const vec<T, N> twr = cdupreal(tw); const vec<T, N> twi = cdupimag(tw); @@ -89,13 +89,13 @@ KFR_INLINE void cmul_2conj(vec<T, N>& out0, vec<T, N>& out1, vec<T, 2> in0, vec< out1 += sumtw - diftw; } template <typename T, size_t N, KFR_ENABLE_IF(N > 2)> -KFR_INLINE vec<T, N> cmul_conj(vec<T, N> x, vec<T, 2> y) +CMT_INLINE vec<T, N> cmul_conj(vec<T, N> x, vec<T, 2> y) { vec<T, N> yy = resize<N>(y); return cmul_conj(x, yy); } template <typename T, size_t N, KFR_ENABLE_IF(N > 2)> -KFR_INLINE vec<T, N> cmul_conj(vec<T, 2> x, vec<T, N> y) +CMT_INLINE vec<T, N> cmul_conj(vec<T, 2> x, vec<T, N> y) { vec<T, N> xx = resize<N>(x); return cmul_conj(xx, y); @@ -103,67 +103,70 @@ KFR_INLINE vec<T, N> cmul_conj(vec<T, 2> x, vec<T, N> y) KFR_FN(cmul_conj) KFR_FN(cmul_2conj) +template <typename T, size_t N> +using cvec = vec<T, N * 2>; + template <size_t N, bool A = false, typename T> -KFR_INLINE cvec<T, N> cread(const complex<T>* src) +CMT_INLINE cvec<T, N> cread(const complex<T>* src) { return internal_read_write::read<N * 2, A>(ptr_cast<T>(src)); } template <size_t N, bool A = false, typename T> -KFR_INLINE void cwrite(complex<T>* dest, cvec<T, N> value) +CMT_INLINE void cwrite(complex<T>* dest, cvec<T, N> value) { return internal_read_write::write<A>(ptr_cast<T>(dest), value); } template <size_t count, size_t N, size_t stride, bool A, typename T, size_t... indices> -KFR_INLINE cvec<T, count * N> cread_group_impl(const complex<T>* src, csizes_t<indices...>) +CMT_INLINE cvec<T, count * N> cread_group_impl(const complex<T>* src, csizes_t<indices...>) { return concat(read<N * 2, A>(ptr_cast<T>(src + stride * indices))...); } template <size_t count, size_t N, size_t stride, bool A, typename T, size_t... indices> -KFR_INLINE void cwrite_group_impl(complex<T>* dest, cvec<T, count * N> value, csizes_t<indices...>) +CMT_INLINE void cwrite_group_impl(complex<T>* dest, cvec<T, count * N> value, csizes_t<indices...>) { swallow{ (write<A>(ptr_cast<T>(dest + stride * indices), slice<indices * N * 2, N * 2>(value)), 0)... }; } template <size_t count, size_t N, bool A, typename T, size_t... indices> -KFR_INLINE cvec<T, count * N> cread_group_impl(const complex<T>* src, size_t stride, csizes_t<indices...>) +CMT_INLINE cvec<T, count * N> cread_group_impl(const complex<T>* src, size_t stride, csizes_t<indices...>) { return concat(read<N * 2, A>(ptr_cast<T>(src + stride * indices))...); } template <size_t count, size_t N, bool A, typename T, size_t... indices> -KFR_INLINE void cwrite_group_impl(complex<T>* dest, size_t stride, cvec<T, count * N> value, +CMT_INLINE void cwrite_group_impl(complex<T>* dest, size_t stride, cvec<T, count * N> value, csizes_t<indices...>) { swallow{ (write<A>(ptr_cast<T>(dest + stride * indices), slice<indices * N * 2, N * 2>(value)), 0)... }; } template <size_t count, size_t N, size_t stride, bool A = false, typename T> -KFR_INLINE cvec<T, count * N> cread_group(const complex<T>* src) +CMT_INLINE cvec<T, count * N> cread_group(const complex<T>* src) { return cread_group_impl<count, N, stride, A>(src, csizeseq<count>); } template <size_t count, size_t N, size_t stride, bool A = false, typename T> -KFR_INLINE void cwrite_group(complex<T>* dest, cvec<T, count * N> value) +CMT_INLINE void cwrite_group(complex<T>* dest, cvec<T, count * N> value) { return cwrite_group_impl<count, N, stride, A>(dest, value, csizeseq<count>); } template <size_t count, size_t N, bool A = false, typename T> -KFR_INLINE cvec<T, count * N> cread_group(const complex<T>* src, size_t stride) +CMT_INLINE cvec<T, count * N> cread_group(const complex<T>* src, size_t stride) { return cread_group_impl<count, N, A>(src, stride, csizeseq<count>); } template <size_t count, size_t N, bool A = false, typename T> -KFR_INLINE void cwrite_group(complex<T>* dest, size_t stride, cvec<T, count * N> value) +CMT_INLINE void cwrite_group(complex<T>* dest, size_t stride, cvec<T, count * N> value) { return cwrite_group_impl<count, N, A>(dest, stride, value, csizeseq<count>); } template <size_t N, bool A = false, bool split = false, typename T> -KFR_INLINE cvec<T, N> cread_split(const complex<T>* src) +CMT_INLINE cvec<T, N> cread_split(const complex<T>* src) { cvec<T, N> temp = internal_read_write::read<N * 2, A>(ptr_cast<T>(src)); if (split) @@ -172,7 +175,7 @@ KFR_INLINE cvec<T, N> cread_split(const complex<T>* src) } template <size_t N, bool A = false, bool split = false, typename T> -KFR_INLINE void cwrite_split(complex<T>* dest, cvec<T, N> value) +CMT_INLINE void cwrite_split(complex<T>* dest, cvec<T, N> value) { if (split) value = interleavehalfs(value); @@ -250,13 +253,13 @@ inline void cwrite_split<4, true, true, f64>(complex<f64>* dest, cvec<f64, 4> x) } template <size_t N, size_t stride, typename T, size_t... Indices> -KFR_INLINE cvec<T, N> cgather_helper(const complex<T>* base, csizes_t<Indices...>) +CMT_INLINE cvec<T, N> cgather_helper(const complex<T>* base, csizes_t<Indices...>) { return concat(ref_cast<cvec<T, 1>>(base[Indices * stride])...); } template <size_t N, size_t stride, typename T> -KFR_INLINE cvec<T, N> cgather(const complex<T>* base) +CMT_INLINE cvec<T, N> cgather(const complex<T>* base) { if (stride == 1) { @@ -266,7 +269,7 @@ KFR_INLINE cvec<T, N> cgather(const complex<T>* base) return cgather_helper<N, stride, T>(base, csizeseq<N>); } -KFR_INLINE size_t cgather_next(size_t& index, size_t stride, size_t size, size_t) +CMT_INLINE size_t cgather_next(size_t& index, size_t stride, size_t size, size_t) { size_t temp = index; index += stride; @@ -274,7 +277,7 @@ KFR_INLINE size_t cgather_next(size_t& index, size_t stride, size_t size, size_t index -= size; return temp; } -KFR_INLINE size_t cgather_next(size_t& index, size_t stride, size_t) +CMT_INLINE size_t cgather_next(size_t& index, size_t stride, size_t) { size_t temp = index; index += stride; @@ -282,45 +285,45 @@ KFR_INLINE size_t cgather_next(size_t& index, size_t stride, size_t) } template <size_t N, typename T, size_t... Indices> -KFR_INLINE cvec<T, N> cgather_helper(const complex<T>* base, size_t& index, size_t stride, +CMT_INLINE cvec<T, N> cgather_helper(const complex<T>* base, size_t& index, size_t stride, csizes_t<Indices...>) { return concat(ref_cast<cvec<T, 1>>(base[cgather_next(index, stride, Indices)])...); } template <size_t N, typename T> -KFR_INLINE cvec<T, N> cgather(const complex<T>* base, size_t& index, size_t stride) +CMT_INLINE cvec<T, N> cgather(const complex<T>* base, size_t& index, size_t stride) { return cgather_helper<N, T>(base, index, stride, csizeseq<N>); } template <size_t N, typename T> -KFR_INLINE cvec<T, N> cgather(const complex<T>* base, size_t stride) +CMT_INLINE cvec<T, N> cgather(const complex<T>* base, size_t stride) { size_t index = 0; return cgather_helper<N, T>(base, index, stride, csizeseq<N>); } template <size_t N, typename T, size_t... Indices> -KFR_INLINE cvec<T, N> cgather_helper(const complex<T>* base, size_t& index, size_t stride, size_t size, +CMT_INLINE cvec<T, N> cgather_helper(const complex<T>* base, size_t& index, size_t stride, size_t size, csizes_t<Indices...>) { return concat(ref_cast<cvec<T, 1>>(base[cgather_next(index, stride, size, Indices)])...); } template <size_t N, typename T> -KFR_INLINE cvec<T, N> cgather(const complex<T>* base, size_t& index, size_t stride, size_t size) +CMT_INLINE cvec<T, N> cgather(const complex<T>* base, size_t& index, size_t stride, size_t size) { return cgather_helper<N, T>(base, index, stride, size, csizeseq<N>); } template <size_t N, size_t stride, typename T, size_t... Indices> -KFR_INLINE void cscatter_helper(complex<T>* base, cvec<T, N> value, csizes_t<Indices...>) +CMT_INLINE void cscatter_helper(complex<T>* base, cvec<T, N> value, csizes_t<Indices...>) { swallow{ (cwrite<1>(base + Indices * stride, slice<Indices * 2, 2>(value)), 0)... }; } template <size_t N, size_t stride, typename T> -KFR_INLINE void cscatter(complex<T>* base, cvec<T, N> value) +CMT_INLINE void cscatter(complex<T>* base, cvec<T, N> value) { if (stride == 1) { @@ -333,31 +336,29 @@ KFR_INLINE void cscatter(complex<T>* base, cvec<T, N> value) } template <size_t N, typename T, size_t... Indices> -KFR_INLINE void cscatter_helper(complex<T>* base, size_t stride, cvec<T, N> value, csizes_t<Indices...>) +CMT_INLINE void cscatter_helper(complex<T>* base, size_t stride, cvec<T, N> value, csizes_t<Indices...>) { swallow{ (cwrite<1>(base + Indices * stride, slice<Indices * 2, 2>(value)), 0)... }; } template <size_t N, typename T> -KFR_INLINE void cscatter(complex<T>* base, size_t stride, cvec<T, N> value) +CMT_INLINE void cscatter(complex<T>* base, size_t stride, cvec<T, N> value) { return cscatter_helper<N, T>(base, stride, value, csizeseq<N>); } template <size_t groupsize = 1, typename T, size_t N, typename IT> -KFR_INLINE vec<T, N * 2 * groupsize> cgather(const complex<T>* base, vec<IT, N> offset) +CMT_INLINE vec<T, N * 2 * groupsize> cgather(const complex<T>* base, vec<IT, N> offset) { return gather_helper<2 * groupsize>(ptr_cast<T>(base), offset, csizeseq<N>); } template <size_t groupsize = 1, typename T, size_t N, typename IT> -KFR_INLINE void cscatter(complex<T>* base, vec<IT, N> offset, vec<T, N * 2 * groupsize> value) +CMT_INLINE void cscatter(complex<T>* base, vec<IT, N> offset, vec<T, N * 2 * groupsize> value) { return scatter_helper<2 * groupsize>(ptr_cast<T>(base), offset, value, csizeseq<N>); } -constexpr size_t default_unroll_count = 2; - template <typename T> KFR_INTRIN void transpose4x8(cvec<T, 8> z0, cvec<T, 8> z1, cvec<T, 8> z2, cvec<T, 8> z3, cvec<T, 4>& w0, cvec<T, 4>& w1, cvec<T, 4>& w2, cvec<T, 4>& w3, cvec<T, 4>& w4, cvec<T, 4>& w5, @@ -438,15 +439,15 @@ constexpr KFR_INTRIN T chsign(T x) template <typename T, size_t N, size_t size, size_t start, size_t step, bool inverse = false, size_t... indices> -constexpr KFR_INTRIN cvec<T, N> get_fixed_twiddle_helper(std::integer_sequence<size_t, indices...>) +constexpr KFR_INTRIN cvec<T, N> get_fixed_twiddle_helper(csizes_t<indices...>) { return make_vector((indices & 1 ? chsign<inverse>(-sin_using_table<T>(size, (indices / 2 * step + start))) : cos_using_table<T>(size, (indices / 2 * step + start)))...); } template <typename T, size_t width, size_t... indices> -constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(std::integer_sequence<size_t, indices...>, - size_t size, size_t start, size_t step) +constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(csizes_t<indices...>, size_t size, size_t start, + size_t step) { return make_vector((indices & 1 ? -sin_using_table<T>(size, indices / 2 * step + start) : cos_using_table<T>(size, indices / 2 * step + start))...); @@ -455,14 +456,13 @@ constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle_helper(std::integer_sequen template <typename T, size_t width, size_t size, size_t start, size_t step, bool inverse = false> constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle() { - return get_fixed_twiddle_helper<T, width, size, start, step, inverse>( - std::make_index_sequence<width * 2>()); + return get_fixed_twiddle_helper<T, width, size, start, step, inverse>(csizeseq<width * 2>); } template <typename T, size_t width> constexpr KFR_INTRIN cvec<T, width> get_fixed_twiddle(size_t size, size_t start, size_t step = 0) { - return get_fixed_twiddle_helper<T, width>(std::make_index_sequence<width * 2>(), start, step, size); + return get_fixed_twiddle_helper<T, width>(csizeseq<width * 2>, start, step, size); } template <typename T, size_t N, size_t size, size_t start, size_t step = 0, bool inverse = false> @@ -480,7 +480,7 @@ constexpr cvec<T, N> twiddleimagmask() #pragma clang diagnostic pop template <typename T, size_t N> -KFR_NOINLINE static vec<T, N> cossin_conj(vec<T, N> x) +CMT_NOINLINE static vec<T, N> cossin_conj(vec<T, N> x) { return cconj(cossin(x)); } @@ -1277,9 +1277,8 @@ KFR_INTRIN vec<T, N> mul_tw(cbool_t<true>, vec<T, N> x, const complex<T>* twiddl // Non-final template <typename T, size_t width, size_t radix, bool inverse, size_t... I> -KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<width>, csize_t<radix>, - cbool_t<inverse>, complex<T>* out, const complex<T>* in, - const complex<T>* tw, size_t stride) +KFR_INTRIN void butterfly_helper(csizes_t<I...>, size_t i, csize_t<width>, csize_t<radix>, cbool_t<inverse>, + complex<T>* out, const complex<T>* in, const complex<T>* tw, size_t stride) { carray<cvec<T, width>, radix> inout; @@ -1294,8 +1293,8 @@ KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<wi // Final template <typename T, size_t width, size_t radix, bool inverse, size_t... I> -KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<width>, csize_t<radix>, - cbool_t<inverse>, complex<T>* out, const complex<T>* in, size_t stride) +KFR_INTRIN void butterfly_helper(csizes_t<I...>, size_t i, csize_t<width>, csize_t<radix>, cbool_t<inverse>, + complex<T>* out, const complex<T>* in, size_t stride) { carray<cvec<T, width>, radix> inout; @@ -1310,8 +1309,7 @@ KFR_INTRIN void butterfly_helper(std::index_sequence<I...>, size_t i, csize_t<wi template <size_t width, size_t radix, typename... Args> KFR_INTRIN void butterfly(size_t i, csize_t<width>, csize_t<radix>, Args&&... args) { - butterfly_helper(std::make_index_sequence<radix>(), i, csize<width>, csize<radix>, - std::forward<Args>(args)...); + butterfly_helper(csizeseq<radix>, i, csize<width>, csize<radix>, std::forward<Args>(args)...); } template <typename... Args> @@ -1321,7 +1319,7 @@ KFR_INTRIN void butterfly_cycle(size_t&, size_t, csize_t<0>, Args&&...) template <size_t width, typename... Args> KFR_INTRIN void butterfly_cycle(size_t& i, size_t count, csize_t<width>, Args&&... args) { - KFR_LOOP_NOUNROLL + CMT_LOOP_NOUNROLL for (; i < count / width * width; i += width) butterfly(i, csize<width>, std::forward<Args>(args)...); butterfly_cycle(i, count, csize<width / 2>, std::forward<Args>(args)...); @@ -1330,7 +1328,7 @@ KFR_INTRIN void butterfly_cycle(size_t& i, size_t count, csize_t<width>, Args&&. template <size_t width, typename... Args> KFR_INTRIN void butterflies(size_t count, csize_t<width>, Args&&... args) { - __builtin_assume(count > 0); + CMT_ASSUME(count > 0); size_t i = 0; butterfly_cycle(i, count, csize<width>, std::forward<Args>(args)...); } @@ -1345,14 +1343,14 @@ KFR_INTRIN void generic_butterfly_cycle(csize_t<width>, size_t radix, cbool_t<in const complex<T>* in, Tstride ostride, size_t halfradix, size_t halfradix_sqr, const complex<T>* twiddle, size_t i) { - KFR_LOOP_NOUNROLL + CMT_LOOP_NOUNROLL for (; i < halfradix / width * width; i += width) { const cvec<T, 1> in0 = cread<1>(in); cvec<T, width> sum0 = resize<2 * width>(in0); cvec<T, width> sum1 = sum0; - KFR_LOOP_NOUNROLL + CMT_LOOP_NOUNROLL for (size_t j = 0; j < halfradix; j++) { const cvec<T, 1> ina = cread<1>(in + (1 + j)); @@ -1386,17 +1384,17 @@ template <size_t width, typename T, bool inverse, typename Tstride = csize_t<1>> KFR_INTRIN void generic_butterfly_w(size_t radix, cbool_t<inverse>, complex<T>* out, const complex<T>* in, const complex<T>* twiddle, Tstride ostride = Tstride{}) { - __builtin_assume(radix > 0); + CMT_ASSUME(radix > 0); { cvec<T, width> sum = T(); size_t j = 0; - KFR_LOOP_NOUNROLL + CMT_LOOP_NOUNROLL for (; j < radix / width * width; j += width) { sum += cread<width>(in + j); } cvec<T, 1> sums = T(); - KFR_LOOP_NOUNROLL + CMT_LOOP_NOUNROLL for (; j < radix; j++) { sums += cread<1>(in + j); @@ -1405,7 +1403,7 @@ KFR_INTRIN void generic_butterfly_w(size_t radix, cbool_t<inverse>, complex<T>* } const size_t halfradix = radix / 2; const size_t halfradix_sqr = halfradix * halfradix; - __builtin_assume(halfradix > 0); + CMT_ASSUME(halfradix > 0); size_t i = 0; generic_butterfly_cycle(csize<width>, radix, cbool<inverse>, out, in, ostride, halfradix, halfradix_sqr, @@ -1424,10 +1422,10 @@ KFR_INTRIN void generic_butterfly(size_t radix, cbool_t<inverse>, complex<T>* ou constexpr size_t width = vector_width<T, cpu_t::native>; cswitch(csizes<11>, radix, - [&](auto radix_) KFR_INLINE_LAMBDA { - generic_butterfly_w<width>(val_of(radix_), cbool<inverse>, out, in, twiddle, ostride); + [&](auto radix_) CMT_INLINE_LAMBDA { + generic_butterfly_w<width>(decltype(radix_)(), cbool<inverse>, out, in, twiddle, ostride); }, - [&]() KFR_INLINE_LAMBDA { + [&]() CMT_INLINE_LAMBDA { generic_butterfly_w<width>(radix, cbool<inverse>, out, in, twiddle, ostride); }); } diff --git a/include/kfr/dsp.hpp b/include/kfr/dsp.hpp @@ -0,0 +1,43 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "base.hpp" + +#include "dsp/biquad.hpp" +#include "dsp/biquad_design.hpp" +#include "dsp/dcremove.hpp" +#include "dsp/fir.hpp" +#include "dsp/fir_design.hpp" +#include "dsp/fracdelay.hpp" +#include "dsp/goertzel.hpp" +#include "dsp/impulse.hpp" +#include "dsp/interpolation.hpp" +#include "dsp/mixdown.hpp" +#include "dsp/oscillators.hpp" +#include "dsp/resample.hpp" +#include "dsp/speaker.hpp" +#include "dsp/units.hpp" +#include "dsp/waveshaper.hpp" +#include "dsp/weighting.hpp" +#include "dsp/window.hpp" diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp @@ -44,6 +44,15 @@ enum class biquad_type template <typename T> struct biquad_params { + template <typename U> + constexpr biquad_params(const biquad_params<U>& bq) noexcept : a0(static_cast<T>(bq.a0)), + a1(static_cast<T>(bq.a1)), + a2(static_cast<T>(bq.a2)), + b0(static_cast<T>(bq.b0)), + b1(static_cast<T>(bq.b1)), + b2(static_cast<T>(bq.b2)) + { + } constexpr static bool is_pod = true; static_assert(std::is_floating_point<T>::value, "T must be a floating point type"); @@ -74,99 +83,92 @@ struct biquad_params namespace internal { -template <cpu_t cpu = cpu_t::native> -struct in_biquad +template <typename T, size_t filters, KFR_ARCH_DEP> +struct biquad_block { -private: -public: - template <typename T, size_t filters> - struct biquad_block - { - vec<T, filters> s1; - vec<T, filters> s2; - vec<T, filters> a1; - vec<T, filters> a2; - vec<T, filters> b0; - vec<T, filters> b1; - vec<T, filters> b2; + vec<T, filters> s1; + vec<T, filters> s2; + vec<T, filters> a1; + vec<T, filters> a2; + vec<T, filters> b0; + vec<T, filters> b1; + vec<T, filters> b2; - vec<T, filters> out; - biquad_block() : s1(0), s2(0), a1(0), a2(0), b0(1), b1(0), b2(0), out(0) {} - biquad_block(const biquad_params<T>* bq, size_t count) : s1(0), s2(0), out(0) + vec<T, filters> out; + biquad_block() : s1(0), s2(0), a1(0), a2(0), b0(1), b1(0), b2(0), out(0) {} + biquad_block(const biquad_params<T>* bq, size_t count) : s1(0), s2(0), out(0) + { + count = count > filters ? filters : count; + for (size_t i = 0; i < count; i++) { - count = count > filters ? filters : count; - for (size_t i = 0; i < count; i++) - { - a1(i) = bq[i].a1; - a2(i) = bq[i].a2; - b0(i) = bq[i].b0; - b1(i) = bq[i].b1; - b2(i) = bq[i].b2; - } - for (size_t i = count; i < filters; i++) - { - a1(i) = T(0); - a2(i) = T(0); - b0(i) = T(1); - b1(i) = T(0); - b2(i) = T(0); - } + a1(i) = bq[i].a1; + a2(i) = bq[i].a2; + b0(i) = bq[i].b0; + b1(i) = bq[i].b1; + b2(i) = bq[i].b2; } - - template <size_t count> - biquad_block(const biquad_params<T> (&bq)[count]) : biquad_block(bq, count) + for (size_t i = count; i < filters; i++) { - static_assert(count <= filters, "count > filters"); + a1(i) = T(0); + a2(i) = T(0); + b0(i) = T(1); + b1(i) = T(0); + b2(i) = T(0); } - }; + } - template <size_t filters, typename T, typename E1> - struct expression_biquads : public expression<E1> + template <size_t count> + biquad_block(const biquad_params<T> (&bq)[count]) : biquad_block(bq, count) { - using value_type = T; + static_assert(count <= filters, "count > filters"); + } +}; - expression_biquads(const biquad_block<T, filters>& bq, E1&& e1) - : expression<E1>(std::forward<E1>(e1)), bq(bq) - { - } - template <size_t width> - KFR_INTRIN vec<T, width> operator()(cinput_t, size_t index, vec_t<T, width> t) const - { - const vec<T, width> in = this->argument_first(index, t); - vec<T, width> out; +template <size_t filters, typename T, typename E1, KFR_ARCH_DEP> +struct expression_biquads : public expression<E1> +{ + using value_type = T; - KFR_LOOP_UNROLL - for (size_t i = 0; i < width; i++) - { - bq.out = process(bq, insertleft(in[i], bq.out)); - out(i) = bq.out[filters - 1]; - } + expression_biquads(const biquad_block<T, filters>& bq, E1&& e1) + : expression<E1>(std::forward<E1>(e1)), bq(bq) + { + } + template <size_t width> + KFR_INTRIN vec<T, width> operator()(cinput_t, size_t index, vec_t<T, width> t) const + { + const vec<T, width> in = this->argument_first(index, t); + vec<T, width> out; - return out; - } - KFR_SINTRIN vec<T, filters> process(biquad_block<T, filters>& bq, vec<T, filters> in) + CMT_LOOP_UNROLL + for (size_t i = 0; i < width; i++) { - const vec<T, filters> out = bq.b0 * in + bq.s1; - bq.s1 = bq.s2 + bq.b1 * in - bq.a1 * out; - bq.s2 = bq.b2 * in - bq.a2 * out; - return out; + bq.out = process(bq, insertleft(in[i], bq.out)); + out(i) = bq.out[filters - 1]; } - mutable biquad_block<T, filters> bq; - }; + + return out; + } + KFR_SINTRIN vec<T, filters> process(biquad_block<T, filters>& bq, vec<T, filters> in) + { + const vec<T, filters> out = bq.b0 * in + bq.s1; + bq.s1 = bq.s2 + bq.b1 * in - bq.a1 * out; + bq.s2 = bq.b2 * in - bq.a2 * out; + return out; + } + mutable biquad_block<T, filters> bq; }; } template <typename T, typename E1> -KFR_INLINE internal::in_biquad<>::expression_biquads<1, T, internal::arg<E1>> biquad( - const biquad_params<T>& bq, E1&& e1) +CMT_INLINE internal::expression_biquads<1, T, internal::arg<E1>> biquad(const biquad_params<T>& bq, E1&& e1) { const biquad_params<T> bqs[1] = { bq }; - return internal::in_biquad<>::expression_biquads<1, T, internal::arg<E1>>(bqs, std::forward<E1>(e1)); + return internal::expression_biquads<1, T, internal::arg<E1>>(bqs, std::forward<E1>(e1)); } template <size_t filters, typename T, typename E1> -KFR_INLINE internal::in_biquad<>::expression_biquads<filters, T, internal::arg<E1>> biquad( +CMT_INLINE internal::expression_biquads<filters, T, internal::arg<E1>> biquad( const biquad_params<T> (&bq)[filters], E1&& e1) { - return internal::in_biquad<>::expression_biquads<filters, T, internal::arg<E1>>(bq, std::forward<E1>(e1)); + return internal::expression_biquads<filters, T, internal::arg<E1>>(bq, std::forward<E1>(e1)); } } diff --git a/include/kfr/dsp/biquad_design.hpp b/include/kfr/dsp/biquad_design.hpp @@ -28,8 +28,8 @@ namespace kfr { -template <typename T> -KFR_INLINE biquad_params<T> biquad_allpass(T frequency, T Q) +template <typename T = fbase> +CMT_INLINE biquad_params<T> biquad_allpass(identity<T> frequency, identity<T> Q) { const T alpha = std::sin(frequency) / 2.0 * Q; const T cs = std::cos(frequency); @@ -43,8 +43,8 @@ KFR_INLINE biquad_params<T> biquad_allpass(T frequency, T Q) return { b0, b1, b2, a0, a1, a2 }; } -template <typename T> -KFR_INLINE biquad_params<T> biquad_lowpass(T frequency, T Q) +template <typename T = fbase> +CMT_INLINE biquad_params<T> biquad_lowpass(identity<T> frequency, identity<T> Q) { const T K = std::tan(c_pi<T, 1> * frequency); const T K2 = K * K; @@ -57,8 +57,8 @@ KFR_INLINE biquad_params<T> biquad_lowpass(T frequency, T Q) return { 1.0, b1, b2, a0, a1, a2 }; } -template <typename T> -KFR_INLINE biquad_params<T> biquad_highpass(T frequency, T Q) +template <typename T = fbase> +CMT_INLINE biquad_params<T> biquad_highpass(identity<T> frequency, identity<T> Q) { const T K = std::tan(c_pi<T, 1> * frequency); const T K2 = K * K; @@ -71,8 +71,8 @@ KFR_INLINE biquad_params<T> biquad_highpass(T frequency, T Q) return { 1.0, b1, b2, a0, a1, a2 }; } -template <typename T> -KFR_INLINE biquad_params<T> biquad_bandpass(T frequency, T Q) +template <typename T = fbase> +CMT_INLINE biquad_params<T> biquad_bandpass(identity<T> frequency, identity<T> Q) { const T K = std::tan(c_pi<T, 1> * frequency); const T K2 = K * K; @@ -85,8 +85,8 @@ KFR_INLINE biquad_params<T> biquad_bandpass(T frequency, T Q) return { 1.0, b1, b2, a0, a1, a2 }; } -template <typename T> -KFR_INLINE biquad_params<T> biquad_notch(T frequency, T Q) +template <typename T = fbase> +CMT_INLINE biquad_params<T> biquad_notch(identity<T> frequency, identity<T> Q) { const T K = std::tan(c_pi<T, 1> * frequency); const T K2 = K * K; @@ -99,8 +99,8 @@ KFR_INLINE biquad_params<T> biquad_notch(T frequency, T Q) return { 1.0, b1, b2, a0, a1, a2 }; } -template <typename T> -KFR_INLINE biquad_params<T> biquad_peak(T frequency, T Q, T gain) +template <typename T = fbase> +CMT_INLINE biquad_params<T> biquad_peak(identity<T> frequency, identity<T> Q, identity<T> gain) { biquad_params<T> result; const T K = std::tan(c_pi<T, 1> * frequency); @@ -130,8 +130,8 @@ KFR_INLINE biquad_params<T> biquad_peak(T frequency, T Q, T gain) return result; } -template <typename T> -KFR_INLINE biquad_params<T> biquad_lowshelf(T frequency, T gain) +template <typename T = fbase> +CMT_INLINE biquad_params<T> biquad_lowshelf(identity<T> frequency, identity<T> gain) { biquad_params<T> result; const T K = std::tan(c_pi<T, 1> * frequency); @@ -161,8 +161,8 @@ KFR_INLINE biquad_params<T> biquad_lowshelf(T frequency, T gain) return result; } -template <typename T> -KFR_INLINE biquad_params<T> biquad_highshelf(T frequency, T gain) +template <typename T = fbase> +CMT_INLINE biquad_params<T> biquad_highshelf(identity<T> frequency, identity<T> gain) { biquad_params<T> result; const T K = std::tan(c_pi<T, 1> * frequency); diff --git a/include/kfr/dsp/dcremove.hpp b/include/kfr/dsp/dcremove.hpp @@ -0,0 +1,37 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "biquad.hpp" +#include "biquad_design.hpp" + +namespace kfr +{ + +template <typename T, typename E1> +CMT_INLINE internal::expression_biquads<1, T, internal::arg<E1>> dcremove(E1&& e1, double cutoff = 0.00025) +{ + const biquad_params<T> bqs[1] = { biquad_highpass(cutoff, 0.5) }; + return internal::expression_biquads<1, T, internal::arg<E1>>(bqs, std::forward<E1>(e1)); +} +} diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp @@ -38,7 +38,7 @@ using fir_taps = univector<T, Size>; namespace internal { -template <size_t tapcount, typename T, typename E1> +template <size_t tapcount, typename T, typename E1, KFR_ARCH_DEP> struct expression_short_fir : expression<E1> { static_assert(is_poweroftwo(tapcount), "tapcount must be a power of two"); @@ -47,8 +47,12 @@ struct expression_short_fir : expression<E1> : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0) { } + expression_short_fir(E1&& e1, const array_ref<const T>& taps) + : expression<E1>(std::forward<E1>(e1)), taps(taps), delayline(0) + { + } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const { vec<T, N> in = cast<T>(this->argument_first(index, x)); @@ -63,7 +67,7 @@ struct expression_short_fir : expression<E1> mutable vec<T, tapcount - 1> delayline; }; -template <typename T, typename E1> +template <typename T, typename E1, KFR_ARCH_DEP> struct expression_fir : expression<E1> { expression_fir(E1&& e1, const array_ref<const T>& taps) @@ -71,14 +75,14 @@ struct expression_fir : expression<E1> { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N> x) const { const size_t tapcount = taps.size(); const vec<T, N> input = cast<T>(this->argument_first(index, x)); vec<T, N> output; size_t cursor = delayline_cursor; - KFR_LOOP_NOUNROLL + CMT_LOOP_NOUNROLL for (size_t i = 0; i < N; i++) { delayline.ringbuf_write(cursor, input[i]); @@ -95,12 +99,12 @@ struct expression_fir : expression<E1> } template <typename T, typename E1, size_t Tag> -KFR_INLINE internal::expression_fir<T, E1> fir(E1&& e1, const univector<T, Tag>& taps) +CMT_INLINE internal::expression_fir<T, E1> fir(E1&& e1, const univector<T, Tag>& taps) { return internal::expression_fir<T, E1>(std::forward<E1>(e1), taps.ref()); } template <typename T, size_t TapCount, typename E1> -KFR_INLINE internal::expression_short_fir<TapCount, T, E1> short_fir(E1&& e1, +CMT_INLINE internal::expression_short_fir<TapCount, T, E1> short_fir(E1&& e1, const univector<T, TapCount>& taps) { static_assert(TapCount >= 1 && TapCount < 16, "Use short_fir only for small FIR filters"); diff --git a/include/kfr/dsp/fir_design.hpp b/include/kfr/dsp/fir_design.hpp @@ -120,25 +120,25 @@ KFR_I_FN(fir_bandpass) KFR_I_FN(fir_bandstop) template <typename T, size_t Tag> -KFR_INLINE void fir_lowpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window, +CMT_INLINE void fir_lowpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window, bool normalize = true) { return intrinsics::fir_lowpass(taps.slice(), cutoff, window, normalize); } template <typename T, size_t Tag> -KFR_INLINE void fir_highpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window, +CMT_INLINE void fir_highpass(univector<T, Tag>& taps, identity<T> cutoff, const expression_pointer<T>& window, bool normalize = true) { return intrinsics::fir_highpass(taps.slice(), cutoff, window, normalize); } template <typename T, size_t Tag> -KFR_INLINE void fir_bandpass(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2, +CMT_INLINE void fir_bandpass(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2, const expression_pointer<T>& window, bool normalize = true) { return intrinsics::fir_bandpass(taps.slice(), frequency1, frequency2, window, normalize); } template <typename T, size_t Tag> -KFR_INLINE void fir_bandstop(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2, +CMT_INLINE void fir_bandstop(univector<T, Tag>& taps, identity<T> frequency1, identity<T> frequency2, const expression_pointer<T>& window, bool normalize = true) { return intrinsics::fir_bandstop(taps.slice(), frequency1, frequency2, window, normalize); diff --git a/include/kfr/dsp/fracdelay.hpp b/include/kfr/dsp/fracdelay.hpp @@ -28,12 +28,10 @@ namespace kfr { template <typename T, typename E1> -KFR_INLINE internal::expression_short_fir<2, T, E1> fracdelay(E1&& e1, T delay) +CMT_INLINE internal::expression_short_fir<2, T, E1> fracdelay(E1&& e1, T delay) { if (delay < 0) delay = 0; - if (delay > 1) - delay = fract(delay); univector<T, 2> taps({ 1 - delay, delay }); return internal::expression_short_fir<2, T, E1>(std::forward<E1>(e1), taps.ref()); } diff --git a/include/kfr/dsp/goertzel.hpp b/include/kfr/dsp/goertzel.hpp @@ -32,95 +32,86 @@ namespace kfr namespace internal { -template <cpu_t c = cpu_t::native, cpu_t cc = c> -struct in_goertzel : in_sin_cos<cc> +template <typename T, KFR_ARCH_DEP> +struct expression_goertzel : output_expression { -private: - using in_sin_cos<cc>::sin; - using in_sin_cos<cc>::cos; - -public: - template <typename T> - struct expression_goertzel : output_expression + expression_goertzel(complex<T>& result, T omega) + : result(result), omega(omega), coeff(2 * cos(omega)), q0(), q1(), q2() { - expression_goertzel(complex<T>& result, identity<T> omega) - : result(result), omega(omega), coeff(2 * cos(omega)), q0(), q1(), q2() - { - } - ~expression_goertzel() - { - result.real(q1 - q2 * cos(omega)); - result.imag(q2 * sin(omega)); - } - template <typename U, size_t N> - KFR_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& x) + } + ~expression_goertzel() + { + result.real(q1 - q2 * cos(omega)); + result.imag(q2 * sin(omega)); + } + template <typename U, size_t N> + CMT_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& x) + { + vec<T, N> in = x; + CMT_LOOP_UNROLL + for (size_t i = 0; i < N; i++) { - vec<T, N> in = cast<T>(x); - KFR_LOOP_UNROLL - for (size_t i = 0; i < N; i++) - { - q0 = coeff * q1 - q2 + in[i]; - q2 = q1; - q1 = q0; - } + q0 = coeff * q1 - q2 + in[i]; + q2 = q1; + q1 = q0; } - complex<T>& result; - const T omega; - const T coeff; - T q0; - T q1; - T q2; - }; + } + complex<T>& result; + const T omega; + const T coeff; + T q0; + T q1; + T q2; +}; - template <typename T, size_t width> - struct expression_parallel_goertzel : output_expression +template <typename T, size_t width> +struct expression_parallel_goertzel : output_expression +{ + expression_parallel_goertzel(complex<T> result[], vec<T, width> omega) + : result(result), omega(omega), coeff(cos(omega)), q0(), q1(), q2() { - expression_parallel_goertzel(complex<T> result[], vec<T, width> omega) - : result(result), omega(omega), coeff(cos(omega)), q0(), q1(), q2() - { - } - ~expression_parallel_goertzel() - { - const vec<T, width> re = q1 - q2 * cos(omega); - const vec<T, width> im = q2 * sin(omega); - for (size_t i = 0; i < width; i++) - { - result[i].real(re[i]); - result[i].imag(im[i]); - } - } - template <typename U, size_t N> - KFR_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& x) + } + ~expression_parallel_goertzel() + { + const vec<T, width> re = q1 - q2 * cos(omega); + const vec<T, width> im = q2 * sin(omega); + for (size_t i = 0; i < width; i++) { - const vec<T, N> in = cast<T>(x); - KFR_LOOP_UNROLL - for (size_t i = 0; i < N; i++) - { - q0 = coeff * q1 - q2 + in[i]; - q2 = q1; - q1 = q0; - } + result[i].real(re[i]); + result[i].imag(im[i]); } - complex<T> result[]; - const vec<T, width> omega; - const vec<T, width> coeff; - vec<T, width> q0; - vec<T, width> q1; - vec<T, width> q2; - }; - - template <typename T> - KFR_SINTRIN expression_goertzel<T> goertzel(complex<T>& result, identity<T> omega) - { - return expression_goertzel<T>(result, omega); } - - template <typename T, size_t width> - KFR_SINTRIN expression_parallel_goertzel<T, width> goertzel(complex<T> (&result)[width], - const T (&omega)[width]) + template <typename U, size_t N> + CMT_INLINE void operator()(coutput_t, size_t index, const vec<U, N>& x) { - return expression_parallel_goertzel<T, width>(result, read<width>(omega)); + const vec<T, N> in = x; + CMT_LOOP_UNROLL + for (size_t i = 0; i < N; i++) + { + q0 = coeff * q1 - q2 + in[i]; + q2 = q1; + q1 = q0; + } } + complex<T> result[]; + const vec<T, width> omega; + const vec<T, width> coeff; + vec<T, width> q0; + vec<T, width> q1; + vec<T, width> q2; +}; }; + +template <typename T> +KFR_SINTRIN internal::expression_goertzel<T> goertzel(complex<T>& result, identity<T> omega) +{ + return internal::expression_goertzel<T>(result, omega); +} + +template <typename T, size_t width> +KFR_SINTRIN internal::expression_parallel_goertzel<T, width> goertzel(complex<T> (&result)[width], + const T (&omega)[width]) +{ + return internal::expression_parallel_goertzel<T, width>(result, read<width>(omega)); } } diff --git a/include/kfr/dsp/interpolation.hpp b/include/kfr/dsp/interpolation.hpp @@ -32,7 +32,7 @@ namespace kfr template <typename T, typename M> KFR_SINTRIN T nearest(M mu, T x1, T x2) { - return native::select(mu < M(0.5), x1, x2); + return select(mu < M(0.5), x1, x2); } template <typename T, typename M> @@ -44,7 +44,7 @@ KFR_SINTRIN T linear(M mu, T x1, T x2) template <typename T, typename M> KFR_SINTRIN T cosine(M mu, T x1, T x2) { - return mix((M(1) - native::fastcos(mu * c_pi<T>)) * M(0.5), x1, x2); + return mix((M(1) - fastcos(mu * c_pi<T>)) * M(0.5), x1, x2); } template <typename T, typename M> diff --git a/include/kfr/dsp/mixdown.hpp b/include/kfr/dsp/mixdown.hpp @@ -0,0 +1,62 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base.hpp" + +namespace kfr +{ + +template <typename... E> +internal::expression_function<fn_add, E...> mixdown(E&&... e) +{ + return internal::expression_function<fn_add, E...>(fn_add(), std::forward<E>(e)...); +} + +namespace internal +{ +struct stereo_matrix +{ + template <typename T, size_t N> + CMT_INLINE vec<vec<T, 2>, N> operator()(const vec<vec<T, 2>, N>& x) const + { + return process(x, csizeseq<N>); + } + template <typename T, size_t N, size_t... indices> + CMT_INLINE vec<vec<T, 2>, N> process(const vec<vec<T, 2>, N>& x, csizes_t<indices...>) const + { + return vec<vec<T, 2>, N>(hadd(transpose(x[indices] * matrix))...); + } + const f64x2x2 matrix; +}; +} + +template <typename Left, typename Right, + typename Result = internal::expression_function< + internal::stereo_matrix, internal::expression_pack<internal::arg<Left>, internal::arg<Right>>>> +Result mixdown_stereo(Left&& left, Right&& right, const f64x2x2& matrix) +{ + return Result(internal::stereo_matrix{ matrix }, + pack(std::forward<Left>(left), std::forward<Right>(right))); +} +} diff --git a/include/kfr/dsp/resample.hpp b/include/kfr/dsp/resample.hpp @@ -22,190 +22,4 @@ */ #pragma once -#include "../base/function.hpp" -#include "../base/memory.hpp" -#include "../base/reduce.hpp" -#include "../base/vec.hpp" -#include "window.hpp" - -namespace kfr -{ -namespace resample_quality -{ -constexpr csize_t<4> draft{}; -constexpr csize_t<6> low{}; -constexpr csize_t<8> normal{}; -constexpr csize_t<10> high{}; -} - -namespace internal -{ -template <typename T1, typename T2> -KFR_SINTRIN T1 resample_blackman(T1 n, T2 a) -{ - const T1 a0 = (1 - a) * 0.5; - const T1 a1 = 0.5; - const T1 a2 = a * 0.5; - n = n * c_pi<T1, 2>; - return a0 - a1 * cos(n) + a2 * cos(2 * n); -} - -template <typename T, size_t quality> -struct resampler -{ - using itype = i64; - - constexpr static itype depth = static_cast<itype>(1 << (quality + 1)); - - resampler(itype interpolation_factor, itype decimation_factor, T scale = T(1), T cutoff = 0.49) - : input_position(0), output_position(0) - { - const i64 gcf = gcd(interpolation_factor, decimation_factor); - interpolation_factor /= gcf; - decimation_factor /= gcf; - - taps = depth * interpolation_factor; - order = size_t(depth * interpolation_factor - 1); - - this->interpolation_factor = interpolation_factor; - this->decimation_factor = decimation_factor; - - const itype halftaps = taps / 2; - filter = univector<T>(size_t(taps), T()); - delay = univector<T>(size_t(depth), T()); - - cutoff = cutoff / std::max(decimation_factor, interpolation_factor); - - for (itype j = 0, jj = 0; j < taps; j++) - { - filter[size_t(j)] = scale * 2 * interpolation_factor * cutoff * - sinc((jj - halftaps) * cutoff * c_pi<T, 2>) * - resample_blackman(T(jj) / T(taps - 1), T(0.16)); - jj += size_t(interpolation_factor); - if (jj >= taps) - jj = jj - taps + 1; - } - - const T s = reciprocal(sum(filter)) * interpolation_factor; - filter = filter * s; - } - KFR_INLINE size_t operator()(T* dest, size_t zerosize) - { - size_t outputsize = 0; - const itype srcsize = itype(zerosize); - - for (size_t i = 0;; i++) - { - const itype ii = itype(i) + output_position; - const itype workindex = ii * (decimation_factor); - const itype workindex_rem = workindex % (interpolation_factor); - const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0; - itype srcindex = workindex / (interpolation_factor); - srcindex = workindex_rem ? srcindex + 1 : srcindex; - const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth)); - srcindex = srcindex - (depth - 1); - - if (srcindex + depth >= input_position + srcsize) - break; - outputsize++; - - if (dest) - { - if (srcindex >= input_position) - { - dest[i] = T(0); - } - else - { - const itype prev_count = input_position - srcindex; - dest[i] = dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr); - } - } - } - if (srcsize >= depth) - { - delay = zeros(); - } - else - { - delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize)); - delay.slice(size_t(depth - srcsize)) = zeros(); - } - - input_position += srcsize; - output_position += outputsize; - return outputsize; - } - KFR_INLINE size_t operator()(T* dest, univector_ref<const T> src) - { - size_t outputsize = 0; - const itype srcsize = itype(src.size()); - - for (size_t i = 0;; i++) - { - const itype ii = itype(i) + output_position; - const itype workindex = ii * (decimation_factor); - const itype workindex_rem = workindex % (interpolation_factor); - const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0; - itype srcindex = workindex / (interpolation_factor); - srcindex = workindex_rem ? srcindex + 1 : srcindex; - const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth)); - srcindex = srcindex - (depth - 1); - - if (srcindex + depth >= input_position + srcsize) - break; - outputsize++; - - if (dest) - { - if (srcindex >= input_position) - { - dest[i] = dotproduct(src.slice(size_t(srcindex - input_position), size_t(depth)), - tap_ptr /*, depth*/); - } - else - { - const itype prev_count = input_position - srcindex; - dest[i] = - dotproduct(delay.slice(size_t(depth - prev_count)), - tap_ptr /*, size_t(prev_count)*/) + - dotproduct( - src, tap_ptr.slice(size_t(prev_count), - size_t(depth - prev_count)) /*, size_t(depth - prev_count)*/); - } - } - } - if (srcsize >= depth) - { - delay = src.slice(size_t(srcsize - depth)); - } - else - { - delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize)); - delay.slice(size_t(depth - srcsize)) = src; - } - - input_position += srcsize; - output_position += outputsize; - return outputsize; - } - itype taps; - size_t order; - itype interpolation_factor; - itype decimation_factor; - univector<T> filter; - univector<T> delay; - itype input_position; - itype output_position; -}; -} - -template <typename T, size_t quality> -inline internal::resampler<T, quality> resampler(csize_t<quality>, size_t interpolation_factor, - size_t decimation_factor, T scale = T(1), T cutoff = 0.49) -{ - using itype = typename internal::resampler<T, quality>::itype; - return internal::resampler<T, quality>(itype(interpolation_factor), itype(decimation_factor), scale, - cutoff); -} -} +#include "sample_rate_conversion.hpp" diff --git a/include/kfr/dsp/sample_rate_conversion.hpp b/include/kfr/dsp/sample_rate_conversion.hpp @@ -0,0 +1,227 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "../base/function.hpp" +#include "../base/memory.hpp" +#include "../base/reduce.hpp" +#include "../base/vec.hpp" +#include "window.hpp" + +namespace kfr +{ +namespace sample_rate_conversion_quality +{ +constexpr csize_t<4> draft{}; +constexpr csize_t<6> low{}; +constexpr csize_t<8> normal{}; +constexpr csize_t<10> high{}; +} + +namespace resample_quality = sample_rate_conversion_quality; + +namespace internal +{ +template <typename T1, typename T2> +KFR_SINTRIN T1 sample_rate_converter_blackman(T1 n, T2 a) +{ + const T1 a0 = (1 - a) * 0.5; + const T1 a1 = 0.5; + const T1 a2 = a * 0.5; + n = n * c_pi<T1, 2>; + return a0 - a1 * cos(n) + a2 * cos(2 * n); +} + +template <typename T, size_t quality, KFR_ARCH_DEP> +struct sample_rate_converter +{ + using itype = i64; + + constexpr static itype depth = static_cast<itype>(1 << (quality + 1)); + + sample_rate_converter(itype interpolation_factor, itype decimation_factor, T scale = T(1), + T cutoff = 0.49) + : input_position(0), output_position(0) + { + const i64 gcf = gcd(interpolation_factor, decimation_factor); + interpolation_factor /= gcf; + decimation_factor /= gcf; + + taps = depth * interpolation_factor; + order = size_t(depth * interpolation_factor - 1); + + this->interpolation_factor = interpolation_factor; + this->decimation_factor = decimation_factor; + + const itype halftaps = taps / 2; + filter = univector<T>(size_t(taps), T()); + delay = univector<T>(size_t(depth), T()); + + cutoff = cutoff / std::max(decimation_factor, interpolation_factor); + + for (itype j = 0, jj = 0; j < taps; j++) + { + filter[size_t(j)] = scale * 2 * interpolation_factor * cutoff * + sinc((jj - halftaps) * cutoff * c_pi<T, 2>) * + sample_rate_converter_blackman(T(jj) / T(taps - 1), T(0.16)); + jj += size_t(interpolation_factor); + if (jj >= taps) + jj = jj - taps + 1; + } + + const T s = reciprocal(sum(filter)) * interpolation_factor; + filter = filter * s; + } + CMT_INLINE size_t operator()(T* dest, size_t zerosize) + { + size_t outputsize = 0; + const itype srcsize = itype(zerosize); + + for (size_t i = 0;; i++) + { + const itype ii = itype(i) + output_position; + const itype workindex = ii * (decimation_factor); + const itype workindex_rem = workindex % (interpolation_factor); + const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0; + itype srcindex = workindex / (interpolation_factor); + srcindex = workindex_rem ? srcindex + 1 : srcindex; + const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth)); + srcindex = srcindex - (depth - 1); + + if (srcindex + depth >= input_position + srcsize) + break; + outputsize++; + + if (dest) + { + if (srcindex >= input_position) + { + dest[i] = T(0); + } + else + { + const itype prev_count = input_position - srcindex; + dest[i] = dotproduct(delay.slice(size_t(depth - prev_count)), tap_ptr); + } + } + } + if (srcsize >= depth) + { + delay = zeros(); + } + else + { + delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize)); + delay.slice(size_t(depth - srcsize)) = zeros(); + } + + input_position += srcsize; + output_position += outputsize; + return outputsize; + } + CMT_INLINE size_t operator()(T* dest, univector_ref<const T> src) + { + size_t outputsize = 0; + const itype srcsize = itype(src.size()); + + for (size_t i = 0;; i++) + { + const itype ii = itype(i) + output_position; + const itype workindex = ii * (decimation_factor); + const itype workindex_rem = workindex % (interpolation_factor); + const itype start = workindex_rem ? (interpolation_factor)-workindex_rem : 0; + itype srcindex = workindex / (interpolation_factor); + srcindex = workindex_rem ? srcindex + 1 : srcindex; + const univector_ref<T> tap_ptr = filter.slice(static_cast<size_t>(start * depth)); + srcindex = srcindex - (depth - 1); + + if (srcindex + depth >= input_position + srcsize) + break; + outputsize++; + + if (dest) + { + if (srcindex >= input_position) + { + dest[i] = dotproduct(src.slice(size_t(srcindex - input_position), size_t(depth)), + tap_ptr /*, depth*/); + } + else + { + const itype prev_count = input_position - srcindex; + dest[i] = + dotproduct(delay.slice(size_t(depth - prev_count)), + tap_ptr /*, size_t(prev_count)*/) + + dotproduct( + src, tap_ptr.slice(size_t(prev_count), + size_t(depth - prev_count)) /*, size_t(depth - prev_count)*/); + } + } + } + if (srcsize >= depth) + { + delay = src.slice(size_t(srcsize - depth)); + } + else + { + delay.slice(0, size_t(depth - srcsize)) = delay.slice(size_t(srcsize)); + delay.slice(size_t(depth - srcsize)) = src; + } + + input_position += srcsize; + output_position += outputsize; + return outputsize; + } + itype taps; + size_t order; + itype interpolation_factor; + itype decimation_factor; + univector<T> filter; + univector<T> delay; + itype input_position; + itype output_position; +}; +} + +template <typename T, size_t quality> +inline internal::sample_rate_converter<T, quality> sample_rate_converter(csize_t<quality>, + size_t interpolation_factor, + size_t decimation_factor, + T scale = T(1), T cutoff = 0.49) +{ + using itype = typename internal::sample_rate_converter<T, quality>::itype; + return internal::sample_rate_converter<T, quality>(itype(interpolation_factor), itype(decimation_factor), + scale, cutoff); +} + +// Deprecated in 0.9.2 +template <typename T, size_t quality> +inline internal::sample_rate_converter<T, quality> resampler(csize_t<quality>, size_t interpolation_factor, + size_t decimation_factor, T scale = T(1), + T cutoff = 0.49) +{ + using itype = typename internal::sample_rate_converter<T, quality>::itype; + return internal::sample_rate_converter<T, quality>(itype(interpolation_factor), itype(decimation_factor), + scale, cutoff); +} +} diff --git a/include/kfr/dsp/units.hpp b/include/kfr/dsp/units.hpp @@ -41,7 +41,7 @@ namespace intrinsics template <typename T, typename TF = ftype<T>> KFR_SINTRIN TF amp_to_dB(T amp) { - return log(cast<subtype<TF>>(amp)) * subtype<TF>(8.6858896380650365530225783783322); + return log(static_cast<TF>(amp)) * subtype<TF>(8.6858896380650365530225783783322); // return T( 20.0 ) * log10( level ); } diff --git a/include/kfr/dsp/waveshaper.hpp b/include/kfr/dsp/waveshaper.hpp @@ -22,21 +22,20 @@ */ #pragma once -#include "../base/abs.hpp" +#include "../base/clamp.hpp" #include "../base/hyperbolic.hpp" -#include "../base/min_max.hpp" namespace kfr { template <typename E1> inline auto waveshaper_hardclip(E1&& input, double clip_level) { - return native::clamp(input, -clip_level, +clip_level); + return clamp(input, -clip_level, +clip_level); } template <typename E1> inline auto waveshaper_tanh(E1&& input, double saturation) { - return native::tanh(saturation * input) * (native::coth(saturation)); + return tanh(saturation * input) * (coth(saturation)); } } diff --git a/include/kfr/dsp/window.hpp b/include/kfr/dsp/window.hpp @@ -122,7 +122,7 @@ struct expression_rectangular : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { using UI = utype<U>; const vec<UI, N> i = enumerate(vec<UI, N>()) + cast<UI>(index); @@ -144,10 +144,10 @@ struct expression_triangular : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(1 - abs(linspace(cinput, index, y))); + return 1 - abs(linspace(cinput, index, y)); } size_t size() const { return m_size; } @@ -166,10 +166,10 @@ struct expression_bartlett : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(1 - abs(linspace(cinput, index, y))); + return 1 - abs(linspace(cinput, index, y)); } size_t size() const { return m_size; } @@ -188,10 +188,10 @@ struct expression_cosine : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(sin(c_pi<T> * linspace(cinput, index, y))); + return sin(c_pi<T> * linspace(cinput, index, y)); } size_t size() const { return m_size; } @@ -210,10 +210,10 @@ struct expression_hann : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y)))); + return T(0.5) * (T(1) - cos(c_pi<T, 2> * linspace(cinput, index, y))); } size_t size() const { return m_size; } @@ -232,11 +232,11 @@ struct expression_bartlett_hann : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; const vec<T, N> xx = linspace(cinput, index, y); - return cast<U>(T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5)))); + return T(0.62) - T(0.48) * abs(xx - T(0.5)) + T(0.38) * cos(c_pi<T, 2> * (xx - T(0.5))); } size_t size() const { return m_size; } @@ -255,10 +255,10 @@ struct expression_hamming : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y)))); + return alpha - (1.0 - alpha) * (cos(c_pi<T, 2> * linspace(cinput, index, y))); } size_t size() const { return m_size; } @@ -278,11 +278,11 @@ struct expression_bohman : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; const vec<U, N> n = abs(linspace(cinput, index, y)); - return cast<U>((T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n)); + return (T(1) - n) * cos(c_pi<T> * n) + (T(1) / c_pi<T>)*sin(c_pi<T> * n); } size_t size() const { return m_size; } @@ -301,11 +301,11 @@ struct expression_blackman : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; const vec<T, N> n = linspace(cinput, index, y); - return cast<U>(a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n)); + return a0 - a1 * cos(c_pi<T, 2> * n) + a2 * cos(c_pi<T, 4> * n); } size_t size() const { return m_size; } @@ -325,12 +325,12 @@ struct expression_blackman_harris : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>; - return cast<U>(T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - T(0.01168) * cos(3 * n)); + return T(0.35875) - T(0.48829) * cos(n) + T(0.14128) * cos(2 * n) - T(0.01168) * cos(3 * n); } size_t size() const { return m_size; } @@ -350,10 +350,10 @@ struct expression_kaiser : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m); + return modzerobessel(beta * sqrt(1 - sqr(linspace(cinput, index, y)))) * m; } size_t size() const { return m_size; } @@ -374,7 +374,7 @@ struct expression_flattop : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; const vec<T, N> n = linspace(cinput, index, y) * c_pi<T, 2>; @@ -383,7 +383,7 @@ struct expression_flattop : input_expression constexpr T a2 = 1.29; constexpr T a3 = 0.388; constexpr T a4 = 0.028; - return cast<U>(a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n)); + return a0 - a1 * cos(n) + a2 * cos(2 * n) - a3 * cos(3 * n) + a4 * cos(4 * n); } size_t size() const { return m_size; } @@ -402,10 +402,10 @@ struct expression_gaussian : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(exp(-0.5 * sqr(alpha * linspace(cinput, index, y)))); + return exp(-0.5 * sqr(alpha * linspace(cinput, index, y))); } size_t size() const { return m_size; } @@ -425,10 +425,10 @@ struct expression_lanczos : input_expression { } template <typename U, size_t N> - KFR_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const + CMT_INLINE vec<U, N> operator()(cinput_t, size_t index, vec_t<U, N>) const { constexpr vec_t<T, N> y{}; - return cast<U>(sinc(linspace(cinput, index, y))); + return sinc(linspace(cinput, index, y)); } size_t size() const { return m_size; } @@ -462,87 +462,88 @@ KFR_WINDOW_BY_TYPE(kaiser) KFR_WINDOW_BY_TYPE(flattop) KFR_WINDOW_BY_TYPE(gaussian) KFR_WINDOW_BY_TYPE(lanczos) +#undef KFR_WINDOW_BY_TYPE } -KFR_INLINE internal::expression_rectangular<fbase> window_rectangular(size_t size) +CMT_INLINE internal::expression_rectangular<fbase> window_rectangular(size_t size) { return internal::expression_rectangular<fbase>(size, fbase()); } template <typename T = fbase> -KFR_INLINE internal::expression_triangular<T> window_triangular(size_t size, ctype_t<T> = ctype_t<T>()) +CMT_INLINE internal::expression_triangular<T> window_triangular(size_t size, ctype_t<T> = ctype_t<T>()) { return internal::expression_triangular<T>(size); } template <typename T = fbase> -KFR_INLINE internal::expression_bartlett<T> window_bartlett(size_t size, ctype_t<T> = ctype_t<T>()) +CMT_INLINE internal::expression_bartlett<T> window_bartlett(size_t size, ctype_t<T> = ctype_t<T>()) { return internal::expression_bartlett<T>(size); } template <typename T = fbase> -KFR_INLINE internal::expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>()) +CMT_INLINE internal::expression_cosine<T> window_cosine(size_t size, ctype_t<T> = ctype_t<T>()) { return internal::expression_cosine<T>(size); } template <typename T = fbase> -KFR_INLINE internal::expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>()) +CMT_INLINE internal::expression_hann<T> window_hann(size_t size, ctype_t<T> = ctype_t<T>()) { return internal::expression_hann<T>(size); } template <typename T = fbase> -KFR_INLINE internal::expression_bartlett_hann<T> window_bartlett_hann(size_t size, ctype_t<T> = ctype_t<T>()) +CMT_INLINE internal::expression_bartlett_hann<T> window_bartlett_hann(size_t size, ctype_t<T> = ctype_t<T>()) { return internal::expression_bartlett_hann<T>(size); } template <typename T = fbase> -KFR_INLINE internal::expression_hamming<T> window_hamming(size_t size, T alpha = 0.54, +CMT_INLINE internal::expression_hamming<T> window_hamming(size_t size, identity<T> alpha = 0.54, ctype_t<T> = ctype_t<T>()) { return internal::expression_hamming<T>(size, alpha); } template <typename T = fbase> -KFR_INLINE internal::expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>()) +CMT_INLINE internal::expression_bohman<T> window_bohman(size_t size, ctype_t<T> = ctype_t<T>()) { return internal::expression_bohman<T>(size); } template <typename T = fbase> -KFR_INLINE internal::expression_blackman<T> window_blackman( - size_t size, T alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric, +CMT_INLINE internal::expression_blackman<T> window_blackman( + size_t size, identity<T> alpha = 0.16, window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>()) { return internal::expression_blackman<T>(size, alpha, symmetry); } template <typename T = fbase> -KFR_INLINE internal::expression_blackman_harris<T> window_blackman_harris( +CMT_INLINE internal::expression_blackman_harris<T> window_blackman_harris( size_t size, window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>()) { return internal::expression_blackman_harris<T>(size, T(), symmetry); } template <typename T = fbase> -KFR_INLINE internal::expression_kaiser<T> window_kaiser(size_t size, T beta = T(0.5), +CMT_INLINE internal::expression_kaiser<T> window_kaiser(size_t size, identity<T> beta = T(0.5), ctype_t<T> = ctype_t<T>()) { return internal::expression_kaiser<T>(size, beta); } template <typename T = fbase> -KFR_INLINE internal::expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>()) +CMT_INLINE internal::expression_flattop<T> window_flattop(size_t size, ctype_t<T> = ctype_t<T>()) { return internal::expression_flattop<T>(size); } template <typename T = fbase> -KFR_INLINE internal::expression_gaussian<T> window_gaussian(size_t size, T alpha = 2.5, +CMT_INLINE internal::expression_gaussian<T> window_gaussian(size_t size, identity<T> alpha = 2.5, ctype_t<T> = ctype_t<T>()) { return internal::expression_gaussian<T>(size, alpha); } template <typename T = fbase> -KFR_INLINE internal::expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>()) +CMT_INLINE internal::expression_lanczos<T> window_lanczos(size_t size, ctype_t<T> = ctype_t<T>()) { return internal::expression_lanczos<T>(size); } template <typename T = fbase, window_type type, typename window_expr = typename internal::window_by_type<type>::template type<T>> -KFR_NOINLINE window_expr window(size_t size, cval_t<window_type, type>, T win_param = T(), +CMT_NOINLINE window_expr window(size_t size, cval_t<window_type, type>, identity<T> win_param = T(), window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>()) { @@ -550,7 +551,7 @@ KFR_NOINLINE window_expr window(size_t size, cval_t<window_type, type>, T win_pa } template <typename T = fbase> -KFR_NOINLINE expression_pointer<T> window(size_t size, window_type type, T win_param, +CMT_NOINLINE expression_pointer<T> window(size_t size, window_type type, identity<T> win_param, window_symmetry symmetry = window_symmetry::symmetric, ctype_t<T> = ctype_t<T>()) { @@ -561,7 +562,7 @@ KFR_NOINLINE expression_pointer<T> window(size_t size, window_type type, T win_p window_type::flattop, window_type::gaussian, window_type::lanczos>, type, [=](auto win) { - constexpr window_type window = val_of(win); + constexpr window_type window = val_of(decltype(win)()); return to_pointer<T>( typename internal::window_by_type<window>::template type<T>(size, win_param, symmetry)); }, diff --git a/include/kfr/io.hpp b/include/kfr/io.hpp @@ -0,0 +1,30 @@ +/** + * Copyright (C) 2016 D Levin (http://www.kfrlib.com) + * This file is part of KFR + * + * KFR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * KFR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with KFR. + * + * If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + * Buying a commercial license is mandatory as soon as you develop commercial activities without + * disclosing the source code of your own applications. + * See http://www.kfrlib.com for details. + */ +#pragma once + +#include "base.hpp" + +#include "io/audiofile.hpp" +#include "io/file.hpp" +#include "io/python_plot.hpp" +#include "io/tostring.hpp" diff --git a/include/kfr/io/file.hpp b/include/kfr/io/file.hpp @@ -85,7 +85,7 @@ struct expression_file_writer : expression_file_base, output_expression { if (position != index) fseeko(file, static_cast<off_t>(index * sizeof(T)), SEEK_SET); - const vec<T, N> output = cast<T>(value); + const vec<T, N> output = value; fwrite(output.data(), sizeof(T), output.size(), file); position = index + N; } @@ -104,7 +104,7 @@ struct expression_file_reader : expression_file_base, input_expression vec<T, N> input = qnan; fread(input.data(), sizeof(T), input.size(), file); position = index + N; - return cast<U>(input); + return input; } mutable size_t position = 0; }; diff --git a/include/kfr/io/python_plot.hpp b/include/kfr/io/python_plot.hpp @@ -25,7 +25,7 @@ #include "../cometa/string.hpp" #include <cstdlib> -#ifdef KFR_OS_WIN +#ifdef CMT_OS_WIN #include <direct.h> #define cross_getcwd _getcwd #else @@ -37,6 +37,10 @@ namespace kfr { namespace internal { +#pragma clang diagnostic push +#if CMT_HAS_WARNING("-Wdeprecated-declarations") +#pragma clang diagnostic ignored "-Wdeprecated-declarations" +#endif void python(const std::string& name, const std::string& code) { @@ -46,7 +50,7 @@ void python(const std::string& name, const std::string& code) cross_getcwd(curdir, arraysize(curdir)); filename = curdir; } -#ifdef KFR_OS_WIN +#ifdef CMT_OS_WIN const char* slash = "\\"; #else const char* slash = "/"; @@ -58,6 +62,7 @@ void python(const std::string& name, const std::string& code) fclose(f); std::system(("python \"" + filename + "\"").c_str()); } +#pragma clang diagnostic pop } static std::string concat_args() { return {}; } diff --git a/include/kfr/io/tostring.hpp b/include/kfr/io/tostring.hpp @@ -29,6 +29,8 @@ namespace cometa { +inline std::string repr(kfr::cpu_t v); + template <typename T> inline std::string repr(const kfr::complex<T>& v); @@ -91,6 +93,8 @@ inline std::string repr(const kfr::complex<T>& v) return as_string(v.real()) + " + " + as_string(v.imag()) + "j"; } +inline std::string repr(kfr::cpu_t v) { return kfr::cpu_name(v); } + template <typename T> inline std::string repr(const T* source, size_t N) { @@ -99,7 +103,7 @@ inline std::string repr(const T* source, size_t N) { if (i > 0) { - if (i % details::number_columns == 0) + if (i % details::number_columns == 0 || kfr::is_vec<T>::value) str += "\n"; else str += " "; diff --git a/include/kfr/math.hpp b/include/kfr/math.hpp @@ -22,25 +22,4 @@ */ #pragma once -#include "base/vec.hpp" - -#include "base/abs.hpp" -#include "base/asin_acos.hpp" -#include "base/atan.hpp" -#include "base/complex.hpp" -#include "base/constants.hpp" -#include "base/digitreverse.hpp" -#include "base/gamma.hpp" -#include "base/hyperbolic.hpp" -#include "base/log_exp.hpp" -#include "base/logical.hpp" -#include "base/min_max.hpp" -#include "base/operators.hpp" -#include "base/read_write.hpp" -#include "base/round.hpp" -#include "base/saturation.hpp" -#include "base/select.hpp" -#include "base/shuffle.hpp" -#include "base/sin_cos.hpp" -#include "base/sqrt.hpp" -#include "base/tan.hpp" +#include "base.hpp" diff --git a/include/kfr/version.hpp b/include/kfr/version.hpp @@ -29,7 +29,7 @@ namespace kfr { static std::string library_version() { - return "KFR " + std::string(version_string) + " " + CID_STRINGIFY(KFR_ARCH_NAME) + + return "KFR " + std::string(version_string) + " " + CMT_STRINGIFY(CMT_ARCH_NAME) + bitness_const(" 32-bit", " 64-bit"); } } diff --git a/sources.cmake b/sources.cmake @@ -1,97 +1,88 @@ -# Copyright (C) 2016 D Levin (http://www.kfrlib.com) -# This file is part of KFR -# -# KFR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# KFR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with KFR. +# Auto-generated file. Do not edit +# Use update-sources.py set( KFR_SRC ${PROJECT_SOURCE_DIR}/include/kfr/all.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/cometa.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/io.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/math.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/version.hpp ${PROJECT_SOURCE_DIR}/include/kfr/cident.h ${PROJECT_SOURCE_DIR}/include/kfr/base/abs.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/asin_acos.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/atan.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/basic_expressions.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/clamp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/compiletime.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/complex.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/constants.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/conversion.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/cpuid.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/cpuid_auto.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/digitreverse.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/expression.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/function.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/gamma.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/log_exp.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/generators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/hyperbolic.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/logical.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/log_exp.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/memory.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/min_max.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/modzerobessel.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/operators.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/pointer.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/random.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/read_write.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/reduce.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/round.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/saturation.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/select.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/shuffle.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/sin_cos.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/specializations.i - ${PROJECT_SOURCE_DIR}/include/kfr/base/hyperbolic.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/small_buffer.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/sort.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/sqrt.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/tan.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/types.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/univector.hpp ${PROJECT_SOURCE_DIR}/include/kfr/base/vec.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/base/intrinsics.h + ${PROJECT_SOURCE_DIR}/include/kfr/base/kfr.h + ${PROJECT_SOURCE_DIR}/include/kfr/base/specializations.i + ${PROJECT_SOURCE_DIR}/include/kfr/cometa/string.hpp ${PROJECT_SOURCE_DIR}/include/kfr/data/bitrev.hpp ${PROJECT_SOURCE_DIR}/include/kfr/data/sincos.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dft/bitrev.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dft/conv.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dft/fft.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dft/ft.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dft/reference_dft.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/dft/conv.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/cpuid.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/cpuid_auto.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/biquad_design.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/dsp/impulse.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/dsp/oscillators.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/dsp/units.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/dcremove.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fir_design.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/fracdelay.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/goertzel.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/impulse.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/interpolation.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/mixdown.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/oscillators.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/resample.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/sample_rate_conversion.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/speaker.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/dsp/units.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/waveshaper.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/weighting.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dsp/window.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/basic_expressions.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/conversion.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/generators.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/pointer.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/reduce.hpp ${PROJECT_SOURCE_DIR}/include/kfr/io/audiofile.hpp ${PROJECT_SOURCE_DIR}/include/kfr/io/file.hpp ${PROJECT_SOURCE_DIR}/include/kfr/io/python_plot.hpp ${PROJECT_SOURCE_DIR}/include/kfr/io/tostring.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/math.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/compiletime.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/random.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/small_buffer.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/sort.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/version.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/base/kfr.h - ${PROJECT_SOURCE_DIR}/include/kfr/base/intrinsics.h - ${PROJECT_SOURCE_DIR}/include/kfr/cometa.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/cometa/string.hpp - - ${PROJECT_SOURCE_DIR}/tests/testo/testo.hpp - ${PROJECT_SOURCE_DIR}/tests/testo/print_colored.hpp ) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt @@ -1,35 +1,48 @@ # Copyright (C) 2016 D Levin (http://www.kfrlib.com) # This file is part of KFR -# +# # KFR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. -# +# # KFR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with KFR. -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 2.8) if (NOT MSVC) add_compile_options(-fno-exceptions -fno-rtti -ftemplate-backtrace-limit=0) + link_libraries(${STD_LIB} pthread m) +endif () + +include_directories(../include) + +if (NOT ARM) + add_executable(multiarch multiarch.cpp multiarch_fir_sse2.cpp multiarch_fir_avx.cpp ${KFR_SRC}) + set_source_files_properties(multiarch_fir_sse2.cpp PROPERTIES COMPILE_FLAGS "-mno-avx -mno-sse3 -msse2") + set_source_files_properties(multiarch_fir_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) +endif () + +if (NOT MSVC) if (NOT ARCH_FLAGS) add_compile_options(-march=native) else () set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARCH_FLAGS}") endif () - link_libraries(stdc++ pthread m) else () add_compile_options(/arch:AVX) endif () -include_directories(../include) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/tests/cmake/") + +find_package(MPFR) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/tests/cmake/") @@ -40,8 +53,8 @@ add_executable(intrinsic_test intrinsic_test.cpp ${KFR_SRC}) add_executable(dft_test dft_test.cpp ${KFR_SRC}) add_executable(conv_test conv_test.cpp ${KFR_SRC}) if (MPFR_FOUND) + include_directories(${MPFR_INCLUDE_DIR}) add_executable(transcendental_test transcendental_test.cpp ${KFR_SRC}) - target_include_directories(transcendental_test PRIVATE ${MPFR_INCLUDE_DIR}) target_link_libraries(transcendental_test ${MPFR_LIBRARIES}) endif () add_executable(fracdelay_test fracdelay_test.cpp ${KFR_SRC}) @@ -50,25 +63,29 @@ add_executable(complex_test complex_test.cpp ${KFR_SRC}) add_executable(vec_test vec_test.cpp ${KFR_SRC}) add_executable(stat_test stat_test.cpp ${KFR_SRC}) -enable_testing() +if (NOT IOS) + enable_testing() -add_test(NAME basic_vector_test - COMMAND ${PROJECT_BINARY_DIR}/tests/basic_vector_test) -add_test(NAME intrinsic_test - COMMAND ${PROJECT_BINARY_DIR}/tests/intrinsic_test) -add_test(NAME fracdelay_test - COMMAND ${PROJECT_BINARY_DIR}/tests/fracdelay_test) -add_test(NAME conv_test - COMMAND ${PROJECT_BINARY_DIR}/tests/conv_test) -if (MPFR_FOUND) - add_test(NAME transcendental_test - COMMAND ${PROJECT_BINARY_DIR}/tests/transcendental_test) + add_test(NAME basic_vector_test + COMMAND ${PROJECT_BINARY_DIR}/tests/basic_vector_test) + add_test(NAME intrinsic_test + COMMAND ${PROJECT_BINARY_DIR}/tests/intrinsic_test) + add_test(NAME fracdelay_test + COMMAND ${PROJECT_BINARY_DIR}/tests/fracdelay_test) + add_test(NAME conv_test + COMMAND ${PROJECT_BINARY_DIR}/tests/conv_test) + if (MPFR_FOUND) + add_test(NAME transcendental_test + COMMAND ${PROJECT_BINARY_DIR}/tests/transcendental_test) + endif () + add_test(NAME complex_test + COMMAND ${PROJECT_BINARY_DIR}/tests/complex_test) + add_test(NAME vec_test + COMMAND ${PROJECT_BINARY_DIR}/tests/vec_test) + add_test(NAME stat_test + COMMAND ${PROJECT_BINARY_DIR}/tests/stat_test) + add_test(NAME multiarch + COMMAND ${PROJECT_BINARY_DIR}/tests/multiarch) + add_test(NAME dft_test + COMMAND ${PROJECT_BINARY_DIR}/tests/dft_test) endif () -add_test(NAME complex_test - COMMAND ${PROJECT_BINARY_DIR}/tests/complex_test) -add_test(NAME vec_test - COMMAND ${PROJECT_BINARY_DIR}/tests/vec_test) -add_test(NAME stat_test - COMMAND ${PROJECT_BINARY_DIR}/tests/stat_test) -add_test(NAME dft_test - COMMAND ${PROJECT_BINARY_DIR}/tests/dft_test) diff --git a/tests/complex_test.cpp b/tests/complex_test.cpp @@ -44,17 +44,17 @@ TEST(complex_vector) TEST(complex_cast) { - const vec<f32, 4> v1 = subcast<f32>(make_vector(c32{ 0, 1 }, c32{ 2, 3 })); + const vec<f32, 4> v1 = bitcast<f32>(make_vector(c32{ 0, 1 }, c32{ 2, 3 })); CHECK(v1(0) == 0.f); CHECK(v1(1) == 1.f); CHECK(v1(2) == 2.f); CHECK(v1(3) == 3.f); - const vec<c32, 1> v2 = subcast<c32>(make_vector(1.f, 2.f)); + const vec<c32, 1> v2 = bitcast<c32>(make_vector(1.f, 2.f)); CHECK(v2(0) == 1.f); CHECK(v2(1) == 2.f); - const vec<c32, 2> v3 = cast<c32>(make_vector(1.f, 2.f)); + const vec<c32, 2> v3 = make_vector(1.f, 2.f); CHECK(v3(0) == 1.f); CHECK(v3(1) == 0.f); CHECK(v3(2) == 2.f); @@ -101,6 +101,15 @@ TEST(complex_math) CHECK(cexp(c32{ 1.f, 1.f }) == c32{ 1.4686939399158849, 2.2873552871788423 }); CHECK(cexp2(c32{ 1.f, 1.f }) == c32{ 1.5384778027279442, 1.2779225526272695 }); CHECK(cexp10(c32{ 1.f, 1.f }) == c32{ -6.682015101903131, 7.439803369574931 }); + +#ifdef KFR_NATIVE_F64 + CHECK(csin(c64{ 1.f, 1.f }) == c64{ 1.2984575814159773, 0.634963914784736 }); + CHECK(ccos(c64{ 1.f, 1.f }) == c64{ 0.8337300251311489, -0.9888977057628651 }); + CHECK(csinh(c64{ 1.f, 1.f }) == c64{ 0.634963914784736, 1.2984575814159773 }); + CHECK(ccosh(c64{ 1.f, 1.f }) == c64{ 0.8337300251311489, 0.9888977057628651 }); + CHECK(clog(c64{ 1.f, 1.f }) == c64{ 0.34657359027997264, 0.7853981633974483 }); + CHECK(cexp(c64{ 1.f, 1.f }) == c64{ 1.4686939399158849, 2.2873552871788423 }); +#endif } TEST(complex_read_write) @@ -168,10 +177,12 @@ int main(int argc, char** argv) { println(library_version()); +#ifdef CMT_ARCH_SSE2 static_assert(vector_width<f32, cpu_t::sse2> == 4, ""); static_assert(vector_width<c32, cpu_t::sse2> == 2, ""); static_assert(vector_width<i32, cpu_t::sse2> == 4, ""); static_assert(vector_width<complex<i32>, cpu_t::sse2> == 2, ""); +#endif static_assert(is_numeric<vec<complex<float>, 4>>::value, ""); static_assert(is_numeric_args<vec<complex<float>, 4>>::value, ""); diff --git a/tests/conv_test.cpp b/tests/conv_test.cpp @@ -19,11 +19,11 @@ using namespace kfr; TEST(test_convolve) { - univector<double, 5> a({ 1, 2, 3, 4, 5 }); - univector<double, 5> b({ 0.25, 0.5, 1.0, 0.5, 0.25 }); - univector<double> c = convolve(a, b); + univector<fbase, 5> a({ 1, 2, 3, 4, 5 }); + univector<fbase, 5> b({ 0.25, 0.5, 1.0, 0.5, 0.25 }); + univector<fbase> c = convolve(a, b); CHECK(c.size() == 9); - CHECK(rms(c - univector<double>({ 0.25, 1., 2.75, 5., 7.5, 8.5, 7.75, 3.5, 1.25 })) < 0.0001); + CHECK(rms(c - univector<fbase>({ 0.25, 1., 2.75, 5., 7.5, 8.5, 7.75, 3.5, 1.25 })) < 0.0001); } int main(int argc, char** argv) diff --git a/tests/dft_test.cpp b/tests/dft_test.cpp @@ -22,12 +22,18 @@ using namespace kfr; +#ifdef KFR_NATIVE_F64 +constexpr ctypes_t<float, double> float_types{}; +#else +constexpr ctypes_t<float> float_types{}; +#endif + TEST(fft_accuracy) { testo::active_test()->show_progress = true; random_bit_generator gen(2247448713, 915890490, 864203735, 2982561); - testo::matrix(named("type") = ctypes<float, double>, // + testo::matrix(named("type") = float_types, // named("inverse") = std::make_tuple(false, true), // named("log2(size)") = make_range(1, 21), // [&gen](auto type, bool inverse, size_t log2size) { diff --git a/tests/empty_test.cpp b/tests/empty_test.cpp @@ -1,4 +1,4 @@ -#include <kfr/math.hpp> +#include <kfr/all.hpp> using namespace kfr; diff --git a/tests/intrinsic_test.cpp b/tests/intrinsic_test.cpp @@ -11,39 +11,52 @@ using namespace kfr; -constexpr ctypes_t<i8x1, i16x1, i32x1, i64x1, // - i8x2, i16x2, i32x2, i64x2, // - i8x4, i16x4, i32x4, i64x4, // - i8x8, i16x8, i32x8, i64x8, // - i8x16, i16x16, i32x16, i64x16, // - i8x3, i16x3, i32x3, i64x3 // +constexpr ctypes_t<i8x1, i8x2, i8x4, i8x8, i8x16, i8x3, // + i16x1, i16x2, i16x4, i16x8, i16x16, i16x3, // + i32x1, i32x2, i32x4, i32x8, i32x16, i32x3 // +#ifdef KFR_NATIVE_I64 + , + i64x1, i64x2, i64x4, i64x8, i64x16, i64x3 // +#endif > signed_types{}; -constexpr ctypes_t<u8x1, u16x1, u32x1, u64x1, // - u8x2, u16x2, u32x2, u64x2, // - u8x4, u16x4, u32x4, u64x4, // - u8x8, u16x8, u32x8, u64x8, // - u8x16, u16x16, u32x16, u64x16, // - u8x3, u16x3, u32x3, u64x3 // +constexpr ctypes_t<u8x1, u8x2, u8x4, u8x8, u8x16, u8x3, // + u16x1, u16x2, u16x4, u16x8, u16x16, u16x3, // + u32x1, u32x2, u32x4, u32x8, u32x16, u32x3 // +#ifdef KFR_NATIVE_I64 + , + u64x1, u64x2, u64x4, u64x8, u64x16, u64x3 // +#endif > unsigned_types{}; -constexpr ctypes_t<f32x1, f64x1, // - f32x2, f64x2, // - f32x4, f64x4, // - f32x8, f64x8, // - f32x16, f64x16, // - f32x3, f64x3 // +constexpr ctypes_t<f32x1, f32x2, f32x4, f32x8, f32x16, f32x3 // +#ifdef KFR_NATIVE_F64 + , + f64x1, f64x2, f64x4, f64x8, f64x16, f64x3 // +#endif > float_types{}; -constexpr ctypes_t<u8x1, i8x1, u16x1, i16x1, u32x1, i32x1, u64x1, i64x1, f32x1, f64x1, // - u8x2, i8x2, u16x2, i16x2, u32x2, i32x2, u64x2, i64x2, f32x2, f64x2, // - u8x4, i8x4, u16x4, i16x4, u32x4, i32x4, u64x4, i64x4, f32x4, f64x4, // - u8x8, i8x8, u16x8, i16x8, u32x8, i32x8, u64x8, i64x8, f32x8, f64x8, // - u8x16, i8x16, u16x16, i16x16, u32x16, i32x16, u64x16, i64x16, f32x16, f64x16, // - u8x3, i8x3, u16x3, i16x3, u32x3, i32x3, u64x3, i64x3, f32x3, f64x3 // +constexpr ctypes_t<i8x1, i8x2, i8x4, i8x8, i8x16, i8x3, // + i16x1, i16x2, i16x4, i16x8, i16x16, i16x3, // + i32x1, i32x2, i32x4, i32x8, i32x16, i32x3, // +#ifdef KFR_NATIVE_I64 + + i64x1, i64x2, i64x4, i64x8, i64x16, i64x3, // +#endif + u8x1, u8x2, u8x4, u8x8, u8x16, u8x3, // + u16x1, u16x2, u16x4, u16x8, u16x16, u16x3, // + u32x1, u32x2, u32x4, u32x8, u32x16, u32x3, // +#ifdef KFR_NATIVE_I64 + u64x1, u64x2, u64x4, u64x8, u64x16, u64x3, // +#endif + f32x1, f32x2, f32x4, f32x8, f32x16, f32x3 // +#ifdef KFR_NATIVE_F64 + , + f64x1, f64x2, f64x4, f64x8, f64x16, f64x3 // +#endif > all_types{}; @@ -145,13 +158,13 @@ TEST(intrin_abs) TEST(intrin_sqrt) { - testo::assert_is_same<decltype(kfr::sqrt(9)), double>(); - testo::assert_is_same<decltype(kfr::sqrt(make_vector(9))), f64x1>(); - testo::assert_is_same<decltype(kfr::sqrt(make_vector(9, 25))), f64x2>(); + testo::assert_is_same<decltype(kfr::sqrt(9)), fbase>(); + testo::assert_is_same<decltype(kfr::sqrt(make_vector(9))), vec<fbase, 1>>(); + testo::assert_is_same<decltype(kfr::sqrt(make_vector(9, 25))), vec<fbase, 2>>(); CHECK(kfr::sqrt(9) == 3.0); CHECK(kfr::sqrt(-9) == qnan); - CHECK(kfr::sqrt(make_vector(9)) == make_vector(3.0)); - CHECK(kfr::sqrt(make_vector(-9)) == make_vector(qnan)); + CHECK(kfr::sqrt(make_vector(9)) == make_vector<fbase>(3.0)); + CHECK(kfr::sqrt(make_vector(-9)) == make_vector<fbase>(qnan)); testo::matrix(named("type") = float_types, named("value") = std::vector<int>{ 0, 2, 65536 }, [](auto type, int value) { using T = type_of<decltype(type)>; @@ -180,8 +193,8 @@ TEST(intrin_round) CHECK(kfr::fract(100) == 0); testo::matrix(named("type") = float_types, - named("value") = std::vector<double>{ -1.51, -1.49, 0.0, +1.49, +1.51 }, - [](auto type, double value) { + named("value") = std::vector<fbase>{ -1.51, -1.49, 0.0, +1.49, +1.51 }, + [](auto type, fbase value) { using T = type_of<decltype(type)>; using Tsub = subtype<T>; const T x(value); @@ -201,10 +214,9 @@ TEST(intrin_min_max) CHECK(min(pack(1, 2, 3), 2) == pack(1, 2, 2)); CHECK(min(pack(1., 2., 3.), 2) == pack(1., 2., 2.)); - testo::matrix(named("type") = float_types, - named("value") = - std::vector<std::pair<double, double>>{ { -100, +100 }, { infinity, 0.0 } }, - [](auto type, std::pair<double, double> value) { + testo::matrix(named("type") = float_types, + named("value") = std::vector<std::pair<fbase, fbase>>{ { -100, +100 }, { infinity, 0.0 } }, + [](auto type, std::pair<fbase, fbase> value) { using T = type_of<decltype(type)>; using Tsub = subtype<T>; const T x(value.first); diff --git a/tests/multiarch.cpp b/tests/multiarch.cpp @@ -0,0 +1,56 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ + +#include <kfr/io/tostring.hpp> + +#include "testo/testo.hpp" +#include <kfr/dsp.hpp> + +using namespace kfr; + +cpu_t fir_sse2(univector<double, 0> data, univector<double, 4>& taps); +cpu_t fir_avx(univector<double, 0> data, univector<double, 4>& taps); + +TEST(test_fir_sse2) +{ + univector<double, 8> data = counter(); + univector<double, 4> taps({ 0.5, 1.0, 1.0, 0.5 }); + cpu_t c = fir_sse2(data, taps); + CHECK(c == cpu_t::sse2); + CHECK(data[0] == 0); + CHECK(data[1] == 0.5); + CHECK(data[2] == 2); + CHECK(data[3] == 4.5); + CHECK(data[4] == 7.5); + CHECK(data[5] == 10.5); + CHECK(data[6] == 13.5); + CHECK(data[7] == 16.5); +} + +TEST(test_fir_avx) +{ + if (get_cpu() >= cpu_t::avx1) + { + univector<double, 8> data = counter(); + univector<double, 4> taps({ 0.5, 1.0, 1.0, 0.5 }); + cpu_t c = fir_avx(data, taps); + CHECK(c == cpu_t::avx); + CHECK(data[0] == 0); + CHECK(data[1] == 0.5); + CHECK(data[2] == 2); + CHECK(data[3] == 4.5); + CHECK(data[4] == 7.5); + CHECK(data[5] == 10.5); + CHECK(data[6] == 13.5); + CHECK(data[7] == 16.5); + } + else + { + println("No AVX"); + } +} + +int main(int argc, char** argv) { return testo::run_all("", true); } diff --git a/tests/multiarch_fir_avx.cpp b/tests/multiarch_fir_avx.cpp @@ -0,0 +1,18 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ + +#include <kfr/dsp.hpp> +#include <kfr/io/tostring.hpp> +#include <kfr/version.hpp> + +using namespace kfr; + +cpu_t fir_avx(univector<double, 0> data, univector<double, 4>& taps) +{ + println(library_version()); + data = short_fir(data, taps); + return cpu_t::native; +} diff --git a/tests/multiarch_fir_sse2.cpp b/tests/multiarch_fir_sse2.cpp @@ -0,0 +1,18 @@ +/** + * KFR (http://kfrlib.com) + * Copyright (C) 2016 D Levin + * See LICENSE.txt for details + */ + +#include <kfr/dsp.hpp> +#include <kfr/io/tostring.hpp> +#include <kfr/version.hpp> + +using namespace kfr; + +cpu_t fir_sse2(univector<double, 0> data, univector<double, 4>& taps) +{ + println(library_version()); + data = short_fir(data, taps); + return cpu_t::native; +} diff --git a/tests/testo/testo.hpp b/tests/testo/testo.hpp @@ -372,7 +372,7 @@ struct test_case void check(bool result, const std::string& value, const char* expr) { - subtests.push_back(subtest{ result, format("{} | {}", padleft(22, expr), value), comment }); + subtests.push_back(subtest{ result, as_string(padleft(22, expr), " | ", value), comment }); result ? success++ : failed++; if (show_progress) { @@ -393,8 +393,7 @@ struct test_case void check(comparison<Op, L, R> comparison, const char* expr) { bool result = comparison(); - check(result, format("{} {} {}", as_string(comparison.left), Op::op(), as_string(comparison.right)), - expr); + check(result, as_string(comparison.left, " ", Op::op(), " ", comparison.right), expr); } template <typename L> @@ -409,7 +408,8 @@ struct test_case comment = text; if (show_progress) { - printfmt("\n{}:\n", comment); + println(); + println(comment, ":"); } } @@ -469,22 +469,22 @@ template <typename Arg0, typename Fn> void matrix(named_arg<Arg0>&& arg0, Fn&& fn) { cforeach(std::forward<Arg0>(arg0.value), [&](auto v0) { - active_test()->set_comment(format("{} = {}", arg0.name, v0)); + active_test()->set_comment(as_string(arg0.name, " = ", v0)); fn(v0); }); if (active_test()->show_progress) - printfmt("\n"); + println(); } template <typename Arg0, typename Arg1, typename Fn> void matrix(named_arg<Arg0>&& arg0, named_arg<Arg1>&& arg1, Fn&& fn) { cforeach(std::forward<Arg0>(arg0.value), std::forward<Arg1>(arg1.value), [&](auto v0, auto v1) { - active_test()->set_comment(format("{} = {}, {} = {}", arg0.name, v0, arg1.name, v1)); + active_test()->set_comment(as_string(arg0.name, " = ", v0, ", ", arg1.name, " = ", v1)); fn(v0, v1); }); if (active_test()->show_progress) - printfmt("\n"); + println(); } template <typename Arg0, typename Arg1, typename Arg2, typename Fn> @@ -493,11 +493,11 @@ void matrix(named_arg<Arg0>&& arg0, named_arg<Arg1>&& arg1, named_arg<Arg2>&& ar cforeach(std::forward<Arg0>(arg0.value), std::forward<Arg1>(arg1.value), std::forward<Arg2>(arg2.value), [&](auto v0, auto v1, auto v2) { active_test()->set_comment( - format("{} = {}, {} = {}, {} = {}", arg0.name, v0, arg1.name, v1, arg2.name, v2)); + as_string(arg0.name, " = ", v0, ", ", arg1.name, " = ", v1, ", ", arg2.name, " = ", v2)); fn(v0, v1, v2); }); if (active_test()->show_progress) - printfmt("\n"); + println(); } static int run_all(const std::string& name = std::string(), bool show_successful = false) @@ -545,7 +545,7 @@ void assert_is_same_decay() #define TESTO_TEST(name) \ void test_function_##name(); \ ::testo::test_case test_case_##name(&test_function_##name, #name); \ - void CID_NOINLINE test_function_##name() + void CMT_NOINLINE test_function_##name() #define TESTO_DTEST(name) \ template <typename> \ diff --git a/tests/vec_test.cpp b/tests/vec_test.cpp @@ -7,6 +7,7 @@ #include <kfr/io/tostring.hpp> #include "testo/testo.hpp" +#include <kfr/dsp/mixdown.hpp> #include <kfr/math.hpp> using namespace kfr; @@ -64,7 +65,7 @@ TEST(vec_apply) CHECK(apply(fn_sqr(), make_vector(1, 2, 3, 4, 5)) == make_vector(1, 4, 9, 16, 25)); } -#ifdef CID_ARCH_SSE +#ifdef CMT_ARCH_SSE TEST(vec_tovec) { const __m128 x = _mm_set_ps(4.f, 3.f, 2.f, 1.f); @@ -132,4 +133,80 @@ TEST(vec_conv) testo::assert_is_same<decltype(min(pack(1.0, 2.0, 3.0), pack(1, 2, 3))), f64x3>(); } +TEST(vec_matrix) +{ + using i32x2x2 = vec<vec<int, 2>, 2>; + const i32x2x2 m22{ i32x2{ 1, 2 }, i32x2{ 3, 4 } }; + CHECK(m22 * 10 == i32x2x2{ i32x2{ 10, 20 }, i32x2{ 30, 40 } }); + + CHECK(m22 * i32x2{ -1, 100 } == i32x2x2{ i32x2{ -1, 200 }, i32x2{ -3, 400 } }); + + i32x2 xy{ 10, 20 }; + i32x2x2 m{ i32x2{ 1, 2 }, i32x2{ 3, 4 } }; + xy = hadd(xy * m); + CHECK(xy == i32x2{ 40, 120 }); + + i32x2 xy2{ 10, 20 }; + xy2 = hadd(transpose(xy2 * m)); + CHECK(xy2 == i32x2{ 50, 110 }); +} + +TEST(vec_is_convertible) +{ + static_assert(std::is_convertible<float, f32x4>::value, ""); + static_assert(std::is_convertible<float, f64x8>::value, ""); + static_assert(std::is_convertible<float, u8x3>::value, ""); + + static_assert(std::is_convertible<u16x4, i32x4>::value, ""); + static_assert(!std::is_convertible<u16x4, i32x3>::value, ""); + static_assert(!std::is_convertible<u16x1, u16x16>::value, ""); + + static_assert(std::is_convertible<float, complex<float>>::value, ""); + static_assert(std::is_convertible<float, complex<double>>::value, ""); + static_assert(std::is_convertible<short, complex<double>>::value, ""); + + static_assert(std::is_convertible<complex<float>, vec<complex<float>, 4>>::value, ""); + static_assert(!std::is_convertible<vec<complex<float>, 1>, vec<complex<float>, 4>>::value, ""); + + static_assert(std::is_convertible<vec<complex<float>, 2>, vec<complex<double>, 2>>::value, ""); + static_assert(std::is_convertible<vec<vec<float, 4>, 2>, vec<vec<double, 4>, 2>>::value, ""); + + CHECK(static_cast<f32x4>(4.f) == f32x4{ 4.f, 4.f, 4.f, 4.f }); + CHECK(static_cast<f64x8>(4.f) == f64x8{ 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0 }); + CHECK(static_cast<u8x3>(4.f) == u8x3{ 4, 4, 4 }); + + CHECK(static_cast<i32x4>(u16x4{ 1, 2, 3, 4 }) == i32x4{ 1, 2, 3, 4 }); + + CHECK(static_cast<complex<float>>(10.f) == complex<float>{ 10.f, 0.f }); + CHECK(static_cast<complex<double>>(10.f) == complex<double>{ 10., 0. }); + CHECK(static_cast<complex<double>>(static_cast<short>(10)) == complex<double>{ 10., 0. }); + + CHECK(static_cast<vec<complex<float>, 4>>(complex<float>{ 1.f, 2.f }) == + vec<complex<float>, 4>{ c32{ 1.f, 2.f }, c32{ 1.f, 2.f }, c32{ 1.f, 2.f }, c32{ 1.f, 2.f } }); + + CHECK(static_cast<vec<complex<double>, 2>>(vec<complex<float>, 2>{ c32{ 1.f, 2.f }, c32{ 1.f, 2.f } }) == + vec<complex<double>, 2>{ c64{ 1., 2. }, c64{ 1., 2. } }); + + CHECK(static_cast<vec<vec<double, 4>, 2>>(vec<vec<float, 4>, 2>{ + vec<float, 4>{ 1.f, 2.f, 3.f, 4.f }, vec<float, 4>{ 11.f, 22.f, 33.f, 44.f } }) == + vec<vec<double, 4>, 2>{ vec<double, 4>{ 1., 2., 3., 4. }, vec<double, 4>{ 11., 22., 33., 44. } }); +} + +TEST(vec_pack_expr) +{ + const univector<float, 20> v1 = 1 + counter(); + const univector<float, 20> v2 = v1 * 11; + const univector<f32x2, 20> v3 = pack(v1, v2); + CHECK(v3[0] == f32x2{ 1, 11 }); + CHECK(v3[1] == f32x2{ 2, 22 }); + CHECK(v3[18] == f32x2{ 19, 209 }); + CHECK(v3[19] == f32x2{ 20, 220 }); + + const univector<f32x2, 20> v4 = bind_expression(fn_reverse(), v3); + CHECK(v4[0] == f32x2{ 11, 1 }); + CHECK(v4[1] == f32x2{ 22, 2 }); + CHECK(v4[18] == f32x2{ 209, 19 }); + CHECK(v4[19] == f32x2{ 220, 20 }); +} + int main(int argc, char** argv) { return testo::run_all("", true); } diff --git a/update-sources.py b/update-sources.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +from __future__ import print_function + +import fnmatch +import os +import subprocess +import sys +import glob + +path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'include') + +masks = ['*.hpp', '*.h', '*.i'] + +filenames = [] +for root, dirnames, files in os.walk(path, path): + for mask in masks: + for filename in fnmatch.filter(files, mask): + filenames.append(os.path.relpath(os.path.join(root, filename), path).replace('\\','/')) + +cmake = """ +# Auto-generated file. Do not edit +# Use update-sources.py + +set( + KFR_SRC + """ + "\n ".join(['${PROJECT_SOURCE_DIR}/include/' + f for f in filenames]) + """ +) +""" + +with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'sources.cmake'), "w") as f: + f.write(cmake)