kfr

Fast, modern C++ DSP framework, FFT, Sample Rate Conversion, FIR/IIR/Biquad Filters (SSE, AVX, AVX-512, ARM NEON)
Log | Files | Refs | README

commit 0ce746165bf1fafa8814719662b6699a96b0371c
parent e90962b729d8777c92bace4468f57b6589e6d892
Author: d.levin256@gmail.com <d.levin256@gmail.com>
Date:   Tue, 26 Nov 2019 09:45:29 +0000

C API: refactoring, filters

Diffstat:
MCMakeLists.txt | 4++--
Acapi/CMakeLists.txt | 100+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acapi/capi.cpp | 340+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ddft/CMakeLists.txt | 107-------------------------------------------------------------------------------
Minclude/kfr/base/expression.hpp | 6+++---
Minclude/kfr/base/filter.hpp | 7++++---
Minclude/kfr/base/pointer.hpp | 25+++++++++++--------------
Ainclude/kfr/capi.h | 243+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/cident.h | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Minclude/kfr/dft/convolution.hpp | 8++++++--
Dinclude/kfr/dft/dft_c.h | 139-------------------------------------------------------------------------------
Minclude/kfr/dft/fft.hpp | 44++------------------------------------------
Minclude/kfr/dft/impl/convolution-impl.cpp | 22++++++++++++++++------
Minclude/kfr/dft/impl/dft-fft.hpp | 2--
Dinclude/kfr/dft/impl/dft-src.cpp | 159-------------------------------------------------------------------------------
Minclude/kfr/dsp/biquad.hpp | 10+++++++---
Minclude/kfr/dsp/fir.hpp | 12++++++++----
Ainclude/kfr/dsp/impl/dsp-impl.cpp | 29+++++++++++++++++++++++++++++
Msources.cmake | 2+-
19 files changed, 877 insertions(+), 487 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -187,8 +187,8 @@ if (ENABLE_DFT) target_compile_definitions(kfr_dft PUBLIC -DKFR_DFT_NO_NPo2) endif () - if (ENABLE_DFT_MULTIARCH) - add_subdirectory(dft) + if (ENABLE_CAPI_BUILD) + add_subdirectory(capi) endif () endif() diff --git a/capi/CMakeLists.txt b/capi/CMakeLists.txt @@ -0,0 +1,100 @@ +# Copyright (C) 2016 D Levin (http://www.kfrlib.com) +# This file is part of KFR +# +# KFR is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# KFR is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with KFR. + + +cmake_minimum_required(VERSION 3.1) + +if (MSVC) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") +endif () + +if(APPLE) + add_compile_options(-mmacosx-version-min=10.9) +endif() + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib) + +add_custom_target(kfr_multiarch_dft) + +add_library(kfr_capi_all INTERFACE) +target_link_libraries(kfr_capi_all INTERFACE kfr) +if (NOT WIN32) + add_library(kfr_capi_all_pic INTERFACE) + target_link_libraries(kfr_capi_all_pic INTERFACE kfr) +endif () + +function(add_c_library ARCH) + add_library(kfr_capi_${ARCH} STATIC + ${KFR_DFT_SRC} + ${CMAKE_CURRENT_SOURCE_DIR}/../include/kfr/dsp/impl/dsp-impl.cpp + ) + target_link_libraries(kfr_capi_${ARCH} kfr) + target_set_arch(kfr_capi_${ARCH} PRIVATE ${ARCH}) + target_compile_options(kfr_capi_${ARCH} PRIVATE -Xclang -ffast-math) + add_dependencies(kfr_multiarch_dft kfr_capi_${ARCH}) + target_link_libraries(kfr_capi_all INTERFACE kfr_capi_${ARCH}) + + if (NOT WIN32) + add_library(kfr_capi_${ARCH}_pic STATIC ${KFR_DFT_SRC}) + set_property(TARGET kfr_capi_${ARCH}_pic PROPERTY POSITION_INDEPENDENT_CODE 1) + target_link_libraries(kfr_capi_${ARCH}_pic kfr) + target_set_arch(kfr_capi_${ARCH}_pic PRIVATE ${ARCH}) + target_compile_options(kfr_capi_${ARCH}_pic PRIVATE -Xclang -ffast-math) + + target_link_libraries(kfr_capi_all_pic INTERFACE kfr_capi_${ARCH}_pic) + endif() +endfunction() + +add_c_library(sse2) +add_c_library(sse3) +add_c_library(ssse3) +add_c_library(sse41) +add_c_library(avx) +add_c_library(avx2) +add_c_library(avx512) + +add_library(kfr_capi SHARED + ${PROJECT_SOURCE_DIR}/capi/capi.cpp) +target_compile_definitions(kfr_capi PRIVATE + -DKFR_DFT_MULTI=1 + -DCMT_MULTI=1 + -DCMT_MULTI_ENABLED_SSE2=1 + -DCMT_MULTI_ENABLED_SSE3=1 + -DCMT_MULTI_ENABLED_SSSE3=1 + -DCMT_MULTI_ENABLED_SSE41=1 + -DCMT_MULTI_ENABLED_AVX=1 + -DCMT_MULTI_ENABLED_AVX2=1 + -DCMT_MULTI_ENABLED_AVX512=1 + -DKFR_BUILDING_DLL=1) + +target_set_arch(kfr_capi PRIVATE sse2) + +if (WIN32) + target_link_libraries(kfr_capi PRIVATE kfr kfr_capi_all) +else () + target_link_libraries(kfr_capi PRIVATE kfr kfr_capi_all_pic) + + if (APPLE) + else () + set_property(TARGET kfr_capi APPEND PROPERTY LINK_LIBRARIES -nodefaultlibs -Wl,-Bdynamic -lm -lc -Wl,-Bstatic -lstdc++ -lgcc -s) + endif () +endif() diff --git a/capi/capi.cpp b/capi/capi.cpp @@ -0,0 +1,340 @@ +/** @addtogroup dft + * @{ + */ +/* + Copyright (C) 2016 D Levin (https://www.kfrlib.com) + This file is part of KFR + + KFR is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + KFR is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with KFR. + + If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + Buying a commercial license is mandatory as soon as you develop commercial activities without + disclosing the source code of your own applications. + See https://www.kfrlib.com for details. + */ + +#include <kfr/capi.h> +#include <kfr/dft.hpp> +#include <kfr/dsp.hpp> + +namespace kfr +{ + +extern "C" +{ +#define KFR_ENABLED_ARCHS "sse2,sse3,ssse3,sse4.1,avx,avx2,avx512" + const char* kfr_version_string() + { + return "KFR " KFR_VERSION_STRING KFR_DEBUG_STR " " KFR_ENABLED_ARCHS " " CMT_ARCH_BITNESS_NAME + " (" CMT_COMPILER_FULL_NAME "/" CMT_OS_NAME ")" KFR_BUILD_DETAILS_1 KFR_BUILD_DETAILS_2; + } + uint32_t kfr_version() { return KFR_VERSION; } + const char* kfr_enabled_archs() { return KFR_ENABLED_ARCHS; } + int kfr_current_arch() { return static_cast<int>(get_cpu()); } + + void* kfr_allocate(size_t size) { return internal_generic::aligned_malloc(size, KFR_DEFAULT_ALIGNMENT); } + void* kfr_allocate_aligned(size_t size, size_t alignment) + { + return internal_generic::aligned_malloc(size, alignment); + } + void kfr_deallocate(void* ptr) { return internal_generic::aligned_free(ptr); } + size_t kfr_allocated_size(void* ptr) { return internal_generic::aligned_size(ptr); } + + void* kfr_add_ref(void* ptr) + { + internal_generic::aligned_add_ref(ptr); + return ptr; + } + void kfr_release(void* ptr) { internal_generic::aligned_release(ptr); } + + void* kfr_reallocate(void* ptr, size_t new_size) + { + return internal_generic::aligned_reallocate(ptr, new_size, KFR_DEFAULT_ALIGNMENT); + } + void* kfr_reallocate_aligned(void* ptr, size_t new_size, size_t alignment) + { + return internal_generic::aligned_reallocate(ptr, new_size, alignment); + } + + KFR_DFT_PLAN_F32* kfr_dft_create_plan_f32(size_t size) + { + if (size < 2) + return nullptr; + if (size > 16777216) + return nullptr; + return reinterpret_cast<KFR_DFT_PLAN_F32*>(new kfr::dft_plan<float>(cpu_t::runtime, size)); + } + KFR_DFT_PLAN_F64* kfr_dft_create_plan_f64(size_t size) + { + if (size < 2) + return nullptr; + if (size > 16777216) + return nullptr; + return reinterpret_cast<KFR_DFT_PLAN_F64*>(new kfr::dft_plan<double>(cpu_t::runtime, size)); + } + + void kfr_dft_dump_f32(KFR_DFT_PLAN_F32* plan) { reinterpret_cast<kfr::dft_plan<float>*>(plan)->dump(); } + void kfr_dft_dump_f64(KFR_DFT_PLAN_F64* plan) { reinterpret_cast<kfr::dft_plan<double>*>(plan)->dump(); } + + size_t kfr_dft_get_size_f32(KFR_DFT_PLAN_F32* plan) + { + return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; + } + size_t kfr_dft_get_size_f64(KFR_DFT_PLAN_F64* plan) + { + return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; + } + + size_t kfr_dft_get_temp_size_f32(KFR_DFT_PLAN_F32* plan) + { + return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; + } + size_t kfr_dft_get_temp_size_f64(KFR_DFT_PLAN_F64* plan) + { + return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; + } + + void kfr_dft_execute_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in, uint8_t* temp) + { + reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute( + reinterpret_cast<kfr::complex<float>*>(out), reinterpret_cast<const kfr::complex<float>*>(in), + temp, kfr::cfalse); + } + void kfr_dft_execute_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in, uint8_t* temp) + { + reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute( + reinterpret_cast<kfr::complex<double>*>(out), reinterpret_cast<const kfr::complex<double>*>(in), + temp, kfr::cfalse); + } + void kfr_dft_execute_inverse_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in, uint8_t* temp) + { + reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute( + reinterpret_cast<kfr::complex<float>*>(out), reinterpret_cast<const kfr::complex<float>*>(in), + temp, kfr::ctrue); + } + void kfr_dft_execute_inverse_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in, uint8_t* temp) + { + reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute( + reinterpret_cast<kfr::complex<double>*>(out), reinterpret_cast<const kfr::complex<double>*>(in), + temp, kfr::ctrue); + } + + void kfr_dft_delete_plan_f32(KFR_DFT_PLAN_F32* plan) + { + delete reinterpret_cast<kfr::dft_plan<float>*>(plan); + } + void kfr_dft_delete_plan_f64(KFR_DFT_PLAN_F64* plan) + { + delete reinterpret_cast<kfr::dft_plan<double>*>(plan); + } + + // Real DFT plans + + KFR_DFT_REAL_PLAN_F32* kfr_dft_real_create_plan_f32(size_t size, KFR_DFT_PACK_FORMAT pack_format) + { + if (size < 4) + return nullptr; + if (size > 16777216) + return nullptr; + return reinterpret_cast<KFR_DFT_REAL_PLAN_F32*>( + new kfr::dft_plan_real<float>(cpu_t::runtime, size, static_cast<dft_pack_format>(pack_format))); + } + KFR_DFT_REAL_PLAN_F64* kfr_dft_real_create_plan_f64(size_t size, KFR_DFT_PACK_FORMAT pack_format) + { + if (size < 4) + return nullptr; + if (size > 16777216) + return nullptr; + return reinterpret_cast<KFR_DFT_REAL_PLAN_F64*>( + new kfr::dft_plan_real<double>(cpu_t::runtime, size, static_cast<dft_pack_format>(pack_format))); + } + + void kfr_dft_real_dump_f32(KFR_DFT_REAL_PLAN_F32* plan) + { + reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->dump(); + } + void kfr_dft_real_dump_f64(KFR_DFT_REAL_PLAN_F64* plan) + { + reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->dump(); + } + + size_t kfr_dft_real_get_size_f32(KFR_DFT_REAL_PLAN_F32* plan) + { + return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; + } + size_t kfr_dft_real_get_size_f64(KFR_DFT_REAL_PLAN_F64* plan) + { + return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; + } + + size_t kfr_dft_real_get_temp_size_f32(KFR_DFT_REAL_PLAN_F32* plan) + { + return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; + } + size_t kfr_dft_real_get_temp_size_f64(KFR_DFT_REAL_PLAN_F64* plan) + { + return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; + } + + void kfr_dft_real_execute_f32(KFR_DFT_REAL_PLAN_F32* plan, kfr_c32* out, const float* in, uint8_t* temp) + { + reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute( + reinterpret_cast<kfr::complex<float>*>(out), in, temp); + } + void kfr_dft_real_execute_f64(KFR_DFT_REAL_PLAN_F64* plan, kfr_c64* out, const double* in, uint8_t* temp) + { + reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute( + reinterpret_cast<kfr::complex<double>*>(out), in, temp); + } + void kfr_dft_real_execute_inverse_f32(KFR_DFT_REAL_PLAN_F32* plan, float* out, const kfr_c32* in, + uint8_t* temp) + { + reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute( + out, reinterpret_cast<const kfr::complex<float>*>(in), temp); + } + void kfr_dft_real_execute_inverse_f64(KFR_DFT_REAL_PLAN_F64* plan, double* out, const kfr_c64* in, + uint8_t* temp) + { + reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute( + out, reinterpret_cast<const kfr::complex<double>*>(in), temp); + } + + void kfr_dft_real_delete_plan_f32(KFR_DFT_REAL_PLAN_F32* plan) + { + delete reinterpret_cast<kfr::dft_plan_real<float>*>(plan); + } + void kfr_dft_real_delete_plan_f64(KFR_DFT_REAL_PLAN_F64* plan) + { + delete reinterpret_cast<kfr::dft_plan_real<double>*>(plan); + } + + // Discrete Cosine Transform + + KFR_DCT_PLAN_F32* kfr_dct_create_plan_f32(size_t size) + { + if (size < 4) + return nullptr; + if (size > 16777216) + return nullptr; + return reinterpret_cast<KFR_DCT_PLAN_F32*>(new kfr::dct_plan<float>(cpu_t::runtime, size)); + } + KFR_DCT_PLAN_F64* kfr_dct_create_plan_f64(size_t size) + { + if (size < 4) + return nullptr; + if (size > 16777216) + return nullptr; + return reinterpret_cast<KFR_DCT_PLAN_F64*>(new kfr::dct_plan<double>(cpu_t::runtime, size)); + } + + void kfr_dct_dump_f32(KFR_DCT_PLAN_F32* plan) { reinterpret_cast<kfr::dct_plan<float>*>(plan)->dump(); } + void kfr_dct_dump_f64(KFR_DCT_PLAN_F64* plan) { reinterpret_cast<kfr::dct_plan<double>*>(plan)->dump(); } + + size_t kfr_dct_get_size_f32(KFR_DCT_PLAN_F32* plan) + { + return reinterpret_cast<kfr::dft_plan<float>*>(plan)->size; + } + size_t kfr_dct_get_size_f64(KFR_DCT_PLAN_F64* plan) + { + return reinterpret_cast<kfr::dft_plan<double>*>(plan)->size; + } + + size_t kfr_dct_get_temp_size_f32(KFR_DCT_PLAN_F32* plan) + { + return reinterpret_cast<kfr::dft_plan<float>*>(plan)->temp_size; + } + size_t kfr_dct_get_temp_size_f64(KFR_DCT_PLAN_F64* plan) + { + return reinterpret_cast<kfr::dft_plan<double>*>(plan)->temp_size; + } + + void kfr_dct_execute_f32(KFR_DCT_PLAN_F32* plan, float* out, const float* in, uint8_t* temp) + { + reinterpret_cast<kfr::dct_plan<float>*>(plan)->execute(out, in, temp); + } + void kfr_dct_execute_f64(KFR_DCT_PLAN_F64* plan, double* out, const double* in, uint8_t* temp) + { + reinterpret_cast<kfr::dct_plan<double>*>(plan)->execute(out, in, temp); + } + void kfr_dct_execute_inverse_f32(KFR_DCT_PLAN_F32* plan, float* out, const float* in, uint8_t* temp) + { + reinterpret_cast<kfr::dct_plan<float>*>(plan)->execute(out, in, temp); + } + void kfr_dct_execute_inverse_f64(KFR_DCT_PLAN_F64* plan, double* out, const double* in, uint8_t* temp) + { + reinterpret_cast<kfr::dct_plan<double>*>(plan)->execute(out, in, temp); + } + + void kfr_dct_delete_plan_f32(KFR_DCT_PLAN_F32* plan) + { + delete reinterpret_cast<kfr::dct_plan<float>*>(plan); + } + void kfr_dct_delete_plan_f64(KFR_DCT_PLAN_F64* plan) + { + delete reinterpret_cast<kfr::dct_plan<double>*>(plan); + } + + // Filters + + KFR_FILTER_F32* kfr_filter_create_fir_plan_f32(const kfr_f32* taps, size_t size) + { + return reinterpret_cast<KFR_FILTER_F32*>(make_fir_filter<float>(make_univector(taps, size))); + } + KFR_FILTER_F64* kfr_filter_create_fir_plan_f64(const kfr_f64* taps, size_t size) + { + return reinterpret_cast<KFR_FILTER_F64*>(make_fir_filter<double>(make_univector(taps, size))); + } + + KFR_FILTER_F32* kfr_filter_create_convolution_plan_f32(const kfr_f32* taps, size_t size, + size_t block_size) + { + return reinterpret_cast<KFR_FILTER_F32*>( + make_convolve_filter<float>(make_univector(taps, size), block_size ? block_size : 1024)); + } + KFR_FILTER_F64* kfr_filter_create_convolution_plan_f64(const kfr_f64* taps, size_t size, + size_t block_size) + { + return reinterpret_cast<KFR_FILTER_F64*>( + make_convolve_filter<double>(make_univector(taps, size), block_size ? block_size : 1024)); + } + + KFR_FILTER_F32* kfr_filter_create_iir_plan_f32(const kfr_f32* sos, size_t sos_count) + { + if (sos_count < 1 || sos_count > 64) + return nullptr; + return reinterpret_cast<KFR_FILTER_F32*>( + make_biquad_filter<float, 64>(reinterpret_cast<const biquad_params<float>*>(sos), sos_count)); + } + KFR_FILTER_F64* kfr_filter_create_iir_plan_f64(const kfr_f64* sos, size_t sos_count) + { + if (sos_count < 1 || sos_count > 64) + return nullptr; + return reinterpret_cast<KFR_FILTER_F64*>( + make_biquad_filter<double, 64>(reinterpret_cast<const biquad_params<double>*>(sos), sos_count)); + } + + void kfr_filter_process_f32(KFR_FILTER_F32* plan, kfr_f32* output, const kfr_f32* input, size_t size) + { + reinterpret_cast<filter<float>*>(plan)->apply(output, input, size); + } + void kfr_filter_process_f64(KFR_FILTER_F64* plan, kfr_f64* output, const kfr_f64* input, size_t size) + { + reinterpret_cast<filter<double>*>(plan)->apply(output, input, size); + } + + void kfr_filter_delete_plan_f32(KFR_FILTER_F32* plan) { delete reinterpret_cast<filter<f32>*>(plan); } + void kfr_filter_delete_plan_f64(KFR_FILTER_F64* plan) { delete reinterpret_cast<filter<f64>*>(plan); } +} + +} // namespace kfr diff --git a/dft/CMakeLists.txt b/dft/CMakeLists.txt @@ -1,107 +0,0 @@ -# Copyright (C) 2016 D Levin (http://www.kfrlib.com) -# This file is part of KFR -# -# KFR is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# KFR is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with KFR. - - -cmake_minimum_required(VERSION 3.1) - -if (MSVC) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") -endif () - -if(APPLE) - add_compile_options(-mmacosx-version-min=10.9) -endif() - -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib) - -if (MSVC) - set(LIBABI "vc${MSVC_TOOLSET_VERSION}") -elseif (WIN32) - set(LIBABI "mingw") -elseif (APPLE) - set(LIBABI "apple") -elseif ("${CMAKE_SYSTEM}" MATCHES "Linux") - set(LIBABI "linux") -else () - set(LIBABI "unknown") - message(WARNING "KFR DFT: Unknown ABI") -endif () - -if (CMAKE_SIZEOF_VOID_P EQUAL 8) - set(LIBABI "${LIBABI}-x64") -else () - set(LIBABI "${LIBABI}-x32") -endif () - -if (CMAKE_BUILD_TYPE STREQUAL Debug) - set(LIBABI "${LIBABI}-d") -endif () - -message(STATUS "LIBABI = ${LIBABI}") - -add_custom_target(kfr_multiarch_dft) - -add_library(kfr_multidft INTERFACE) -target_link_libraries(kfr_multidft INTERFACE kfr) -if (NOT WIN32) - add_library(kfr_multidft_pic INTERFACE) - target_link_libraries(kfr_multidft_pic INTERFACE kfr) -endif () - -function(add_dft_library ARCH) - add_library(kfr_dft_${LIBABI}_${ARCH} STATIC ${KFR_DFT_SRC}) - target_link_libraries(kfr_dft_${LIBABI}_${ARCH} kfr) - target_set_arch(kfr_dft_${LIBABI}_${ARCH} PRIVATE ${ARCH}) - target_compile_options(kfr_dft_${LIBABI}_${ARCH} PRIVATE -Xclang -ffast-math) - add_dependencies(kfr_multiarch_dft kfr_dft_${LIBABI}_${ARCH}) - target_link_libraries(kfr_multidft INTERFACE kfr_dft_${LIBABI}_${ARCH}) - - if (NOT WIN32) - add_library(kfr_dft_${LIBABI}_${ARCH}_pic STATIC ${KFR_DFT_SRC}) - set_property(TARGET kfr_dft_${LIBABI}_${ARCH}_pic PROPERTY POSITION_INDEPENDENT_CODE 1) - target_link_libraries(kfr_dft_${LIBABI}_${ARCH}_pic kfr) - target_set_arch(kfr_dft_${LIBABI}_${ARCH}_pic PRIVATE ${ARCH}) - target_compile_options(kfr_dft_${LIBABI}_${ARCH}_pic PRIVATE -Xclang -ffast-math) - - target_link_libraries(kfr_multidft_pic INTERFACE kfr_dft_${LIBABI}_${ARCH}_pic) - endif() -endfunction() - -add_dft_library(sse2) -add_dft_library(sse41) -add_dft_library(avx) -add_dft_library(avx2) -add_dft_library(avx512) - -add_library(kfrdft SHARED ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/dft-src.cpp) -target_compile_definitions(kfrdft PRIVATE -DKFR_DFT_MULTI=1 -DKFR_BUILDING_DLL=1) - -if (WIN32) - target_link_libraries(kfrdft PRIVATE kfr kfr_multidft) -else () - target_link_libraries(kfrdft PRIVATE kfr kfr_multidft_pic) - - if (APPLE) - else () - set_property(TARGET kfrdft APPEND PROPERTY LINK_LIBRARIES -nodefaultlibs -Wl,-Bdynamic -lm -lc -Wl,-Bstatic -lstdc++ -lgcc -s) - endif () -endif() diff --git a/include/kfr/base/expression.hpp b/include/kfr/base/expression.hpp @@ -55,9 +55,6 @@ struct complex; #endif #endif -inline namespace CMT_ARCH_NAME -{ - constexpr size_t inout_context_size = 16; struct coutput_context @@ -142,6 +139,9 @@ constexpr inline bool is_numeric = is_number<deep_subtype<T>>; template <typename... Ts> constexpr inline bool is_numeric_args = (is_numeric<Ts> && ...); +inline namespace CMT_ARCH_NAME +{ + #ifdef KFR_TESTING namespace internal { diff --git a/include/kfr/base/filter.hpp b/include/kfr/base/filter.hpp @@ -32,8 +32,6 @@ namespace kfr { -inline namespace CMT_ARCH_NAME -{ /// @brief Abstract base class for filters with one argument. Mainly for DSP template <typename T> @@ -131,12 +129,16 @@ protected: expression_pointer<T> filter_expr; }; +inline namespace CMT_ARCH_NAME +{ + /// @brief Converts expression with placeholder to filter. Placeholder and filter must have the same type template <typename E, typename T = value_type_of<E>> KFR_INTRINSIC expression_filter<T> to_filter(E&& e) { return expression_filter<T>(to_pointer(std::move(e))); } +} // namespace CMT_ARCH_NAME /// @brief Converts expression with placeholder to filter. Placeholder and filter must have the same type template <typename T, typename E> @@ -145,5 +147,4 @@ KFR_INTRINSIC expression_filter<T> to_filter(expression_pointer<T>&& e) return expression_filter<T>(std::move(e)); } -} // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/base/pointer.hpp b/include/kfr/base/pointer.hpp @@ -31,15 +31,16 @@ namespace kfr { -inline namespace CMT_ARCH_NAME -{ - -template <typename T> -constexpr size_t maximum_expression_width = vector_width<T> * 2; template <typename T, bool enable_resource = true> struct expression_pointer; +template <typename T> +constexpr size_t maximum_expression_width = vector_width_for<T, cpu_t::highest> * 2; + +inline namespace CMT_ARCH_NAME +{ + namespace internal { @@ -47,6 +48,7 @@ template <typename Expression, typename T, size_t key = 0> KFR_INTRINSIC bool invoke_substitute(Expression& expr, expression_pointer<T>&& new_pointer, csize_t<key> = {}); } +} // namespace CMT_ARCH_NAME template <typename T, size_t N = maximum_expression_width<T>> struct expression_vtable : expression_vtable<T, N / 2> @@ -182,6 +184,9 @@ private: std::shared_ptr<expression_resource> resource; }; +inline namespace CMT_ARCH_NAME +{ + namespace internal { @@ -277,19 +282,11 @@ KFR_INTRINSIC bool substitute(expression_pointer<T>& expr, expression_pointer<T> namespace internal { -KFR_INTRINSIC bool var_or() { return false; } - -template <typename... Args> -KFR_INTRINSIC bool var_or(bool b, Args... args) -{ - return b || var_or(args...); -} - template <typename... Args, typename T, size_t key, size_t... indices> KFR_INTRINSIC bool substitute(internal::expression_with_arguments<Args...>& expr, expression_pointer<T>&& new_pointer, csize_t<key>, csizes_t<indices...>) { - return var_or(substitute(std::get<indices>(expr.args), std::move(new_pointer), csize_t<key>())...); + return (substitute(std::get<indices>(expr.args), std::move(new_pointer), csize_t<key>()) || ...); } } // namespace internal diff --git a/include/kfr/capi.h b/include/kfr/capi.h @@ -0,0 +1,243 @@ +/** @addtogroup dft + * @{ + */ +/* + Copyright (C) 2016 D Levin (https://www.kfrlib.com) + This file is part of KFR + + KFR is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + KFR is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with KFR. + + If GPL is not suitable for your project, you must purchase a commercial license to use KFR. + Buying a commercial license is mandatory as soon as you develop commercial activities without + disclosing the source code of your own applications. + See https://www.kfrlib.com for details. + */ +#pragma once + +#include <stddef.h> +#include <stdint.h> +#if defined __STDC_IEC_559_COMPLEX__ && !defined KFR_NO_C_COMPLEX_TYPES +#include <complex.h> +#endif + +#if defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__x86_64__) +#define KFR_ARCH_IS_X86 1 +#elif defined(__arm__) || defined(__arm64__) || defined(_M_ARM) || defined(__aarch64__) +#define KFR_ARCH_IS_ARM 1 +#endif + +#if defined(_M_X64) || defined(__x86_64__) +#define KFR_CDECL +#else +#ifdef _WIN32 +#define KFR_CDECL __cdecl +#elif defined KFR_ARCH_IS_X86 +#define KFR_CDECL __attribute__((__cdecl__)) +#else +#define KFR_CDECL +#endif +#endif + +#ifdef _WIN32 +#ifdef KFR_BUILDING_DLL +#define KFR_API_SPEC KFR_CDECL __declspec(dllexport) +#else +#define KFR_API_SPEC KFR_CDECL __declspec(dllimport) +#endif +#else +#define KFR_API_SPEC KFR_CDECL +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + + enum + { + KFR_ARCH_X86 = 0, + KFR_ARCH_SSE2 = 1, + KFR_ARCH_SSE3 = 2, + KFR_ARCH_SSSE3 = 3, + KFR_ARCH_SSE41 = 4, + KFR_ARCH_SSE42 = 5, + KFR_ARCH_AVX = 6, + KFR_ARCH_AVX2 = 7, + KFR_ARCH_AVX512 = 8, + }; + + KFR_API_SPEC const char* kfr_version_string(); + KFR_API_SPEC uint32_t kfr_version(); + KFR_API_SPEC const char* kfr_enabled_archs(); + KFR_API_SPEC int kfr_current_arch(); + + typedef float kfr_f32; + typedef double kfr_f64; +#if defined __STDC_IEC_559_COMPLEX__ && !defined KFR_NO_C_COMPLEX_TYPES + typedef float _Complex kfr_c32; + typedef double _Complex kfr_c64; +#else +typedef float kfr_c32; +typedef double kfr_c64; +#endif + typedef size_t kfr_size_t; + typedef int32_t kfr_int32_t; + +#define KFR_OPAQUE_STRUCT(NAME) \ + typedef struct NAME \ + { \ + int opaque; \ + } NAME; + + KFR_OPAQUE_STRUCT(KFR_DFT_PLAN_F32) + KFR_OPAQUE_STRUCT(KFR_DFT_PLAN_F64) + + KFR_OPAQUE_STRUCT(KFR_DFT_REAL_PLAN_F32) + KFR_OPAQUE_STRUCT(KFR_DFT_REAL_PLAN_F64) + + KFR_OPAQUE_STRUCT(KFR_DCT_PLAN_F32) + KFR_OPAQUE_STRUCT(KFR_DCT_PLAN_F64) + + KFR_OPAQUE_STRUCT(KFR_FILTER_F32) + KFR_OPAQUE_STRUCT(KFR_FILTER_F64) + + KFR_OPAQUE_STRUCT(KFR_FILTER_C32) + KFR_OPAQUE_STRUCT(KFR_FILTER_C64) + + // Memory allocation + +#define KFR_DEFAULT_ALIGNMENT 64 + + void* kfr_allocate(size_t size); + void* kfr_allocate_aligned(size_t size, size_t alignment); + void* kfr_reallocate(void* ptr, size_t new_size); + void* kfr_reallocate_aligned(void* ptr, size_t new_size, size_t alignment); + void* kfr_add_ref(void* ptr); + void kfr_release(void* ptr); + void kfr_deallocate(void* ptr); + size_t kfr_allocated_size(void* ptr); + + typedef enum KFR_DFT_PACK_FORMAT + { + Perm = 0, + CCs = 1 + } KFR_DFT_PACK_FORMAT; + + // Complex DFT plans + + KFR_API_SPEC KFR_DFT_PLAN_F32* kfr_dft_create_plan_f32(size_t size); + KFR_API_SPEC KFR_DFT_PLAN_F64* kfr_dft_create_plan_f64(size_t size); + + KFR_API_SPEC void kfr_dft_dump_f32(KFR_DFT_PLAN_F32* plan); + KFR_API_SPEC void kfr_dft_dump_f64(KFR_DFT_PLAN_F64* plan); + + KFR_API_SPEC size_t kfr_dft_get_size_f32(KFR_DFT_PLAN_F32* plan); + KFR_API_SPEC size_t kfr_dft_get_size_f64(KFR_DFT_PLAN_F64* plan); + + KFR_API_SPEC size_t kfr_dft_get_temp_size_f32(KFR_DFT_PLAN_F32* plan); + KFR_API_SPEC size_t kfr_dft_get_temp_size_f64(KFR_DFT_PLAN_F64* plan); + + KFR_API_SPEC void kfr_dft_execute_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in, + uint8_t* temp); + KFR_API_SPEC void kfr_dft_execute_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in, + uint8_t* temp); + + KFR_API_SPEC void kfr_dft_execute_inverse_f32(KFR_DFT_PLAN_F32* plan, kfr_c32* out, const kfr_c32* in, + uint8_t* temp); + KFR_API_SPEC void kfr_dft_execute_inverse_f64(KFR_DFT_PLAN_F64* plan, kfr_c64* out, const kfr_c64* in, + uint8_t* temp); + + KFR_API_SPEC void kfr_dft_delete_plan_f32(KFR_DFT_PLAN_F32* plan); + KFR_API_SPEC void kfr_dft_delete_plan_f64(KFR_DFT_PLAN_F64* plan); + + // Real DFT plans + + KFR_API_SPEC KFR_DFT_REAL_PLAN_F32* kfr_dft_real_create_plan_f32(size_t size, + KFR_DFT_PACK_FORMAT pack_format); + KFR_API_SPEC KFR_DFT_REAL_PLAN_F64* kfr_dft_real_create_plan_f64(size_t size, + KFR_DFT_PACK_FORMAT pack_format); + + KFR_API_SPEC void kfr_dft_real_dump_f32(KFR_DFT_REAL_PLAN_F32* plan); + KFR_API_SPEC void kfr_dft_real_dump_f64(KFR_DFT_REAL_PLAN_F64* plan); + + KFR_API_SPEC size_t kfr_dft_real_get_size_f32(KFR_DFT_REAL_PLAN_F32* plan); + KFR_API_SPEC size_t kfr_dft_real_get_size_f64(KFR_DFT_REAL_PLAN_F64* plan); + + KFR_API_SPEC size_t kfr_dft_real_get_temp_size_f32(KFR_DFT_REAL_PLAN_F32* plan); + KFR_API_SPEC size_t kfr_dft_real_get_temp_size_f64(KFR_DFT_REAL_PLAN_F64* plan); + + KFR_API_SPEC void kfr_dft_real_execute_f32(KFR_DFT_REAL_PLAN_F32* plan, kfr_c32* out, const kfr_f32* in, + uint8_t* temp); + KFR_API_SPEC void kfr_dft_real_execute_f64(KFR_DFT_REAL_PLAN_F64* plan, kfr_c64* out, const kfr_f64* in, + uint8_t* temp); + + KFR_API_SPEC void kfr_dft_real_execute_inverse_f32(KFR_DFT_REAL_PLAN_F32* plan, kfr_f32* out, + const kfr_c32* in, uint8_t* temp); + KFR_API_SPEC void kfr_dft_real_execute_inverse_f64(KFR_DFT_REAL_PLAN_F64* plan, kfr_f64* out, + const kfr_c64* in, uint8_t* temp); + + KFR_API_SPEC void kfr_dft_real_delete_plan_f32(KFR_DFT_REAL_PLAN_F32* plan); + KFR_API_SPEC void kfr_dft_real_delete_plan_f64(KFR_DFT_REAL_PLAN_F64* plan); + + // Discrete Cosine Transform type II plans + + KFR_API_SPEC KFR_DCT_PLAN_F32* kfr_dct_create_plan_f32(size_t size); + KFR_API_SPEC KFR_DCT_PLAN_F64* kfr_dct_create_plan_f64(size_t size); + + KFR_API_SPEC void kfr_dct_dump_f32(KFR_DCT_PLAN_F32* plan); + KFR_API_SPEC void kfr_dct_dump_f64(KFR_DCT_PLAN_F64* plan); + + KFR_API_SPEC size_t kfr_dct_get_size_f32(KFR_DCT_PLAN_F32* plan); + KFR_API_SPEC size_t kfr_dct_get_size_f64(KFR_DCT_PLAN_F64* plan); + + KFR_API_SPEC size_t kfr_dct_get_temp_size_f32(KFR_DCT_PLAN_F32* plan); + KFR_API_SPEC size_t kfr_dct_get_temp_size_f64(KFR_DCT_PLAN_F64* plan); + + KFR_API_SPEC void kfr_dct_execute_f32(KFR_DCT_PLAN_F32* plan, kfr_f32* out, const kfr_f32* in, + uint8_t* temp); + KFR_API_SPEC void kfr_dct_execute_f64(KFR_DCT_PLAN_F64* plan, kfr_f64* out, const kfr_f64* in, + uint8_t* temp); + + KFR_API_SPEC void kfr_dct_execute_inverse_f32(KFR_DCT_PLAN_F32* plan, kfr_f32* out, const kfr_f32* in, + uint8_t* temp); + KFR_API_SPEC void kfr_dct_execute_inverse_f64(KFR_DCT_PLAN_F64* plan, kfr_f64* out, const kfr_f64* in, + uint8_t* temp); + + KFR_API_SPEC void kfr_dct_delete_plan_f32(KFR_DCT_PLAN_F32* plan); + KFR_API_SPEC void kfr_dct_delete_plan_f64(KFR_DCT_PLAN_F64* plan); + + // Filters: FIR, IIR + + KFR_API_SPEC KFR_FILTER_F32* kfr_filter_create_fir_plan_f32(const kfr_f32* taps, size_t size); + KFR_API_SPEC KFR_FILTER_F64* kfr_filter_create_fir_plan_f64(const kfr_f64* taps, size_t size); + + KFR_API_SPEC KFR_FILTER_F32* kfr_filter_create_convolution_plan_f32(const kfr_f32* taps, size_t size, + size_t block_size); + KFR_API_SPEC KFR_FILTER_F64* kfr_filter_create_convolution_plan_f64(const kfr_f64* taps, size_t size, + size_t block_size); + + KFR_API_SPEC KFR_FILTER_F32* kfr_filter_create_iir_plan_f32(const kfr_f32* sos, size_t sos_count); + KFR_API_SPEC KFR_FILTER_F64* kfr_filter_create_iir_plan_f64(const kfr_f64* sos, size_t sos_count); + + KFR_API_SPEC void kfr_filter_process_f32(KFR_FILTER_F32* plan, kfr_f32* output, const kfr_f32* input, + size_t size); + KFR_API_SPEC void kfr_filter_process_f64(KFR_FILTER_F64* plan, kfr_f64* output, const kfr_f64* input, + size_t size); + + KFR_API_SPEC void kfr_filter_delete_plan_f32(KFR_FILTER_F32* plan); + KFR_API_SPEC void kfr_filter_delete_plan_f64(KFR_FILTER_F64* plan); + +#ifdef __cplusplus +} +#endif diff --git a/include/kfr/cident.h b/include/kfr/cident.h @@ -651,3 +651,107 @@ extern char* gets(char* __s); #define CMT_NARGS2(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, ...) _10 #define CMT_NARGS(...) CMT_NARGS2(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +#ifdef CMT_MULTI_ENABLED_AVX512 +#define CMT_IF_ENABLED_AVX512(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_AVX512(...) +#endif + +#ifdef CMT_MULTI_ENABLED_AVX2 +#define CMT_IF_ENABLED_AVX2(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_AVX2(...) +#endif + +#ifdef CMT_MULTI_ENABLED_AVX +#define CMT_IF_ENABLED_AVX(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_AVX(...) +#endif + +#ifdef CMT_MULTI_ENABLED_SSE42 +#define CMT_IF_ENABLED_SSE42(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_SSE42(...) +#endif + +#ifdef CMT_MULTI_ENABLED_SSE41 +#define CMT_IF_ENABLED_SSE41(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_SSE41(...) +#endif + +#ifdef CMT_MULTI_ENABLED_SSSE3 +#define CMT_IF_ENABLED_SSSE3(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_SSSE3(...) +#endif + +#ifdef CMT_MULTI_ENABLED_SSE3 +#define CMT_IF_ENABLED_SSE3(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_SSE3(...) +#endif + +#ifdef CMT_MULTI_ENABLED_SSE2 +#define CMT_IF_ENABLED_SSE2(...) __VA_ARGS__ +#else +#define CMT_IF_ENABLED_SSE2(...) +#endif + +#define CMT_IF_IS_AVX512(...) +#define CMT_IF_IS_AVX2(...) +#define CMT_IF_IS_AVX(...) +#define CMT_IF_IS_SSE42(...) +#define CMT_IF_IS_SSE41(...) +#define CMT_IF_IS_SSSE3(...) +#define CMT_IF_IS_SSE3(...) +#define CMT_IF_IS_SSE2(...) + +#if defined CMT_ARCH_AVX512 +#undef CMT_IF_IS_AVX512 +#define CMT_IF_IS_AVX512(...) __VA_ARGS__ +#elif defined CMT_ARCH_AVX2 +#undef CMT_IF_IS_AVX2 +#define CMT_IF_IS_AVX2(...) __VA_ARGS__ +#elif defined CMT_ARCH_AVX +#undef CMT_IF_IS_AVX +#define CMT_IF_IS_AVX(...) __VA_ARGS__ +#elif defined CMT_ARCH_SSE42 +#undef CMT_IF_IS_SSE42 +#define CMT_IF_IS_SSE42(...) __VA_ARGS__ +#elif defined CMT_ARCH_SSE41 +#undef CMT_IF_IS_SSE41 +#define CMT_IF_IS_SSE41(...) __VA_ARGS__ +#elif defined CMT_ARCH_SSSE3 +#undef CMT_IF_IS_SSSE3 +#define CMT_IF_IS_SSSE3(...) __VA_ARGS__ +#elif defined CMT_ARCH_SSE3 +#undef CMT_IF_IS_SSE3 +#define CMT_IF_IS_SSE3(...) __VA_ARGS__ +#elif defined CMT_ARCH_SSE2 +#undef CMT_IF_IS_SSE2 +#define CMT_IF_IS_SSE2(...) __VA_ARGS__ +#endif + +#ifdef CMT_MULTI +#define CMT_MULTI_PROTO(...) \ + inline namespace CMT_ARCH_NAME \ + { \ + __VA_ARGS__ \ + } \ + CMT_IF_ENABLED_SSE2(CMT_IF_IS_SSE2(inline) namespace sse2{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_SSE3(CMT_IF_IS_SSE3(inline) namespace sse3{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_SSSE3(CMT_IF_IS_SSSE3(inline) namespace ssse3{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_SSE41(CMT_IF_IS_SSE41(inline) namespace sse41{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_AVX(CMT_IF_IS_AVX(inline) namespace avx{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_AVX2(CMT_IF_IS_AVX2(inline) namespace avx2{ __VA_ARGS__ }) \ + CMT_IF_ENABLED_AVX512(CMT_IF_IS_AVX512(inline) namespace avx512{ __VA_ARGS__ }) +#else +#define CMT_MULTI_PROTO(...) \ + inline namespace CMT_ARCH_NAME \ + { \ + __VA_ARGS__ \ + } +#endif +\ No newline at end of file diff --git a/include/kfr/dft/convolution.hpp b/include/kfr/dft/convolution.hpp @@ -82,8 +82,8 @@ class convolve_filter : public filter<T> { public: explicit convolve_filter(size_t size, size_t block_size = 1024); - explicit convolve_filter(const univector<T>& data, size_t block_size = 1024); - void set_data(const univector<T>& data); + explicit convolve_filter(const univector_ref<const T>& data, size_t block_size = 1024); + void set_data(const univector_ref<const T>& data); protected: void process_expression(T* dest, const expression_pointer<T>& src, size_t size) final @@ -108,5 +108,9 @@ protected: size_t position; }; } // namespace CMT_ARCH_NAME + +CMT_MULTI_PROTO(template <typename T> + filter<T>* make_convolve_filter(const univector_ref<const T>& taps, size_t block_size);) + } // namespace kfr CMT_PRAGMA_GNU(GCC diagnostic pop) diff --git a/include/kfr/dft/dft_c.h b/include/kfr/dft/dft_c.h @@ -1,139 +0,0 @@ -/** @addtogroup dft - * @{ - */ -/* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) - This file is part of KFR - - KFR is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - KFR is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with KFR. - - If GPL is not suitable for your project, you must purchase a commercial license to use KFR. - Buying a commercial license is mandatory as soon as you develop commercial activities without - disclosing the source code of your own applications. - See https://www.kfrlib.com for details. - */ -#pragma once - -#include <stddef.h> -#include <stdint.h> - -#if defined(_M_IX86) || defined(__i386__) || defined(_M_X64) || defined(__x86_64__) -#define KFR_ARCH_IS_X86 1 -#elif defined(__arm__) || defined(__arm64__) || defined(_M_ARM) || defined(__aarch64__) -#define KFR_ARCH_IS_ARM 1 -#endif - -#if defined(_M_X64) || defined(__x86_64__) -#define KFR_CDECL -#else -#ifdef _WIN32 -#define KFR_CDECL __cdecl -#elif defined KFR_ARCH_IS_X86 -#define KFR_CDECL __attribute__((__cdecl__)) -#else -#define KFR_CDECL -#endif -#endif - -#ifdef _WIN32 -#ifdef KFR_BUILDING_DLL -#define KFR_API_SPEC KFR_CDECL __declspec(dllexport) -#else -#define KFR_API_SPEC KFR_CDECL __declspec(dllimport) -#endif -#else -#define KFR_API_SPEC KFR_CDECL -#endif - -#ifdef __cplusplus -extern "C" -{ -#endif - - typedef struct KFR_DFT_PLAN_F32 - { - size_t size; - size_t temp_size; - } KFR_DFT_PLAN_F32; - typedef struct KFR_DFT_PLAN_F64 - { - size_t size; - size_t temp_size; - } KFR_DFT_PLAN_F64; - - typedef struct KFR_DFT_REAL_PLAN_F32 - { - size_t dummy; - size_t temp_size; - size_t size; - } KFR_DFT_REAL_PLAN_F32; - typedef struct KFR_DFT_REAL_PLAN_F64 - { - size_t dummy; - size_t temp_size; - size_t size; - } KFR_DFT_REAL_PLAN_F64; - - typedef enum KFR_DFT_PACK_FORMAT - { - Perm = 0, - CCs = 1 - } KFR_DFT_PACK_FORMAT; - - // Complex DFT plans - - KFR_API_SPEC KFR_DFT_PLAN_F32* kfr_dft_create_plan_f32(size_t size); - KFR_API_SPEC KFR_DFT_PLAN_F64* kfr_dft_create_plan_f64(size_t size); - - KFR_API_SPEC void kfr_dft_dump_f32(KFR_DFT_PLAN_F32* plan); - KFR_API_SPEC void kfr_dft_dump_f64(KFR_DFT_PLAN_F64* plan); - - KFR_API_SPEC void kfr_dft_execute_f32(KFR_DFT_PLAN_F32* plan, float* out, const float* in, uint8_t* temp); - KFR_API_SPEC void kfr_dft_execute_f64(KFR_DFT_PLAN_F64* plan, double* out, const double* in, - uint8_t* temp); - - KFR_API_SPEC void kfr_dft_execute_inverse_f32(KFR_DFT_PLAN_F32* plan, float* out, const float* in, - uint8_t* temp); - KFR_API_SPEC void kfr_dft_execute_inverse_f64(KFR_DFT_PLAN_F64* plan, double* out, const double* in, - uint8_t* temp); - - KFR_API_SPEC void kfr_dft_delete_plan_f32(KFR_DFT_PLAN_F32* plan); - KFR_API_SPEC void kfr_dft_delete_plan_f64(KFR_DFT_PLAN_F64* plan); - - // Real DFT plans - - KFR_API_SPEC KFR_DFT_REAL_PLAN_F32* kfr_dft_create_real_plan_f32(size_t size, - KFR_DFT_PACK_FORMAT pack_format); - KFR_API_SPEC KFR_DFT_REAL_PLAN_F64* kfr_dft_create_real_plan_f64(size_t size, - KFR_DFT_PACK_FORMAT pack_format); - - KFR_API_SPEC void kfr_dft_dump_real_f32(KFR_DFT_REAL_PLAN_F32* plan); - KFR_API_SPEC void kfr_dft_dump_real_f64(KFR_DFT_REAL_PLAN_F64* plan); - - KFR_API_SPEC void kfr_dft_execute_real_f32(KFR_DFT_REAL_PLAN_F32* plan, float* out, const float* in, - uint8_t* temp); - KFR_API_SPEC void kfr_dft_execute_real_f64(KFR_DFT_REAL_PLAN_F64* plan, double* out, const double* in, - uint8_t* temp); - - KFR_API_SPEC void kfr_dft_execute_real_inverse_f32(KFR_DFT_REAL_PLAN_F32* plan, float* out, - const float* in, uint8_t* temp); - KFR_API_SPEC void kfr_dft_execute_real_inverse_f64(KFR_DFT_REAL_PLAN_F64* plan, double* out, - const double* in, uint8_t* temp); - - KFR_API_SPEC void kfr_dft_delete_real_plan_f32(KFR_DFT_REAL_PLAN_F32* plan); - KFR_API_SPEC void kfr_dft_delete_real_plan_f64(KFR_DFT_REAL_PLAN_F64* plan); - -#ifdef __cplusplus -} -#endif diff --git a/include/kfr/dft/fft.hpp b/include/kfr/dft/fft.hpp @@ -127,48 +127,8 @@ struct dft_stage; template <typename T> using dft_stage_ptr = std::unique_ptr<dft_stage<T>>; -inline namespace CMT_ARCH_NAME -{ -template <typename T> -void dft_initialize(dft_plan<T>& plan); -template <typename T> -void dft_real_initialize(dft_plan_real<T>& plan); -} // namespace CMT_ARCH_NAME - -#ifdef KFR_DFT_MULTI - -#define KFR_DFT_PROTO(arch) \ - namespace arch \ - { \ - template <typename T> \ - void dft_initialize(dft_plan<T>& plan); \ - template <typename T> \ - void dft_real_initialize(dft_plan_real<T>& plan); \ - } - -#if !CMT_ARCH_IS_AVX512 -KFR_DFT_PROTO(avx512) -#endif -#if !CMT_ARCH_IS_AVX2 -KFR_DFT_PROTO(avx2) -#endif -#if !CMT_ARCH_IS_AVX -KFR_DFT_PROTO(avx) -#endif -#if !CMT_ARCH_IS_SSE41 -KFR_DFT_PROTO(sse41) -#endif -#if !CMT_ARCH_IS_SSSE3 -KFR_DFT_PROTO(ssse3) -#endif -#if !CMT_ARCH_IS_SSE3 -KFR_DFT_PROTO(sse3) -#endif -#if !CMT_ARCH_IS_SSE2 -KFR_DFT_PROTO(sse2) -#endif - -#endif +CMT_MULTI_PROTO(template <typename T> void dft_initialize(dft_plan<T>& plan);) +CMT_MULTI_PROTO(template <typename T> void dft_real_initialize(dft_plan_real<T>& plan);) /// @brief Class for performing DFT/FFT template <typename T> diff --git a/include/kfr/dft/impl/convolution-impl.cpp b/include/kfr/dft/impl/convolution-impl.cpp @@ -89,7 +89,7 @@ convolve_filter<T>::convolve_filter(size_t size, size_t block_size) } template <typename T> -convolve_filter<T>::convolve_filter(const univector<T>& data, size_t block_size) +convolve_filter<T>::convolve_filter(const univector_ref<const T>& data, size_t block_size) : size(data.size()), block_size(next_poweroftwo(block_size)), fft(2 * next_poweroftwo(block_size), dft_pack_format::Perm), temp(fft.temp_size), segments((data.size() + next_poweroftwo(block_size) - 1) / next_poweroftwo(block_size)), @@ -100,7 +100,7 @@ convolve_filter<T>::convolve_filter(const univector<T>& data, size_t block_size) } template <typename T> -void convolve_filter<T>::set_data(const univector<T>& data) +void convolve_filter<T>::set_data(const univector_ref<const T>& data) { univector<T> input(fft.size); const T ifftsize = reciprocal(T(fft.size)); @@ -177,9 +177,9 @@ template univector<float> autocorrelate<float>(const univector_ref<const float>& template convolve_filter<float>::convolve_filter(size_t, size_t); -template convolve_filter<float>::convolve_filter(const univector<float>&, size_t); +template convolve_filter<float>::convolve_filter(const univector_ref<const float>&, size_t); -template void convolve_filter<float>::set_data(const univector<float>&); +template void convolve_filter<float>::set_data(const univector_ref<const float>&); template void convolve_filter<float>::process_buffer(float* output, const float* input, size_t size); @@ -197,10 +197,20 @@ template univector<double> autocorrelate<double>(const univector_ref<const doubl template convolve_filter<double>::convolve_filter(size_t, size_t); -template convolve_filter<double>::convolve_filter(const univector<double>&, size_t); +template convolve_filter<double>::convolve_filter(const univector_ref<const double>&, size_t); -template void convolve_filter<double>::set_data(const univector<double>&); +template void convolve_filter<double>::set_data(const univector_ref<const double>&); template void convolve_filter<double>::process_buffer(double* output, const double* input, size_t size); + +template <typename T> +filter<T>* make_convolve_filter(const univector_ref<const T>& taps, size_t block_size) +{ + return new convolve_filter<T>(taps, block_size); +} + +template filter<float>* make_convolve_filter(const univector_ref<const float>&, size_t); +template filter<double>* make_convolve_filter(const univector_ref<const double>&, size_t); + } // namespace CMT_ARCH_NAME } // namespace kfr diff --git a/include/kfr/dft/impl/dft-fft.hpp b/include/kfr/dft/impl/dft-fft.hpp @@ -25,8 +25,6 @@ */ #pragma once -#include "../dft_c.h" - #include "../../base/basic_expressions.hpp" #include "../../math/complex_math.hpp" #include "../../testo/assert.hpp" diff --git a/include/kfr/dft/impl/dft-src.cpp b/include/kfr/dft/impl/dft-src.cpp @@ -1,159 +0,0 @@ -/** @addtogroup dft - * @{ - */ -/* - Copyright (C) 2016 D Levin (https://www.kfrlib.com) - This file is part of KFR - - KFR is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - KFR is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with KFR. - - If GPL is not suitable for your project, you must purchase a commercial license to use KFR. - Buying a commercial license is mandatory as soon as you develop commercial activities without - disclosing the source code of your own applications. - See https://www.kfrlib.com for details. - */ - -#include "../dft_c.h" -#include "../fft.hpp" - -namespace kfr -{ - -extern "C" -{ - -#ifdef KFR_DFT_MULTI -#define KFR_CPU_ARG cpu_t::runtime, -#else -#define KFR_CPU_ARG -#endif - - KFR_DFT_PLAN_F32* kfr_dft_create_plan_f32(size_t size) - { - if (size < 2) - return nullptr; - if (size > 16777216) - return nullptr; - return reinterpret_cast<KFR_DFT_PLAN_F32*>(new kfr::dft_plan<float>(KFR_CPU_ARG size)); - } - KFR_DFT_PLAN_F64* kfr_dft_create_plan_f64(size_t size) - { - if (size < 2) - return nullptr; - if (size > 16777216) - return nullptr; - return reinterpret_cast<KFR_DFT_PLAN_F64*>(new kfr::dft_plan<double>(KFR_CPU_ARG size)); - } - - void kfr_dft_dump_f32(KFR_DFT_PLAN_F32* plan) { reinterpret_cast<kfr::dft_plan<float>*>(plan)->dump(); } - void kfr_dft_dump_f64(KFR_DFT_PLAN_F64* plan) { reinterpret_cast<kfr::dft_plan<double>*>(plan)->dump(); } - - void kfr_dft_execute_f32(KFR_DFT_PLAN_F32* plan, float* out, const float* in, uint8_t* temp) - { - reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute( - reinterpret_cast<kfr::complex<float>*>(out), reinterpret_cast<const kfr::complex<float>*>(in), - temp, kfr::cfalse); - } - void kfr_dft_execute_f64(KFR_DFT_PLAN_F64* plan, double* out, const double* in, uint8_t* temp) - { - reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute( - reinterpret_cast<kfr::complex<double>*>(out), reinterpret_cast<const kfr::complex<double>*>(in), - temp, kfr::cfalse); - } - void kfr_dft_execute_inverse_f32(KFR_DFT_PLAN_F32* plan, float* out, const float* in, uint8_t* temp) - { - reinterpret_cast<kfr::dft_plan<float>*>(plan)->execute( - reinterpret_cast<kfr::complex<float>*>(out), reinterpret_cast<const kfr::complex<float>*>(in), - temp, kfr::ctrue); - } - void kfr_dft_execute_inverse_f64(KFR_DFT_PLAN_F64* plan, double* out, const double* in, uint8_t* temp) - { - reinterpret_cast<kfr::dft_plan<double>*>(plan)->execute( - reinterpret_cast<kfr::complex<double>*>(out), reinterpret_cast<const kfr::complex<double>*>(in), - temp, kfr::ctrue); - } - - void kfr_dft_delete_plan_f32(KFR_DFT_PLAN_F32* plan) - { - delete reinterpret_cast<kfr::dft_plan<float>*>(plan); - } - void kfr_dft_delete_plan_f64(KFR_DFT_PLAN_F64* plan) - { - delete reinterpret_cast<kfr::dft_plan<double>*>(plan); - } - - // Real DFT plans - - KFR_DFT_REAL_PLAN_F32* kfr_dft_create_real_plan_f32(size_t size, KFR_DFT_PACK_FORMAT pack_format) - { - if (size < 4) - return nullptr; - if (size > 16777216) - return nullptr; - return reinterpret_cast<KFR_DFT_REAL_PLAN_F32*>( - new kfr::dft_plan_real<float>(KFR_CPU_ARG size, static_cast<dft_pack_format>(pack_format))); - } - KFR_DFT_REAL_PLAN_F64* kfr_dft_create_real_plan_f64(size_t size, KFR_DFT_PACK_FORMAT pack_format) - { - if (size < 4) - return nullptr; - if (size > 16777216) - return nullptr; - return reinterpret_cast<KFR_DFT_REAL_PLAN_F64*>( - new kfr::dft_plan_real<double>(KFR_CPU_ARG size, static_cast<dft_pack_format>(pack_format))); - } - - KFR_API_SPEC void kfr_dft_dump_real_f32(KFR_DFT_REAL_PLAN_F32* plan) - { - reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->dump(); - } - KFR_API_SPEC void kfr_dft_dump_real_f64(KFR_DFT_REAL_PLAN_F64* plan) - { - reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->dump(); - } - - void kfr_dft_execute_real_f32(KFR_DFT_REAL_PLAN_F32* plan, float* out, const float* in, uint8_t* temp) - { - reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute( - reinterpret_cast<kfr::complex<float>*>(out), in, temp); - } - void kfr_dft_execute_real_f64(KFR_DFT_REAL_PLAN_F64* plan, double* out, const double* in, uint8_t* temp) - { - reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute( - reinterpret_cast<kfr::complex<double>*>(out), in, temp); - } - void kfr_dft_execute_real_inverse_f32(KFR_DFT_REAL_PLAN_F32* plan, float* out, const float* in, - uint8_t* temp) - { - reinterpret_cast<kfr::dft_plan_real<float>*>(plan)->execute( - out, reinterpret_cast<const kfr::complex<float>*>(in), temp); - } - void kfr_dft_execute_real_inverse__f64(KFR_DFT_REAL_PLAN_F64* plan, double* out, const double* in, - uint8_t* temp, KFR_DFT_PACK_FORMAT pack_format) - { - reinterpret_cast<kfr::dft_plan_real<double>*>(plan)->execute( - out, reinterpret_cast<const kfr::complex<double>*>(in), temp); - } - - void kfr_dft_delete_real_plan_f32(KFR_DFT_REAL_PLAN_F32* plan) - { - delete reinterpret_cast<kfr::dft_plan_real<float>*>(plan); - } - void kfr_dft_delete_real_plan_f64(KFR_DFT_REAL_PLAN_F64* plan) - { - delete reinterpret_cast<kfr::dft_plan_real<double>*>(plan); - } -} - -} // namespace kfr diff --git a/include/kfr/dsp/biquad.hpp b/include/kfr/dsp/biquad.hpp @@ -47,9 +47,6 @@ enum class biquad_type highshelf }; -inline namespace CMT_ARCH_NAME -{ - /** * @brief Structure for holding biquad filter coefficients. */ @@ -93,6 +90,9 @@ struct biquad_params biquad_params<T> normalized_all() const { return normalized_a0().normalized_b0(); } }; +inline namespace CMT_ARCH_NAME +{ + namespace internal { template <typename T, size_t filters, KFR_ARCH_DEP> @@ -341,5 +341,9 @@ public: { } }; + } // namespace CMT_ARCH_NAME + +CMT_MULTI_PROTO(template <typename T, size_t maxfiltercount> + filter<T>* make_biquad_filter(const biquad_params<T>* bq, size_t count);) } // namespace kfr diff --git a/include/kfr/dsp/fir.hpp b/include/kfr/dsp/fir.hpp @@ -193,12 +193,12 @@ short_fir(E1&& e1, const univector<T, TapCount>& taps) } template <typename T, typename U = T> -class filter_fir : public filter<U> +class fir_filter : public filter<U> { public: - filter_fir(const array_ref<const T>& taps) : state(taps) {} + fir_filter(const univector_ref<const T>& taps) : state(taps) {} - void set_taps(const array_ref<const T>& taps) { state = fir_state<T, U>(taps); } + void set_taps(const univector_ref<const T>& taps) { state = fir_state<T, U>(taps); } /// Reset internal filter state void reset() final @@ -222,6 +222,10 @@ private: }; template <typename T, typename U = T> -using fir_filter = filter_fir<T, U>; +using filter_fir = fir_filter<T, U>; + } // namespace CMT_ARCH_NAME + +CMT_MULTI_PROTO(template <typename U, typename T> + filter<U>* make_fir_filter(const univector_ref<const T>& taps);) } // namespace kfr diff --git a/include/kfr/dsp/impl/dsp-impl.cpp b/include/kfr/dsp/impl/dsp-impl.cpp @@ -0,0 +1,28 @@ +#include "../biquad.hpp" +#include "../fir.hpp" + +namespace kfr +{ +inline namespace CMT_ARCH_NAME +{ +template <typename U, typename T> +filter<U>* make_fir_filter(const univector_ref<const T>& taps) +{ + return new fir_filter<T, U>(taps); +} + +template filter<float>* make_fir_filter<float, float>(const univector_ref<const float>&); +template filter<double>* make_fir_filter<double, double>(const univector_ref<const double>&); +template filter<float>* make_fir_filter<float, double>(const univector_ref<const double>&); + +template <typename T, size_t maxfiltercount> +KFR_FUNCTION filter<T>* make_biquad_filter(const biquad_params<T>* bq, size_t count) +{ + return new biquad_filter<T, maxfiltercount>(bq, count); +} + +template filter<float>* make_biquad_filter<float, 64>(const biquad_params<float>* bq, size_t count); +template filter<double>* make_biquad_filter<double, 64>(const biquad_params<double>* bq, size_t count); + +} // namespace CMT_ARCH_NAME +} // namespace kfr +\ No newline at end of file diff --git a/sources.cmake b/sources.cmake @@ -15,6 +15,7 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/runtime.hpp ${PROJECT_SOURCE_DIR}/include/kfr/simd.hpp ${PROJECT_SOURCE_DIR}/include/kfr/version.hpp + ${PROJECT_SOURCE_DIR}/include/kfr/capi.h ${PROJECT_SOURCE_DIR}/include/kfr/cident.h ${PROJECT_SOURCE_DIR}/include/kfr/kfr.h ${PROJECT_SOURCE_DIR}/include/kfr/base/basic_expressions.hpp @@ -45,7 +46,6 @@ set( ${PROJECT_SOURCE_DIR}/include/kfr/dft/convolution.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dft/fft.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dft/reference_dft.hpp - ${PROJECT_SOURCE_DIR}/include/kfr/dft/dft_c.h ${PROJECT_SOURCE_DIR}/include/kfr/dft/data/bitrev.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dft/data/sincos.hpp ${PROJECT_SOURCE_DIR}/include/kfr/dft/impl/bitrev.hpp