diff --git a/Project.toml b/Project.toml index 4eb2e76..61d618b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "FFTW" uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" -version = "1.8.1" +version = "1.9.0" [deps] AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" diff --git a/src/FFTW.jl b/src/FFTW.jl index 4366ee7..1a9352f 100644 --- a/src/FFTW.jl +++ b/src/FFTW.jl @@ -16,7 +16,7 @@ export dct, idct, dct!, idct!, plan_dct, plan_idct, plan_dct!, plan_idct! include("providers.jl") -function __init__() +function initialize_library_paths() # If someone is trying to set the provider via the old environment variable, warn them that they # should instead use `set_provider!()` instead. if haskey(ENV, "JULIA_FFTW_PROVIDER") @@ -27,14 +27,36 @@ function __init__() # libfftw3{,f} refs at runtime, since we may have relocated and # changed the path to the library since the last time we precompiled. @static if fftw_provider == "fftw" - libfftw3[] = FFTW_jll.libfftw3_path - libfftw3f[] = FFTW_jll.libfftw3f_path + libfftw3_path[] = FFTW_jll.libfftw3_path + libfftw3f_path[] = FFTW_jll.libfftw3f_path fftw_init_threads() end @static if fftw_provider == "mkl" - libfftw3[] = MKL_jll.libmkl_rt_path - libfftw3f[] = MKL_jll.libmkl_rt_path + libfftw3_path[] = MKL_jll.libmkl_rt_path + libfftw3f_path[] = MKL_jll.libmkl_rt_path end + return nothing +end + +if VERSION >= v"1.12.0-beta1.29" + const initialize_library_paths_once = OncePerProcess{Nothing}() do + initialize_library_paths() + return + end + function libfftw3() + initialize_library_paths_once() + return libfftw3_path[] + end + function libfftw3f() + initialize_library_paths_once() + return libfftw3f_path[] + end +else + function __init__() + initialize_library_paths() + end + libfftw3() = libfftw3_path[] + libfftw3f() = libfftw3f_path[] end # most FFTW calls other than fftw_execute should be protected by a lock to be thread-safe diff --git a/src/fft.jl b/src/fft.jl index 4be9e51..6739744 100644 --- a/src/fft.jl +++ b/src/fft.jl @@ -56,7 +56,7 @@ function plan_r2r end ## FFT: Implement fft by calling fftw. const version = VersionNumber(split(unsafe_string(cglobal( - (:fftw_version,libfftw3[]), UInt8)), ['-', ' '])[2]) + (:fftw_version,libfftw3()), UInt8)), ['-', ' '])[2]) ## Direction of FFT @@ -141,32 +141,32 @@ alignment_of(A::FakeArray) = Int32(0) @exclusive function export_wisdom(fname::AbstractString) f = ccall(:fopen, Ptr{Cvoid}, (Cstring,Cstring), fname, :w) systemerror("could not open wisdom file $fname for writing", f == C_NULL) - ccall((:fftw_export_wisdom_to_file,libfftw3[]), Cvoid, (Ptr{Cvoid},), f) + ccall((:fftw_export_wisdom_to_file,libfftw3()), Cvoid, (Ptr{Cvoid},), f) ccall(:fputs, Int32, (Ptr{UInt8},Ptr{Cvoid}), " "^256, f) # no NUL, hence no Cstring - ccall((:fftwf_export_wisdom_to_file,libfftw3f[]), Cvoid, (Ptr{Cvoid},), f) + ccall((:fftwf_export_wisdom_to_file,libfftw3f()), Cvoid, (Ptr{Cvoid},), f) ccall(:fclose, Cvoid, (Ptr{Cvoid},), f) end @exclusive function import_wisdom(fname::AbstractString) f = ccall(:fopen, Ptr{Cvoid}, (Cstring,Cstring), fname, :r) systemerror("could not open wisdom file $fname for reading", f == C_NULL) - if ccall((:fftw_import_wisdom_from_file,libfftw3[]),Int32,(Ptr{Cvoid},),f)==0|| - ccall((:fftwf_import_wisdom_from_file,libfftw3f[]),Int32,(Ptr{Cvoid},),f)==0 + if ccall((:fftw_import_wisdom_from_file,libfftw3()),Int32,(Ptr{Cvoid},),f)==0|| + ccall((:fftwf_import_wisdom_from_file,libfftw3f()),Int32,(Ptr{Cvoid},),f)==0 error("failed to import wisdom from $fname") end ccall(:fclose, Cvoid, (Ptr{Cvoid},), f) end @exclusive function import_system_wisdom() - if ccall((:fftw_import_system_wisdom,libfftw3[]), Int32, ()) == 0 || - ccall((:fftwf_import_system_wisdom,libfftw3f[]), Int32, ()) == 0 + if ccall((:fftw_import_system_wisdom,libfftw3()), Int32, ()) == 0 || + ccall((:fftwf_import_system_wisdom,libfftw3f()), Int32, ()) == 0 error("failed to import system wisdom") end end @exclusive function forget_wisdom() - ccall((:fftw_forget_wisdom,libfftw3[]), Cvoid, ()) - ccall((:fftwf_forget_wisdom,libfftw3f[]), Cvoid, ()) + ccall((:fftw_forget_wisdom,libfftw3()), Cvoid, ()) + ccall((:fftwf_forget_wisdom,libfftw3f()), Cvoid, ()) end # Threads @@ -176,15 +176,15 @@ function _set_num_threads(num_threads::Integer) @static if fftw_provider == "mkl" _last_num_threads[] = num_threads end - ccall((:fftw_plan_with_nthreads,libfftw3[]), Cvoid, (Int32,), num_threads) - ccall((:fftwf_plan_with_nthreads,libfftw3f[]), Cvoid, (Int32,), num_threads) + ccall((:fftw_plan_with_nthreads,libfftw3()), Cvoid, (Int32,), num_threads) + ccall((:fftwf_plan_with_nthreads,libfftw3f()), Cvoid, (Int32,), num_threads) end @exclusive set_num_threads(num_threads::Integer) = _set_num_threads(num_threads) function get_num_threads() @static if fftw_provider == "fftw" - ccall((:fftw_planner_nthreads,libfftw3[]), Cint, ()) + ccall((:fftw_planner_nthreads,libfftw3()), Cint, ()) else _last_num_threads[] end @@ -211,9 +211,9 @@ const NO_TIMELIMIT = -1.0 # from fftw3.h # only call these when fftwlock is held: unsafe_set_timelimit(precision::fftwTypeDouble,seconds) = - ccall((:fftw_set_timelimit,libfftw3[]), Cvoid, (Float64,), seconds) + ccall((:fftw_set_timelimit,libfftw3()), Cvoid, (Float64,), seconds) unsafe_set_timelimit(precision::fftwTypeSingle,seconds) = - ccall((:fftwf_set_timelimit,libfftw3f[]), Cvoid, (Float64,), seconds) + ccall((:fftwf_set_timelimit,libfftw3f()), Cvoid, (Float64,), seconds) @exclusive set_timelimit(precision, seconds) = unsafe_set_timelimit(precision, seconds) # Array alignment mod 16: @@ -234,9 +234,9 @@ unsafe_set_timelimit(precision::fftwTypeSingle,seconds) = convert(Int32, convert(Int64, pointer(A)) % 16) else alignment_of(A::StridedArray{T}) where {T<:fftwDouble} = - ccall((:fftw_alignment_of, libfftw3[]), Int32, (Ptr{T},), A) + ccall((:fftw_alignment_of, libfftw3()), Int32, (Ptr{T},), A) alignment_of(A::StridedArray{T}) where {T<:fftwSingle} = - ccall((:fftwf_alignment_of, libfftw3f[]), Int32, (Ptr{T},), A) + ccall((:fftwf_alignment_of, libfftw3f()), Int32, (Ptr{T},), A) end # FFTWPlan (low-level) @@ -320,9 +320,9 @@ unsafe_convert(::Type{PlanPtr}, p::FFTWPlan) = p.plan # these functions should only be called while the fftwlock is held unsafe_destroy_plan(@nospecialize(plan::FFTWPlan{<:fftwDouble})) = - ccall((:fftw_destroy_plan,libfftw3[]), Cvoid, (PlanPtr,), plan) + ccall((:fftw_destroy_plan,libfftw3()), Cvoid, (PlanPtr,), plan) unsafe_destroy_plan(@nospecialize(plan::FFTWPlan{<:fftwSingle})) = - ccall((:fftwf_destroy_plan,libfftw3f[]), Cvoid, (PlanPtr,), plan) + ccall((:fftwf_destroy_plan,libfftw3f()), Cvoid, (PlanPtr,), plan) const deferred_destroy_lock = ReentrantLock() # lock protecting the deferred_destroy_plans list const deferred_destroy_plans = FFTWPlan[] @@ -388,19 +388,19 @@ end ################################################################################################# cost(plan::FFTWPlan{<:fftwDouble}) = - ccall((:fftw_cost,libfftw3[]), Float64, (PlanPtr,), plan) + ccall((:fftw_cost,libfftw3()), Float64, (PlanPtr,), plan) cost(plan::FFTWPlan{<:fftwSingle}) = - ccall((:fftwf_cost,libfftw3f[]), Float64, (PlanPtr,), plan) + ccall((:fftwf_cost,libfftw3f()), Float64, (PlanPtr,), plan) @exclusive function arithmetic_ops(plan::FFTWPlan{<:fftwDouble}) add, mul, fma = Ref(0.0), Ref(0.0), Ref(0.0) - ccall((:fftw_flops,libfftw3[]), Cvoid, + ccall((:fftw_flops,libfftw3()), Cvoid, (PlanPtr,Ref{Float64},Ref{Float64},Ref{Float64}), plan, add, mul, fma) return (round(Int64, add[]), round(Int64, mul[]), round(Int64, fma[])) end @exclusive function arithmetic_ops(plan::FFTWPlan{<:fftwSingle}) add, mul, fma = Ref(0.0), Ref(0.0), Ref(0.0) - ccall((:fftwf_flops,libfftw3f[]), Cvoid, + ccall((:fftwf_flops,libfftw3f()), Cvoid, (PlanPtr,Ref{Float64},Ref{Float64},Ref{Float64}), plan, add, mul, fma) return (round(Int64, add[]), round(Int64, mul[]), round(Int64, fma[])) end @@ -431,9 +431,9 @@ const has_sprint_plan = version >= v"3.3.4" && fftw_provider == "fftw" @static if has_sprint_plan sprint_plan_(plan::FFTWPlan{<:fftwDouble}) = - ccall((:fftw_sprint_plan,libfftw3[]), Ptr{UInt8}, (PlanPtr,), plan) + ccall((:fftw_sprint_plan,libfftw3()), Ptr{UInt8}, (PlanPtr,), plan) sprint_plan_(plan::FFTWPlan{<:fftwSingle}) = - ccall((:fftwf_sprint_plan,libfftw3f[]), Ptr{UInt8}, (PlanPtr,), plan) + ccall((:fftwf_sprint_plan,libfftw3f()), Ptr{UInt8}, (PlanPtr,), plan) function sprint_plan(plan::FFTWPlan) p = sprint_plan_(plan) str = unsafe_string(p) @@ -515,49 +515,49 @@ _colmajorstrides(p) = () # Execute unsafe_execute!(plan::FFTWPlan{<:fftwDouble}) = - ccall((:fftw_execute,libfftw3[]), Cvoid, (PlanPtr,), plan) + ccall((:fftw_execute,libfftw3()), Cvoid, (PlanPtr,), plan) unsafe_execute!(plan::FFTWPlan{<:fftwSingle}) = - ccall((:fftwf_execute,libfftw3f[]), Cvoid, (PlanPtr,), plan) + ccall((:fftwf_execute,libfftw3f()), Cvoid, (PlanPtr,), plan) unsafe_execute!(plan::cFFTWPlan{T}, X::StridedArray{T}, Y::StridedArray{T}) where {T<:fftwDouble} = - ccall((:fftw_execute_dft,libfftw3[]), Cvoid, + ccall((:fftw_execute_dft,libfftw3()), Cvoid, (PlanPtr,Ptr{T},Ptr{T}), plan, X, Y) unsafe_execute!(plan::cFFTWPlan{T}, X::StridedArray{T}, Y::StridedArray{T}) where {T<:fftwSingle} = - ccall((:fftwf_execute_dft,libfftw3f[]), Cvoid, + ccall((:fftwf_execute_dft,libfftw3f()), Cvoid, (PlanPtr,Ptr{T},Ptr{T}), plan, X, Y) unsafe_execute!(plan::rFFTWPlan{Float64,FORWARD}, X::StridedArray{Float64}, Y::StridedArray{Complex{Float64}}) = - ccall((:fftw_execute_dft_r2c,libfftw3[]), Cvoid, + ccall((:fftw_execute_dft_r2c,libfftw3()), Cvoid, (PlanPtr,Ptr{Float64},Ptr{Complex{Float64}}), plan, X, Y) unsafe_execute!(plan::rFFTWPlan{Float32,FORWARD}, X::StridedArray{Float32}, Y::StridedArray{Complex{Float32}}) = - ccall((:fftwf_execute_dft_r2c,libfftw3f[]), Cvoid, + ccall((:fftwf_execute_dft_r2c,libfftw3f()), Cvoid, (PlanPtr,Ptr{Float32},Ptr{Complex{Float32}}), plan, X, Y) unsafe_execute!(plan::rFFTWPlan{Complex{Float64},BACKWARD}, X::StridedArray{Complex{Float64}}, Y::StridedArray{Float64}) = - ccall((:fftw_execute_dft_c2r,libfftw3[]), Cvoid, + ccall((:fftw_execute_dft_c2r,libfftw3()), Cvoid, (PlanPtr,Ptr{Complex{Float64}},Ptr{Float64}), plan, X, Y) unsafe_execute!(plan::rFFTWPlan{Complex{Float32},BACKWARD}, X::StridedArray{Complex{Float32}}, Y::StridedArray{Float32}) = - ccall((:fftwf_execute_dft_c2r,libfftw3f[]), Cvoid, + ccall((:fftwf_execute_dft_c2r,libfftw3f()), Cvoid, (PlanPtr,Ptr{Complex{Float32}},Ptr{Float32}), plan, X, Y) unsafe_execute!(plan::r2rFFTWPlan{T}, X::StridedArray{T}, Y::StridedArray{T}) where {T<:fftwDouble} = - ccall((:fftw_execute_r2r,libfftw3[]), Cvoid, + ccall((:fftw_execute_r2r,libfftw3()), Cvoid, (PlanPtr,Ptr{T},Ptr{T}), plan, X, Y) unsafe_execute!(plan::r2rFFTWPlan{T}, X::StridedArray{T}, Y::StridedArray{T}) where {T<:fftwSingle} = - ccall((:fftwf_execute_r2r,libfftw3f[]), Cvoid, + ccall((:fftwf_execute_r2r,libfftw3f()), Cvoid, (PlanPtr,Ptr{T},Ptr{T}), plan, X, Y) # NOTE ON GC (garbage collection): @@ -654,7 +654,7 @@ for (Tr,Tc,fftw,lib) in ((:Float64,:(Complex{Float64}),"fftw",:libfftw3), unsafe_set_timelimit($Tr, timelimit) R = isa(region, Tuple) ? region : copy(region) dims, howmany = dims_howmany(X, Y, size(X), R) - plan = ccall(($(string(fftw,"_plan_guru64_dft")),$lib[]), + plan = ccall(($(string(fftw,"_plan_guru64_dft")),$lib()), PlanPtr, (Int32, Ptr{Int}, Int32, Ptr{Int}, Ptr{$Tc}, Ptr{$Tc}, Int32, UInt32), @@ -674,7 +674,7 @@ for (Tr,Tc,fftw,lib) in ((:Float64,:(Complex{Float64}),"fftw",:libfftw3), regionshft = _circshiftmin1(region) # FFTW halves last dim unsafe_set_timelimit($Tr, timelimit) dims, howmany = dims_howmany(X, Y, size(X), regionshft) - plan = ccall(($(string(fftw,"_plan_guru64_dft_r2c")),$lib[]), + plan = ccall(($(string(fftw,"_plan_guru64_dft_r2c")),$lib()), PlanPtr, (Int32, Ptr{Int}, Int32, Ptr{Int}, Ptr{$Tr}, Ptr{$Tc}, UInt32), @@ -694,7 +694,7 @@ for (Tr,Tc,fftw,lib) in ((:Float64,:(Complex{Float64}),"fftw",:libfftw3), regionshft = _circshiftmin1(region) # FFTW halves last dim unsafe_set_timelimit($Tr, timelimit) dims, howmany = dims_howmany(X, Y, size(Y), regionshft) - plan = ccall(($(string(fftw,"_plan_guru64_dft_c2r")),$lib[]), + plan = ccall(($(string(fftw,"_plan_guru64_dft_c2r")),$lib()), PlanPtr, (Int32, Ptr{Int}, Int32, Ptr{Int}, Ptr{$Tc}, Ptr{$Tr}, UInt32), @@ -716,7 +716,7 @@ for (Tr,Tc,fftw,lib) in ((:Float64,:(Complex{Float64}),"fftw",:libfftw3), knd = fix_kinds(region, kinds) unsafe_set_timelimit($Tr, timelimit) dims, howmany = dims_howmany(X, Y, size(X), region) - plan = ccall(($(string(fftw,"_plan_guru64_r2r")),$lib[]), + plan = ccall(($(string(fftw,"_plan_guru64_r2r")),$lib()), PlanPtr, (Int32, Ptr{Int}, Int32, Ptr{Int}, Ptr{$Tr}, Ptr{$Tr}, Ptr{Int32}, UInt32), @@ -744,7 +744,7 @@ for (Tr,Tc,fftw,lib) in ((:Float64,:(Complex{Float64}),"fftw",:libfftw3), howmany[2:3, :] .*= 2 end howmany = [howmany [2,1,1]] # append loop over real/imag parts - plan = ccall(($(string(fftw,"_plan_guru64_r2r")),$lib[]), + plan = ccall(($(string(fftw,"_plan_guru64_r2r")),$lib()), PlanPtr, (Int32, Ptr{Int}, Int32, Ptr{Int}, Ptr{$Tc}, Ptr{$Tc}, Ptr{Int32}, UInt32), diff --git a/src/providers.jl b/src/providers.jl index 58817b1..826ab48 100644 --- a/src/providers.jl +++ b/src/providers.jl @@ -23,8 +23,8 @@ const fftw_provider = get_provider() # We'll initialize `libfftw3` here (in the conditionals below), and # it will get overwritten again in `__init__()`. This allows us to # `ccall` at build time, and also be relocatable for PackageCompiler. -const libfftw3 = Ref{String}() -const libfftw3f = Ref{String}() +const libfftw3_path = Ref{String}() +const libfftw3f_path = Ref{String}() """ set_provider!(provider; export_prefs::Bool = false) @@ -48,8 +48,8 @@ end # If we're using fftw_jll, load it in @static if fftw_provider == "fftw" import FFTW_jll - libfftw3[] = FFTW_jll.libfftw3_path - libfftw3f[] = FFTW_jll.libfftw3f_path + libfftw3_path[] = FFTW_jll.libfftw3_path + libfftw3f_path[] = FFTW_jll.libfftw3f_path # callback function that FFTW uses to launch `num` parallel # tasks (FFTW/fftw3#175): @@ -66,16 +66,19 @@ end # (Previously, we called fftw_cleanup, but this invalidated existing # plans, causing Base Julia issue #19892.) function fftw_init_threads() - stat = ccall((:fftw_init_threads, libfftw3[]), Int32, ()) - statf = ccall((:fftwf_init_threads, libfftw3f[]), Int32, ()) + # We de-reference libfftw3(f)_path directly in this function instead of using + # `libfftw3(f)()` to avoid the circular dependency and thus a stack overflow. This + # function is only called after the path references are initialized. + stat = ccall((:fftw_init_threads, libfftw3_path[]), Int32, ()) + statf = ccall((:fftwf_init_threads, libfftw3f_path[]), Int32, ()) if stat == 0 || statf == 0 error("could not initialize FFTW threads") end if nthreads() > 1 cspawnloop = @cfunction(spawnloop, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Cint, Ptr{Cvoid})) - ccall((:fftw_threads_set_callback, libfftw3[]), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), cspawnloop, C_NULL) - ccall((:fftwf_threads_set_callback, libfftw3f[]), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), cspawnloop, C_NULL) + ccall((:fftw_threads_set_callback, libfftw3_path[]), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), cspawnloop, C_NULL) + ccall((:fftwf_threads_set_callback, libfftw3f_path[]), Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), cspawnloop, C_NULL) end end end @@ -83,7 +86,7 @@ end # If we're using MKL, load it in and set library paths appropriately. @static if fftw_provider == "mkl" import MKL_jll - libfftw3[] = MKL_jll.libmkl_rt_path - libfftw3f[] = MKL_jll.libmkl_rt_path + libfftw3_path[] = MKL_jll.libmkl_rt_path + libfftw3f_path[] = MKL_jll.libmkl_rt_path const _last_num_threads = Ref(Cint(1)) end diff --git a/test/runtests.jl b/test/runtests.jl index 6b158ac..7b00570 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -528,7 +528,7 @@ end # fftw_provider == "fftw" end # check whether FFTW on this architecture has nontrivial alignment requirements - nontrivial_alignment = FFTW.fftw_provider == "fftw" && ccall((:fftwf_alignment_of, FFTW.libfftw3f[]), Int32, (Int,), 8) != 0 + nontrivial_alignment = FFTW.fftw_provider == "fftw" && ccall((:fftwf_alignment_of, FFTW.libfftw3f()), Int32, (Int,), 8) != 0 if nontrivial_alignment @test_throws ArgumentError plan_rfft(Array{Float32}(undef, 32)) * view(A, 2:33) @test_throws ArgumentError plan_fft(Array{Complex{Float32}}(undef, 32)) * view(Ac, 2:33)