From 7190fac9677e1ccbbed17aa88342755cb6c3eb1e Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Wed, 4 Jun 2025 20:30:23 -0300 Subject: [PATCH 1/9] Make late gc lower handle insertelement of alloca use. (#58637) This was in DAECompiler.jl code found by @serenity4. He also mentioned that writing up how one might go and fix a bug like this so i'll give a quick writeup (this was a very simple bug so it might not be too interesting) The original crash which looked something like > %19 = alloca [10 x i64], align 8 %155 = insertelement <4 x ptr> poison, ptr %19, i32 0 Unexpected instruction > [898844] signal 6 (-6): Aborted in expression starting at /home/gbaraldi/DAECompiler.jl/test/reflection.jl:28 pthread_kill at /lib/x86_64-linux-gnu/libc.so.6 (unknown line) gsignal at /lib/x86_64-linux-gnu/libc.so.6 (unknown line) abort at /lib/x86_64-linux-gnu/libc.so.6 (unknown line) RecursivelyVisit, int, State&, std::map >):::: > at /home/gbaraldi/julia4/src/llvm-late-gc-lowering.cpp:803 operator() at /home/gbaraldi/julia4/src/llvm-late-gc-lowering.cpp:2560 [inlined] PlaceRootsAndUpdateCalls at /home/gbaraldi/julia4/src/llvm-late-gc-lowering.cpp:2576 runOnFunction at /home/gbaraldi/julia4/src/llvm-late-gc-lowering.cpp:2638 run at /home/gbaraldi/julia4/src/llvm-late-gc-lowering.cpp:2675 run at /home/gbaraldi/julia4/usr/include/llvm/IR/PassManagerInternal.h:91 which means it was crashing inside of late-gc-lowering, so the first thing I did was ran julia and the same test with LLVM_ASSERTIONS=1 and FORCE_ASSERTIONS=1 to see if LLVM complained about a malformed module, and both were fine. Next step was trying to get the failing code out for inspection. Easiest way is to do `export JULIA_LLVM_ARGS="--print-before=LateLowerGCFrame --print-module-scope"` and pipe the output to a file. The file is huge, but since it's a crash in LLVM we know that the last thing is what we want, and that gave me the IR I wanted. To verify that this is failing I did `make -C src install-analysis-deps` to install the LLVM machinery (opt...). That gets put in the `tools` directory of a julia build. Then I checked if this crashed outside of julia by doing `./opt -load-pass-plugin=../lib/libjulia-codegen.dylib --passes=LateLowerGCFrame -S test.ll -o tmp3.ll `. This is run from inside the tools dir so your paths might vary (the -S is so LLVM doesn't generate bitcode) and my code did crash, however it was over 500 lines of IR which makes it harder to debug and to write a test. Next step then is to minimize the crash by doing [`llvm-reduce`](https://llvm.org/docs/CommandGuide/llvm-reduce.html) over it (it's basically creduce but optimized for LLVM IR) which gave me a 2 line reproducer (in this case apparently just having the insertelement was enough for the pass to fail). One thing to be wary is that llvm-reduce will usually make very weird code, so it might be useful to modify the code slightly so it doesn't look odd (it will have unreachable basic-blocks and such). After the cleanup fixing the bug here wasn't interesting but this doesn't apply generally. And also always transform your reduced IR into a test to put in llvmpasses. (cherry picked from commit 906d3482d318868e7d3eef6b1db97ef5bedd6f82) --- src/llvm-late-gc-lowering.cpp | 2 +- test/llvmpasses/late-lower-gc.ll | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 46214666c5d36..25ec6959eddf1 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -778,7 +778,7 @@ void RecursivelyVisit(callback f, Value *V) { if (isa(TheUser) || isa(TheUser) || isa(TheUser) || isa(TheUser) || // TODO: should these be removed from this list? isa(TheUser) || isa(TheUser) || - isa(TheUser) || // ICmpEQ/ICmpNE can be used with ptr types + isa(TheUser) || isa(TheUser)|| // ICmpEQ/ICmpNE can be used with ptr types isa(TheUser) || isa(TheUser)) continue; if (isa(TheUser) || isa(TheUser) || isa(TheUser)) { diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll index bdb10e97093b2..6d5c42f50fcde 100644 --- a/test/llvmpasses/late-lower-gc.ll +++ b/test/llvmpasses/late-lower-gc.ll @@ -199,6 +199,20 @@ define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) { ; CHECK: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1) ; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) +define swiftcc ptr addrspace(10) @insert_element(ptr swiftself %0) { +; CHECK-LABEL: @insert_element + %2 = alloca [10 x i64], i32 1, align 8 +; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 10) +; CHECK: [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0) +; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 10) + call void null(ptr sret([2 x [5 x ptr addrspace(10)]]) %2, ptr null, ptr addrspace(11) null, ptr null) + %4 = insertelement <4 x ptr> zeroinitializer, ptr %2, i32 0 +; CHECK: [[gc_slot_addr_:%.*]] = insertelement <4 x ptr> zeroinitializer, ptr [[gc_slot_addr_:%.*]], i32 0 +; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) + ret ptr addrspace(10) null +} + + !0 = !{i64 0, i64 23} !1 = !{!1} !2 = !{!7} ; scope list From 6846af76eb4da22d7198b737de07895447e9585a Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 16 Jul 2025 14:23:38 -0400 Subject: [PATCH 2/9] Fix LLVM TaskDispatcher implementation issues (#58950) Fixes #58229 (LLVM JITLink stack overflow issue) I tried submitting this promise/future implementation upstream (https://github.com/llvm/llvm-project/compare/main...vtjnash:llvm-project:jn/cowait-jit) so that I would not need to duplicate nearly as much code here to fix this bug, but upstream is currently opposed to fixing this bug and instead insists it is preferable for each downstream project to implement this fix themselves adding extra maintenance burden for us for now. Sigh. (cherry picked from commit 00351da179b7de6cd8a28f229580c96603387f0b) --- src/Makefile | 2 +- src/jitlayers.cpp | 23 +- src/llvm-julia-task-dispatcher.h | 465 +++++++++++++++++++++++++++++++ 3 files changed, 474 insertions(+), 16 deletions(-) create mode 100644 src/llvm-julia-task-dispatcher.h diff --git a/src/Makefile b/src/Makefile index e859acc765354..6dac1524145ef 100644 --- a/src/Makefile +++ b/src/Makefile @@ -349,7 +349,7 @@ $(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR) $(BUILDDIR)/gc-page-profiler.o $(BUILDDIR)/gc-page-profiler.dbg.obj: $(SRCDIR)/gc-page-profiler.h $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h -$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h +$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-julia-task-dispatcher.h $(BUILDDIR)/jltypes.o $(BUILDDIR)/jltypes.dbg.obj: $(SRCDIR)/builtin_proto.h $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvm-codegen-shared.h $(BUILDDIR)/julia_version.h $(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 3ea95ea42f596..a44da64941985 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -6,6 +6,7 @@ #include #include "llvm/IR/Mangler.h" +#include #include #include #include @@ -46,6 +47,7 @@ using namespace llvm; #include "jitlayers.h" #include "julia_assert.h" #include "processor.h" +#include "llvm-julia-task-dispatcher.h" #if JL_LLVM_VERSION >= 180000 # include @@ -666,17 +668,8 @@ static void jl_compile_codeinst_now(jl_code_instance_t *codeinst) if (!decls.specFunctionObject.empty()) NewDefs.push_back(decls.specFunctionObject); } - // Split batches to avoid stack overflow in the JIT linker. - // FIXME: Patch ORCJITs InPlaceTaskDispatcher to not recurse on task dispatches but - // push the tasks to a queue to be drained later. This avoids the stackoverflow caused by recursion - // in the linker when compiling a large number of functions at once. - SmallVector Addrs; - for (size_t i = 0; i < NewDefs.size(); i += 1000) { - auto end = std::min(i + 1000, NewDefs.size()); - SmallVector batch(NewDefs.begin() + i, NewDefs.begin() + end); - auto AddrsBatch = jl_ExecutionEngine->findSymbols(batch); - Addrs.append(AddrsBatch); - } + auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs); + size_t nextaddr = 0; for (auto &this_code : linkready) { auto it = invokenames.find(this_code); @@ -1841,7 +1834,7 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) { JuliaOJIT::JuliaOJIT() : TM(createTargetMachine()), DL(jl_create_datalayout(*TM)), - ES(cantFail(orc::SelfExecutorProcessControl::Create())), + ES(cantFail(orc::SelfExecutorProcessControl::Create(nullptr, std::make_unique<::JuliaTaskDispatcher>()))), GlobalJD(ES.createBareJITDylib("JuliaGlobals")), JD(ES.createBareJITDylib("JuliaOJIT")), ExternalJD(ES.createBareJITDylib("JuliaExternal")), @@ -2098,7 +2091,7 @@ SmallVector JuliaOJIT::findSymbols(ArrayRef Names) Unmangled[NonOwningSymbolStringPtr(Mangled)] = Unmangled.size(); Exports.add(std::move(Mangled)); } - SymbolMap Syms = cantFail(ES.lookup(orc::makeJITDylibSearchOrder(ArrayRef(&JD)), std::move(Exports))); + SymbolMap Syms = cantFail(::safelookup(ES, orc::makeJITDylibSearchOrder(ArrayRef(&JD)), std::move(Exports))); SmallVector Addrs(Names.size()); for (auto it : Syms) { Addrs[Unmangled.at(orc::NonOwningSymbolStringPtr(it.first))] = it.second.getAddress().getValue(); @@ -2110,7 +2103,7 @@ Expected JuliaOJIT::findSymbol(StringRef Name, bool ExportedS { orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD}; ArrayRef SearchOrder = ArrayRef(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1); - auto Sym = ES.lookup(SearchOrder, Name); + auto Sym = ::safelookup(ES, SearchOrder, Name); return Sym; } @@ -2123,7 +2116,7 @@ Expected JuliaOJIT::findExternalJDSymbol(StringRef Name, bool { orc::JITDylib* SearchOrders[3] = {&ExternalJD, &GlobalJD, &JD}; ArrayRef SearchOrder = ArrayRef(&SearchOrders[0], ExternalJDOnly ? 1 : 3); - auto Sym = ES.lookup(SearchOrder, getMangledName(Name)); + auto Sym = ::safelookup(ES, SearchOrder, getMangledName(Name)); return Sym; } diff --git a/src/llvm-julia-task-dispatcher.h b/src/llvm-julia-task-dispatcher.h new file mode 100644 index 0000000000000..dd4037378b6b6 --- /dev/null +++ b/src/llvm-julia-task-dispatcher.h @@ -0,0 +1,465 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +namespace { + +using namespace llvm::orc; + +template struct future_value_storage { + // Union disables default construction/destruction semantics, allowing us to + // use placement new/delete for precise control over value lifetime + union { + U value_; + }; + + future_value_storage() {} + ~future_value_storage() {} +}; + +template <> struct future_value_storage { + // No value_ member for void +}; + +struct JuliaTaskDispatcher : public TaskDispatcher { + /// Forward declarations + class future_base; + void dispatch(std::unique_ptr T) override; + void shutdown() override; + void work_until(future_base &F); +private: + /// C++ does not support non-static thread_local variables, so this needs to + /// store both the task and the associated dispatcher queue so that shutdown + /// can wait for the correct tasks to finish. + thread_local static SmallVector, JuliaTaskDispatcher*>> TaskQueue; + std::mutex DispatchMutex; + std::condition_variable WorkFinishedCV; + SmallVector WaitingFutures; + +public: + +/// @name ORC Promise/Future Classes +/// +/// ORC-aware promise/future implementation that integrates with the +/// TaskDispatcher system to allow efficient cooperative multitasking while +/// waiting for results (with certain limitations on what can be awaited). +/// Together they provide building blocks for a full async/await-like runtime +/// for llvm that supports multiple threads. +/// +/// Unlike std::promise/std::future alone, these classes can help dispatch other +/// tasks while waiting, preventing deadlocks and improving overall system +/// throughput. They have a similar API, though with some important differences +/// and some features simply not currently implemented. +/// +/// @{ + +template class promise; +template class future; + +/// Status for future/promise state +enum class FutureStatus : uint8_t { NotReady = 0, Ready = 1 }; + +/// @} + +/// Type-erased base class for futures, generally for scheduler use to avoid +/// needing virtual dispatches +class future_base { +public: + /// Check if the future is now ready with a value (precondition: get_promise() + /// must have been called) + bool ready() const { + if (!valid()) + report_fatal_error("ready() called before get_promise()"); + return state_->status_.load(std::memory_order_acquire) == FutureStatus::Ready; + } + + /// Check if the future is in a valid state (not moved-from and get_promise() called) + bool valid() const { return state_ != nullptr; } + + /// Wait for the future to be ready, helping with task dispatch + void wait(JuliaTaskDispatcher &D) { + // Keep helping with task dispatch until our future is ready + if (!ready()) { + D.work_until(*this); + if (state_->status_.load(std::memory_order_relaxed) != FutureStatus::Ready) + report_fatal_error( + "work_until() returned without this future being ready"); + } + } + +protected: + struct state_base { + std::atomic status_{FutureStatus::NotReady}; + }; + + future_base(state_base *state) : state_(state) {} + future_base() = default; + + /// Only allow deleting the future once it is invalid + ~future_base() { + if (state_) + report_fatal_error("get() must be called before future destruction (ensuring promise::set_value memory is valid)"); + } + + // Move constructor and assignment + future_base(future_base &&other) noexcept : state_(other.state_) { + other.state_ = nullptr; + } + + future_base &operator=(future_base &&other) noexcept { + if (this != &other) { + this->~future_base(); + state_ = other.state_; + other.state_ = nullptr; + } + return *this; + } + + state_base *state_; +}; + +/// TaskDispatcher-aware future class for cooperative await. +/// +/// @tparam T The type of value this future will provide. Use void for futures +/// that +/// signal completion without providing a value. +/// +/// This future implementation is similar to `std::future`, so most code can +/// transition to it easily. However, it differs from `std::future` in a few +/// key ways to be aware of: +/// - No exception support (or the overhead for it). +/// - The future is created before the promise, then the promise is created +/// from the future. +/// - The future is in an invalid state until get_promise() has been called. +/// - Waiting operations (get(&D), wait(&D)) help dispatch other tasks while +/// blocked, requiring an additional argument of which TaskDispatcher object +/// of where all associated work will be scheduled. +/// - While `wait` may be called multiple times and on multiple threads, all of +/// them must have returned before calling `get` on exactly one thread. +/// - Must call get() exactly once before destruction (enforced with +/// `report_fatal_error`) after each call to `get_promise`. Internal state is +/// freed when `get` returns, and allocated when `get_promise` is called. +/// +/// Other notable features, in common with `std::future`: +/// - Supports both value types and void specialization through the same +/// interface. +/// - Thread-safe through atomic operations. +/// - Provides acquire-release ordering with `std::promise::set_value()`. +/// - Concurrent access to any method (including to `ready`) on multiple threads +/// is not allowed. +/// - Holding any locks while calling `get()` is likely to lead to deadlock. +/// +/// @warning Users should avoid borrowing references to futures. References may +/// go out of scope and break the uniqueness contract, which may break the +/// soundness of the types. Always use move semantics or pass by value. + +template class future : public future_base { +public: + future() : future_base(nullptr) {} + future(const future &) = delete; + future &operator=(const future &) = delete; + future(future &&) = default; + future &operator=(future &&) = default; + + /// Get the value, helping with task dispatch while waiting. + /// This will destroy the underlying value, so this must be called exactly + /// once, which returns the future to the initial state. + T get(JuliaTaskDispatcher &D) { + if (!valid()) + report_fatal_error("get() must only be called once, after get_promise()"); + wait(D); + auto state_ = static_cast(this->state_); + this->state_ = nullptr; + return take_value(state_); + } + + /// Get the associated promise (must only be called once) + promise get_promise() { + if (valid()) + report_fatal_error("get_promise() can only be called once"); + auto state_ = new state(); + this->state_ = state_; + return promise(state_); + } + +private: + friend class promise; + + // Template the state struct with EBCO so that future has no wasted + // overhead for the value. The declaration of future_value_storage is far + // above here since GCC doesn't implement it properly when nested. + struct state : future_base::state_base, future_value_storage {}; + + template + typename std::enable_if::value, U>::type take_value(state *state_) { + T result = std::move(state_->value_); + state_->value_.~T(); + delete state_; + return result; + } + + template + typename std::enable_if::value, U>::type take_value(state *state_) { + delete state_; + } +}; + +/// TaskDispatcher-aware promise class that provides values to associated +/// futures. +/// +/// @tparam T The type of value this promise will provide. Use void for promises +/// that +/// signal completion without providing a value. +/// +/// This promise implementation provides the value-setting side of the +/// promise/future pair and integrates with the ORC TaskDispatcher system. Key +/// characteristics: +/// - Created from a future via get_promise() rather than creating the future from the promise. +/// - Must call get_future() on the thread that created it (it can be passed to another thread, but do not borrow a reference and use that to mutate it from another thread). +/// - Must call set_value() exactly once per `get_promise()` call to provide the result. +/// - Thread-safe from set_value to get. +/// - Move-only semantics to prevent accidental copying. +/// +/// The `promise` can usually be passed to another thread in one of two ways: +/// - With move semantics: +/// * `[P = F.get_promise()] () { P.set_value(); }` +/// * `[P = std::move(P)] () { P.set_value(); }` +/// * Advantages: clearer where `P` is owned, automatic deadlock detection +/// on destruction, +/// easier memory management if the future is returned from the function. +/// - By reference: +/// * `[&P] () { P.set_value(); }` +/// * Advantages: simpler memory management if the future is consumed in the +/// same function. +/// * Disadvantages: more difficult memory management if the future is +/// returned from the function, no deadlock detection. +/// +/// @warning Users should avoid borrowing references to promises. References may +/// go out of scope and break the uniqueness contract, which may break the +/// soundness of the types. Always use move semantics or pass by value. +/// +/// @par Error Handling: +/// The promise/future system uses report_fatal_error() for misuse: +/// - Calling set_value() more than once. +/// - Destroying a future without calling get(). +/// - Calling get() more than once on a future. +/// +/// @par Thread Safety: +/// - Each promise/future must only be accessed by one thread, as concurrent +/// calls to the API functions may result in crashes. +/// - Multiple threads can safely access different promise/future pairs. +/// - set_value() and get() operations are atomic and thread-safe. +/// - Move operations should only be performed by a single thread. +template class promise { + friend class future; + +public: + promise() : state_(nullptr) {} + + ~promise() { + // Assert proper promise lifecycle: ensure set_value was called if promise was valid. + // This can catch deadlocks where a promise is created but set_value() is + // never called, though only if the promise is moved from instead of + // borrowed from the frame with the future. + // Empty promises (state_ == nullptr) are allowed to be destroyed without calling set_value. + } + + promise(const promise &) = delete; + promise &operator=(const promise &) = delete; + + promise(promise &&other) noexcept + : state_(other.state_) { + other.state_ = nullptr; + } + + promise &operator=(promise &&other) noexcept { + if (this != &other) { + this->~promise(); + state_ = other.state_; + other.state_ = nullptr; + } + return *this; + } + + + /// Set the value (must only be called once) + // In C++20, this std::conditional weirdness can probably be replaced just + // with requires. It ensures that we don't try to define a method for `void&`, + // but that if the user calls set_value(v) for any value v that they get a + // member function error, instead of no member named 'value_'. + template + void + set_value(const typename std::conditional::value, + std::nullopt_t, T>::type &value) const { + assert(state_ && "set_value() can only be called once"); + new (&state_->value_) T(value); + state_->status_.store(FutureStatus::Ready, std::memory_order_release); + state_ = nullptr; + } + + template + void set_value(typename std::conditional::value, + std::nullopt_t, T>::type &&value) const { + assert(state_ && "set_value() can only be called once"); + new (&state_->value_) T(std::move(value)); + state_->status_.store(FutureStatus::Ready, std::memory_order_release); + state_ = nullptr; + } + + template + typename std::enable_if::value, void>::type + set_value(const std::nullopt_t &value) = delete; + + template + typename std::enable_if::value, void>::type + set_value(std::nullopt_t &&value) = delete; + + template + typename std::enable_if::value, void>::type set_value() const { + assert(state_ && "set_value() can only be called once"); + state_->status_.store(FutureStatus::Ready, std::memory_order_release); + state_ = nullptr; + } + + /// Swap with another promise + void swap(promise &other) noexcept { + using std::swap; + swap(state_, other.state_); + } + +private: + explicit promise(typename future::state *state) + : state_(state) {} + + mutable typename future::state *state_; +}; + +}; // class JuliaTaskDispatcher + +thread_local SmallVector, JuliaTaskDispatcher *>> JuliaTaskDispatcher::TaskQueue; + +void JuliaTaskDispatcher::dispatch(std::unique_ptr T) { + TaskQueue.push_back(std::pair(std::move(T), this)); +} + +void JuliaTaskDispatcher::shutdown() { + // Keep processing until no tasks belonging to this dispatcher remain + while (true) { + // Check if any task belongs to this dispatcher + auto it = std::find_if( + TaskQueue.begin(), TaskQueue.end(), + [this](const auto &TaskPair) { return TaskPair.second == this; }); + + // If no tasks belonging to this dispatcher, we're done + if (it == TaskQueue.end()) + return; + + // Create a future/promise pair to wait for completion of this task + future taskFuture; + // Replace the task with a GenericNamedTask that wraps the original task + // with a notification of completion that this thread can work_until. + auto originalTask = std::move(it->first); + it->first = makeGenericNamedTask( + [originalTask = std::move(originalTask), + taskPromise = taskFuture.get_promise()]() { + originalTask->run(); + taskPromise.set_value(); + }, + "Shutdown task marker"); + + // Wait for the task to complete + taskFuture.get(*this); + } +} + +void JuliaTaskDispatcher::work_until(future_base &F) { + while (!F.ready()) { + // First, process any tasks in our local queue + // Process in LIFO order (most recently added first) to avoid deadlocks + // when tasks have dependencies on each other + while (!TaskQueue.empty()) { + { + auto TaskPair = std::move(TaskQueue.back()); + TaskQueue.pop_back(); + TaskPair.first->run(); + } + + // Notify any threads that might be waiting for work to complete + { + std::lock_guard Lock(DispatchMutex); + bool ShouldNotify = llvm::any_of( + WaitingFutures, [](future_base *F) { return F->ready(); }); + if (ShouldNotify) { + WaitingFutures.clear(); + WorkFinishedCV.notify_all(); + } + } + + // Check if our future is now ready + if (F.ready()) + return; + } + + // If we get here, our queue is empty but the future isn't ready + // We need to wait for other threads to finish work that should complete our + // future + { + std::unique_lock Lock(DispatchMutex); + WaitingFutures.push_back(&F); + WorkFinishedCV.wait(Lock, [&F]() { return F.ready(); }); + } + } +} + +} // End namespace + +namespace std { +template +void swap(::JuliaTaskDispatcher::promise &lhs, ::JuliaTaskDispatcher::promise &rhs) noexcept { + lhs.swap(rhs); +} +} // End namespace std + +// n.b. this actually is sometimes a safepoint +Expected +safelookup(ExecutionSession &ES, + const JITDylibSearchOrder &SearchOrder, + SymbolLookupSet Symbols, LookupKind K = LookupKind::Static, + SymbolState RequiredState = SymbolState::Ready, + RegisterDependenciesFunction RegisterDependencies = NoDependenciesToRegister) JL_NOTSAFEPOINT { + JuliaTaskDispatcher::future> PromisedFuture; + auto NotifyComplete = [PromisedResult = PromisedFuture.get_promise()](Expected R) { + PromisedResult.set_value(std::move(R)); + }; + ES.lookup(K, SearchOrder, std::move(Symbols), RequiredState, + std::move(NotifyComplete), RegisterDependencies); + return PromisedFuture.get(static_cast(ES.getExecutorProcessControl().getDispatcher())); +} + +Expected +safelookup(ExecutionSession &ES, + const JITDylibSearchOrder &SearchOrder, + SymbolStringPtr Name, + SymbolState RequiredState = SymbolState::Ready) JL_NOTSAFEPOINT { + SymbolLookupSet Names({Name}); + + if (auto ResultMap = safelookup(ES, SearchOrder, std::move(Names), LookupKind::Static, + RequiredState, NoDependenciesToRegister)) { + assert(ResultMap->size() == 1 && "Unexpected number of results"); + assert(ResultMap->count(Name) && "Missing result for symbol"); + return std::move(ResultMap->begin()->second); + } else + return ResultMap.takeError(); +} + +Expected +safelookup(ExecutionSession &ES, + ArrayRef SearchOrder, SymbolStringPtr Name, + SymbolState RequiredState = SymbolState::Ready) JL_NOTSAFEPOINT { + return safelookup(ES, makeJITDylibSearchOrder(SearchOrder), Name, RequiredState); +} + +Expected +safelookup(ExecutionSession &ES, + ArrayRef SearchOrder, StringRef Name, + SymbolState RequiredState = SymbolState::Ready) JL_NOTSAFEPOINT { + return safelookup(ES, SearchOrder, ES.intern(Name), RequiredState); +} From c9bfbbc005231b8e7a4bc32c99b15d3a5060fb3a Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 17 Jul 2025 13:19:33 -0400 Subject: [PATCH 3/9] Fix use-after-free in FileWatching (#59017) We observe an abort on Windows on Revise master CI, where a free'd handle is passed to jl_close_uv. The root cause is that uv_fseventscb_file called uvfinalize earlier, but did not set the handle to NULL, so when the actual finalizer ran later, it would see corrupted state. (cherry picked from commit b45b429b93966e992c25a101c896e72738799ca4) --- stdlib/FileWatching/src/FileWatching.jl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl index ebfdd9c8fea6b..ddaf36dfd33a4 100644 --- a/stdlib/FileWatching/src/FileWatching.jl +++ b/stdlib/FileWatching/src/FileWatching.jl @@ -516,17 +516,19 @@ end function uvfinalize(uv::Union{FileMonitor, FolderMonitor}) iolock_begin() - if uv.handle != C_NULL - disassociate_julia_struct(uv) # close (and free) without notify - ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), uv.handle) + handle = @atomicswap :monotonic uv.handle = C_NULL + if handle != C_NULL + disassociate_julia_struct(handle) # close (and free) without notify + ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), handle) end iolock_end() end function close(t::Union{FileMonitor, FolderMonitor}) iolock_begin() - if t.handle != C_NULL - ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle) + handle = t.handle + if handle != C_NULL + ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), handle) end iolock_end() end From 2bfc0f2cac802b4fa8fcd2379bfd2a582108100c Mon Sep 17 00:00:00 2001 From: JonasIsensee Date: Fri, 18 Jul 2025 22:28:54 +0200 Subject: [PATCH 4/9] Bugfix: Use Base.aligned_sizeof instead of sizeof in Mmap.mmap (#58998) fix #58982 (cherry picked from commit c30199fe17aa24f77530bd64f7762546c15e2941) --- stdlib/Mmap/src/Mmap.jl | 8 ++++---- stdlib/Mmap/test/runtests.jl | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/stdlib/Mmap/src/Mmap.jl b/stdlib/Mmap/src/Mmap.jl index 7d57bf053940d..c527123d51bb8 100644 --- a/stdlib/Mmap/src/Mmap.jl +++ b/stdlib/Mmap/src/Mmap.jl @@ -187,16 +187,16 @@ like HDF5 (which can be used with memory-mapping). """ function mmap(io::IO, ::Type{Array{T,N}}=Vector{UInt8}, - dims::NTuple{N,Integer}=(div(filesize(io)-position(io),sizeof(T)),), + dims::NTuple{N,Integer}=(div(filesize(io)-position(io),Base.aligned_sizeof(T)),), offset::Integer=position(io); grow::Bool=true, shared::Bool=true) where {T,N} # check inputs isopen(io) || throw(ArgumentError("$io must be open to mmap")) isbitstype(T) || throw(ArgumentError("unable to mmap $T; must satisfy isbitstype(T) == true")) - len = sizeof(T) + len = Base.aligned_sizeof(T) for l in dims len, overflow = Base.Checked.mul_with_overflow(promote(len, l)...) - overflow && throw(ArgumentError("requested size prod($((sizeof(T), dims...))) too large, would overflow typeof(size(T)) == $(typeof(len))")) + overflow && throw(ArgumentError("requested size prod($((len, dims...))) too large, would overflow typeof(size(T)) == $(typeof(len))")) end len >= 0 || throw(ArgumentError("requested size must be ≥ 0, got $len")) len == 0 && return Array{T}(undef, ntuple(x->0,Val(N))) @@ -267,7 +267,7 @@ end mmap(file::AbstractString, ::Type{T}=Vector{UInt8}, - dims::NTuple{N,Integer}=(div(filesize(file),sizeof(eltype(T))),), + dims::NTuple{N,Integer}=(div(filesize(file),Base.aligned_sizeof(eltype(T))),), offset::Integer=Int64(0); grow::Bool=true, shared::Bool=true) where {T<:Array,N} = open(io->mmap(io, T, dims, offset; grow=grow, shared=shared), file, isfile(file) ? "r" : "w+")::Array{eltype(T),N} diff --git a/stdlib/Mmap/test/runtests.jl b/stdlib/Mmap/test/runtests.jl index 03e4b48d95f7a..224664e5faaa2 100644 --- a/stdlib/Mmap/test/runtests.jl +++ b/stdlib/Mmap/test/runtests.jl @@ -44,7 +44,7 @@ s = open(file) @test length(@inferred mmap(s, Vector{Int8}, 12, 0; grow=false)) == 12 @test length(@inferred mmap(s, Vector{Int8}, 12, 0; shared=false)) == 12 close(s) -@test_throws ErrorException mmap(file, Vector{Ref}) # must be bit-type +@test_throws ArgumentError mmap(file, Vector{Ref}) # must be bit-type GC.gc(); GC.gc() s = open(f->f,file,"w") @@ -341,6 +341,19 @@ end GC.gc() rm(file) +@testset "test for #58982 - mmap with primitive types" begin + file = tempname() + primitive type PrimType9Bytes 9*8 end + arr = Vector{PrimType9Bytes}(undef, 2) + write(file, arr) + m = mmap(file, Vector{PrimType9Bytes}) + @test length(m) == 2 + @test m[1] == arr[1] + @test m[2] == arr[2] + finalize(m); m = nothing; GC.gc() + rm(file) +end + @testset "Docstrings" begin @test isempty(Docs.undocumented_names(Mmap)) end From ab66f187e5d6c16742f7edd1dc856d8db79464bf Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 22 Jul 2025 16:08:27 -0400 Subject: [PATCH 5/9] stored method interference graph (#58948) Store full method interference relationship graph in interferences field of Method to avoid expensive morespecific calls during dispatch. This provides significant performance improvements: - Replace method comparisons with precomputed interference lookup. - Optimize ml_matches minmax computation using interference lookups. - Optimize sort_mlmatches for large return sets by iterating over interferences instead of all matching methods. - Add method_morespecific_via_interferences in both C and Julia. This representation may exclude some edges that are implied by transitivity since sort_mlmatches will ensure the correct result by following strong edges. Ambiguous edges are guaranteed to be checkable without recursion. Also fix a variety of bugs along the way: - Builtins signature would cause them to try to discard all other methods during `sort_mlmatches`. - Some ambiguities were over-estimated, which now are improved upon. - Setting lim==-1 now gives the same limited list of methods as lim>0, since that is actually faster now than attempting to give the unsorted list. This provides a better fix to #53814 than #57837 and fixes #58766. - Reverts recent METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC attempt (though not the whole commit), since I found a significant problem with any usage of that bit during testing: it only tracks methods that intersect with a target, but new methods do not necessarily intersect with any existing target. This provides a decent performance improvement to `methods` calls, which implies a decent speed up to package loading also (e.g. ModelingToolkit loads in about 4 seconds instead of 5 seconds). (cherry picked from commit 59a7bb3184feee32227aa15d5c39aa59270ad8e5) --- base/Base.jl | 2 + base/errorshow.jl | 8 +- base/methodshow.jl | 8 +- base/runtime_internals.jl | 10 +- base/staticdata.jl | 215 +++++++--- src/gf.c | 856 ++++++++++++++++++-------------------- src/jltypes.c | 14 +- src/julia.h | 1 + src/julia_internal.h | 3 +- src/method.c | 1 + src/staticdata.c | 2 - src/staticdata_utils.c | 4 + stdlib/Test/src/Test.jl | 1 - test/ambiguous.jl | 40 +- test/core.jl | 2 +- 15 files changed, 638 insertions(+), 529 deletions(-) diff --git a/base/Base.jl b/base/Base.jl index 6d798d69014ea..f8905bf49ce79 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -264,7 +264,9 @@ include("uuid.jl") include("pkgid.jl") include("toml_parser.jl") include("linking.jl") +module StaticData include("staticdata.jl") +end include("loading.jl") # misc useful functions & macros diff --git a/base/errorshow.jl b/base/errorshow.jl index 9df363a8c5e9f..e771597d7b1e0 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -441,6 +441,7 @@ function show_method_candidates(io::IO, ex::MethodError, kwargs=[]) line_score = Int[] # These functions are special cased to only show if first argument is matched. special = f === convert || f === getindex || f === setindex! + f isa Core.Builtin && return # `methods` isn't very useful for a builtin funcs = Tuple{Any,Vector{Any}}[(f, arg_types_param)] # An incorrect call method produces a MethodError for convert. @@ -448,7 +449,7 @@ function show_method_candidates(io::IO, ex::MethodError, kwargs=[]) # pool MethodErrors for these two functions. if f === convert && !isempty(arg_types_param) at1 = arg_types_param[1] - if isType(at1) && !has_free_typevars(at1) + if isType(at1) && !has_free_typevars(at1) && at1.parameters[1] isa Type push!(funcs, (at1.parameters[1], arg_types_param[2:end])) end end @@ -470,8 +471,8 @@ function show_method_candidates(io::IO, ex::MethodError, kwargs=[]) end sig0 = sig0::DataType s1 = sig0.parameters[1] - if sig0 === Tuple || !isa(func, rewrap_unionall(s1, method.sig)) - # function itself doesn't match or is a builtin + if !isa(func, rewrap_unionall(s1, method.sig)) + # function itself doesn't match continue else print(iob, " ") @@ -616,6 +617,7 @@ function show_method_candidates(io::IO, ex::MethodError, kwargs=[]) println(io) # extra newline for spacing to stacktrace end end + nothing end # In case the line numbers in the source code have changed since the code was compiled, diff --git a/base/methodshow.jl b/base/methodshow.jl index dd391a46d170a..cae0e9d809225 100644 --- a/base/methodshow.jl +++ b/base/methodshow.jl @@ -78,7 +78,7 @@ end # NOTE: second argument is deprecated and is no longer used function kwarg_decl(m::Method, kwtype = nothing) - if m.sig !== Tuple # OpaqueClosure or Builtin + if !(m.sig === Tuple || m.sig <: Tuple{Core.Builtin, Vararg}) # OpaqueClosure or Builtin kwtype = typeof(Core.kwcall) sig = rewrap_unionall(Tuple{kwtype, NamedTuple, (unwrap_unionall(m.sig)::DataType).parameters...}, m.sig) kwli = ccall(:jl_methtable_lookup, Any, (Any, UInt), sig, get_world_counter()) @@ -216,8 +216,7 @@ show(io::IO, ::MIME"text/plain", m::Method; kwargs...) = show_method(io, m; kwar function show_method(io::IO, m::Method; modulecolor = :light_black, digit_align_width = 1) tv, decls, file, line = arg_decl_parts(m) - sig = unwrap_unionall(m.sig) - if sig === Tuple + if m.sig <: Tuple{Core.Builtin, Vararg} # Builtin print(io, m.name, "(...)") file = "none" @@ -420,8 +419,7 @@ end function show(io::IO, ::MIME"text/html", m::Method) tv, decls, file, line = arg_decl_parts(m, true) sig = unwrap_unionall(m.sig) - if sig === Tuple - # Builtin + if sig <: Tuple{Core.Builtin, Vararg} print(io, m.name, "(...) in ", parentmodule(m)) return end diff --git a/base/runtime_internals.jl b/base/runtime_internals.jl index f48b4734a7b8b..7a06a8fcbdb75 100644 --- a/base/runtime_internals.jl +++ b/base/runtime_internals.jl @@ -1330,15 +1330,7 @@ end function matches_to_methods(ms::Array{Any,1}, tn::Core.TypeName, mod) # Lack of specialization => a comprehension triggers too many invalidations via _collect, so collect the methods manually ms = Method[(ms[i]::Core.MethodMatch).method for i in 1:length(ms)] - # Remove shadowed methods with identical type signatures - prev = nothing - filter!(ms) do m - l = prev - repeated = (l isa Method && m.sig == l.sig) - prev = m - return !repeated - end - # Remove methods not part of module (after removing shadowed methods) + # Remove methods not part of module mod === nothing || filter!(ms) do m return parentmodule(m) ∈ mod end diff --git a/base/staticdata.jl b/base/staticdata.jl index 9f7ef67743906..5a8ea39c8a4c9 100644 --- a/base/staticdata.jl +++ b/base/staticdata.jl @@ -1,9 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -module StaticData - using .Core: CodeInstance, MethodInstance -using .Base: JLOptions, Compiler, get_world_counter, _methods_by_ftype, get_methodtable +using .Base: JLOptions, Compiler, get_world_counter, _methods_by_ftype, get_methodtable, morespecific const WORLD_AGE_REVALIDATION_SENTINEL::UInt = 1 const _jl_debug_method_invalidation = Ref{Union{Nothing,Vector{Any}}}(nothing) @@ -167,6 +165,7 @@ function verify_method(codeinst::CodeInstance, stack::Vector{CodeInstance}, visi elseif maxworld == Base.get_require_world() # if no new worlds were allocated since serializing the base module, then no new validation is worth doing right now either else + matches = [] j = 1 while j ≤ length(callees) local min_valid2::UInt, max_valid2::UInt @@ -177,14 +176,14 @@ function verify_method(codeinst::CodeInstance, stack::Vector{CodeInstance}, visi end if edge isa MethodInstance sig = edge.specTypes - min_valid2, max_valid2, matches = verify_call(sig, callees, j, 1, world, true) + min_valid2, max_valid2 = verify_call(sig, callees, j, 1, world, true, matches) j += 1 elseif edge isa Int sig = callees[j+1] # Handle negative counts (fully_covers=false) nmatches = abs(edge) fully_covers = edge > 0 - min_valid2, max_valid2, matches = verify_call(sig, callees, j+2, nmatches, world, fully_covers) + min_valid2, max_valid2 = verify_call(sig, callees, j+2, nmatches, world, fully_covers, matches) j += 2 + nmatches edge = sig elseif edge isa Core.Binding @@ -201,7 +200,6 @@ function verify_method(codeinst::CodeInstance, stack::Vector{CodeInstance}, visi min_valid2 = 1 max_valid2 = 0 end - matches = nothing else callee = callees[j+1] if callee isa Core.MethodTable # skip the legacy edge (missing backedge) @@ -216,7 +214,7 @@ function verify_method(codeinst::CodeInstance, stack::Vector{CodeInstance}, visi else meth = callee::Method end - min_valid2, max_valid2, matches = verify_invokesig(edge, meth, world) + min_valid2, max_valid2 = verify_invokesig(edge, meth, world, matches) j += 2 end if minworld < min_valid2 @@ -227,7 +225,7 @@ function verify_method(codeinst::CodeInstance, stack::Vector{CodeInstance}, visi end invalidations = _jl_debug_method_invalidation[] if max_valid2 ≠ typemax(UInt) && invalidations !== nothing - push!(invalidations, edge, "insert_backedges_callee", codeinst, matches) + push!(invalidations, edge, "insert_backedges_callee", codeinst, copy(matches)) end if max_valid2 == 0 && invalidations === nothing break @@ -291,44 +289,172 @@ function verify_method(codeinst::CodeInstance, stack::Vector{CodeInstance}, visi return 0, minworld, maxworld end -function verify_call(@nospecialize(sig), expecteds::Core.SimpleVector, i::Int, n::Int, world::UInt, fully_covers::Bool) +function get_method_from_edge(@nospecialize t) + if t isa Method + return t + else + if t isa CodeInstance + t = get_ci_mi(t)::MethodInstance + else + t = t::MethodInstance + end + return t.def::Method + end +end + +# Check if method2 is in method1's interferences set +# Returns true if method2 is found (meaning !morespecific(method1, method2)) +function method_in_interferences(method2::Method, method1::Method) + interferences = method1.interferences + for k = 1:length(interferences) + isassigned(interferences, k) || break + interference_method = interferences[k]::Method + if interference_method === method2 + return true + end + end + return false +end + +# Check if method1 is more specific than method2 via the interference graph +function method_morespecific_via_interferences(method1::Method, method2::Method) + if method1 === method2 + return false + end + ms = method_in_interferences_recursive(method1, method2, IdSet{Method}()) + # slow check: @assert ms === morespecific(method1, method2) || typeintersect(method1.sig, method2.sig) === Union{} || typeintersect(method2.sig, method1.sig) === Union{} + return ms +end + +# Returns true if method1 is in method2's interferences (meaning !morespecific(method2, method1)) +function method_in_interferences_recursive(method1::Method, method2::Method, visited::IdSet{Method}) + if method_in_interferences(method2, method1) + return false + end + if method_in_interferences(method1, method2) + return true + end + + # Recursively check through interference graph + method2 in visited && return false + push!(visited, method2) + interferences = method2.interferences + for k = 1:length(interferences) + isassigned(interferences, k) || break + method3 = interferences[k]::Method + if method_in_interferences(method2, method3) + continue # only follow edges to morespecific methods in search of the morespecific target (skip ambiguities) + end + if method_in_interferences_recursive(method1, method3, visited) + return true # found method1 in the interference graph + end + end + + return false +end + +function verify_call(@nospecialize(sig), expecteds::Core.SimpleVector, i::Int, n::Int, world::UInt, fully_covers::Bool, matches::Vector{Any}) # verify that these edges intersect with the same methods as before mi = nothing - if n == 1 + expected_deleted = false + for j = 1:n + t = expecteds[i+j-1] + meth = get_method_from_edge(t) + if iszero(meth.dispatch_status & METHOD_SIG_LATEST_WHICH) + expected_deleted = true + break + end + end + if expected_deleted + if _jl_debug_method_invalidation[] === nothing && world == get_world_counter() + return UInt(1), UInt(0) + end + elseif n == 1 # first, fast-path a check if the expected method simply dominates its sig anyways # so the result of ml_matches is already simply known - let t = expecteds[i], meth, minworld, maxworld, result - if t isa Method - meth = t - else + let t = expecteds[i], meth, minworld, maxworld + meth = get_method_from_edge(t) + if !(t isa Method) if t isa CodeInstance mi = get_ci_mi(t)::MethodInstance else mi = t::MethodInstance end - meth = mi.def::Method - # Fast path is legal when fully_covers=true OR when METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC is unset - if (fully_covers || iszero(meth.dispatch_status & METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC)) && - !iszero(mi.dispatch_status & METHOD_SIG_LATEST_ONLY) + # Fast path is legal when fully_covers=true + if fully_covers && !iszero(mi.dispatch_status & METHOD_SIG_LATEST_ONLY) minworld = meth.primary_world @assert minworld ≤ world maxworld = typemax(UInt) - result = Any[] # result is unused - return minworld, maxworld, result + return minworld, maxworld end end - # Fast path is legal when fully_covers=true OR when METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC is unset - if fully_covers || iszero(meth.dispatch_status & METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC) - if !iszero(meth.dispatch_status & METHOD_SIG_LATEST_ONLY) - minworld = meth.primary_world - @assert minworld ≤ world - maxworld = typemax(UInt) - result = Any[] # result is unused - return minworld, maxworld, result + # Fast path is legal when fully_covers=true + if fully_covers && !iszero(meth.dispatch_status & METHOD_SIG_LATEST_ONLY) + minworld = meth.primary_world + @assert minworld ≤ world + maxworld = typemax(UInt) + return minworld, maxworld + end + end + elseif n > 1 + # Try the interference set fast path: check if all interference sets are covered by expecteds + interference_fast_path_success = fully_covers + # If it didn't fail yet, then check that all interference methods are either expected, or not applicable. + if interference_fast_path_success + local interference_minworld::UInt = 1 + for j = 1:n + meth = get_method_from_edge(expecteds[i+j-1]) + if interference_minworld < meth.primary_world + interference_minworld = meth.primary_world + end + interferences = meth.interferences + for k = 1:length(interferences) + isassigned(interferences, k) || break # no more entries + interference_method = interferences[k]::Method + if iszero(interference_method.dispatch_status & METHOD_SIG_LATEST_WHICH) + # detected a deleted interference_method, so need the full lookup to compute minworld + interference_fast_path_success = false + break + end + world < interference_method.primary_world && break # this and later entries are for a future world + local found_in_expecteds = false + for j = 1:n + if interference_method === get_method_from_edge(expecteds[i+j-1]) + found_in_expecteds = true + break + end + end + if !found_in_expecteds + ti = typeintersect(sig, interference_method.sig) + if !(ti === Union{}) + # try looking for a different expected method that fully covers this interference_method anyways over their intersection + for j = 1:n + meth2 = get_method_from_edge(expecteds[i+j-1]) + if method_morespecific_via_interferences(meth2, interference_method) && ti <: meth2.sig + found_in_expecteds = true + break + end + end + if !found_in_expecteds + meth2 = get_method_from_edge(expecteds[i]) + interference_fast_path_success = false + break + end + end + end + end + if !interference_fast_path_success + break end end + if interference_fast_path_success + # All interference sets are covered by expecteds, can return success + @assert interference_minworld ≤ world + maxworld = typemax(UInt) + return interference_minworld, maxworld + end end - end + end # next, compare the current result of ml_matches to the old result lim = _jl_debug_method_invalidation[] !== nothing ? Int(typemax(Int32)) : n minworld = Ref{UInt}(1) @@ -336,6 +462,7 @@ function verify_call(@nospecialize(sig), expecteds::Core.SimpleVector, i::Int, n has_ambig = Ref{Int32}(0) result = _methods_by_ftype(sig, nothing, lim, world, #=ambig=#false, minworld, maxworld, has_ambig) if result === nothing + empty!(matches) maxworld[] = 0 else # setdiff!(result, expected) @@ -348,17 +475,7 @@ function verify_call(@nospecialize(sig), expecteds::Core.SimpleVector, i::Int, n local found = false for j = 1:n t = expecteds[i+j-1] - if t isa Method - meth = t - else - if t isa CodeInstance - t = get_ci_mi(t)::MethodInstance - else - t = t::MethodInstance - end - meth = t.def::Method - end - if match.method == meth + if match.method == get_method_from_edge(t) found = true break end @@ -377,12 +494,13 @@ function verify_call(@nospecialize(sig), expecteds::Core.SimpleVector, i::Int, n end if maxworld[] ≠ typemax(UInt) && _jl_debug_method_invalidation[] !== nothing resize!(result, ins) + copy!(matches, result) end end if maxworld[] == typemax(UInt) && mi isa MethodInstance ccall(:jl_promote_mi_to_current, Cvoid, (Any, UInt, UInt), mi, minworld[], world) end - return minworld[], maxworld[], result + return minworld[], maxworld[] end # fast-path dispatch_status bit definitions (false indicates unknown) @@ -390,13 +508,11 @@ end const METHOD_SIG_LATEST_WHICH = 0x1 # true indicates this method would be returned as the only result from `methods` when calling `method.sig` in the current latest world const METHOD_SIG_LATEST_ONLY = 0x2 -# true indicates there exists some other method that is not more specific than this one in the current latest world (which might be more fully covering) -const METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC = 0x8 -function verify_invokesig(@nospecialize(invokesig), expected::Method, world::UInt) +function verify_invokesig(@nospecialize(invokesig), expected::Method, world::UInt, matches::Vector{Any}) @assert invokesig isa Type local minworld::UInt, maxworld::UInt - matched = nothing + empty!(matches) if invokesig === expected.sig && !iszero(expected.dispatch_status & METHOD_SIG_LATEST_WHICH) # the invoke match is `expected` for `expected->sig`, unless `expected` is replaced minworld = expected.primary_world @@ -413,14 +529,13 @@ function verify_invokesig(@nospecialize(invokesig), expected::Method, world::UIn if matched === nothing maxworld = 0 else - matched = Any[matched.method] - if matched[] !== expected + matched = matched.method + push!(matches, matched) + if matched !== expected maxworld = 0 end end end end - return minworld, maxworld, matched + return minworld, maxworld end - -end # module StaticData diff --git a/src/gf.c b/src/gf.c index b0c7a65ea8f99..0757614b390db 100644 --- a/src/gf.c +++ b/src/gf.c @@ -302,27 +302,27 @@ JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_value_t *type, size_t world) jl_method_t *jl_mk_builtin_func(jl_datatype_t *dt, jl_sym_t *sname, jl_fptr_args_t fptr) JL_GC_DISABLED { - jl_method_t *m = jl_new_method_uninit(jl_core_module); + jl_value_t *params[2]; + params[0] = dt->name->wrapper; + params[1] = jl_tparam0(jl_anytuple_type); + jl_datatype_t *tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2); + + jl_typemap_entry_t *newentry = NULL; + jl_method_t *m = NULL; + JL_GC_PUSH3(&m, &newentry, &tuptyp); + + m = jl_new_method_uninit(jl_core_module); m->name = sname; m->module = jl_core_module; m->isva = 1; m->nargs = 2; jl_atomic_store_relaxed(&m->primary_world, 1); jl_atomic_store_relaxed(&m->dispatch_status, METHOD_SIG_LATEST_ONLY | METHOD_SIG_LATEST_WHICH); - m->sig = (jl_value_t*)jl_anytuple_type; + m->sig = (jl_value_t*)tuptyp; m->slot_syms = jl_an_empty_string; m->nospecialize = 0; m->nospecialize = ~m->nospecialize; - jl_typemap_entry_t *newentry = NULL; - jl_datatype_t *tuptyp = NULL; - JL_GC_PUSH3(&m, &newentry, &tuptyp); - - jl_value_t *params[2]; - params[0] = dt->name->wrapper; - params[1] = jl_tparam0(jl_anytuple_type); - tuptyp = (jl_datatype_t*)jl_apply_tuple_type_v(params, 2); - jl_method_instance_t *mi = jl_get_specialized(m, (jl_value_t*)tuptyp, jl_emptysvec); jl_atomic_store_relaxed(&m->unspecialized, mi); jl_gc_wb(m, mi); @@ -1881,11 +1881,12 @@ struct matches_env { static int get_intersect_visitor(jl_typemap_entry_t *oldentry, struct typemap_intersection_env *closure0) { struct matches_env *closure = container_of(closure0, struct matches_env, match); + jl_method_t *oldmethod = oldentry->func.method; assert(oldentry != closure->newentry && "entry already added"); assert(jl_atomic_load_relaxed(&oldentry->min_world) <= jl_atomic_load_relaxed(&closure->newentry->min_world) && "old method cannot be newer than new method"); - assert(jl_atomic_load_relaxed(&oldentry->max_world) != jl_atomic_load_relaxed(&closure->newentry->min_world) && "method cannot be added at the same time as method deleted"); + //assert(jl_atomic_load_relaxed(&oldentry->max_world) != jl_atomic_load_relaxed(&closure->newentry->min_world) && "method cannot be added at the same time as method deleted"); + assert((jl_atomic_load_relaxed(&oldentry->max_world) == ~(size_t)0)); // don't need to consider other similar methods if this oldentry will always fully intersect with them and dominates all of them - jl_method_t *oldmethod = oldentry->func.method; if (closure->match.issubty // e.g. jl_subtype(closure->newentry.sig, oldentry->sig) && jl_subtype(oldmethod->sig, (jl_value_t*)closure->newentry->sig)) { // e.g. jl_type_equal(closure->newentry->sig, oldentry->sig) if (closure->replaced == NULL || jl_atomic_load_relaxed(&closure->replaced->min_world) < jl_atomic_load_relaxed(&oldentry->min_world)) @@ -1893,7 +1894,10 @@ static int get_intersect_visitor(jl_typemap_entry_t *oldentry, struct typemap_in } if (closure->shadowed == NULL) closure->shadowed = (jl_value_t*)jl_alloc_vec_any(0); - if (closure->match.issubty) { // this should be rarely true (in fact, get_intersect_visitor should be rarely true), but might as well skip the rest of the scan fast anyways since we can + // This should be rarely true (in fact, get_intersect_visitor should be + // rarely true), but might as well skip the rest of the scan fast anyways + // since we can. + if (closure->match.issubty) { int only = jl_atomic_load_relaxed(&oldmethod->dispatch_status) & METHOD_SIG_LATEST_ONLY; if (only) { size_t len = jl_array_nrows(closure->shadowed); @@ -2397,7 +2401,6 @@ struct invalidate_mt_env { jl_typemap_entry_t *newentry; jl_array_t *shadowed; size_t max_world; - int invalidated; }; static int invalidate_mt_cache(jl_typemap_entry_t *oldentry, void *closure0) { @@ -2439,7 +2442,6 @@ static int invalidate_mt_cache(jl_typemap_entry_t *oldentry, void *closure0) JL_GC_POP(); } jl_atomic_store_relaxed(&oldentry->max_world, env->max_world); - env->invalidated = 1; } } return 1; @@ -2584,7 +2586,7 @@ JL_DLLEXPORT void jl_method_table_disable(jl_method_t *method) jl_error("Method changes have been disabled via a call to disable_new_worlds."); int enabled = jl_atomic_load_relaxed(&methodentry->max_world) == ~(size_t)0; if (enabled) { - // Narrow the world age on the method to make it uncallable + // Narrow the world age on the method to make it uncallable size_t world = jl_atomic_load_relaxed(&jl_world_counter); assert(method == methodentry->func.method); jl_atomic_store_relaxed(&method->dispatch_status, 0); @@ -2592,7 +2594,7 @@ JL_DLLEXPORT void jl_method_table_disable(jl_method_t *method) jl_atomic_store_relaxed(&methodentry->max_world, world); jl_method_table_invalidate(method, world); jl_atomic_store_release(&jl_world_counter, world + 1); - } + } JL_UNLOCK(&world_counter_lock); if (!enabled) jl_errorf("Method of %s already disabled", jl_symbol_name(method->name)); @@ -2621,6 +2623,132 @@ jl_typemap_entry_t *jl_method_table_add(jl_methtable_t *mt, jl_method_t *method, return newentry; } +static int has_key(jl_genericmemory_t *keys, jl_value_t *key) +{ + for (size_t l = keys->length, i = 0; i < l; i++) { + jl_value_t *k = jl_genericmemory_ptr_ref(keys, i); + if (k == NULL) + return 0; + if (jl_genericmemory_ptr_ref(keys, i) == key) + return 1; + } + return 0; +} + +// Check if m2 is in m1's interferences set, which means !morespecific(m1, m2) +static int method_in_interferences(jl_method_t *m2, jl_method_t *m1) +{ + return has_key(jl_atomic_load_relaxed(&m1->interferences), (jl_value_t*)m2); +} + +// Find the index of a method in the method match array +static int find_method_in_matches(jl_array_t *t, jl_method_t *method) +{ + size_t len = jl_array_nrows(t); + for (size_t i = 0; i < len; i++) { + jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, i); + if (matc->method == method) + return i; + } + return -1; +} + +// Recursively check if any method in interferences covers the given type signature +static int check_interferences_covers(jl_method_t *m, jl_value_t *ti, jl_array_t *t, arraylist_t *visited, arraylist_t *recursion_stack) +{ + // Check if we're already visiting this method (cycle detection and memoization) + for (size_t i = 0; i < recursion_stack->len; i++) + if (recursion_stack->items[i] == (void*)m) + return 0; + + // Add this method to the recursion stack + arraylist_push(recursion_stack, (void*)m); + + jl_genericmemory_t *interferences = jl_atomic_load_relaxed(&m->interferences); + for (size_t i = 0; i < interferences->length; i++) { + jl_method_t *m2 = (jl_method_t*)jl_genericmemory_ptr_ref(interferences, i); + if (m2 == NULL) + continue; + int idx = find_method_in_matches(t, m2); + if (idx < 0) + continue; + if (method_in_interferences(m, m2)) + continue; // ambiguous + assert(visited->items[idx] != (void*)0); + if (visited->items[idx] != (void*)1) + continue; // part of the same SCC cycle (handled by ambiguity later) + if (jl_subtype(ti, m2->sig)) + return 1; + // Recursively check m2's interferences since m2 is more specific + if (check_interferences_covers(m2, ti, t, visited, recursion_stack)) + return 1; + } + return 0; +} + +static int check_fully_ambiguous(jl_method_t *m, jl_value_t *ti, jl_array_t *t, int include_ambiguous, int *has_ambiguity) +{ + jl_genericmemory_t *interferences = jl_atomic_load_relaxed(&m->interferences); + for (size_t i = 0; i < interferences->length; i++) { + jl_method_t *m2 = (jl_method_t*)jl_genericmemory_ptr_ref(interferences, i); + if (m2 == NULL) + continue; + int idx = find_method_in_matches(t, m2); + if (idx < 0) + continue; + if (!method_in_interferences(m, m2)) + continue; + *has_ambiguity = 1; + if (!include_ambiguous && jl_subtype(ti, m2->sig)) + return 1; + } + return 0; +} + +// Recursively check if target_method is in the interferences of (morespecific than) start_method, but not the reverse +static int method_in_interferences_recursive(jl_method_t *target_method, jl_method_t *start_method, arraylist_t *seen) +{ + // Check direct interferences first + if (method_in_interferences(start_method, target_method)) + return 0; + if (method_in_interferences(target_method, start_method)) + return 1; + + // Check if we're already visiting this method (cycle prevention and memoization) + for (size_t i = 0; i < seen->len; i++) { + if (seen->items[i] == (void*)start_method) + return 0; + } + arraylist_push(seen, (void*)start_method); + + // Recursively check interferences + jl_genericmemory_t *interferences = jl_atomic_load_relaxed(&start_method->interferences); + for (size_t i = 0; i < interferences->length; i++) { + jl_method_t *interference_method = (jl_method_t*)jl_genericmemory_ptr_ref(interferences, i); + if (interference_method == NULL) + continue; + if (method_in_interferences(start_method, interference_method)) + continue; // only follow edges to morespecific methods in search of morespecific target (skip ambiguities) + if (method_in_interferences_recursive(target_method, interference_method, seen)) + return 1; + } + + return 0; +} + +static int method_morespecific_via_interferences(jl_method_t *target_method, jl_method_t *start_method) +{ + if (target_method == start_method) + return 0; + arraylist_t seen; + arraylist_new(&seen, 0); + int result = method_in_interferences_recursive(target_method, start_method, &seen); + arraylist_free(&seen); + //assert(result == jl_method_morespecific(target_method, start_method) || jl_has_empty_intersection(target_method->sig, start_method->sig) || jl_has_empty_intersection(start_method->sig, target_method->sig)); + return result; +} + + void jl_method_table_activate(jl_typemap_entry_t *newentry) { JL_TIMING(ADD_METHOD, ADD_METHOD); @@ -2644,48 +2772,100 @@ void jl_method_table_activate(jl_typemap_entry_t *newentry) jl_value_t *loctag = NULL; // debug info for invalidation jl_value_t *isect = NULL; jl_value_t *isect2 = NULL; - jl_value_t *isect3 = NULL; - JL_GC_PUSH6(&oldvalue, &oldmi, &loctag, &isect, &isect2, &isect3); + jl_genericmemory_t *interferences = NULL; + JL_GC_PUSH6(&oldvalue, &oldmi, &loctag, &isect, &isect2, &interferences); jl_typemap_entry_t *replaced = NULL; - // then check what entries we replaced + // Check what entries this intersects with in the prior world. oldvalue = get_intersect_matches(jl_atomic_load_relaxed(&mt->defs), newentry, &replaced, max_world); + jl_method_t *const *d; + size_t j, n; + if (oldvalue == NULL) { + d = NULL; + n = 0; + } + else { + assert(jl_is_array(oldvalue)); + d = (jl_method_t**)jl_array_ptr_data(oldvalue); + n = jl_array_nrows(oldvalue); + oldmi = jl_alloc_vec_any(0); + } + // These get updated from their state stored in the caches files, since content in cache files gets added "all at once". int invalidated = 0; int dispatch_bits = METHOD_SIG_LATEST_WHICH; // Always set LATEST_WHICH // Check precompiled dispatch status bits int precompiled_status = jl_atomic_load_relaxed(&method->dispatch_status); if (!(precompiled_status & METHOD_SIG_PRECOMPILE_MANY)) + // This will store if this method will be currently the only result that would returned from `ml_matches` given `sig`. dispatch_bits |= METHOD_SIG_LATEST_ONLY; // Tentatively set, will be cleared if not applicable - if (precompiled_status & METHOD_SIG_PRECOMPILE_HAS_NOTMORESPECIFIC) - dispatch_bits |= METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC; - if (replaced) { - oldvalue = (jl_value_t*)replaced; - jl_method_t *m = replaced->func.method; - invalidated = 1; - method_overwrite(newentry, m); - // This is an optimized version of below, given we know the type-intersection is exact - jl_method_table_invalidate(m, max_world); - int m_dispatch = jl_atomic_load_relaxed(&m->dispatch_status); - // Clear METHOD_SIG_LATEST_ONLY and METHOD_SIG_LATEST_WHICH bits, only keeping NOTMORESPECIFIC - jl_atomic_store_relaxed(&m->dispatch_status, m_dispatch & METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC); - // Edge case: don't set dispatch_bits |= METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC unconditionally since `m` is not an visible method for invalidations - dispatch_bits |= (m_dispatch & METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC); - if (!(m_dispatch & METHOD_SIG_LATEST_ONLY)) - dispatch_bits &= ~METHOD_SIG_LATEST_ONLY; - } - else { - jl_method_t *const *d; - size_t j, n; - if (oldvalue == NULL) { + // Holds the set of all intersecting methods not more specific than this one. + // Note: this set may be incomplete (may exclude methods whose intersection + // is covered by another method that is morespecific than both, causing them + // to have no relevant type intersection for sorting). + interferences = (jl_genericmemory_t*)jl_atomic_load_relaxed(&method->interferences); + if (oldvalue) { + assert(n > 0); + if (replaced) { + oldvalue = (jl_value_t*)replaced; + jl_method_t *m = replaced->func.method; + invalidated = 1; + method_overwrite(newentry, m); + // This is an optimized version of below, given we know the type-intersection is exact + jl_method_table_invalidate(m, max_world); + int m_dispatch = jl_atomic_load_relaxed(&m->dispatch_status); + // Clear METHOD_SIG_LATEST_ONLY and METHOD_SIG_LATEST_WHICH bits + jl_atomic_store_relaxed(&m->dispatch_status, 0); + if (!(m_dispatch & METHOD_SIG_LATEST_ONLY)) + dispatch_bits &= ~METHOD_SIG_LATEST_ONLY; + // Take over the interference list from the replaced method + jl_genericmemory_t *m_interferences = jl_atomic_load_relaxed(&m->interferences); + if (interferences->length == 0) { + interferences = jl_genericmemory_copy(m_interferences); + } + else { + for (size_t i = 0; i < m_interferences->length; i++) { + jl_value_t *k = jl_genericmemory_ptr_ref(m_interferences, i); + if (k && !has_key(interferences, (jl_value_t*)k)) { + ssize_t idx; + interferences = jl_idset_put_key(interferences, (jl_value_t*)k, &idx); + } + } + } + ssize_t idx; + m_interferences = jl_idset_put_key(m_interferences, (jl_value_t*)method, &idx); + jl_atomic_store_release(&m->interferences, m_interferences); + jl_gc_wb(m, m_interferences); + for (j = 0; j < n; j++) { + jl_method_t *m2 = d[j]; + if (m2 && method_in_interferences(m, m2)) { + jl_genericmemory_t *m2_interferences = jl_atomic_load_relaxed(&m2->interferences); + ssize_t idx; + m2_interferences = jl_idset_put_key(m2_interferences, (jl_value_t*)method, &idx); + jl_atomic_store_release(&m2->interferences, m2_interferences); + jl_gc_wb(m2, m2_interferences); + } + } + loctag = jl_atomic_load_relaxed(&m->specializations); // use loctag for a gcroot + _Atomic(jl_method_instance_t*) *data; + size_t l; + if (jl_is_svec(loctag)) { + data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(loctag); + l = jl_svec_len(loctag); + } + else { + data = (_Atomic(jl_method_instance_t*)*) &loctag; + l = 1; + } + for (size_t i = 0; i < l; i++) { + jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]); + if ((jl_value_t*)mi == jl_nothing) + continue; + jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi); + } d = NULL; n = 0; } else { - assert(jl_is_array(oldvalue)); - d = (jl_method_t**)jl_array_ptr_data(oldvalue); - n = jl_array_nrows(oldvalue); - - oldmi = jl_alloc_vec_any(0); char *morespec = (char*)alloca(n); // Compute all morespec values upfront for (j = 0; j < n; j++) @@ -2699,16 +2879,27 @@ void jl_method_table_activate(jl_typemap_entry_t *newentry) if (morespec[j] || ambig) { // !morespecific(new, old) dispatch_bits &= ~METHOD_SIG_LATEST_ONLY; - m_dispatch |= METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC; + // Add the old method to this interference set + ssize_t idx; + if (!has_key(interferences, (jl_value_t*)m)) + interferences = jl_idset_put_key(interferences, (jl_value_t*)m, &idx); } if (!morespec[j]) { // !morespecific(old, new) - dispatch_bits |= METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC; m_dispatch &= ~METHOD_SIG_LATEST_ONLY; + // Add the new method to its interference set + jl_genericmemory_t *m_interferences = jl_atomic_load_relaxed(&m->interferences); + ssize_t idx; + m_interferences = jl_idset_put_key(m_interferences, (jl_value_t*)method, &idx); + jl_atomic_store_release(&m->interferences, m_interferences); + jl_gc_wb(m, m_interferences); } + // Add methods that intersect but are not more specific to interference list jl_atomic_store_relaxed(&m->dispatch_status, m_dispatch); if (morespec[j]) continue; + + // Now examine if this caused any invalidations. loctag = jl_atomic_load_relaxed(&m->specializations); // use loctag for a gcroot _Atomic(jl_method_instance_t*) *data; size_t l; @@ -2724,12 +2915,11 @@ void jl_method_table_activate(jl_typemap_entry_t *newentry) jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]); if ((jl_value_t*)mi == jl_nothing) continue; - isect3 = jl_type_intersection(m->sig, (jl_value_t*)mi->specTypes); - if (jl_type_intersection2(type, isect3, &isect, &isect2)) { + if (jl_type_intersection2(type, mi->specTypes, &isect, &isect2)) { // Replacing a method--see if this really was the selected method previously - // over the intersection (not ambiguous) and the new method will be selected now (morespec_is). + // over the intersection (not ambiguous) and the new method will be selected now (morespec). // TODO: this only checks pair-wise for ambiguities, but the ambiguities could arise from the interaction of multiple methods - // and thus might miss a case where we introduce an ambiguity between two existing methods + // and thus might miss a case where we introduce an ambiguity between`.u two existing methods // We could instead work to sort this into 3 groups `morespecific .. ambiguous .. lesspecific`, with `type` in ambiguous, // such that everything in `morespecific` dominates everything in `ambiguous`, and everything in `ambiguous` dominates everything in `lessspecific` // And then compute where each isect falls, and whether it changed group--necessitating invalidation--or not. @@ -2738,9 +2928,10 @@ void jl_method_table_activate(jl_typemap_entry_t *newentry) // call invalidate_backedges(mi, max_world, "jl_method_table_insert"); // but ignore invoke-type edges int invalidatedmi = _invalidate_dispatch_backedges(mi, type, m, d, n, replaced_dispatch, ambig, max_world, morespec); - if (replaced_dispatch) + if (replaced_dispatch) { jl_atomic_store_relaxed(&mi->dispatch_status, 0); - jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi); + jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi); + } if (_jl_debug_method_invalidation && invalidatedmi) { jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi); loctag = jl_cstr_to_string("jl_method_table_insert"); @@ -2748,49 +2939,64 @@ void jl_method_table_activate(jl_typemap_entry_t *newentry) } invalidated |= invalidatedmi; } + // TODO: do we have any interesting cases left where isect3 is useful + //jl_value_t *isect3 = NULL; + //jl_value_t *isect4 = NULL; + //jl_value_t *isect5 = NULL; + //JL_GC_PUSH3(&isec3, &isect4, &isect5); + //isect3 = jl_type_intersection(m->sig, (jl_value_t*)mi->specTypes); + //jl_type_intersection2(type, isect3, &isect4, &isect5); + //if (!jl_types_equal(isect, isect4) && (!isect2 || !jl_types_equal(isect2, isect4)) && + // (!isect5 || (!jl_types_equal(isect, isect5) && (!isect2 || !jl_types_equal(isect2, isect5))))) { + // jl_(type); + // jl_(mi->specTypes); + // jl_(m->sig); + //} + //JL_GC_POP(); + isect = NULL; + isect2 = NULL; } } } + } - jl_methcache_t *mc = jl_method_table->cache; - JL_LOCK(&mc->writelock); - struct _typename_invalidate_backedge typename_env = {type, &isect, &isect2, d, n, max_world, invalidated}; - if (!jl_foreach_top_typename_for(_typename_invalidate_backedges, type, 1, &typename_env)) { - // if the new method cannot be split into exact backedges, scan the whole table for anything that might be affected - jl_genericmemory_t *allbackedges = jl_method_table->backedges; - for (size_t i = 0, n = allbackedges->length; i < n; i += 2) { - jl_value_t *tn = jl_genericmemory_ptr_ref(allbackedges, i); - jl_value_t *backedges = jl_genericmemory_ptr_ref(allbackedges, i+1); - if (tn && tn != jl_nothing && backedges) - _typename_invalidate_backedges((jl_typename_t*)tn, 0, &typename_env); - } - } - invalidated |= typename_env.invalidated; - if (oldmi && jl_array_nrows(oldmi)) { - // search mc->cache and leafcache and drop anything that might overlap with the new method - // this is very cheap, so we don't mind being fairly conservative at over-approximating this - struct invalidate_mt_env mt_cache_env; - mt_cache_env.max_world = max_world; - mt_cache_env.shadowed = oldmi; - mt_cache_env.newentry = newentry; - mt_cache_env.invalidated = 0; - - jl_typemap_visitor(jl_atomic_load_relaxed(&mc->cache), invalidate_mt_cache, (void*)&mt_cache_env); - jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mc->leafcache); - size_t i, l = leafcache->length; - for (i = 1; i < l; i += 2) { - jl_value_t *entry = jl_genericmemory_ptr_ref(leafcache, i); - if (entry) { - while (entry != jl_nothing) { - invalidate_mt_cache((jl_typemap_entry_t*)entry, (void*)&mt_cache_env); - entry = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)entry)->next); - } + jl_methcache_t *mc = jl_method_table->cache; + JL_LOCK(&mc->writelock); + struct _typename_invalidate_backedge typename_env = {type, &isect, &isect2, d, n, max_world, invalidated}; + if (!jl_foreach_top_typename_for(_typename_invalidate_backedges, type, 1, &typename_env)) { + // if the new method cannot be split into exact backedges, scan the whole table for anything that might be affected + jl_genericmemory_t *allbackedges = jl_method_table->backedges; + for (size_t i = 0, n = allbackedges->length; i < n; i += 2) { + jl_value_t *tn = jl_genericmemory_ptr_ref(allbackedges, i); + jl_value_t *backedges = jl_genericmemory_ptr_ref(allbackedges, i+1); + if (tn && tn != jl_nothing && backedges) + _typename_invalidate_backedges((jl_typename_t*)tn, 0, &typename_env); + } + } + invalidated |= typename_env.invalidated; + if (oldmi && jl_array_nrows(oldmi)) { + // drop leafcache and search mc->cache and drop anything that might overlap with the new method + // this is very cheap, so we don't mind being very conservative at over-approximating this + struct invalidate_mt_env mt_cache_env; + mt_cache_env.max_world = max_world; + mt_cache_env.shadowed = oldmi; + mt_cache_env.newentry = newentry; + + jl_typemap_visitor(jl_atomic_load_relaxed(&mc->cache), invalidate_mt_cache, (void*)&mt_cache_env); + jl_genericmemory_t *leafcache = jl_atomic_load_relaxed(&mc->leafcache); + size_t i, l = leafcache->length; + for (i = 1; i < l; i += 2) { + jl_value_t *entry = jl_genericmemory_ptr_ref(leafcache, i); + if (entry) { + while (entry != jl_nothing) { + jl_atomic_store_relaxed(&((jl_typemap_entry_t*)entry)->max_world, max_world); + entry = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)entry)->next); } } - invalidated |= mt_cache_env.invalidated; } - JL_UNLOCK(&mc->writelock); + jl_atomic_store_relaxed(&mc->leafcache, (jl_genericmemory_t*)jl_an_empty_memory_any); } + JL_UNLOCK(&mc->writelock); if (invalidated && _jl_debug_method_invalidation) { jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)method); loctag = jl_cstr_to_string("jl_method_table_insert"); @@ -2798,6 +3004,8 @@ void jl_method_table_activate(jl_typemap_entry_t *newentry) } jl_atomic_store_relaxed(&newentry->max_world, ~(size_t)0); jl_atomic_store_relaxed(&method->dispatch_status, dispatch_bits); // TODO: this should be sequenced fully after the world counter store + jl_atomic_store_release(&method->interferences, interferences); + jl_gc_wb(method, interferences); JL_GC_POP(); } @@ -4238,16 +4446,9 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio return 1; } -static int ml_mtable_visitor(jl_methtable_t *mt, void *closure0) -{ - struct typemap_intersection_env* env = (struct typemap_intersection_env*)closure0; - return jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), 0, env); -} - // Visit the candidate methods, starting from t[idx], to determine a possible valid sort ordering, // where every morespecific method appears before any method which it has a common -// intersection with but is not partly ambiguous with (ambiguity is transitive, particularly -// if lim==-1, although morespecific is not transitive). +// intersection with but is not partly ambiguous with (ambiguity is not transitive, since morespecific is not transitive). // Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable // Inputs: // * `t`: the array of vertexes (method matches) @@ -4255,260 +4456,118 @@ static int ml_mtable_visitor(jl_methtable_t *mt, void *closure0) // * `visited`: the state of the algorithm for each vertex in `t`: either 1 if we visited it already or 1+depth if we are visiting it now // * `stack`: the state of the algorithm for the current vertex (up to length equal to `t`): the list of all vertexes currently in the depth-first path or in the current SCC // * `result`: the output of the algorithm, a sorted list of vertexes (up to length `lim`) -// * `allambig`: a list of all vertexes with an ambiguity (up to length equal to `t`), discovered while running the rest of the algorithm +// * `recursion_stack`: an array for temporary use // * `lim`: either -1 for unlimited matches, or the maximum length for `result` before returning failure (return -1). -// If specified as -1, this will return extra matches that would have been elided from the list because they were already covered by an earlier match. -// This gives a sort of maximal set of matching methods (up to the first minmax method). -// If specified as -1, the sorting will also include all "weak" edges (every ambiguous pair) which will create much larger ambiguity cycles, -// resulting in a less accurate sort order and much less accurate `*has_ambiguity` result. // * `include_ambiguous`: whether to filter out fully ambiguous matches from `result` // * `*has_ambiguity`: whether the algorithm does not need to compute if there is an unresolved ambiguity // * `*found_minmax`: whether there is a minmax method already found, so future fully_covers matches should be ignored // Outputs: -// * `*has_ambiguity`: whether the caller should check if there remains an unresolved ambiguity (in `allambig`) +// * `*has_ambiguity`: whether there are any ambiguities that mean the sort order is not exact // Returns: // * -1: too many matches for lim, other outputs are undefined // * 0: the child(ren) have been added to the output // * 1+: the children are part of this SCC (up to this depth) // TODO: convert this function into an iterative call, rather than recursive -static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, arraylist_t *stack, arraylist_t *result, arraylist_t *allambig, int lim, int include_ambiguous, int *has_ambiguity, int *found_minmax) +static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, arraylist_t *stack, arraylist_t *result, arraylist_t *recursion_stack, int lim, int include_ambiguous, int *has_ambiguity, int *found_minmax) { size_t cycle = (size_t)visited->items[idx]; if (cycle != 0) return cycle - 1; // depth remaining - jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, idx); - jl_method_t *m = matc->method; - jl_value_t *ti = (jl_value_t*)matc->spec_types; - int subt = matc->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig) - // first check if this new method is actually already fully covered by an - // existing match and we can just ignore this entry quickly - size_t result_len = 0; - if (subt) { - if (*found_minmax == 2) - visited->items[idx] = (void*)1; - } - else if (lim != -1) { - for (; result_len < result->len; result_len++) { - size_t idx2 = (size_t)result->items[result_len]; - jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2); - jl_method_t *m2 = matc2->method; - if (jl_subtype(ti, m2->sig)) { - if (include_ambiguous) { - if (!jl_method_morespecific(m2, m)) - continue; - } - visited->items[idx] = (void*)1; - break; - } - } - } - if ((size_t)visited->items[idx] == 1) - return 0; arraylist_push(stack, (void*)idx); size_t depth = stack->len; - visited->items[idx] = (void*)(1 + depth); - cycle = depth; - int addambig = 0; - int mayexclude = 0; - // First visit all "strong" edges where the child is definitely better. - // This likely won't hit any cycles, but might (because morespecific is not transitive). - // Along the way, record if we hit any ambiguities-we may need to track those later. - for (size_t childidx = 0; childidx < jl_array_nrows(t); childidx++) { - if (childidx == idx) - continue; - int child_cycle = (size_t)visited->items[childidx]; - if (child_cycle == 1) - continue; // already handled - if (child_cycle != 0 && child_cycle - 1 >= cycle) - continue; // already part of this cycle - jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, childidx); - jl_method_t *m2 = matc2->method; - int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig) - // TODO: we could change this to jl_has_empty_intersection(ti, (jl_value_t*)matc2->spec_types); - // since we only care about sorting of the intersections the user asked us about - if (!subt2 && jl_has_empty_intersection(m2->sig, m->sig)) - continue; - int msp = jl_method_morespecific(m, m2); - int msp2 = !msp && jl_method_morespecific(m2, m); - if (!msp) { - if (subt || !include_ambiguous || (lim != -1 && msp2)) { - if (subt2 || ((lim != -1 || (!include_ambiguous && !msp2)) && jl_subtype((jl_value_t*)ti, m2->sig))) { - // this may be filtered out as fully intersected, if applicable later - mayexclude = 1; - } - } - if (!msp2) { - addambig = 1; // record there is a least one previously-undetected ambiguity that may need to be investigated later (between m and m2) - } - } - if (lim == -1 ? msp : !msp2) // include only strong or also weak edges, depending on whether the result size is limited - continue; - // m2 is (lim!=-1 ? better : not-worse), so attempt to visit it first - // if limited, then we want to visit only better edges, because that results in finding k best matches quickest - // if not limited, then we want to visit all edges, since that results in finding the largest SCC cycles, which requires doing the fewest intersections - child_cycle = sort_mlmatches(t, childidx, visited, stack, result, allambig, lim, include_ambiguous, has_ambiguity, found_minmax); - if (child_cycle == -1) - return -1; - if (child_cycle && child_cycle < cycle) { + { // scope block + visited->items[idx] = (void*)(1 + depth); + jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, idx); + jl_method_t *m = matc->method; + jl_value_t *ti = (jl_value_t*)matc->spec_types; + int subt = matc->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig) + jl_genericmemory_t *interferences = jl_atomic_load_relaxed(&m->interferences); + cycle = depth; + // Iterate over the interferences set to get the morespecific methods + for (size_t i = 0; i < interferences->length; i++) { + jl_method_t *m2 = (jl_method_t*)jl_genericmemory_ptr_ref(interferences, i); + if (m2 == NULL) + continue; + int childidx = find_method_in_matches(t, m2); + if (childidx < 0 || (size_t)childidx == idx) + continue; + int child_cycle = (size_t)visited->items[childidx]; + if (child_cycle == 1) + continue; // already handled + if (child_cycle != 0 && child_cycle - 1 >= cycle) + continue; // already part of this cycle + if (method_in_interferences(m, m2)) + continue; + // m2 is morespecific, so attempt to visit it first + child_cycle = sort_mlmatches(t, childidx, visited, stack, result, recursion_stack, lim, include_ambiguous, has_ambiguity, found_minmax); + if (child_cycle == -1) + return -1; // record the cycle will resolve at depth "cycle" - cycle = child_cycle; - } - if (stack->len == depth) { - // if this child resolved without hitting a cycle, then there is - // some probability that this method is already fully covered now - // (same check as before), and we can delete this vertex now without - // anyone noticing (too much) - if (subt) { - if (*found_minmax == 2) - visited->items[idx] = (void*)1; - } - else if (lim != -1) { - for (; result_len < result->len; result_len++) { - size_t idx2 = (size_t)result->items[result_len]; - jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2); - jl_method_t *m2 = matc2->method; - if (jl_subtype(ti, m2->sig)) { - if (include_ambiguous) { - if (!jl_method_morespecific(m2, m)) - continue; - } - visited->items[idx] = (void*)1; - break; - } - } - } - if ((size_t)visited->items[idx] == 1) { - // n.b. cycle might be < depth, if we had a cycle with a child - // idx, but since we are on the top of the stack, nobody - // observed that and so we are content to ignore this - size_t childidx = (size_t)arraylist_pop(stack); - assert(childidx == idx); (void)childidx; - assert(!subt || *found_minmax == 2); - return 0; - } + if (child_cycle && child_cycle < cycle) + cycle = child_cycle; } - } - if (matc->fully_covers == NOT_FULLY_COVERS && addambig) - arraylist_push(allambig, (void*)idx); - if (cycle != depth) - return cycle; - result_len = result->len; - if (stack->len == depth) { - // Found one "best" method to add right now. But we might exclude it if - // we determined earlier that we had that option. - if (mayexclude) { - if (!subt || *found_minmax == 2) + // There is some probability that this method is already fully covered + // now, and we can delete this vertex now without anyone noticing. + if (subt && *found_minmax) { + if (*found_minmax == 2) visited->items[idx] = (void*)1; } + else if (check_interferences_covers(m, ti, t, visited, recursion_stack)) { + visited->items[idx] = (void*)1; + } + else if (check_fully_ambiguous(m, ti, t, include_ambiguous, has_ambiguity)) { + visited->items[idx] = (void*)1; + } + + // If there were no cycles hit either, then we can potentially delete all of its edges too. + if ((size_t)visited->items[idx] == 1 && stack->len == depth) { + // n.b. cycle might be < depth, if we had a cycle with a child + // idx, but since we are on the top of the stack, nobody + // observed that and so we are content to ignore this + size_t childidx = (size_t)arraylist_pop(stack); + assert(childidx == idx); (void)childidx; + return 0; + } + if (cycle != depth) + return cycle; } - else { - // We have a set of ambiguous methods. Record that. - // This is greatly over-approximated for lim==-1 - *has_ambiguity = 1; - // If we followed weak edges above, then this also fully closed the ambiguity cycle - if (lim == -1) - addambig = 0; - // If we're only returning possible matches, now filter out this method - // if its intersection is fully ambiguous in this SCC group. - // This is a repeat of the "first check", now that we have completed the cycle analysis + // If this is in an SCC group, do some additional checks before returning or setting has_ambiguity + if (depth != stack->len) { + int scc_count = 0; for (size_t i = depth - 1; i < stack->len; i++) { size_t childidx = (size_t)stack->items[i]; - jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx); - jl_value_t *ti = (jl_value_t*)matc->spec_types; - int subt = matc->fully_covers != NOT_FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig) - if ((size_t)visited->items[childidx] == 1) { - assert(subt); - continue; - } - assert(visited->items[childidx] == (void*)(2 + i)); - // if we only followed strong edges before above - // check also if this set has an unresolved ambiguity missing from it - if (lim != -1 && !addambig) { - for (size_t j = 0; j < allambig->len; j++) { - if ((size_t)allambig->items[j] == childidx) { - addambig = 1; - break; - } - } - } - // always remove fully_covers matches after the first minmax ambiguity group is handled - if (subt) { - if (*found_minmax) - visited->items[childidx] = (void*)1; + if (visited->items[childidx] == (void*)1) continue; - } - else if (lim != -1) { - // when limited, don't include this match if it was covered by an earlier one - for (size_t result_len = 0; result_len < result->len; result_len++) { - size_t idx2 = (size_t)result->items[result_len]; - jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2); - jl_method_t *m2 = matc2->method; - if (jl_subtype(ti, m2->sig)) { - if (include_ambiguous) { - if (!jl_method_morespecific(m2, m)) - continue; - } - visited->items[childidx] = (void*)1; - break; - } - } - } - } - if (!include_ambiguous && lim == -1) { - for (size_t i = depth - 1; i < stack->len; i++) { - size_t childidx = (size_t)stack->items[i]; - if ((size_t)visited->items[childidx] == 1) - continue; - jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx); - jl_method_t *m = matc->method; - jl_value_t *ti = (jl_value_t*)matc->spec_types; - for (size_t j = depth - 1; j < stack->len; j++) { - if (i == j) - continue; - size_t idx2 = (size_t)stack->items[j]; - jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(t, idx2); - jl_method_t *m2 = matc2->method; - int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig) - // if their intersection contributes to the ambiguity cycle - // and the contribution of m is fully ambiguous with the portion of the cycle from m2 - if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) { - // but they aren't themselves simply ordered (here - // we don't consider that a third method might be - // disrupting that ordering and just consider them - // pairwise to keep this simple). - if (!jl_method_morespecific(m, m2) && !jl_method_morespecific(m2, m)) { - visited->items[childidx] = (void*)-1; - break; - } - } - } - } + scc_count++; } + if (scc_count > 1) + *has_ambiguity = 1; } // copy this cycle into the results for (size_t i = depth - 1; i < stack->len; i++) { size_t childidx = (size_t)stack->items[i]; + jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx); + int subt = matc->fully_covers != NOT_FULLY_COVERS; + if (subt && *found_minmax) + visited->items[childidx] = (void*)1; if ((size_t)visited->items[childidx] == 1) continue; - if ((size_t)visited->items[childidx] != -1) { - assert(visited->items[childidx] == (void*)(2 + i)); - visited->items[childidx] = (void*)-1; - if (lim == -1 || result->len < lim) - arraylist_push(result, (void*)childidx); - else - return -1; - } + assert(visited->items[childidx] == (void*)(2 + i)); + visited->items[childidx] = (void*)1; + if (lim == -1 || result->len < lim) + arraylist_push(result, (void*)childidx); + else + return -1; } // now finally cleanup the stack while (stack->len >= depth) { size_t childidx = (size_t)arraylist_pop(stack); // always remove fully_covers matches after the first minmax ambiguity group is handled - //jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx); - if (matc->fully_covers != NOT_FULLY_COVERS && !addambig) + jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(t, childidx); + int subt = matc->fully_covers == FULLY_COVERS; + if (subt && *found_minmax == 1) *found_minmax = 2; - if (visited->items[childidx] != (void*)-1) - continue; - visited->items[childidx] = (void*)1; + assert(visited->items[childidx] == (void*)1); } return 0; } @@ -4620,7 +4679,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, jl_methcache_t *mc, } } // then scan everything - if (!ml_mtable_visitor(mt, &env.match) && env.t == jl_an_empty_vec_any) { + if (!jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), 0, &env.match) && env.t == jl_an_empty_vec_any) { JL_GC_POP(); // if we return early without returning methods, set only the min/max valid collected from matching *min_valid = env.match.min_valid; @@ -4634,43 +4693,31 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, jl_methcache_t *mc, env.match.ti = NULL; env.matc = NULL; env.match.env = NULL; search.env = NULL; size_t i, j, len = jl_array_nrows(env.t); jl_method_match_t *minmax = NULL; - int minmax_ambig = 0; - int all_subtypes = 1; + int any_subtypes = 0; if (len > 1) { // first try to pre-process the results to find the most specific - // result that fully covers the input, since we can do this in linear - // time, and the rest is O(n^2) + // result that fully covers the input, since we can do this in O(n^2) + // time, and the rest is O(n^3) // - first find a candidate for the best of these method results for (i = 0; i < len; i++) { jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i); if (matc->fully_covers == FULLY_COVERS) { + any_subtypes = 1; jl_method_t *m = matc->method; - if (minmax != NULL) { - jl_method_t *minmaxm = minmax->method; - if (jl_method_morespecific(minmaxm, m)) + for (j = 0; j < len; j++) { + if (i == j) continue; + jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j); + if (matc2->fully_covers == FULLY_COVERS) { + jl_method_t *m2 = matc2->method; + if (!method_morespecific_via_interferences(m, m2)) + break; + } } - minmax = matc; - } - else { - all_subtypes = 0; - } - } - // - then see if it dominated all of the other choices - if (minmax != NULL) { - for (i = 0; i < len; i++) { - jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i); - if (matc == minmax) + if (j == len) { + // Found the minmax method + minmax = matc; break; - if (matc->fully_covers == FULLY_COVERS) { - jl_method_t *m = matc->method; - jl_method_t *minmaxm = minmax->method; - if (!jl_method_morespecific(minmaxm, m)) { - minmax_ambig = 1; - minmax = NULL; - has_ambiguity = 1; - break; - } } } } @@ -4683,24 +4730,31 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, jl_methcache_t *mc, // cost much extra and is less likely to help us hit a fast path // (we will look for this later, when we compute ambig_groupid, for // correctness) - if (!all_subtypes && minmax != NULL) { - jl_method_t *minmaxm = minmax->method; - all_subtypes = 1; + int all_subtypes = any_subtypes; + if (any_subtypes) { + jl_method_t *minmaxm = NULL; + if (minmax != NULL) + minmaxm = minmax->method; for (i = 0; i < len; i++) { jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i); if (matc->fully_covers != FULLY_COVERS) { jl_method_t *m = matc->method; - if (jl_method_morespecific(minmaxm, m)) - matc->fully_covers = SENTINEL; // put a sentinel value here for sorting - else - all_subtypes = 0; + if (minmaxm) { + if (method_morespecific_via_interferences(minmaxm, m)) { + matc->fully_covers = SENTINEL; // put a sentinel value here for sorting + continue; + } + if (method_in_interferences(minmaxm, m)) // !morespecific(m, minmaxm) + has_ambiguity = 1; + } + all_subtypes = 0; } } } // - now we might have a fast-return here, if we see that // we've already processed all of the possible outputs if (all_subtypes) { - if (minmax_ambig) { + if (minmax == NULL) { if (!include_ambiguous) { len = 0; env.t = jl_an_empty_vec_any; @@ -4711,7 +4765,6 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, jl_methcache_t *mc, } } else { - assert(minmax != NULL); jl_array_ptr_set(env.t, 0, minmax); jl_array_del_end((jl_array_t*)env.t, len - 1); len = 1; @@ -4724,20 +4777,23 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, jl_methcache_t *mc, } } if (len > 1) { - arraylist_t stack, visited, result, allambig; + arraylist_t stack, visited, result, recursion_stack; arraylist_new(&result, lim != -1 && lim < len ? lim : len); arraylist_new(&stack, 0); arraylist_new(&visited, len); - arraylist_new(&allambig, len); + arraylist_new(&recursion_stack, len); arraylist_grow(&visited, len); memset(visited.items, 0, len * sizeof(size_t)); // if we had a minmax method (any subtypes), now may now be able to // quickly cleanup some of methods int found_minmax = 0; - if (minmax != NULL) + if (has_ambiguity) + found_minmax = 1; + else if (minmax != NULL) found_minmax = 2; - else if (minmax_ambig && !include_ambiguous) + else if (any_subtypes && !include_ambiguous) found_minmax = 1; + has_ambiguity = 0; if (ambig == NULL) // if we don't care about the result, set it now so we won't bother attempting to compute it accurately later has_ambiguity = 1; for (i = 0; i < len; i++) { @@ -4748,9 +4804,9 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, jl_methcache_t *mc, // by visiting it and it might be a bit costly continue; } - int child_cycle = sort_mlmatches((jl_array_t*)env.t, i, &visited, &stack, &result, &allambig, lim == -1 || minmax == NULL ? lim : lim - 1, include_ambiguous, &has_ambiguity, &found_minmax); + int child_cycle = sort_mlmatches((jl_array_t*)env.t, i, &visited, &stack, &result, &recursion_stack, lim == -1 || minmax == NULL ? lim : lim - 1, include_ambiguous, &has_ambiguity, &found_minmax); if (child_cycle == -1) { - arraylist_free(&allambig); + arraylist_free(&recursion_stack); arraylist_free(&visited); arraylist_free(&stack); arraylist_free(&result); @@ -4761,89 +4817,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, jl_methcache_t *mc, assert(stack.len == 0); assert(visited.items[i] == (void*)1); } - // now compute whether there were ambiguities left in this cycle - if (has_ambiguity == 0 && allambig.len > 0) { - if (lim == -1) { - // lim is over-approximated, so has_ambiguities is too - has_ambiguity = 1; - } - else { - // go back and find the additional ambiguous methods and temporary add them to the stack - // (potentially duplicating them from lower on the stack to here) - jl_value_t *ti = NULL; - jl_value_t *isect2 = NULL; - JL_GC_PUSH2(&ti, &isect2); - for (size_t i = 0; i < allambig.len; i++) { - size_t idx = (size_t)allambig.items[i]; - jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx); - jl_method_t *m = matc->method; - int subt = matc->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig) - for (size_t idx2 = 0; idx2 < jl_array_nrows(env.t); idx2++) { - if (idx2 == idx) - continue; - // laborious test, checking for existence and coverage of another method (m3) - // outside of the ambiguity group that dominates any ambiguous methods, - // and means we can ignore this for has_ambiguity - // (has_ambiguity is overestimated for lim==-1, since we don't compute skipped matches either) - // n.b. even if we skipped them earlier, they still might - // contribute to the ambiguities (due to lock of transitivity of - // morespecific over subtyping) - // TODO: we could improve this result by checking if the removal of some - // edge earlier means that this subgraph is now well-ordered and then be - // allowed to ignore these vertexes entirely here - jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx2); - jl_method_t *m2 = matc2->method; - int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig) - if (subt) { - ti = (jl_value_t*)matc2->spec_types; - isect2 = NULL; - } - else if (subt2) { - ti = (jl_value_t*)matc->spec_types; - isect2 = NULL; - } - else { - jl_type_intersection2((jl_value_t*)matc->spec_types, (jl_value_t*)matc2->spec_types, &ti, &isect2); - } - // if their intersection contributes to the ambiguity cycle - if (ti == jl_bottom_type) - continue; - // and they aren't themselves simply ordered - if (jl_method_morespecific(m, m2) || jl_method_morespecific(m2, m)) - continue; - // now look for a third method m3 that dominated these and that fully covered this intersection already - size_t k; - for (k = 0; k < result.len; k++) { - size_t idx3 = (size_t)result.items[k]; - if (idx3 == idx || idx3 == idx2) { - has_ambiguity = 1; - break; - } - jl_method_match_t *matc3 = (jl_method_match_t*)jl_array_ptr_ref(env.t, idx3); - jl_method_t *m3 = matc3->method; - if ((jl_subtype(ti, m3->sig) || (isect2 && jl_subtype(isect2, m3->sig))) - && jl_method_morespecific(m3, m) && jl_method_morespecific(m3, m2)) { - //if (jl_subtype(matc->spec_types, ti) || jl_subtype(matc->spec_types, matc3->m3->sig)) - // // check if it covered not only this intersection, but all intersections with matc - // // if so, we do not need to check all of them separately - // j = len; - break; - } - } - if (k == result.len) - has_ambiguity = 1; - isect2 = NULL; - ti = NULL; - if (has_ambiguity) - break; - } - if (has_ambiguity) - break; - } - JL_GC_POP(); - } - } - arraylist_free(&allambig); + arraylist_free(&recursion_stack); arraylist_free(&visited); arraylist_free(&stack); for (j = 0; j < result.len; j++) { diff --git a/src/jltypes.c b/src/jltypes.c index d30adbd4b72fa..28a8afc10216e 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -3537,12 +3537,13 @@ void jl_init_types(void) JL_GC_DISABLED jl_method_type = jl_new_datatype(jl_symbol("Method"), core, jl_any_type, jl_emptysvec, - jl_perm_symsvec(32, + jl_perm_symsvec(33, "name", "module", "file", "line", "dispatch_status", // atomic + "interferences", // atomic "primary_world", // atomic "sig", "specializations", // !const @@ -3570,12 +3571,13 @@ void jl_init_types(void) JL_GC_DISABLED "constprop", "max_varargs", "purity"), - jl_svec(32, + jl_svec(33, jl_symbol_type, jl_module_type, jl_symbol_type, jl_int32_type, jl_uint8_type, + jl_memory_any_type, jl_ulong_type, jl_type_type, jl_any_type, // union(jl_simplevector_type, jl_method_instance_type), @@ -3605,9 +3607,9 @@ void jl_init_types(void) JL_GC_DISABLED jl_uint16_type), jl_emptysvec, 0, 1, 10); - //const static uint32_t method_constfields[1] = { 0b0 }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<6)|(1<<9)|(1<<10)|(1<<17)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25)|(1<<26)|(1<<27)|(1<<28)|(1<<29)|(1<<30); + //const static uint32_t method_constfields[] = { 0b0, 0b0 }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<6)|(1<<9)|(1<<10)|(1<<17)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25)|(1<<26)|(1<<27)|(1<<28)|(1<<29)|(1<<30); //jl_method_type->name->constfields = method_constfields; - const static uint32_t method_atomicfields[1] = { 0x10000030 }; // (1<<4)|(1<<5)||(1<<28) + const static uint32_t method_atomicfields[] = { 0x20000070, 0x0 }; // (1<<4)|(1<<5)|(1<<6)|(1<<29) jl_method_type->name->atomicfields = method_atomicfields; jl_method_instance_type = @@ -3634,7 +3636,7 @@ void jl_init_types(void) JL_GC_DISABLED jl_emptysvec, 0, 1, 3); // These fields should be constant, but Serialization wants to mutate them in initialization - //const static uint32_t method_instance_constfields[1] = { 0b0000111 }; // fields 1, 2, 3 + //const static uint32_t method_instance_constfields[1] = { 0b00000111 }; // fields 1, 2, 3 const static uint32_t method_instance_atomicfields[1] = { 0b11010000 }; // fields 5, 7, 8 //Fields 4 and 5 must be protected by method->write_lock, and thus all operations on jl_method_instance_t are threadsafe. //jl_method_instance_type->name->constfields = method_instance_constfields; @@ -3854,7 +3856,7 @@ void jl_init_types(void) JL_GC_DISABLED jl_svecset(jl_methcache_type->types, 2, jl_long_type); // voidpointer jl_svecset(jl_methcache_type->types, 3, jl_long_type); // uint32_t plus alignment jl_svecset(jl_methtable_type->types, 3, jl_module_type); - jl_svecset(jl_method_type->types, 13, jl_method_instance_type); + jl_svecset(jl_method_type->types, 14, jl_method_instance_type); //jl_svecset(jl_debuginfo_type->types, 0, jl_method_instance_type); // union(jl_method_instance_type, jl_method_type, jl_symbol_type) jl_svecset(jl_method_instance_type->types, 4, jl_code_instance_type); jl_svecset(jl_code_instance_type->types, 19, jl_voidpointer_type); diff --git a/src/julia.h b/src/julia.h index 2d8eecbf358f2..094a518f9f86d 100644 --- a/src/julia.h +++ b/src/julia.h @@ -333,6 +333,7 @@ typedef struct _jl_method_t { jl_sym_t *file; int32_t line; _Atomic(uint8_t) dispatch_status; // bits defined in staticdata.jl + _Atomic(jl_genericmemory_t*) interferences; // set of intersecting methods not more specific _Atomic(size_t) primary_world; // method's type signature. redundant with TypeMapEntry->specTypes diff --git a/src/julia_internal.h b/src/julia_internal.h index 42f77c6a59f72..e9fc24dda22ca 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -677,8 +677,6 @@ typedef union { #define METHOD_SIG_LATEST_WHICH 0b0001 #define METHOD_SIG_LATEST_ONLY 0b0010 #define METHOD_SIG_PRECOMPILE_MANY 0b0100 -#define METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC 0b1000 // indicates there exists some other method that is not more specific than this one -#define METHOD_SIG_PRECOMPILE_HAS_NOTMORESPECIFIC 0b10000 // precompiled version of METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC JL_DLLEXPORT jl_code_instance_t *jl_engine_reserve(jl_method_instance_t *m, jl_value_t *owner); JL_DLLEXPORT void jl_engine_fulfill(jl_code_instance_t *ci, jl_code_info_t *src); @@ -1748,6 +1746,7 @@ JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *a); JL_DLLEXPORT int jl_stored_inline(jl_value_t *el_type); JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a); JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary); +JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy(jl_genericmemory_t *mem); JL_DLLEXPORT uintptr_t jl_object_id_(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT; diff --git a/src/method.c b/src/method.c index a04035086df90..8220178964333 100644 --- a/src/method.c +++ b/src/method.c @@ -1008,6 +1008,7 @@ JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module) m->nargs = 0; jl_atomic_store_relaxed(&m->primary_world, ~(size_t)0); jl_atomic_store_relaxed(&m->dispatch_status, 0); + jl_atomic_store_relaxed(&m->interferences, (jl_genericmemory_t*)jl_an_empty_memory_any); m->is_for_opaque_closure = 0; m->nospecializeinfer = 0; jl_atomic_store_relaxed(&m->did_scan_source, 0); diff --git a/src/staticdata.c b/src/staticdata.c index 88304c1eb6f7f..5d575188ab5bf 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -1837,8 +1837,6 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED int new_dispatch_status = 0; if (!(dispatch_status & METHOD_SIG_LATEST_ONLY)) new_dispatch_status |= METHOD_SIG_PRECOMPILE_MANY; - if (dispatch_status & METHOD_SIG_LATEST_HAS_NOTMORESPECIFIC) - new_dispatch_status |= METHOD_SIG_PRECOMPILE_HAS_NOTMORESPECIFIC; jl_atomic_store_relaxed(&newm->dispatch_status, new_dispatch_status); arraylist_push(&s->fixup_objs, (void*)reloc_offset); } diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 0b8cfc1cf4ebd..e676eabbddad3 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -726,7 +726,11 @@ static void jl_activate_methods(jl_array_t *external, jl_array_t *internal, size } for (i = 0; i < l; i++) { jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_array_ptr_ref(external, i); + //uint64_t t0 = uv_hrtime(); jl_method_table_activate(entry); + //jl_printf(JL_STDERR, "%f ", (double)(uv_hrtime() - t0) / 1e6); + //jl_static_show(JL_STDERR, entry->func.value); + //jl_printf(JL_STDERR, "\n"); } } } diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl index 20a1df8e773bd..b50107cec3a29 100644 --- a/stdlib/Test/src/Test.jl +++ b/stdlib/Test/src/Test.jl @@ -2143,7 +2143,6 @@ function detect_ambiguities(mods::Module...; end function examine(mt::Core.MethodTable) for m in Base.MethodList(mt) - m.sig == Tuple && continue # ignore Builtins is_in_mods(parentmodule(m), recursive, mods) || continue world = Base.get_world_counter() ambig = Ref{Int32}(0) diff --git a/test/ambiguous.jl b/test/ambiguous.jl index 0f29817e74dd5..df3ba89e3eb2a 100644 --- a/test/ambiguous.jl +++ b/test/ambiguous.jl @@ -19,7 +19,7 @@ include("testenv.jl") @test length(methods(ambig, (Int, Int))) == 1 @test length(methods(ambig, (UInt8, Int))) == 0 -@test length(Base.methods_including_ambiguous(ambig, (UInt8, Int))) == 3 +@test length(Base.methods_including_ambiguous(ambig, (UInt8, Int))) == 2 @test ambig("hi", "there") == 1 @test ambig(3.1, 3.2) == 5 @@ -42,7 +42,6 @@ let err = try errstr = String(take!(io)) @test occursin(" ambig(x, y::Integer)\n @ $curmod_str", errstr) @test occursin(" ambig(x::Integer, y)\n @ $curmod_str", errstr) - @test occursin(" ambig(x::Number, y)\n @ $curmod_str", errstr) @test occursin("Possible fix, define\n ambig(::Integer, ::Integer)", errstr) end @@ -160,7 +159,7 @@ ambig(::Signed, ::Int) = 3 ambig(::Int, ::Signed) = 4 end ambs = detect_ambiguities(Ambig48312) -@test length(ambs) == 4 +@test length(ambs) == 1 # only ambiguous over (Int, Int), which is 3 or 4 module UnboundAmbig55868 module B @@ -287,7 +286,7 @@ end @test isempty(methods(Ambig8.f, (Int,))) @test isempty(methods(Ambig8.g, (Int,))) for f in (Ambig8.f, Ambig8.g) - @test length(methods(f, (Integer,))) == 2 # 1 is also acceptable + @test length(methods(f, (Integer,))) == 2 # 3 is also acceptable @test length(methods(f, (Signed,))) == 1 # 2 is also acceptable @test length(Base.methods_including_ambiguous(f, (Signed,))) == 2 @test f(0x00) == 1 @@ -413,7 +412,7 @@ let has_ambig = Ref(Int32(0)) ms = Base._methods_by_ftype(Tuple{typeof(fnoambig), Any, Any}, nothing, 4, Base.get_world_counter(), false, Ref(typemin(UInt)), Ref(typemax(UInt)), has_ambig) @test ms isa Vector @test length(ms) == 4 - @test has_ambig[] == 0 + @test has_ambig[] == 1 # 0 is better, but expensive and probably unnecessary to compute end # issue #11407 @@ -459,15 +458,38 @@ struct U55231{P} end struct V55231{P} end U55231(::V55231) = nothing (::Type{T})(::V55231) where {T<:U55231} = nothing -@test length(methods(U55231)) == 2 +@test length(methods(U55231)) == 1 U55231(a, b) = nothing -@test length(methods(U55231)) == 3 +@test length(methods(U55231)) == 2 struct S55231{P} end struct T55231{P} end (::Type{T})(::T55231) where {T<:S55231} = nothing S55231(::T55231) = nothing -@test length(methods(S55231)) == 2 +@test length(methods(S55231)) == 1 S55231(a, b) = nothing -@test length(methods(S55231)) == 3 +@test length(methods(S55231)) == 2 + +ambig10() = 1 +ambig10(a::Vararg{Any}) = 2 +ambig10(a::Vararg{Union{Int32,Int64}}) = 6 +ambig10(a::Vararg{Matrix}) = 4 +ambig10(a::Vararg{Number}) = 7 +ambig10(a::Vararg{N}) where {N<:Number} = 5 +let ambig = Ref{Int32}(0) + ms = Base._methods_by_ftype(Tuple{typeof(ambig10), Vararg}, nothing, -1, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig) + @test ms isa Vector + @test length(ms) == 6 + @test_broken ambig[] == 0 +end +let ambig = Ref{Int32}(0) + ms = Base._methods_by_ftype(Tuple{typeof(ambig10), Vararg{Number}}, nothing, -1, Base.get_world_counter(), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig) + @test ms isa Vector + @test length(ms) == 4 + @test_broken ambig[] == 0 + @test ms[1].method === which(ambig10, ()) + @test ms[2].method === which(ambig10, (Vararg{Union{Int32, Int64}},)) + @test ms[3].method === which(ambig10, Tuple{Vararg{N}} where N<:Number,) + @test ms[4].method === which(ambig10, (Vararg{Number},)) +end nothing diff --git a/test/core.jl b/test/core.jl index 4fdd3540b3ef7..28c496cedee3a 100644 --- a/test/core.jl +++ b/test/core.jl @@ -36,7 +36,7 @@ end for (T, c) in ( (Core.CodeInfo, []), (Core.CodeInstance, [:next, :min_world, :max_world, :inferred, :edges, :debuginfo, :ipo_purity_bits, :invoke, :specptr, :specsigflags, :precompile, :time_compile]), - (Core.Method, [:primary_world, :did_scan_source, :dispatch_status]), + (Core.Method, [:primary_world, :did_scan_source, :dispatch_status, :interferences]), (Core.MethodInstance, [:cache, :flags, :dispatch_status]), (Core.MethodTable, [:defs]), (Core.MethodCache, [:leafcache, :cache, :var""]), From b92bdc2fc47a106f40f266735d490fdd2c0aad9e Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Fri, 25 Jul 2025 14:58:30 -0400 Subject: [PATCH 6/9] Revert "Fix LLVM TaskDispatcher implementation issues (#58950)" This reverts commit 6846af76eb4da22d7198b737de07895447e9585a. --- src/Makefile | 2 +- src/jitlayers.cpp | 23 +- src/llvm-julia-task-dispatcher.h | 465 ------------------------------- 3 files changed, 16 insertions(+), 474 deletions(-) delete mode 100644 src/llvm-julia-task-dispatcher.h diff --git a/src/Makefile b/src/Makefile index 6dac1524145ef..e859acc765354 100644 --- a/src/Makefile +++ b/src/Makefile @@ -349,7 +349,7 @@ $(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR) $(BUILDDIR)/gc-page-profiler.o $(BUILDDIR)/gc-page-profiler.dbg.obj: $(SRCDIR)/gc-page-profiler.h $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h -$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-julia-task-dispatcher.h +$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h $(BUILDDIR)/jltypes.o $(BUILDDIR)/jltypes.dbg.obj: $(SRCDIR)/builtin_proto.h $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvm-codegen-shared.h $(BUILDDIR)/julia_version.h $(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index a44da64941985..3ea95ea42f596 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -6,7 +6,6 @@ #include #include "llvm/IR/Mangler.h" -#include #include #include #include @@ -47,7 +46,6 @@ using namespace llvm; #include "jitlayers.h" #include "julia_assert.h" #include "processor.h" -#include "llvm-julia-task-dispatcher.h" #if JL_LLVM_VERSION >= 180000 # include @@ -668,8 +666,17 @@ static void jl_compile_codeinst_now(jl_code_instance_t *codeinst) if (!decls.specFunctionObject.empty()) NewDefs.push_back(decls.specFunctionObject); } - auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs); - + // Split batches to avoid stack overflow in the JIT linker. + // FIXME: Patch ORCJITs InPlaceTaskDispatcher to not recurse on task dispatches but + // push the tasks to a queue to be drained later. This avoids the stackoverflow caused by recursion + // in the linker when compiling a large number of functions at once. + SmallVector Addrs; + for (size_t i = 0; i < NewDefs.size(); i += 1000) { + auto end = std::min(i + 1000, NewDefs.size()); + SmallVector batch(NewDefs.begin() + i, NewDefs.begin() + end); + auto AddrsBatch = jl_ExecutionEngine->findSymbols(batch); + Addrs.append(AddrsBatch); + } size_t nextaddr = 0; for (auto &this_code : linkready) { auto it = invokenames.find(this_code); @@ -1834,7 +1841,7 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) { JuliaOJIT::JuliaOJIT() : TM(createTargetMachine()), DL(jl_create_datalayout(*TM)), - ES(cantFail(orc::SelfExecutorProcessControl::Create(nullptr, std::make_unique<::JuliaTaskDispatcher>()))), + ES(cantFail(orc::SelfExecutorProcessControl::Create())), GlobalJD(ES.createBareJITDylib("JuliaGlobals")), JD(ES.createBareJITDylib("JuliaOJIT")), ExternalJD(ES.createBareJITDylib("JuliaExternal")), @@ -2091,7 +2098,7 @@ SmallVector JuliaOJIT::findSymbols(ArrayRef Names) Unmangled[NonOwningSymbolStringPtr(Mangled)] = Unmangled.size(); Exports.add(std::move(Mangled)); } - SymbolMap Syms = cantFail(::safelookup(ES, orc::makeJITDylibSearchOrder(ArrayRef(&JD)), std::move(Exports))); + SymbolMap Syms = cantFail(ES.lookup(orc::makeJITDylibSearchOrder(ArrayRef(&JD)), std::move(Exports))); SmallVector Addrs(Names.size()); for (auto it : Syms) { Addrs[Unmangled.at(orc::NonOwningSymbolStringPtr(it.first))] = it.second.getAddress().getValue(); @@ -2103,7 +2110,7 @@ Expected JuliaOJIT::findSymbol(StringRef Name, bool ExportedS { orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD}; ArrayRef SearchOrder = ArrayRef(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1); - auto Sym = ::safelookup(ES, SearchOrder, Name); + auto Sym = ES.lookup(SearchOrder, Name); return Sym; } @@ -2116,7 +2123,7 @@ Expected JuliaOJIT::findExternalJDSymbol(StringRef Name, bool { orc::JITDylib* SearchOrders[3] = {&ExternalJD, &GlobalJD, &JD}; ArrayRef SearchOrder = ArrayRef(&SearchOrders[0], ExternalJDOnly ? 1 : 3); - auto Sym = ::safelookup(ES, SearchOrder, getMangledName(Name)); + auto Sym = ES.lookup(SearchOrder, getMangledName(Name)); return Sym; } diff --git a/src/llvm-julia-task-dispatcher.h b/src/llvm-julia-task-dispatcher.h deleted file mode 100644 index dd4037378b6b6..0000000000000 --- a/src/llvm-julia-task-dispatcher.h +++ /dev/null @@ -1,465 +0,0 @@ -// This file is a part of Julia. License is MIT: https://julialang.org/license - -namespace { - -using namespace llvm::orc; - -template struct future_value_storage { - // Union disables default construction/destruction semantics, allowing us to - // use placement new/delete for precise control over value lifetime - union { - U value_; - }; - - future_value_storage() {} - ~future_value_storage() {} -}; - -template <> struct future_value_storage { - // No value_ member for void -}; - -struct JuliaTaskDispatcher : public TaskDispatcher { - /// Forward declarations - class future_base; - void dispatch(std::unique_ptr T) override; - void shutdown() override; - void work_until(future_base &F); -private: - /// C++ does not support non-static thread_local variables, so this needs to - /// store both the task and the associated dispatcher queue so that shutdown - /// can wait for the correct tasks to finish. - thread_local static SmallVector, JuliaTaskDispatcher*>> TaskQueue; - std::mutex DispatchMutex; - std::condition_variable WorkFinishedCV; - SmallVector WaitingFutures; - -public: - -/// @name ORC Promise/Future Classes -/// -/// ORC-aware promise/future implementation that integrates with the -/// TaskDispatcher system to allow efficient cooperative multitasking while -/// waiting for results (with certain limitations on what can be awaited). -/// Together they provide building blocks for a full async/await-like runtime -/// for llvm that supports multiple threads. -/// -/// Unlike std::promise/std::future alone, these classes can help dispatch other -/// tasks while waiting, preventing deadlocks and improving overall system -/// throughput. They have a similar API, though with some important differences -/// and some features simply not currently implemented. -/// -/// @{ - -template class promise; -template class future; - -/// Status for future/promise state -enum class FutureStatus : uint8_t { NotReady = 0, Ready = 1 }; - -/// @} - -/// Type-erased base class for futures, generally for scheduler use to avoid -/// needing virtual dispatches -class future_base { -public: - /// Check if the future is now ready with a value (precondition: get_promise() - /// must have been called) - bool ready() const { - if (!valid()) - report_fatal_error("ready() called before get_promise()"); - return state_->status_.load(std::memory_order_acquire) == FutureStatus::Ready; - } - - /// Check if the future is in a valid state (not moved-from and get_promise() called) - bool valid() const { return state_ != nullptr; } - - /// Wait for the future to be ready, helping with task dispatch - void wait(JuliaTaskDispatcher &D) { - // Keep helping with task dispatch until our future is ready - if (!ready()) { - D.work_until(*this); - if (state_->status_.load(std::memory_order_relaxed) != FutureStatus::Ready) - report_fatal_error( - "work_until() returned without this future being ready"); - } - } - -protected: - struct state_base { - std::atomic status_{FutureStatus::NotReady}; - }; - - future_base(state_base *state) : state_(state) {} - future_base() = default; - - /// Only allow deleting the future once it is invalid - ~future_base() { - if (state_) - report_fatal_error("get() must be called before future destruction (ensuring promise::set_value memory is valid)"); - } - - // Move constructor and assignment - future_base(future_base &&other) noexcept : state_(other.state_) { - other.state_ = nullptr; - } - - future_base &operator=(future_base &&other) noexcept { - if (this != &other) { - this->~future_base(); - state_ = other.state_; - other.state_ = nullptr; - } - return *this; - } - - state_base *state_; -}; - -/// TaskDispatcher-aware future class for cooperative await. -/// -/// @tparam T The type of value this future will provide. Use void for futures -/// that -/// signal completion without providing a value. -/// -/// This future implementation is similar to `std::future`, so most code can -/// transition to it easily. However, it differs from `std::future` in a few -/// key ways to be aware of: -/// - No exception support (or the overhead for it). -/// - The future is created before the promise, then the promise is created -/// from the future. -/// - The future is in an invalid state until get_promise() has been called. -/// - Waiting operations (get(&D), wait(&D)) help dispatch other tasks while -/// blocked, requiring an additional argument of which TaskDispatcher object -/// of where all associated work will be scheduled. -/// - While `wait` may be called multiple times and on multiple threads, all of -/// them must have returned before calling `get` on exactly one thread. -/// - Must call get() exactly once before destruction (enforced with -/// `report_fatal_error`) after each call to `get_promise`. Internal state is -/// freed when `get` returns, and allocated when `get_promise` is called. -/// -/// Other notable features, in common with `std::future`: -/// - Supports both value types and void specialization through the same -/// interface. -/// - Thread-safe through atomic operations. -/// - Provides acquire-release ordering with `std::promise::set_value()`. -/// - Concurrent access to any method (including to `ready`) on multiple threads -/// is not allowed. -/// - Holding any locks while calling `get()` is likely to lead to deadlock. -/// -/// @warning Users should avoid borrowing references to futures. References may -/// go out of scope and break the uniqueness contract, which may break the -/// soundness of the types. Always use move semantics or pass by value. - -template class future : public future_base { -public: - future() : future_base(nullptr) {} - future(const future &) = delete; - future &operator=(const future &) = delete; - future(future &&) = default; - future &operator=(future &&) = default; - - /// Get the value, helping with task dispatch while waiting. - /// This will destroy the underlying value, so this must be called exactly - /// once, which returns the future to the initial state. - T get(JuliaTaskDispatcher &D) { - if (!valid()) - report_fatal_error("get() must only be called once, after get_promise()"); - wait(D); - auto state_ = static_cast(this->state_); - this->state_ = nullptr; - return take_value(state_); - } - - /// Get the associated promise (must only be called once) - promise get_promise() { - if (valid()) - report_fatal_error("get_promise() can only be called once"); - auto state_ = new state(); - this->state_ = state_; - return promise(state_); - } - -private: - friend class promise; - - // Template the state struct with EBCO so that future has no wasted - // overhead for the value. The declaration of future_value_storage is far - // above here since GCC doesn't implement it properly when nested. - struct state : future_base::state_base, future_value_storage {}; - - template - typename std::enable_if::value, U>::type take_value(state *state_) { - T result = std::move(state_->value_); - state_->value_.~T(); - delete state_; - return result; - } - - template - typename std::enable_if::value, U>::type take_value(state *state_) { - delete state_; - } -}; - -/// TaskDispatcher-aware promise class that provides values to associated -/// futures. -/// -/// @tparam T The type of value this promise will provide. Use void for promises -/// that -/// signal completion without providing a value. -/// -/// This promise implementation provides the value-setting side of the -/// promise/future pair and integrates with the ORC TaskDispatcher system. Key -/// characteristics: -/// - Created from a future via get_promise() rather than creating the future from the promise. -/// - Must call get_future() on the thread that created it (it can be passed to another thread, but do not borrow a reference and use that to mutate it from another thread). -/// - Must call set_value() exactly once per `get_promise()` call to provide the result. -/// - Thread-safe from set_value to get. -/// - Move-only semantics to prevent accidental copying. -/// -/// The `promise` can usually be passed to another thread in one of two ways: -/// - With move semantics: -/// * `[P = F.get_promise()] () { P.set_value(); }` -/// * `[P = std::move(P)] () { P.set_value(); }` -/// * Advantages: clearer where `P` is owned, automatic deadlock detection -/// on destruction, -/// easier memory management if the future is returned from the function. -/// - By reference: -/// * `[&P] () { P.set_value(); }` -/// * Advantages: simpler memory management if the future is consumed in the -/// same function. -/// * Disadvantages: more difficult memory management if the future is -/// returned from the function, no deadlock detection. -/// -/// @warning Users should avoid borrowing references to promises. References may -/// go out of scope and break the uniqueness contract, which may break the -/// soundness of the types. Always use move semantics or pass by value. -/// -/// @par Error Handling: -/// The promise/future system uses report_fatal_error() for misuse: -/// - Calling set_value() more than once. -/// - Destroying a future without calling get(). -/// - Calling get() more than once on a future. -/// -/// @par Thread Safety: -/// - Each promise/future must only be accessed by one thread, as concurrent -/// calls to the API functions may result in crashes. -/// - Multiple threads can safely access different promise/future pairs. -/// - set_value() and get() operations are atomic and thread-safe. -/// - Move operations should only be performed by a single thread. -template class promise { - friend class future; - -public: - promise() : state_(nullptr) {} - - ~promise() { - // Assert proper promise lifecycle: ensure set_value was called if promise was valid. - // This can catch deadlocks where a promise is created but set_value() is - // never called, though only if the promise is moved from instead of - // borrowed from the frame with the future. - // Empty promises (state_ == nullptr) are allowed to be destroyed without calling set_value. - } - - promise(const promise &) = delete; - promise &operator=(const promise &) = delete; - - promise(promise &&other) noexcept - : state_(other.state_) { - other.state_ = nullptr; - } - - promise &operator=(promise &&other) noexcept { - if (this != &other) { - this->~promise(); - state_ = other.state_; - other.state_ = nullptr; - } - return *this; - } - - - /// Set the value (must only be called once) - // In C++20, this std::conditional weirdness can probably be replaced just - // with requires. It ensures that we don't try to define a method for `void&`, - // but that if the user calls set_value(v) for any value v that they get a - // member function error, instead of no member named 'value_'. - template - void - set_value(const typename std::conditional::value, - std::nullopt_t, T>::type &value) const { - assert(state_ && "set_value() can only be called once"); - new (&state_->value_) T(value); - state_->status_.store(FutureStatus::Ready, std::memory_order_release); - state_ = nullptr; - } - - template - void set_value(typename std::conditional::value, - std::nullopt_t, T>::type &&value) const { - assert(state_ && "set_value() can only be called once"); - new (&state_->value_) T(std::move(value)); - state_->status_.store(FutureStatus::Ready, std::memory_order_release); - state_ = nullptr; - } - - template - typename std::enable_if::value, void>::type - set_value(const std::nullopt_t &value) = delete; - - template - typename std::enable_if::value, void>::type - set_value(std::nullopt_t &&value) = delete; - - template - typename std::enable_if::value, void>::type set_value() const { - assert(state_ && "set_value() can only be called once"); - state_->status_.store(FutureStatus::Ready, std::memory_order_release); - state_ = nullptr; - } - - /// Swap with another promise - void swap(promise &other) noexcept { - using std::swap; - swap(state_, other.state_); - } - -private: - explicit promise(typename future::state *state) - : state_(state) {} - - mutable typename future::state *state_; -}; - -}; // class JuliaTaskDispatcher - -thread_local SmallVector, JuliaTaskDispatcher *>> JuliaTaskDispatcher::TaskQueue; - -void JuliaTaskDispatcher::dispatch(std::unique_ptr T) { - TaskQueue.push_back(std::pair(std::move(T), this)); -} - -void JuliaTaskDispatcher::shutdown() { - // Keep processing until no tasks belonging to this dispatcher remain - while (true) { - // Check if any task belongs to this dispatcher - auto it = std::find_if( - TaskQueue.begin(), TaskQueue.end(), - [this](const auto &TaskPair) { return TaskPair.second == this; }); - - // If no tasks belonging to this dispatcher, we're done - if (it == TaskQueue.end()) - return; - - // Create a future/promise pair to wait for completion of this task - future taskFuture; - // Replace the task with a GenericNamedTask that wraps the original task - // with a notification of completion that this thread can work_until. - auto originalTask = std::move(it->first); - it->first = makeGenericNamedTask( - [originalTask = std::move(originalTask), - taskPromise = taskFuture.get_promise()]() { - originalTask->run(); - taskPromise.set_value(); - }, - "Shutdown task marker"); - - // Wait for the task to complete - taskFuture.get(*this); - } -} - -void JuliaTaskDispatcher::work_until(future_base &F) { - while (!F.ready()) { - // First, process any tasks in our local queue - // Process in LIFO order (most recently added first) to avoid deadlocks - // when tasks have dependencies on each other - while (!TaskQueue.empty()) { - { - auto TaskPair = std::move(TaskQueue.back()); - TaskQueue.pop_back(); - TaskPair.first->run(); - } - - // Notify any threads that might be waiting for work to complete - { - std::lock_guard Lock(DispatchMutex); - bool ShouldNotify = llvm::any_of( - WaitingFutures, [](future_base *F) { return F->ready(); }); - if (ShouldNotify) { - WaitingFutures.clear(); - WorkFinishedCV.notify_all(); - } - } - - // Check if our future is now ready - if (F.ready()) - return; - } - - // If we get here, our queue is empty but the future isn't ready - // We need to wait for other threads to finish work that should complete our - // future - { - std::unique_lock Lock(DispatchMutex); - WaitingFutures.push_back(&F); - WorkFinishedCV.wait(Lock, [&F]() { return F.ready(); }); - } - } -} - -} // End namespace - -namespace std { -template -void swap(::JuliaTaskDispatcher::promise &lhs, ::JuliaTaskDispatcher::promise &rhs) noexcept { - lhs.swap(rhs); -} -} // End namespace std - -// n.b. this actually is sometimes a safepoint -Expected -safelookup(ExecutionSession &ES, - const JITDylibSearchOrder &SearchOrder, - SymbolLookupSet Symbols, LookupKind K = LookupKind::Static, - SymbolState RequiredState = SymbolState::Ready, - RegisterDependenciesFunction RegisterDependencies = NoDependenciesToRegister) JL_NOTSAFEPOINT { - JuliaTaskDispatcher::future> PromisedFuture; - auto NotifyComplete = [PromisedResult = PromisedFuture.get_promise()](Expected R) { - PromisedResult.set_value(std::move(R)); - }; - ES.lookup(K, SearchOrder, std::move(Symbols), RequiredState, - std::move(NotifyComplete), RegisterDependencies); - return PromisedFuture.get(static_cast(ES.getExecutorProcessControl().getDispatcher())); -} - -Expected -safelookup(ExecutionSession &ES, - const JITDylibSearchOrder &SearchOrder, - SymbolStringPtr Name, - SymbolState RequiredState = SymbolState::Ready) JL_NOTSAFEPOINT { - SymbolLookupSet Names({Name}); - - if (auto ResultMap = safelookup(ES, SearchOrder, std::move(Names), LookupKind::Static, - RequiredState, NoDependenciesToRegister)) { - assert(ResultMap->size() == 1 && "Unexpected number of results"); - assert(ResultMap->count(Name) && "Missing result for symbol"); - return std::move(ResultMap->begin()->second); - } else - return ResultMap.takeError(); -} - -Expected -safelookup(ExecutionSession &ES, - ArrayRef SearchOrder, SymbolStringPtr Name, - SymbolState RequiredState = SymbolState::Ready) JL_NOTSAFEPOINT { - return safelookup(ES, makeJITDylibSearchOrder(SearchOrder), Name, RequiredState); -} - -Expected -safelookup(ExecutionSession &ES, - ArrayRef SearchOrder, StringRef Name, - SymbolState RequiredState = SymbolState::Ready) JL_NOTSAFEPOINT { - return safelookup(ES, SearchOrder, ES.intern(Name), RequiredState); -} From 0dfb7c9fc72a4cee0789e33eb2951237793ba93f Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 25 Jul 2025 15:43:48 -0400 Subject: [PATCH 7/9] Fix llvmpasses --- test/llvmpasses/late-lower-gc.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll index 6d5c42f50fcde..0073d4be09289 100644 --- a/test/llvmpasses/late-lower-gc.ll +++ b/test/llvmpasses/late-lower-gc.ll @@ -201,6 +201,7 @@ define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) { define swiftcc ptr addrspace(10) @insert_element(ptr swiftself %0) { ; CHECK-LABEL: @insert_element + %v2 = call ptr @julia.get_pgcstack() %2 = alloca [10 x i64], i32 1, align 8 ; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 10) ; CHECK: [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0) From 1adfbc3523bc58199daac1e7a9b992fb13917591 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Wed, 23 Jul 2025 06:44:34 -0400 Subject: [PATCH 8/9] remove a testset from MMAP that might cause CI to now fail on Windows (#59062) (cherry picked from commit 4718f43b5afe34ae7aa12d60755a817f0e4f6808) --- stdlib/Mmap/test/runtests.jl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/stdlib/Mmap/test/runtests.jl b/stdlib/Mmap/test/runtests.jl index 224664e5faaa2..0c34c49d400a7 100644 --- a/stdlib/Mmap/test/runtests.jl +++ b/stdlib/Mmap/test/runtests.jl @@ -341,18 +341,18 @@ end GC.gc() rm(file) -@testset "test for #58982 - mmap with primitive types" begin - file = tempname() - primitive type PrimType9Bytes 9*8 end - arr = Vector{PrimType9Bytes}(undef, 2) - write(file, arr) - m = mmap(file, Vector{PrimType9Bytes}) - @test length(m) == 2 - @test m[1] == arr[1] - @test m[2] == arr[2] - finalize(m); m = nothing; GC.gc() - rm(file) -end +# test for #58982 - mmap with primitive types +file = tempname() +primitive type PrimType9Bytes 9*8 end +arr = Vector{PrimType9Bytes}(undef, 2) +write(file, arr) +m = mmap(file, Vector{PrimType9Bytes}) +@test length(m) == 2 +@test m[1] == arr[1] +@test m[2] == arr[2] +finalize(m); m = nothing; GC.gc() +rm(file) + @testset "Docstrings" begin @test isempty(Docs.undocumented_names(Mmap)) From 227ff673657b598d196119920047cbbe5a311644 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 24 Jul 2025 11:54:35 -0400 Subject: [PATCH 9/9] build: Also pass -fno-strict-aliasing for C++ (#59066) As diagnosed by Andrew Pinski (https://github.com/JuliaLang/julia/issues/58466#issuecomment-3105141193), we are not respecting strict aliasing currently. We turn this off for C, but the flag appears to be missing for C++. Looks like it's been that way ever since that flag was first added to our build system (#484). We should probably consider running TypeSanitizer over our code base to see if we can make our code correct under strict aliasing as compilers are increasingly taking advantage of it. Fixes #58466 (cherry picked from commit 095e75304306484cb189249c3d21cf376afb9f6f) --- Make.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Make.inc b/Make.inc index 466e97cc43c4f..32c3cd4595a20 100644 --- a/Make.inc +++ b/Make.inc @@ -555,7 +555,7 @@ JCPPFLAGS_COMMON := -fasynchronous-unwind-tables JCPPFLAGS_CLANG := $(JCPPFLAGS_COMMON) -mllvm -enable-tail-merge=0 JCPPFLAGS_GCC := $(JCPPFLAGS_COMMON) -fno-tree-tail-merge -JCXXFLAGS_COMMON := -pipe $(fPIC) -fno-rtti -std=c++17 -Wformat -Wformat-security +JCXXFLAGS_COMMON := -pipe $(fPIC) -fno-rtti -std=c++17 -Wformat -Wformat-security -fno-strict-aliasing JCXXFLAGS_CLANG := $(JCXXFLAGS_COMMON) -pedantic JCXXFLAGS_GCC := $(JCXXFLAGS_COMMON) -fno-gnu-unique