@@ -108,7 +108,7 @@ function print_with_code(preprint, postprint, io::IO, src::CodeInfo)
108108 :displaysize => displaysize (io),
109109 :SOURCE_SLOTNAMES => Base. sourceinfo_slotnames (src))
110110 used = BitSet ()
111- cfg = Core . Compiler . compute_basic_blocks (src. code)
111+ cfg = compute_basic_blocks (src. code)
112112 for stmt in src. code
113113 Core. Compiler. scan_ssa_use! (push!, used, stmt)
114114 end
@@ -629,8 +629,7 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo,
629629 objs = add_requests! (isrequired, objs, edges, norequire)
630630
631631 # Compute basic blocks, which we'll use to make sure we mark necessary control-flow
632- cfg = Core. Compiler. compute_basic_blocks (src. code) # needed for control-flow analysis
633- domtree = construct_domtree (cfg. blocks)
632+ cfg = compute_basic_blocks (src. code) # needed for control-flow analysis
634633 postdomtree = construct_postdomtree (cfg. blocks)
635634
636635 # We'll mostly use generic graph traversal to discover all the lines we need,
@@ -651,14 +650,18 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo,
651650
652651 # Add control-flow
653652 changed |= add_loops! (isrequired, cfg)
654- changed |= add_control_flow! (isrequired, cfg, domtree , postdomtree)
653+ changed |= add_control_flow! (isrequired, src, cfg , postdomtree)
655654
656655 # So far, everything is generic graph traversal. Now we add some domain-specific information
657656 changed |= add_typedefs! (isrequired, src, edges, typedefs, norequire)
658657 changed |= add_inplace! (isrequired, src, edges, norequire)
659658
660659 iter += 1 # just for diagnostics
661660 end
661+
662+ # now mark the active goto nodes
663+ add_active_gotos! (isrequired, src, cfg, postdomtree)
664+
662665 return isrequired
663666end
664667
@@ -752,48 +755,140 @@ function add_loops!(isrequired, cfg)
752755 return changed
753756end
754757
755- function add_control_flow! (isrequired, cfg, domtree, postdomtree)
756- changed, _changed = false , true
757- blocks = cfg. blocks
758- nblocks = length (blocks)
759- while _changed
760- _changed = false
761- for (ibb, bb) in enumerate (blocks)
762- r = rng (bb)
763- if any (view (isrequired, r))
764- # Walk up the dominators
765- jbb = ibb
766- while jbb != 1
767- jdbb = domtree. idoms_bb[jbb]
768- dbb = blocks[jdbb]
769- # Check the successors; if jbb doesn't post-dominate, mark the last statement
770- for s in dbb. succs
771- if ! postdominates (postdomtree, jbb, s)
772- idxlast = rng (dbb)[end ]
773- _changed |= ! isrequired[idxlast]
774- isrequired[idxlast] = true
775- break
776- end
777- end
778- jbb = jdbb
758+ using Core: CodeInfo
759+ using Core. Compiler: CFG, BasicBlock, compute_basic_blocks
760+
761+ # The goal of this function is to request concretization of the minimal necessary control
762+ # flow to evaluate statements whose concretization have already been requested.
763+ # The basic algorithm is based on what was proposed in [^Wei84]. If there is even one active
764+ # block in the blocks reachable from a conditional branch up to its successors' nearest
765+ # common post-dominator (referred to as 𝑰𝑵𝑭𝑳 in the paper), it is necessary to follow
766+ # that conditional branch and execute the code. Otherwise, execution can be short-circuited
767+ # from the conditional branch to the nearest common post-dominator.
768+ #
769+ # COMBAK: It is important to note that in Julia's intermediate code representation (`CodeInfo`),
770+ # "short-circuiting" a specific code region is not a simple task. Simply ignoring the path
771+ # to the post-dominator does not guarantee fall-through to the post-dominator. Therefore,
772+ # a more careful implementation is required for this aspect.
773+ #
774+ # [Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984.
775+ function add_control_flow! (isrequired, src:: CodeInfo , cfg:: CFG , postdomtree)
776+ local changed:: Bool = false
777+ function mark_isrequired! (idx:: Int )
778+ if ! isrequired[idx]
779+ changed |= isrequired[idx] = true
780+ return true
781+ end
782+ return false
783+ end
784+ for bbidx = 1 : length (cfg. blocks) # forward traversal
785+ bb = cfg. blocks[bbidx]
786+ nsuccs = length (bb. succs)
787+ if nsuccs == 0
788+ continue
789+ elseif nsuccs == 1
790+ continue # leave a fall-through terminator unmarked: `GotoNode`s are marked later
791+ elseif nsuccs == 2
792+ termidx = bb. stmts[end ]
793+ @assert is_conditional_terminator (src. code[termidx]) " invalid IR"
794+ if is_conditional_block_active (isrequired, bb, cfg, postdomtree)
795+ mark_isrequired! (termidx)
796+ else
797+ # fall-through to the post dominator block (by short-circuiting all statements between)
798+ end
799+ end
800+ end
801+ return changed
802+ end
803+
804+ is_conditional_terminator (@nospecialize stmt) = stmt isa GotoIfNot ||
805+ (@static @isdefined (EnterNode) ? stmt isa EnterNode : isexpr (stmt, :enter ))
806+
807+ function is_conditional_block_active (isrequired, bb:: BasicBlock , cfg:: CFG , postdomtree)
808+ return visit_𝑰𝑵𝑭𝑳_blocks (bb, cfg, postdomtree) do postdominator:: Int , 𝑰𝑵𝑭𝑳:: BitSet
809+ for blk in 𝑰𝑵𝑭𝑳
810+ if blk == postdominator
811+ continue # skip the post-dominator block and continue to a next infl block
812+ end
813+ if any (@view isrequired[cfg. blocks[blk]. stmts])
814+ return true
815+ end
816+ end
817+ return false
818+ end
819+ end
820+
821+ function visit_𝑰𝑵𝑭𝑳_blocks (func, bb:: BasicBlock , cfg:: CFG , postdomtree)
822+ succ1, succ2 = bb. succs
823+ postdominator = nearest_common_dominator (postdomtree, succ1, succ2)
824+ 𝑰𝑵𝑭𝑳 = reachable_blocks (cfg, succ1, postdominator) ∪ reachable_blocks (cfg, succ2, postdominator)
825+ return func (postdominator, 𝑰𝑵𝑭𝑳)
826+ end
827+
828+ function reachable_blocks (cfg, from_bb:: Int , to_bb:: Int )
829+ worklist = Int[from_bb]
830+ visited = BitSet (from_bb)
831+ if to_bb == from_bb
832+ return visited
833+ end
834+ push! (visited, to_bb)
835+ function visit! (bb:: Int )
836+ if bb ∉ visited
837+ push! (visited, bb)
838+ push! (worklist, bb)
839+ end
840+ end
841+ while ! isempty (worklist)
842+ foreach (visit!, cfg. blocks[pop! (worklist)]. succs)
843+ end
844+ return visited
845+ end
846+
847+ function add_active_gotos! (isrequired, src:: CodeInfo , cfg:: CFG , postdomtree)
848+ dead_blocks = compute_dead_blocks (isrequired, src, cfg, postdomtree)
849+ changed = false
850+ for bbidx = 1 : length (cfg. blocks)
851+ if bbidx ∉ dead_blocks
852+ bb = cfg. blocks[bbidx]
853+ nsuccs = length (bb. succs)
854+ if nsuccs == 1
855+ termidx = bb. stmts[end ]
856+ if src. code[termidx] isa GotoNode
857+ changed |= isrequired[termidx] = true
779858 end
780- # Walk down the post-dominators, including self
781- jbb = ibb
782- while jbb != 0 && jbb < nblocks
783- pdbb = blocks[jbb]
784- # Check if the exit of this block is a GotoNode or `return`
785- if length (pdbb. succs) < 2
786- idxlast = rng (pdbb)[end ]
787- _changed |= ! isrequired[idxlast]
788- isrequired[idxlast] = true
859+ end
860+ end
861+ end
862+ return changed
863+ end
864+
865+ # find dead blocks using the same approach as `add_control_flow!`, for the converged `isrequired`
866+ function compute_dead_blocks (isrequired, src:: CodeInfo , cfg:: CFG , postdomtree)
867+ dead_blocks = BitSet ()
868+ for bbidx = 1 : length (cfg. blocks)
869+ bb = cfg. blocks[bbidx]
870+ nsuccs = length (bb. succs)
871+ if nsuccs == 2
872+ termidx = bb. stmts[end ]
873+ @assert is_conditional_terminator (src. code[termidx]) " invalid IR"
874+ visit_𝑰𝑵𝑭𝑳_blocks (bb, cfg, postdomtree) do postdominator:: Int , 𝑰𝑵𝑭𝑳:: BitSet
875+ is_𝑰𝑵𝑭𝑳_active = false
876+ for blk in 𝑰𝑵𝑭𝑳
877+ if blk == postdominator
878+ continue # skip the post-dominator block and continue to a next infl block
879+ end
880+ if any (@view isrequired[cfg. blocks[blk]. stmts])
881+ is_𝑰𝑵𝑭𝑳_active |= true
882+ break
789883 end
790- jbb = postdomtree. idoms_bb[jbb]
884+ end
885+ if ! is_𝑰𝑵𝑭𝑳_active
886+ union! (dead_blocks, delete! (𝑰𝑵𝑭𝑳, postdominator))
791887 end
792888 end
793889 end
794- changed |= _changed
795890 end
796- return changed
891+ return dead_blocks
797892end
798893
799894# Do a traveral of "numbered" predecessors and find statement ranges and names of type definitions
0 commit comments