Skip to content

Commit a9569b4

Browse files
authored
Improve parsing of links with type disambiguation that include generics (#1338)
* Improve parsing of links with type disambiguation that include generics rdar://160232871 * Add documentation for the private type signature string scanner API
1 parent 9aabe36 commit a9569b4

File tree

2 files changed

+238
-15
lines changed

2 files changed

+238
-15
lines changed

Sources/SwiftDocC/Infrastructure/Link Resolution/PathHierarchy+TypeSignature.swift

Lines changed: 153 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
This source file is part of the Swift.org open source project
33

4-
Copyright (c) 2023-2024 Apple Inc. and the Swift project authors
4+
Copyright (c) 2023-2025 Apple Inc. and the Swift project authors
55
Licensed under Apache License v2.0 with Runtime Library Exception
66

77
See https://swift.org/LICENSE.txt for license information
@@ -522,49 +522,121 @@ extension PathHierarchy.PathParser {
522522

523523
// MARK: Scanning a substring
524524

525+
/// A file-private, low-level string scanner type that's only designed for parsing type signature based disambiguation suffixes in authored links.
526+
///
527+
/// ## Correct usage
528+
///
529+
/// The higher level methods like ``scanReturnTypes()``, ``scanArguments()``, ``scanTuple()``, or ``scanValue()`` makes assumptions about the scanners content and current state.
530+
/// For example:
531+
/// - ``scanReturnTypes()`` knows that return types are specified after any parameter types and requires that the caller has already scanned the parameter types and advanced past the `"->"` separator.
532+
/// It's the caller's (`parseTypeSignatureDisambiguation(pathComponent:)` above) responsibility to do these things correctly.
533+
/// Similarly, it's the caller's responsibility to advance past the `"-"` prefix verify that the scanner points to an open parenthesis character (`(`) that before calling ``scanArguments()`` to scan the parameter types.
534+
/// Failing to do either of these things will result in unexpected parsed disambiguation that DocC will fail to find a match for.
535+
/// - Both ``scanArguments()``, or ``scanTuple()`` expects that the disambiguation portion of the authored link has a balanced number of open and closer parenthesis (`(` and `)`).
536+
/// If the authored link contains unbalanced parenthesis then disambiguation isn't valid and the scanner will return a parsed value that DocC will fail to find a match for.
537+
/// - ``scanValue()`` expects that the disambiguation portion of the authored link has a balanced number of open and closer angle brackets (`<` and `>`).
538+
/// If the authored link contains unbalanced angle brackets then disambiguation isn't valid and the scanner will return a parsed value that DocC will fail to find a match for.
525539
private struct StringScanner: ~Copyable {
526540
private var remaining: Substring
527541

528542
init(_ original: Substring) {
529543
remaining = original
530544
}
531545

532-
func peek() -> Character? {
546+
/// Returns the next character _without_ advancing the scanner
547+
private func peek() -> Character? {
533548
remaining.first
534549
}
535550

536-
mutating func take() -> Character {
551+
/// Advances the scanner and returns the scanned character.
552+
private mutating func take() -> Character {
537553
remaining.removeFirst()
538554
}
539555

556+
/// Advances the scanner by `count` elements and returns the scanned substring.
540557
mutating func take(_ count: Int) -> Substring {
541558
defer { remaining = remaining.dropFirst(count) }
542559
return remaining.prefix(count)
543560
}
544561

545-
mutating func takeAll() -> Substring {
562+
/// Advances the scanner to the end and returns the scanned substring.
563+
private mutating func takeAll() -> Substring {
546564
defer { remaining.removeAll() }
547565
return remaining
548566
}
549567

550-
mutating func scan(until predicate: (Character) -> Bool) -> Substring? {
568+
/// Advances the scanner up to the first character that satisfies the given `predicate` and returns the scanned substring.
569+
///
570+
/// If the scanner doesn't contain any characters that satisfy the given `predicate`, then this method returns `nil` _without_ advancing the scanner.
571+
///
572+
/// For example, consider a scanner that has already advanced 4 characters into the string `"One,Two,Three"`
573+
/// ```
574+
/// One,Two,Three
575+
/// ^
576+
/// ```
577+
/// Calling `scanner.scan(until: \.isNumber)` returns `nil` without advancing the scanner because none of the (remaining) characters is a number.
578+
///
579+
/// Calling `scanner.scan(until: { $0 == "," })` advances the scanner by 3 additional characters, returning the scanned `"Two"` substring.
580+
/// ```
581+
/// One,Two,Three
582+
/// ^
583+
/// ```
584+
private mutating func scan(until predicate: (Character) -> Bool) -> Substring? {
551585
guard let index = remaining.firstIndex(where: predicate) else {
552586
return nil
553587
}
554588
defer { remaining = remaining[index...] }
555589
return remaining[..<index]
556590
}
557591

592+
/// Advances the scanner up to and past the first character that satisfies the given `predicate` and returns the scanned substring.
593+
///
594+
/// If the scanner doesn't contain any characters that satisfy the given `predicate`, then this method returns `nil` _without_ advancing the scanner.
595+
///
596+
/// For example, consider a scanner that has already advanced 4 characters into the string `"One,Two,Three"`
597+
/// ```
598+
/// One,Two,Three
599+
/// ^
600+
/// ```
601+
/// Calling `scanner.scan(until: \.isNumber)` returns `nil` without advancing the scanner because none of the (remaining) characters is a number.
602+
///
603+
/// Calling `scanner.scan(until: { $0 == "," })` advances the scanner by 4 additional characters, returning the scanned `"Two,"` substring.
604+
/// ```
605+
/// One,Two,Three
606+
/// ^
607+
/// ```
608+
private mutating func scan(past predicate: (Character) -> Bool) -> Substring? {
609+
guard let beforeIndex = remaining.firstIndex(where: predicate) else {
610+
return nil
611+
}
612+
let index = remaining.index(after: beforeIndex)
613+
defer { remaining = remaining[index...] }
614+
return remaining[..<index]
615+
}
616+
617+
/// A Boolean value indicating whether the scanner has reached the end.
558618
var isAtEnd: Bool {
559619
remaining.isEmpty
560620
}
561621

622+
/// Returns a Boolean value indicating whether the substring at the scanners current position begins with the specified prefix.
562623
func hasPrefix(_ prefix: String) -> Bool {
563624
remaining.hasPrefix(prefix)
564625
}
565626

566627
// MARK: Parsing argument types by scanning
567628

629+
/// Scans the remainder of the scanner's contents as the individual elements of a tuple return type,
630+
/// or as a single return type if the scanners current position isn't an open parenthesis (`(`)
631+
///
632+
/// For example, consider a scanner that has already advanced 8 characters into the string `"-(One)->(Two,Three)"`
633+
/// ```
634+
/// -(One)->(Two, Three)
635+
/// ^
636+
/// ```
637+
/// Because the scanner's current position is an open parenthesis (`(`), the scanner advances all the way to the end and returns `["Two", "Three"]` representing two elements in the tuple return value.
638+
///
639+
/// - Note: The scanner expects that the caller has already scanned any parameter types and advanced past the `"->"` separator.
568640
mutating func scanReturnTypes() -> [Substring] {
569641
if peek() == "(" {
570642
_ = take() // the leading parenthesis
@@ -573,7 +645,20 @@ private struct StringScanner: ~Copyable {
573645
return [takeAll()]
574646
}
575647
}
576-
648+
649+
/// Scans the list of individual parameter type names as if the scanner's current position was 1 past the open parenthesis (`(`) or a tuple.
650+
///
651+
/// For example, consider a scanner that has already advanced 2 characters into the string `"-(One,(A,B))->(Two)"`
652+
/// ```
653+
/// -(One,(A,B))->(Two)
654+
/// ^
655+
/// ```
656+
/// The scanner parses two parameter return types---`"One"` and `"(A,B)"`---before the parenthesis balance out, advancing its position to one after the arguments list's closing parenthesis (`)`).
657+
/// ```
658+
/// -(One,(A,B))->(Two)
659+
/// ^
660+
/// ```
661+
/// - Note: The scanner expects that the caller has already advanced past the open parenthesis (`(`) that begins the list of parameter types.
577662
mutating func scanArguments() -> [Substring] {
578663
guard peek() != ")" else {
579664
_ = take() // drop the ")"
@@ -591,11 +676,23 @@ private struct StringScanner: ~Copyable {
591676
return arguments
592677
}
593678

594-
mutating func scanArgument() -> Substring? {
679+
/// Scans a single type name, representing either a scalar value (such as `One`) or a nested tuple (such as `(A,B)`).
680+
///
681+
/// For example, consider a scanner that has already advanced 6 characters into the string `"-(One,(A,B))->(Two)"`
682+
/// ```
683+
/// -(One,(A,B))->(Two)
684+
/// ^
685+
/// ```
686+
/// Because the value starts with an opening parenthesis (`(`), the scanner advances until the parenthesis balance out, returning `"(A,B)"`.
687+
/// ```
688+
/// -(One,(A,B))->(Two)
689+
/// ^
690+
/// ```
691+
private mutating func scanArgument() -> Substring? {
595692
guard peek() == "(" else {
596693
// If the argument doesn't start with "(" it can't be neither a tuple nor a closure type.
597694
// In this case, scan until the next argument (",") or the end of the arguments (")")
598-
return scan(until: { $0 == "," || $0 == ")" }) ?? takeAll()
695+
return scanValue() ?? takeAll()
599696
}
600697

601698
guard var argumentString = scanTuple() else {
@@ -611,7 +708,7 @@ private struct StringScanner: ~Copyable {
611708

612709
guard peek() == "(" else {
613710
// This closure type has a simple return type.
614-
guard let returnValue = scan(until: { $0 == "," || $0 == ")" }) else {
711+
guard let returnValue = scanValue() else {
615712
return nil
616713
}
617714
return argumentString + returnValue
@@ -622,7 +719,20 @@ private struct StringScanner: ~Copyable {
622719
return argumentString + returnValue
623720
}
624721

625-
mutating func scanTuple() -> Substring? {
722+
/// Scans a nested tuple as a single substring.
723+
///
724+
/// For example, consider a scanner that has already advanced 6 character into the string `"-(One,(A,B))->(Two)"`
725+
/// ```
726+
/// -(One,(A,B))->(Two)
727+
/// ^
728+
/// ```
729+
/// Because the value starts with an opening parenthesis (`(`), the scanner advances until the parenthesis balance out, returning `"(A,B)"`.
730+
/// ```
731+
/// -(One,(A,B))->(Two)
732+
/// ^
733+
/// ```
734+
/// - Note: The scanner expects that the caller has already advanced to the open parenthesis (`(`) that's the start of the nested tuple.
735+
private mutating func scanTuple() -> Substring? {
626736
assert(peek() == "(", "The caller should have checked that this is a tuple")
627737

628738
// The tuple may contain any number of nested tuples. Keep track of the open and close parenthesis while scanning.
@@ -632,13 +742,41 @@ private struct StringScanner: ~Copyable {
632742
depth += 1
633743
return false // keep scanning
634744
}
635-
if depth > 0 {
636-
if $0 == ")" {
637-
depth -= 1
638-
}
745+
else if $0 == ")" {
746+
depth -= 1
747+
return depth == 0 // stop only if we've reached a balanced number of parenthesis
748+
}
749+
return false // keep scanning
750+
}
751+
752+
return scan(past: predicate)
753+
}
754+
755+
/// Scans a single type name.
756+
///
757+
/// For example, consider a scanner that has already advanced 2 character into the string `"-(One<A,B>,Two)"`
758+
/// ```
759+
/// -(One<A,B>,Two)
760+
/// ^
761+
/// ```
762+
/// Because the value contains generics (`<A,B>`), the scanner advances until the angle brackets balance out, returning `"One<A,B>"`.
763+
/// ```
764+
/// -(One<A,B>,Two)
765+
/// ^
766+
/// ```
767+
private mutating func scanValue() -> Substring? {
768+
// The value may contain any number of nested generics. Keep track of the open and close angle brackets while scanning.
769+
var depth = 0
770+
let predicate: (Character) -> Bool = {
771+
if $0 == "<" {
772+
depth += 1
773+
return false // keep scanning
774+
}
775+
else if $0 == ">" {
776+
depth -= 1
639777
return false // keep scanning
640778
}
641-
return $0 == "," || $0 == ")"
779+
return depth == 0 && ($0 == "," || $0 == ")")
642780
}
643781
return scan(until: predicate)
644782
}

0 commit comments

Comments
 (0)