Skip to content

Commit c0150a0

Browse files
committed
refactor: refactor ParsePlaceholders for conciseness and adaptability
- avoid panic with corrupted placeholder - support any run spans between delimeters - update `template.docx` to include corrupted and nested cases - lay foundation for a template engine to be introduced closes #29
1 parent fe742ed commit c0150a0

File tree

5 files changed

+69
-210
lines changed

5 files changed

+69
-210
lines changed

placeholder.go

Lines changed: 63 additions & 194 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package docx
33
import (
44
"fmt"
55
"log"
6-
"regexp"
76
"strings"
87
)
98

@@ -20,13 +19,6 @@ func ChangeOpenCloseDelimiter(openDelimiter, closeDelimiter rune) {
2019
CloseDelimiter = closeDelimiter
2120
}
2221

23-
var (
24-
// OpenDelimiterRegex is used to quickly match the opening delimiter and find it'str positions.
25-
OpenDelimiterRegex = regexp.MustCompile(string(OpenDelimiter))
26-
// CloseDelimiterRegex is used to quickly match the closing delimiter and find it'str positions.
27-
CloseDelimiterRegex = regexp.MustCompile(string(CloseDelimiter))
28-
)
29-
3022
// PlaceholderMap is the type used to map the placeholder keys (without delimiters) to the replacement values
3123
type PlaceholderMap map[string]interface{}
3224

@@ -73,208 +65,85 @@ func (p Placeholder) Valid() bool {
7365
// ParsePlaceholders will, given the document run positions and the bytes, parse out all placeholders including
7466
// their fragments.
7567
func ParsePlaceholders(runs DocumentRuns, docBytes []byte) (placeholders []*Placeholder, err error) {
76-
// tmp vars used to preserve state across iterations
77-
unclosedPlaceholder := new(Placeholder)
78-
hasOpenPlaceholder := false
79-
68+
// Use stack to trace the delimiter pair
69+
stack := []*PlaceholderFragment{}
8070
for _, run := range runs.WithText() {
81-
runText := run.GetText(docBytes)
82-
83-
openDelimPositions := OpenDelimiterRegex.FindAllStringIndex(runText, -1)
84-
closeDelimPositions := CloseDelimiterRegex.FindAllStringIndex(runText, -1)
85-
86-
// FindAllStringIndex returns a [][]int whereas the nested []int has only 2 keys (0 and 1)
87-
// We're only interested in the first key as that one indicates the position of the delimiter
88-
delimPositions := func(positions [][]int) []int {
89-
var pos []int
90-
for _, position := range positions {
91-
pos = append(pos, position[0])
92-
}
93-
return pos
94-
}
95-
96-
// index all delimiters
97-
openPos := delimPositions(openDelimPositions)
98-
closePos := delimPositions(closeDelimPositions)
99-
100-
// In case there are the same amount of open and close delimiters.
101-
// Here we will have three three different sub-cases.
102-
// Case 1 (default):
103-
// '{foo}{bar}' which is the simplest case to handle
104-
//
105-
// Case 2 (special):
106-
// '}foo{bar}foo{' which can easily be detected by checking if 'openPos > endPos'.
107-
// That case can only be valid if there is an unclosed placeholder in a previous run.
108-
// If there is no unclosed placeholder, then there is some form of user error (e.g. '{baz}}foo{bar}').
109-
// We can also be sure that the first close and the last open delimiters are wrong, all the other ones
110-
// in between will be correct, given the len(openPos)==len(closePos) premise.
111-
// We're ignoring the case in which the user might've entered '}foo}bar{foo{' and went full derp-mode.
112-
//
113-
// Case 3 (nested):
114-
// '{foo{bar}foo}' aka placeholder-nesting, which is acatually not going to be supported
115-
// but needs to be detected and handled anyway. TODO handle nestings
116-
if (len(openPos) == len(closePos)) && len(openPos) != 0 {
117-
118-
// isSpecialCase checks if, for all found delimiters, startPos > endPos is true (case 2)
119-
isSpecialCase := func() bool {
120-
for i := 0; i < len(openPos); i++ {
121-
start := openPos[i]
122-
end := closePos[i] + 1 // +1 is required to include the closing delimiter in the text
123-
if start > end {
124-
return true
125-
}
126-
}
127-
return false
71+
hasDelimiter := false
72+
runRune := []rune(run.GetText(docBytes))
73+
for i := 0; i < len(runRune); i++ {
74+
// There is an open delimiter in the run, thus create a partial placeholder fragment
75+
if runRune[i] == OpenDelimiter {
76+
hasDelimiter = true
77+
stack = append(stack, NewPlaceholderFragment(Position{int64(i), -1}, run))
78+
continue
12879
}
12980

130-
// isNestedCase checks if, there are >1 OpenDelimiters before the first CloseDelimiter
131-
// if there is only 1 openPos, this cannot be true (we already know that it's not 0
132-
isNestedCase := func() bool {
133-
if len(openPos) == 1 {
134-
return false
81+
if runRune[i] == CloseDelimiter {
82+
// There is a close delimiter in the run, 3 scenarios may happen:
83+
// 1) The stack is empty, no open delimiter can match this close delimiter,
84+
// this must be a corrupted placeholder, we log the error and skip
85+
if len(stack) == 0 {
86+
log.Printf(
87+
"detected unmatched close delimiter in run %d \"%s\", index %d, skipping \n",
88+
run.ID, run.GetText(docBytes), i,
89+
)
90+
continue
13591
}
136-
if openPos[0] < closePos[0] &&
137-
openPos[1] < closePos[0] {
138-
return true
139-
}
140-
return false
141-
}
14292

143-
// handle case 2
144-
if isSpecialCase() {
145-
146-
// handle the easy part (everything between the the culprit first '}' and last '{' in the example of '}foo{bar}foo{'
147-
validOpenPos := openPos[:len(openPos)-1]
148-
validClosePos := closePos[1:]
149-
placeholders = append(placeholders, assembleFullPlaceholders(run, validOpenPos, validClosePos)...)
150-
151-
// extract the first open and last close delimiter positions as they are the one causing issues.
152-
lastOpenPos := openPos[len(openPos)-1]
153-
firstClosePos := closePos[0]
154-
155-
// we MUST be having an unclosedPlaceholder or the user made a typo like double-closing ('{foo}}{bar')
156-
if !hasOpenPlaceholder {
157-
return nil, fmt.Errorf("unexpected %c in run %d \"%s\"), missing preceeding %c", CloseDelimiter, run.ID, run.GetText(docBytes), OpenDelimiter)
93+
// 2) The stack is not empty,
94+
hasDelimiter = true
95+
fragment := stack[len(stack)-1]
96+
stack = stack[:len(stack)-1]
97+
if run == fragment.Run {
98+
// a) The close delimiter is in the same run as the open delimiter, then we take
99+
// the partial fragment from the top of the stack, and complete its end position, to make a
100+
// complete placeholder with only 1 fragment.
101+
// e.g., run like:
102+
// foo{bar}baz
103+
// foo{bar}baz{qux}bbb
104+
fragment.Position.End = int64(i) + 1
105+
placeholders = append(placeholders, &Placeholder{Fragments: []*PlaceholderFragment{fragment}})
106+
} else {
107+
// b) There are some span runs between the run of open and close delimiter, then we first
108+
// take the partial fragment from the top of the stack, and its end position must be the end of
109+
// that run. Then we create span fragments, with its length set to the run length. Finally, we
110+
// create the fragment that includes the close delimiter, with its start position set to 0, and
111+
// end position set to the position of the close delimiter.
112+
// e.g., run like (here | is the run boundary):
113+
// foo{bar|}baz => {bar}
114+
// foo{bar|abc|}baz => {barabc}
115+
// foo{bar|abc|def|}baz => {barabcdef}
116+
// foo{bar|{bc|d}ef|}baz => {bar{bcd}ef} {bcd}
117+
fragment.Position.End = int64(len(fragment.Run.GetText(docBytes)))
118+
fragments := []*PlaceholderFragment{fragment}
119+
for _, srun := range fragment.SpanRun {
120+
fragments = append(
121+
fragments,
122+
NewPlaceholderFragment(Position{0, int64(len(srun.GetText(docBytes)))}, srun),
123+
)
124+
}
125+
fragments = append(fragments, NewPlaceholderFragment(Position{0, int64(i) + 1}, run))
126+
placeholders = append(placeholders, &Placeholder{Fragments: fragments})
158127
}
159-
160-
// everything up to firstClosePos belongs to the currently open placeholder
161-
fragment := NewPlaceholderFragment(0, Position{0, int64(firstClosePos) + 1}, run)
162-
unclosedPlaceholder.Fragments = append(unclosedPlaceholder.Fragments, fragment)
163-
placeholders = append(placeholders, unclosedPlaceholder)
164-
165-
// a new, unclosed, placeholder starts at lastOpenPos
166-
fragment = NewPlaceholderFragment(0, Position{int64(lastOpenPos), int64(len(runText))}, run)
167-
unclosedPlaceholder = new(Placeholder)
168-
unclosedPlaceholder.Fragments = append(unclosedPlaceholder.Fragments, fragment)
169-
hasOpenPlaceholder = true
170-
171-
continue
172-
}
173-
174-
// there are multiple ways to handle this
175-
// - error
176-
// - cut out
177-
// - skip the run (that's what we do because we're lazy bums)
178-
if isNestedCase() {
179-
log.Printf("detected nested placeholder in run %d \"%s\", skipping \n", run.ID, run.GetText(docBytes))
180128
continue
181129
}
182-
183-
// case 1, assemble and continue
184-
placeholders = append(placeholders, assembleFullPlaceholders(run, openPos, closePos)...)
185-
continue
186130
}
187-
188-
// More open than closing delimiters, e.g. '{foo}{bar'
189-
// this can only mean that a placeholder is left unclosed after this run
190-
// For the length this means that (len(openPos) + 1) == len(closePos)
191-
// So we can be sure that the last position in openPos is the opening tag of the
192-
// unclosed placeholder.
193-
if len(openPos) > len(closePos) {
194-
// merge full placeholders in the run, leaving out the last openPos since
195-
// we know that the one is left over and must be handled separately below
196-
placeholders = append(placeholders, assembleFullPlaceholders(run, openPos[:len(openPos)-1], closePos)...)
197-
198-
// add the unclosed part of the placeholder to a tmp placeholder var
199-
unclosedOpenPos := openPos[len(openPos)-1]
200-
fragment := NewPlaceholderFragment(0, Position{int64(unclosedOpenPos), int64(len(runText))}, run)
201-
unclosedPlaceholder.Fragments = append(unclosedPlaceholder.Fragments, fragment)
202-
hasOpenPlaceholder = true
203-
continue
204-
}
205-
206-
// More closing than opening delimiters, e.g. '}{foo}'
207-
// this can only mean that there must be an unclosed placeholder which
208-
// is closed in this run.
209-
if len(openPos) < len(closePos) {
210-
// merge full placeholders in the run, leaving out the last closePos since
211-
// we know that the one is left over and must be handled separately below
212-
placeholders = append(placeholders, assembleFullPlaceholders(run, openPos, closePos[:len(closePos)-1])...)
213-
214-
// there is only a closePos and no open pos
215-
if len(closePos) == 1 {
216-
fragment := NewPlaceholderFragment(0, Position{0, int64(int64(closePos[0]) + 1)}, run)
217-
unclosedPlaceholder.Fragments = append(unclosedPlaceholder.Fragments, fragment)
218-
placeholders = append(placeholders, unclosedPlaceholder)
219-
unclosedPlaceholder = new(Placeholder)
220-
hasOpenPlaceholder = false
221-
continue
222-
}
223-
continue
224-
}
225-
226-
// No placeholders at all.
227-
// The run is only relevant if there is an unclosed placeholder from a previous run.
228-
// In that case it means that the full run-text belongs to the placeholder.
229-
// For example, if a placeholder has three fragments in total, this represents fragment 2 (see below)
230-
// 1) '{foo'
231-
// 2) 'bar-'
232-
// 3) '-baz}
233-
if len(openPos) == 0 && len(closePos) == 0 {
234-
if hasOpenPlaceholder {
235-
fragment := NewPlaceholderFragment(0, Position{0, int64(len(runText))}, run)
236-
unclosedPlaceholder.Fragments = append(unclosedPlaceholder.Fragments, fragment)
131+
if !hasDelimiter {
132+
// If a run has no delimiter, it must be a span run. Thus we add the run to all the partial framents that
133+
// has not been closed.
134+
for i := 0; i < len(stack); i++ {
135+
stack[i].SpanRun = append(stack[i].SpanRun, run)
237136
continue
238137
}
239138
}
240139
}
241140

242-
// Make sure that we're dealing with valid and proper placeholders only.
243-
// Everything else may cause issues like out of bounds errors or any other sort of weird things.
244-
// Here we will also assemble the final list of placeholders and return only the valid ones.
245-
var validPlaceholders []*Placeholder
246-
for _, placeholder := range placeholders {
247-
if !placeholder.Valid() {
248-
continue
249-
}
250-
251-
// in order to catch false positives, ensure that all placeholders have BOTH delimiters
252-
text := placeholder.Text(docBytes)
253-
if !strings.ContainsRune(text, OpenDelimiter) ||
254-
!strings.ContainsRune(text, CloseDelimiter) {
255-
continue
256-
}
257-
258-
// placeholder is valid
259-
validPlaceholders = append(validPlaceholders, placeholder)
141+
// Warn user there are some unmatched open delimiters (a.k.a corrupted placeholders) left in the stack
142+
for _, fragment := range stack {
143+
log.Printf("detected unmatched open delimiter in run %d \"%s\", index %d, skipping \n", fragment.Run.ID, fragment.Run.GetText(docBytes), fragment.Position.Start)
260144
}
261-
return validPlaceholders, nil
262-
}
263145

264-
// assembleFullPlaceholders will extract all complete placeholders inside the run given a open and close position.
265-
// The open and close positions are the positions of the Delimiters which must already be known at this point.
266-
// openPos and closePos are expected to be symmetrical (e.g. same length).
267-
// Example: openPos := []int{10,20,30}; closePos := []int{13, 23, 33} resulting in 3 fragments (10,13),(20,23),(30,33)
268-
// The n-th elements inside openPos and closePos must be matching delimiter positions.
269-
func assembleFullPlaceholders(run *Run, openPos, closePos []int) (placeholders []*Placeholder) {
270-
for i := 0; i < len(openPos); i++ {
271-
start := openPos[i]
272-
end := closePos[i] + 1 // +1 is required to include the closing delimiter in the text
273-
fragment := NewPlaceholderFragment(0, Position{int64(start), int64(end)}, run)
274-
p := &Placeholder{Fragments: []*PlaceholderFragment{fragment}}
275-
placeholders = append(placeholders, p)
276-
}
277-
return placeholders
146+
return placeholders, nil
278147
}
279148

280149
// AddPlaceholderDelimiter will wrap the given string with OpenDelimiter and CloseDelimiter.

placeholder_fragment.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@ var (
1313
type PlaceholderFragment struct {
1414
ID int // ID is used to identify the fragments globally.
1515
Position Position // Position of the actual fragment within the run text. 0 == (Run.Text.OpenTag.End + 1)
16-
Number int // numbering fragments for ease of use. Numbering is scoped to placeholders.
17-
Run *Run
16+
//Number int // numbering fragments for ease of use. Numbering is scoped to placeholders.
17+
Run *Run
18+
SpanRun []*Run
1819
}
1920

2021
// NewPlaceholderFragment returns an initialized PlaceholderFragment with a new, auto-incremented, ID.
21-
func NewPlaceholderFragment(number int, pos Position, run *Run) *PlaceholderFragment {
22+
func NewPlaceholderFragment(pos Position, run *Run) *PlaceholderFragment {
2223
return &PlaceholderFragment{
2324
ID: NewFragmentID(),
24-
Position: pos,
25-
Number: number,
2625
Run: run,
26+
Position: pos,
2727
}
2828
}
2929

placeholder_test.go

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,3 @@ func TestParsePlaceholders(t *testing.T) {
4646
}
4747
}
4848
}
49-
50-
func TestPlaceholder_AssembleFullPlaceholders(t *testing.T) {
51-
expectedCount := 2
52-
openPos := []int{10, 18}
53-
closePos := []int{17, 25}
54-
55-
placeholders := assembleFullPlaceholders(&Run{}, openPos, closePos)
56-
if len(placeholders) != expectedCount {
57-
t.Errorf("not all full placeholders were parsed, want=%d, have=%d", expectedCount, len(placeholders))
58-
}
59-
}

replace_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ func TestReplacer_Replace(t *testing.T) {
1818
"mixed-key.separator_styles#": "mixed-key.separator_styles#",
1919
"yet-another_placeholder": "yet-another_placeholder",
2020
"foo": "foo",
21+
"nested": "nested",
2122
}
2223

2324
doc, err := Open("./test/template.docx")

test/template.docx

348 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)