@@ -3,7 +3,6 @@ package docx
33import (
44 "fmt"
55 "log"
6- "regexp"
76 "strings"
87)
98
@@ -20,13 +19,6 @@ func ChangeOpenCloseDelimiter(openDelimiter, closeDelimiter rune) {
2019 CloseDelimiter = closeDelimiter
2120}
2221
23- var (
24- // OpenDelimiterRegex is used to quickly match the opening delimiter and find it'str positions.
25- OpenDelimiterRegex = regexp .MustCompile (string (OpenDelimiter ))
26- // CloseDelimiterRegex is used to quickly match the closing delimiter and find it'str positions.
27- CloseDelimiterRegex = regexp .MustCompile (string (CloseDelimiter ))
28- )
29-
3022// PlaceholderMap is the type used to map the placeholder keys (without delimiters) to the replacement values
3123type PlaceholderMap map [string ]interface {}
3224
@@ -73,208 +65,85 @@ func (p Placeholder) Valid() bool {
7365// ParsePlaceholders will, given the document run positions and the bytes, parse out all placeholders including
7466// their fragments.
7567func ParsePlaceholders (runs DocumentRuns , docBytes []byte ) (placeholders []* Placeholder , err error ) {
76- // tmp vars used to preserve state across iterations
77- unclosedPlaceholder := new (Placeholder )
78- hasOpenPlaceholder := false
79-
68+ // Use stack to trace the delimiter pair
69+ stack := []* PlaceholderFragment {}
8070 for _ , run := range runs .WithText () {
81- runText := run .GetText (docBytes )
82-
83- openDelimPositions := OpenDelimiterRegex .FindAllStringIndex (runText , - 1 )
84- closeDelimPositions := CloseDelimiterRegex .FindAllStringIndex (runText , - 1 )
85-
86- // FindAllStringIndex returns a [][]int whereas the nested []int has only 2 keys (0 and 1)
87- // We're only interested in the first key as that one indicates the position of the delimiter
88- delimPositions := func (positions [][]int ) []int {
89- var pos []int
90- for _ , position := range positions {
91- pos = append (pos , position [0 ])
92- }
93- return pos
94- }
95-
96- // index all delimiters
97- openPos := delimPositions (openDelimPositions )
98- closePos := delimPositions (closeDelimPositions )
99-
100- // In case there are the same amount of open and close delimiters.
101- // Here we will have three three different sub-cases.
102- // Case 1 (default):
103- // '{foo}{bar}' which is the simplest case to handle
104- //
105- // Case 2 (special):
106- // '}foo{bar}foo{' which can easily be detected by checking if 'openPos > endPos'.
107- // That case can only be valid if there is an unclosed placeholder in a previous run.
108- // If there is no unclosed placeholder, then there is some form of user error (e.g. '{baz}}foo{bar}').
109- // We can also be sure that the first close and the last open delimiters are wrong, all the other ones
110- // in between will be correct, given the len(openPos)==len(closePos) premise.
111- // We're ignoring the case in which the user might've entered '}foo}bar{foo{' and went full derp-mode.
112- //
113- // Case 3 (nested):
114- // '{foo{bar}foo}' aka placeholder-nesting, which is acatually not going to be supported
115- // but needs to be detected and handled anyway. TODO handle nestings
116- if (len (openPos ) == len (closePos )) && len (openPos ) != 0 {
117-
118- // isSpecialCase checks if, for all found delimiters, startPos > endPos is true (case 2)
119- isSpecialCase := func () bool {
120- for i := 0 ; i < len (openPos ); i ++ {
121- start := openPos [i ]
122- end := closePos [i ] + 1 // +1 is required to include the closing delimiter in the text
123- if start > end {
124- return true
125- }
126- }
127- return false
71+ hasDelimiter := false
72+ runRune := []rune (run .GetText (docBytes ))
73+ for i := 0 ; i < len (runRune ); i ++ {
74+ // There is an open delimiter in the run, thus create a partial placeholder fragment
75+ if runRune [i ] == OpenDelimiter {
76+ hasDelimiter = true
77+ stack = append (stack , NewPlaceholderFragment (Position {int64 (i ), - 1 }, run ))
78+ continue
12879 }
12980
130- // isNestedCase checks if, there are >1 OpenDelimiters before the first CloseDelimiter
131- // if there is only 1 openPos, this cannot be true (we already know that it's not 0
132- isNestedCase := func () bool {
133- if len (openPos ) == 1 {
134- return false
81+ if runRune [i ] == CloseDelimiter {
82+ // There is a close delimiter in the run, 3 scenarios may happen:
83+ // 1) The stack is empty, no open delimiter can match this close delimiter,
84+ // this must be a corrupted placeholder, we log the error and skip
85+ if len (stack ) == 0 {
86+ log .Printf (
87+ "detected unmatched close delimiter in run %d \" %s\" , index %d, skipping \n " ,
88+ run .ID , run .GetText (docBytes ), i ,
89+ )
90+ continue
13591 }
136- if openPos [0 ] < closePos [0 ] &&
137- openPos [1 ] < closePos [0 ] {
138- return true
139- }
140- return false
141- }
14292
143- // handle case 2
144- if isSpecialCase () {
145-
146- // handle the easy part (everything between the the culprit first '}' and last '{' in the example of '}foo{bar}foo{'
147- validOpenPos := openPos [:len (openPos )- 1 ]
148- validClosePos := closePos [1 :]
149- placeholders = append (placeholders , assembleFullPlaceholders (run , validOpenPos , validClosePos )... )
150-
151- // extract the first open and last close delimiter positions as they are the one causing issues.
152- lastOpenPos := openPos [len (openPos )- 1 ]
153- firstClosePos := closePos [0 ]
154-
155- // we MUST be having an unclosedPlaceholder or the user made a typo like double-closing ('{foo}}{bar')
156- if ! hasOpenPlaceholder {
157- return nil , fmt .Errorf ("unexpected %c in run %d \" %s\" ), missing preceeding %c" , CloseDelimiter , run .ID , run .GetText (docBytes ), OpenDelimiter )
93+ // 2) The stack is not empty,
94+ hasDelimiter = true
95+ fragment := stack [len (stack )- 1 ]
96+ stack = stack [:len (stack )- 1 ]
97+ if run == fragment .Run {
98+ // a) The close delimiter is in the same run as the open delimiter, then we take
99+ // the partial fragment from the top of the stack, and complete its end position, to make a
100+ // complete placeholder with only 1 fragment.
101+ // e.g., run like:
102+ // foo{bar}baz
103+ // foo{bar}baz{qux}bbb
104+ fragment .Position .End = int64 (i ) + 1
105+ placeholders = append (placeholders , & Placeholder {Fragments : []* PlaceholderFragment {fragment }})
106+ } else {
107+ // b) There are some span runs between the run of open and close delimiter, then we first
108+ // take the partial fragment from the top of the stack, and its end position must be the end of
109+ // that run. Then we create span fragments, with its length set to the run length. Finally, we
110+ // create the fragment that includes the close delimiter, with its start position set to 0, and
111+ // end position set to the position of the close delimiter.
112+ // e.g., run like (here | is the run boundary):
113+ // foo{bar|}baz => {bar}
114+ // foo{bar|abc|}baz => {barabc}
115+ // foo{bar|abc|def|}baz => {barabcdef}
116+ // foo{bar|{bc|d}ef|}baz => {bar{bcd}ef} {bcd}
117+ fragment .Position .End = int64 (len (fragment .Run .GetText (docBytes )))
118+ fragments := []* PlaceholderFragment {fragment }
119+ for _ , srun := range fragment .SpanRun {
120+ fragments = append (
121+ fragments ,
122+ NewPlaceholderFragment (Position {0 , int64 (len (srun .GetText (docBytes )))}, srun ),
123+ )
124+ }
125+ fragments = append (fragments , NewPlaceholderFragment (Position {0 , int64 (i ) + 1 }, run ))
126+ placeholders = append (placeholders , & Placeholder {Fragments : fragments })
158127 }
159-
160- // everything up to firstClosePos belongs to the currently open placeholder
161- fragment := NewPlaceholderFragment (0 , Position {0 , int64 (firstClosePos ) + 1 }, run )
162- unclosedPlaceholder .Fragments = append (unclosedPlaceholder .Fragments , fragment )
163- placeholders = append (placeholders , unclosedPlaceholder )
164-
165- // a new, unclosed, placeholder starts at lastOpenPos
166- fragment = NewPlaceholderFragment (0 , Position {int64 (lastOpenPos ), int64 (len (runText ))}, run )
167- unclosedPlaceholder = new (Placeholder )
168- unclosedPlaceholder .Fragments = append (unclosedPlaceholder .Fragments , fragment )
169- hasOpenPlaceholder = true
170-
171- continue
172- }
173-
174- // there are multiple ways to handle this
175- // - error
176- // - cut out
177- // - skip the run (that's what we do because we're lazy bums)
178- if isNestedCase () {
179- log .Printf ("detected nested placeholder in run %d \" %s\" , skipping \n " , run .ID , run .GetText (docBytes ))
180128 continue
181129 }
182-
183- // case 1, assemble and continue
184- placeholders = append (placeholders , assembleFullPlaceholders (run , openPos , closePos )... )
185- continue
186130 }
187-
188- // More open than closing delimiters, e.g. '{foo}{bar'
189- // this can only mean that a placeholder is left unclosed after this run
190- // For the length this means that (len(openPos) + 1) == len(closePos)
191- // So we can be sure that the last position in openPos is the opening tag of the
192- // unclosed placeholder.
193- if len (openPos ) > len (closePos ) {
194- // merge full placeholders in the run, leaving out the last openPos since
195- // we know that the one is left over and must be handled separately below
196- placeholders = append (placeholders , assembleFullPlaceholders (run , openPos [:len (openPos )- 1 ], closePos )... )
197-
198- // add the unclosed part of the placeholder to a tmp placeholder var
199- unclosedOpenPos := openPos [len (openPos )- 1 ]
200- fragment := NewPlaceholderFragment (0 , Position {int64 (unclosedOpenPos ), int64 (len (runText ))}, run )
201- unclosedPlaceholder .Fragments = append (unclosedPlaceholder .Fragments , fragment )
202- hasOpenPlaceholder = true
203- continue
204- }
205-
206- // More closing than opening delimiters, e.g. '}{foo}'
207- // this can only mean that there must be an unclosed placeholder which
208- // is closed in this run.
209- if len (openPos ) < len (closePos ) {
210- // merge full placeholders in the run, leaving out the last closePos since
211- // we know that the one is left over and must be handled separately below
212- placeholders = append (placeholders , assembleFullPlaceholders (run , openPos , closePos [:len (closePos )- 1 ])... )
213-
214- // there is only a closePos and no open pos
215- if len (closePos ) == 1 {
216- fragment := NewPlaceholderFragment (0 , Position {0 , int64 (int64 (closePos [0 ]) + 1 )}, run )
217- unclosedPlaceholder .Fragments = append (unclosedPlaceholder .Fragments , fragment )
218- placeholders = append (placeholders , unclosedPlaceholder )
219- unclosedPlaceholder = new (Placeholder )
220- hasOpenPlaceholder = false
221- continue
222- }
223- continue
224- }
225-
226- // No placeholders at all.
227- // The run is only relevant if there is an unclosed placeholder from a previous run.
228- // In that case it means that the full run-text belongs to the placeholder.
229- // For example, if a placeholder has three fragments in total, this represents fragment 2 (see below)
230- // 1) '{foo'
231- // 2) 'bar-'
232- // 3) '-baz}
233- if len (openPos ) == 0 && len (closePos ) == 0 {
234- if hasOpenPlaceholder {
235- fragment := NewPlaceholderFragment (0 , Position {0 , int64 (len (runText ))}, run )
236- unclosedPlaceholder .Fragments = append (unclosedPlaceholder .Fragments , fragment )
131+ if ! hasDelimiter {
132+ // If a run has no delimiter, it must be a span run. Thus we add the run to all the partial framents that
133+ // has not been closed.
134+ for i := 0 ; i < len (stack ); i ++ {
135+ stack [i ].SpanRun = append (stack [i ].SpanRun , run )
237136 continue
238137 }
239138 }
240139 }
241140
242- // Make sure that we're dealing with valid and proper placeholders only.
243- // Everything else may cause issues like out of bounds errors or any other sort of weird things.
244- // Here we will also assemble the final list of placeholders and return only the valid ones.
245- var validPlaceholders []* Placeholder
246- for _ , placeholder := range placeholders {
247- if ! placeholder .Valid () {
248- continue
249- }
250-
251- // in order to catch false positives, ensure that all placeholders have BOTH delimiters
252- text := placeholder .Text (docBytes )
253- if ! strings .ContainsRune (text , OpenDelimiter ) ||
254- ! strings .ContainsRune (text , CloseDelimiter ) {
255- continue
256- }
257-
258- // placeholder is valid
259- validPlaceholders = append (validPlaceholders , placeholder )
141+ // Warn user there are some unmatched open delimiters (a.k.a corrupted placeholders) left in the stack
142+ for _ , fragment := range stack {
143+ log .Printf ("detected unmatched open delimiter in run %d \" %s\" , index %d, skipping \n " , fragment .Run .ID , fragment .Run .GetText (docBytes ), fragment .Position .Start )
260144 }
261- return validPlaceholders , nil
262- }
263145
264- // assembleFullPlaceholders will extract all complete placeholders inside the run given a open and close position.
265- // The open and close positions are the positions of the Delimiters which must already be known at this point.
266- // openPos and closePos are expected to be symmetrical (e.g. same length).
267- // Example: openPos := []int{10,20,30}; closePos := []int{13, 23, 33} resulting in 3 fragments (10,13),(20,23),(30,33)
268- // The n-th elements inside openPos and closePos must be matching delimiter positions.
269- func assembleFullPlaceholders (run * Run , openPos , closePos []int ) (placeholders []* Placeholder ) {
270- for i := 0 ; i < len (openPos ); i ++ {
271- start := openPos [i ]
272- end := closePos [i ] + 1 // +1 is required to include the closing delimiter in the text
273- fragment := NewPlaceholderFragment (0 , Position {int64 (start ), int64 (end )}, run )
274- p := & Placeholder {Fragments : []* PlaceholderFragment {fragment }}
275- placeholders = append (placeholders , p )
276- }
277- return placeholders
146+ return placeholders , nil
278147}
279148
280149// AddPlaceholderDelimiter will wrap the given string with OpenDelimiter and CloseDelimiter.
0 commit comments