Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
257 changes: 63 additions & 194 deletions placeholder.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package docx
import (
"fmt"
"log"
"regexp"
"strings"
)

Expand All @@ -20,13 +19,6 @@ func ChangeOpenCloseDelimiter(openDelimiter, closeDelimiter rune) {
CloseDelimiter = closeDelimiter
}

var (
// OpenDelimiterRegex is used to quickly match the opening delimiter and find it'str positions.
OpenDelimiterRegex = regexp.MustCompile(string(OpenDelimiter))
// CloseDelimiterRegex is used to quickly match the closing delimiter and find it'str positions.
CloseDelimiterRegex = regexp.MustCompile(string(CloseDelimiter))
)

// PlaceholderMap is the type used to map the placeholder keys (without delimiters) to the replacement values
type PlaceholderMap map[string]interface{}

Expand Down Expand Up @@ -73,208 +65,85 @@ func (p Placeholder) Valid() bool {
// ParsePlaceholders will, given the document run positions and the bytes, parse out all placeholders including
// their fragments.
func ParsePlaceholders(runs DocumentRuns, docBytes []byte) (placeholders []*Placeholder, err error) {
// tmp vars used to preserve state across iterations
unclosedPlaceholder := new(Placeholder)
hasOpenPlaceholder := false

// Use stack to trace the delimiter pair
stack := []*PlaceholderFragment{}
for _, run := range runs.WithText() {
runText := run.GetText(docBytes)

openDelimPositions := OpenDelimiterRegex.FindAllStringIndex(runText, -1)
closeDelimPositions := CloseDelimiterRegex.FindAllStringIndex(runText, -1)

// FindAllStringIndex returns a [][]int whereas the nested []int has only 2 keys (0 and 1)
// We're only interested in the first key as that one indicates the position of the delimiter
delimPositions := func(positions [][]int) []int {
var pos []int
for _, position := range positions {
pos = append(pos, position[0])
}
return pos
}

// index all delimiters
openPos := delimPositions(openDelimPositions)
closePos := delimPositions(closeDelimPositions)

// In case there are the same amount of open and close delimiters.
// Here we will have three three different sub-cases.
// Case 1 (default):
// '{foo}{bar}' which is the simplest case to handle
//
// Case 2 (special):
// '}foo{bar}foo{' which can easily be detected by checking if 'openPos > endPos'.
// That case can only be valid if there is an unclosed placeholder in a previous run.
// If there is no unclosed placeholder, then there is some form of user error (e.g. '{baz}}foo{bar}').
// We can also be sure that the first close and the last open delimiters are wrong, all the other ones
// in between will be correct, given the len(openPos)==len(closePos) premise.
// We're ignoring the case in which the user might've entered '}foo}bar{foo{' and went full derp-mode.
//
// Case 3 (nested):
// '{foo{bar}foo}' aka placeholder-nesting, which is acatually not going to be supported
// but needs to be detected and handled anyway. TODO handle nestings
if (len(openPos) == len(closePos)) && len(openPos) != 0 {

// isSpecialCase checks if, for all found delimiters, startPos > endPos is true (case 2)
isSpecialCase := func() bool {
for i := 0; i < len(openPos); i++ {
start := openPos[i]
end := closePos[i] + 1 // +1 is required to include the closing delimiter in the text
if start > end {
return true
}
}
return false
hasDelimiter := false
runRune := []rune(run.GetText(docBytes))
for i := 0; i < len(runRune); i++ {
// There is an open delimiter in the run, thus create a partial placeholder fragment
if runRune[i] == OpenDelimiter {
hasDelimiter = true
stack = append(stack, NewPlaceholderFragment(Position{int64(i), -1}, run))
continue
}

// isNestedCase checks if, there are >1 OpenDelimiters before the first CloseDelimiter
// if there is only 1 openPos, this cannot be true (we already know that it's not 0
isNestedCase := func() bool {
if len(openPos) == 1 {
return false
if runRune[i] == CloseDelimiter {
// There is a close delimiter in the run, 3 scenarios may happen:
// 1) The stack is empty, no open delimiter can match this close delimiter,
// this must be a corrupted placeholder, we log the error and skip
if len(stack) == 0 {
log.Printf(
"detected unmatched close delimiter in run %d \"%s\", index %d, skipping \n",
run.ID, run.GetText(docBytes), i,
)
continue
}
if openPos[0] < closePos[0] &&
openPos[1] < closePos[0] {
return true
}
return false
}

// handle case 2
if isSpecialCase() {

// handle the easy part (everything between the the culprit first '}' and last '{' in the example of '}foo{bar}foo{'
validOpenPos := openPos[:len(openPos)-1]
validClosePos := closePos[1:]
placeholders = append(placeholders, assembleFullPlaceholders(run, validOpenPos, validClosePos)...)

// extract the first open and last close delimiter positions as they are the one causing issues.
lastOpenPos := openPos[len(openPos)-1]
firstClosePos := closePos[0]

// we MUST be having an unclosedPlaceholder or the user made a typo like double-closing ('{foo}}{bar')
if !hasOpenPlaceholder {
return nil, fmt.Errorf("unexpected %c in run %d \"%s\"), missing preceeding %c", CloseDelimiter, run.ID, run.GetText(docBytes), OpenDelimiter)
// 2) The stack is not empty,
hasDelimiter = true
fragment := stack[len(stack)-1]
stack = stack[:len(stack)-1]
if run == fragment.Run {
// a) The close delimiter is in the same run as the open delimiter, then we take
// the partial fragment from the top of the stack, and complete its end position, to make a
// complete placeholder with only 1 fragment.
// e.g., run like:
// foo{bar}baz
// foo{bar}baz{qux}bbb
fragment.Position.End = int64(i) + 1
placeholders = append(placeholders, &Placeholder{Fragments: []*PlaceholderFragment{fragment}})
} else {
// b) There are some span runs between the run of open and close delimiter, then we first
// take the partial fragment from the top of the stack, and its end position must be the end of
// that run. Then we create span fragments, with its length set to the run length. Finally, we
// create the fragment that includes the close delimiter, with its start position set to 0, and
// end position set to the position of the close delimiter.
// e.g., run like (here | is the run boundary):
// foo{bar|}baz => {bar}
// foo{bar|abc|}baz => {barabc}
// foo{bar|abc|def|}baz => {barabcdef}
// foo{bar|{bc|d}ef|}baz => {bar{bcd}ef} {bcd}
fragment.Position.End = int64(len(fragment.Run.GetText(docBytes)))
fragments := []*PlaceholderFragment{fragment}
for _, srun := range fragment.SpanRun {
fragments = append(
fragments,
NewPlaceholderFragment(Position{0, int64(len(srun.GetText(docBytes)))}, srun),
)
}
fragments = append(fragments, NewPlaceholderFragment(Position{0, int64(i) + 1}, run))
placeholders = append(placeholders, &Placeholder{Fragments: fragments})
}

// everything up to firstClosePos belongs to the currently open placeholder
fragment := NewPlaceholderFragment(0, Position{0, int64(firstClosePos) + 1}, run)
unclosedPlaceholder.Fragments = append(unclosedPlaceholder.Fragments, fragment)
placeholders = append(placeholders, unclosedPlaceholder)

// a new, unclosed, placeholder starts at lastOpenPos
fragment = NewPlaceholderFragment(0, Position{int64(lastOpenPos), int64(len(runText))}, run)
unclosedPlaceholder = new(Placeholder)
unclosedPlaceholder.Fragments = append(unclosedPlaceholder.Fragments, fragment)
hasOpenPlaceholder = true

continue
}

// there are multiple ways to handle this
// - error
// - cut out
// - skip the run (that's what we do because we're lazy bums)
if isNestedCase() {
log.Printf("detected nested placeholder in run %d \"%s\", skipping \n", run.ID, run.GetText(docBytes))
continue
}

// case 1, assemble and continue
placeholders = append(placeholders, assembleFullPlaceholders(run, openPos, closePos)...)
continue
}

// More open than closing delimiters, e.g. '{foo}{bar'
// this can only mean that a placeholder is left unclosed after this run
// For the length this means that (len(openPos) + 1) == len(closePos)
// So we can be sure that the last position in openPos is the opening tag of the
// unclosed placeholder.
if len(openPos) > len(closePos) {
// merge full placeholders in the run, leaving out the last openPos since
// we know that the one is left over and must be handled separately below
placeholders = append(placeholders, assembleFullPlaceholders(run, openPos[:len(openPos)-1], closePos)...)

// add the unclosed part of the placeholder to a tmp placeholder var
unclosedOpenPos := openPos[len(openPos)-1]
fragment := NewPlaceholderFragment(0, Position{int64(unclosedOpenPos), int64(len(runText))}, run)
unclosedPlaceholder.Fragments = append(unclosedPlaceholder.Fragments, fragment)
hasOpenPlaceholder = true
continue
}

// More closing than opening delimiters, e.g. '}{foo}'
// this can only mean that there must be an unclosed placeholder which
// is closed in this run.
if len(openPos) < len(closePos) {
// merge full placeholders in the run, leaving out the last closePos since
// we know that the one is left over and must be handled separately below
placeholders = append(placeholders, assembleFullPlaceholders(run, openPos, closePos[:len(closePos)-1])...)

// there is only a closePos and no open pos
if len(closePos) == 1 {
fragment := NewPlaceholderFragment(0, Position{0, int64(int64(closePos[0]) + 1)}, run)
unclosedPlaceholder.Fragments = append(unclosedPlaceholder.Fragments, fragment)
placeholders = append(placeholders, unclosedPlaceholder)
unclosedPlaceholder = new(Placeholder)
hasOpenPlaceholder = false
continue
}
continue
}

// No placeholders at all.
// The run is only relevant if there is an unclosed placeholder from a previous run.
// In that case it means that the full run-text belongs to the placeholder.
// For example, if a placeholder has three fragments in total, this represents fragment 2 (see below)
// 1) '{foo'
// 2) 'bar-'
// 3) '-baz}
if len(openPos) == 0 && len(closePos) == 0 {
if hasOpenPlaceholder {
fragment := NewPlaceholderFragment(0, Position{0, int64(len(runText))}, run)
unclosedPlaceholder.Fragments = append(unclosedPlaceholder.Fragments, fragment)
if !hasDelimiter {
// If a run has no delimiter, it must be a span run. Thus we add the run to all the partial framents that
// has not been closed.
for i := 0; i < len(stack); i++ {
stack[i].SpanRun = append(stack[i].SpanRun, run)
continue
}
}
}

// Make sure that we're dealing with valid and proper placeholders only.
// Everything else may cause issues like out of bounds errors or any other sort of weird things.
// Here we will also assemble the final list of placeholders and return only the valid ones.
var validPlaceholders []*Placeholder
for _, placeholder := range placeholders {
if !placeholder.Valid() {
continue
}

// in order to catch false positives, ensure that all placeholders have BOTH delimiters
text := placeholder.Text(docBytes)
if !strings.ContainsRune(text, OpenDelimiter) ||
!strings.ContainsRune(text, CloseDelimiter) {
continue
}

// placeholder is valid
validPlaceholders = append(validPlaceholders, placeholder)
// Warn user there are some unmatched open delimiters (a.k.a corrupted placeholders) left in the stack
for _, fragment := range stack {
log.Printf("detected unmatched open delimiter in run %d \"%s\", index %d, skipping \n", fragment.Run.ID, fragment.Run.GetText(docBytes), fragment.Position.Start)
}
return validPlaceholders, nil
}

// assembleFullPlaceholders will extract all complete placeholders inside the run given a open and close position.
// The open and close positions are the positions of the Delimiters which must already be known at this point.
// openPos and closePos are expected to be symmetrical (e.g. same length).
// Example: openPos := []int{10,20,30}; closePos := []int{13, 23, 33} resulting in 3 fragments (10,13),(20,23),(30,33)
// The n-th elements inside openPos and closePos must be matching delimiter positions.
func assembleFullPlaceholders(run *Run, openPos, closePos []int) (placeholders []*Placeholder) {
for i := 0; i < len(openPos); i++ {
start := openPos[i]
end := closePos[i] + 1 // +1 is required to include the closing delimiter in the text
fragment := NewPlaceholderFragment(0, Position{int64(start), int64(end)}, run)
p := &Placeholder{Fragments: []*PlaceholderFragment{fragment}}
placeholders = append(placeholders, p)
}
return placeholders
return placeholders, nil
}

// AddPlaceholderDelimiter will wrap the given string with OpenDelimiter and CloseDelimiter.
Expand Down
10 changes: 5 additions & 5 deletions placeholder_fragment.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ var (
type PlaceholderFragment struct {
ID int // ID is used to identify the fragments globally.
Position Position // Position of the actual fragment within the run text. 0 == (Run.Text.OpenTag.End + 1)
Number int // numbering fragments for ease of use. Numbering is scoped to placeholders.
Run *Run
//Number int // numbering fragments for ease of use. Numbering is scoped to placeholders.
Run *Run
SpanRun []*Run
}

// NewPlaceholderFragment returns an initialized PlaceholderFragment with a new, auto-incremented, ID.
func NewPlaceholderFragment(number int, pos Position, run *Run) *PlaceholderFragment {
func NewPlaceholderFragment(pos Position, run *Run) *PlaceholderFragment {
return &PlaceholderFragment{
ID: NewFragmentID(),
Position: pos,
Number: number,
Run: run,
Position: pos,
}
}

Expand Down
11 changes: 0 additions & 11 deletions placeholder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,3 @@ func TestParsePlaceholders(t *testing.T) {
}
}
}

func TestPlaceholder_AssembleFullPlaceholders(t *testing.T) {
expectedCount := 2
openPos := []int{10, 18}
closePos := []int{17, 25}

placeholders := assembleFullPlaceholders(&Run{}, openPos, closePos)
if len(placeholders) != expectedCount {
t.Errorf("not all full placeholders were parsed, want=%d, have=%d", expectedCount, len(placeholders))
}
}
1 change: 1 addition & 0 deletions replace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ func TestReplacer_Replace(t *testing.T) {
"mixed-key.separator_styles#": "mixed-key.separator_styles#",
"yet-another_placeholder": "yet-another_placeholder",
"foo": "foo",
"nested": "nested",
}

doc, err := Open("./test/template.docx")
Expand Down
Binary file modified test/template.docx
Binary file not shown.
Loading