diff --git a/matchtree.go b/matchtree.go index fad840c0e..e74c3641c 100644 --- a/matchtree.go +++ b/matchtree.go @@ -239,6 +239,7 @@ func (t *symbolRegexpMatchTree) matches(cp *contentProvider, cost int, known map type symbolSubstrMatchTree struct { *substrMatchTree + exact bool patternSize uint32 fileEndRunes []uint32 fileEndSymbol []uint32 @@ -292,12 +293,19 @@ func (t *symbolSubstrMatchTree) prepare(doc uint32) { continue } - if end <= sections[secIdx].End { - t.current[0].symbol = true - t.current[0].symbolIdx = uint32(secIdx) - trimmed = append(trimmed, t.current[0]) + if end > sections[secIdx].End { + t.current = t.current[1:] + continue + } + + if t.exact && !(start == sections[secIdx].Start && end == sections[secIdx].End) { + t.current = t.current[1:] + continue } + t.current[0].symbol = true + t.current[0].symbolIdx = uint32(secIdx) + trimmed = append(trimmed, t.current[0]) t.current = t.current[1:] } t.current = trimmed @@ -983,7 +991,12 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) optCopy := opt optCopy.DisableWordMatchOptimization = true - subMT, err := d.newMatchTree(s.Expr, optCopy) + expr, wasAnchored := s.Expr, false + if regexpExpr, ok := expr.(*query.Regexp); ok { + expr, wasAnchored = stripAnchors(regexpExpr) + } + + subMT, err := d.newMatchTree(expr, optCopy) if err != nil { return nil, err } @@ -991,6 +1004,7 @@ func (d *indexData) newMatchTree(q query.Q, opt matchTreeOpt) (matchTree, error) if substr, ok := subMT.(*substrMatchTree); ok { return &symbolSubstrMatchTree{ substrMatchTree: substr, + exact: wasAnchored, patternSize: uint32(utf8.RuneCountInString(substr.query.Pattern)), fileEndRunes: d.fileEndRunes, fileEndSymbol: d.fileEndSymbol, @@ -1256,3 +1270,37 @@ func pruneMatchTree(mt matchTree) (matchTree, error) { } return mt, err } + +func stripAnchors(in *query.Regexp) (out *query.Regexp, stripped bool) { + stripRegexpAnchors := func(in *syntax.Regexp) (out *syntax.Regexp, stripped bool) { + if in.Op != syntax.OpConcat { + return out, false + } + + if len(in.Sub) < 3 { + return out, false + } + + firstOp, lastOp := in.Sub[0].Op, in.Sub[len(in.Sub)-1].Op + + if firstOp != syntax.OpBeginLine && firstOp != syntax.OpBeginText { + return out, false + } + if lastOp != syntax.OpEndLine && lastOp != syntax.OpEndText { + return out, false + } + + inCopy := *in + inCopy.Sub = in.Sub[1 : len(in.Sub)-1] // remove the first and last ops, which are the anchors + return &inCopy, true + } + + newRegexp, stripped := stripRegexpAnchors(in.Regexp) + if !stripped { + return in, false + } + + inCopy := *in + inCopy.Regexp = newRegexp + return &inCopy, true +}