From adea6af35eb25cd984b0e82efd03cd12656231ea Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Sun, 6 Nov 2022 11:34:29 -0700
Subject: [PATCH 01/18] Upgraded to 1.19 in go.mod
---
go.mod | 4 +++-
go.sum | 15 +++++++++++++++
2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/go.mod b/go.mod
index b344236..fe80d0b 100644
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
module github.com/ekalinin/github-markdown-toc.go
-go 1.17
+go 1.19
require gopkg.in/alecthomas/kingpin.v2 v2.2.4
@@ -14,4 +14,6 @@ require (
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/sergi/go-diff v1.2.0 // indirect
github.com/stretchr/testify v1.7.0 // indirect
+ golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect
+ golang.org/x/tools v0.2.0 // indirect
)
diff --git a/go.sum b/go.sum
index 93e1e05..d0ccf99 100644
--- a/go.sum
+++ b/go.sum
@@ -24,8 +24,23 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug=
+golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
+golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/tools v0.2.0 h1:G6AHpWxTMGY1KyEYoAQ5WTtIekUUvDNjan3ugu60JvE=
+golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/alecthomas/kingpin.v2 v2.2.4 h1:CC8tJ/xljioKrK6ii3IeWVXU4Tw7VB+LbjZBJaBxN50=
gopkg.in/alecthomas/kingpin.v2 v2.2.4/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
From b0bea08dd575bf690865d116ec12b33a8fffa85b Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Sun, 6 Nov 2022 13:32:27 -0700
Subject: [PATCH 02/18] Save work before introducing html tokenizer
---
cmd/gh-md-toc/main.go | 13 +++++++++++--
ghdoc.go | 8 +-------
ghdoc_test.go | 2 +-
headerfinder.go | 38 ++++++++++++++++++++++++++++++++++++++
headerfinder_test.go | 16 ++++++++++++++++
5 files changed, 67 insertions(+), 10 deletions(-)
create mode 100644 headerfinder.go
create mode 100644 headerfinder_test.go
diff --git a/cmd/gh-md-toc/main.go b/cmd/gh-md-toc/main.go
index f09b9ba..f085b9b 100644
--- a/cmd/gh-md-toc/main.go
+++ b/cmd/gh-md-toc/main.go
@@ -3,6 +3,7 @@ package main
import (
"fmt"
"io"
+ "log"
"os"
"gopkg.in/alecthomas/kingpin.v2"
@@ -48,10 +49,11 @@ func main() {
for _, p := range *paths {
ghdoc := ghtoc.NewGHDoc(p, absPathsInToc, *startDepth, *depth, !*noEscape, *token, *indent, *debug)
+ getFn := func(ch chan *ghtoc.GHToc, ghdoc *ghtoc.GHDoc) { ch <- ghdoc.GetToc() }
if *serial {
- ch <- ghdoc.GetToc()
+ getFn(ch, ghdoc)
} else {
- go func(path string) { ch <- ghdoc.GetToc() }(p)
+ go getFn(ch, ghdoc)
}
}
@@ -62,8 +64,15 @@ func main() {
fmt.Println()
}
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK: pathsCount: %+#v", pathsCount)
+ // DEBUG END
+
for i := 1; i <= pathsCount; i++ {
toc := <-ch
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK: in loop toc: %+#v", toc)
+ // DEBUG END
// #14, check if there's really TOC?
if toc != nil {
check(toc.Print(os.Stdout))
diff --git a/ghdoc.go b/ghdoc.go
index a7ed8e0..bbc72d3 100644
--- a/ghdoc.go
+++ b/ghdoc.go
@@ -6,7 +6,6 @@ import (
"log"
"net/url"
"os"
- "regexp"
"strconv"
"strings"
)
@@ -141,12 +140,7 @@ func (doc *GHDoc) GrabToc() *GHToc {
doc.d("GrabToc: start, html size: " + strconv.Itoa(len(doc.html)))
defer doc.d("GrabToc: done.")
- re := `(?si)[1-6])>\s*` +
- `[^"]*)"[^>]*>\s*` +
- `.*?(?P.*?)README in another language
+ README in another language
`,
AbsPaths: false,
Depth: 0,
diff --git a/headerfinder.go b/headerfinder.go
new file mode 100644
index 0000000..d27e005
--- /dev/null
+++ b/headerfinder.go
@@ -0,0 +1,38 @@
+package ghtoc
+
+import "regexp"
+
+// const _headerRegexpStr = `(?si)[1-6])>\s*` +
+// `[^"]*)"[^>]*>\s*` +
+// `.*?(?P.*?)[1-6])>\s*` +
+ `[^"]*)"\s.*` +
+ `[^>]*>` +
+ `.*?(?P.*?)[1-6])>\s*` +
+ `[^"]*)"[^>]*>\s*` +
+ `.*?(?P.*?)Document Title
+`
+
+func TestHeaderRegexp(t *testing.T) {
+ r := newHeaderRegexp()
+
+ results := r.FindAllStringSubmatch(singleHdr, -1)
+ if len(results) != 1 {
+ t.Errorf("Expected a single header. %+#v", results)
+ }
+}
From 8e7bc67f290abb1def7433dd54fd64a45df3a3e8 Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 09:14:47 -0700
Subject: [PATCH 03/18] Save work with html parser
---
ghdoc_test.go | 2 +-
go.mod | 7 +-
go.sum | 19 +----
headerfinder.go | 164 ++++++++++++++++++++++++++++++++++++++-----
headerfinder_test.go | 20 ++++--
5 files changed, 169 insertions(+), 43 deletions(-)
diff --git a/ghdoc_test.go b/ghdoc_test.go
index f67be12..4b0d3dd 100644
--- a/ghdoc_test.go
+++ b/ghdoc_test.go
@@ -30,7 +30,7 @@ func TestGrabTocOneRow(t *testing.T) {
}
doc := &GHDoc{
html: `
- README in another language
+ README in another language
`,
AbsPaths: false,
Depth: 0,
diff --git a/go.mod b/go.mod
index fe80d0b..720d191 100644
--- a/go.mod
+++ b/go.mod
@@ -2,7 +2,10 @@ module github.com/ekalinin/github-markdown-toc.go
go 1.19
-require gopkg.in/alecthomas/kingpin.v2 v2.2.4
+require (
+ golang.org/x/net v0.1.0
+ gopkg.in/alecthomas/kingpin.v2 v2.2.4
+)
require (
github.com/alecthomas/assert v0.0.0-20170929043011-405dbfeb8e38 // indirect
@@ -14,6 +17,4 @@ require (
github.com/mattn/go-isatty v0.0.14 // indirect
github.com/sergi/go-diff v1.2.0 // indirect
github.com/stretchr/testify v1.7.0 // indirect
- golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect
- golang.org/x/tools v0.2.0 // indirect
)
diff --git a/go.sum b/go.sum
index d0ccf99..c7c02d4 100644
--- a/go.sum
+++ b/go.sum
@@ -24,23 +24,10 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
-golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug=
-golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
-golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
-golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
-golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I=
+golang.org/x/net v0.1.0 h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0=
+golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
-golang.org/x/tools v0.2.0 h1:G6AHpWxTMGY1KyEYoAQ5WTtIekUUvDNjan3ugu60JvE=
-golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA=
-golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
gopkg.in/alecthomas/kingpin.v2 v2.2.4 h1:CC8tJ/xljioKrK6ii3IeWVXU4Tw7VB+LbjZBJaBxN50=
gopkg.in/alecthomas/kingpin.v2 v2.2.4/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
diff --git a/headerfinder.go b/headerfinder.go
index d27e005..f1c7519 100644
--- a/headerfinder.go
+++ b/headerfinder.go
@@ -1,27 +1,42 @@
package ghtoc
-import "regexp"
+import (
+ "io"
+ "log"
+ "regexp"
+ "strings"
-// const _headerRegexpStr = `(?si)[1-6])>\s*` +
-// `[^"]*)"[^>]*>\s*` +
-// `.*?(?P.*?)[1-6])>\s*` +
- `[^"]*)"\s.*` +
- `[^>]*>` +
+ `[^"]*)"[^>]*>\s*` +
`.*?(?P.*?)[1-6])>\s*` +
- `[^"]*)"[^>]*>\s*` +
- `.*?(?P.*?)[1-6])>\s*` +
+// `[^"]*)"\s.*` +
+// `[^>]*>` +
+// `.*?(?P.*?)[1-6])>\s*` +
+// `[^"]*)"[^>]*>\s*` +
+// `.*?(?P.*?)Document Title
`
-func TestHeaderRegexp(t *testing.T) {
- r := newHeaderRegexp()
+// func TestHeaderRegexp(t *testing.T) {
+// r := newHeaderRegexp()
+// results := r.FindAllStringSubmatch(singleHdr, -1)
+// if len(results) != 1 {
+// t.Errorf("Expected a single header. %+#v", results)
+// }
+// }
- results := r.FindAllStringSubmatch(singleHdr, -1)
+func TestFindHeaders(t *testing.T) {
+ results := findHeadersInString(singleHdr)
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK: results: %+#v", results)
+ // DEBUG END
if len(results) != 1 {
t.Errorf("Expected a single header. %+#v", results)
}
From 84262ecd4271350759e583c04daf12455a61925b Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 09:19:42 -0700
Subject: [PATCH 04/18] Switched to struct instead of pointer for return
---
headerfinder.go | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/headerfinder.go b/headerfinder.go
index f1c7519..f9f4ef1 100644
--- a/headerfinder.go
+++ b/headerfinder.go
@@ -59,13 +59,13 @@ type Header struct {
Name string
}
-func findHeadersInString(str string) []*Header {
+func findHeadersInString(str string) []Header {
r := strings.NewReader(str)
return findHeaders(r)
}
-func findHeaders(r io.Reader) []*Header {
- hdrs := make([]*Header, 0)
+func findHeaders(r io.Reader) []Header {
+ hdrs := make([]Header, 0)
tokenizer := html.NewTokenizer(r)
for {
tt := tokenizer.Next()
@@ -82,8 +82,7 @@ func findHeaders(r io.Reader) []*Header {
// log.Printf("*** CHUCK: default t.DataAtom: %+#v", t.DataAtom)
// DEBUG END
- hdr := createHeader(tokenizer, t)
- if hdr != nil {
+ if hdr, ok := createHeader(tokenizer, t); ok {
hdrs = append(hdrs, hdr)
}
}
@@ -112,10 +111,10 @@ func getHxDepth(dataAtom atom.Atom) HxDepth {
return InvalidDepth
}
-func createHeader(tokenizer *html.Tokenizer, token html.Token) *Header {
+func createHeader(tokenizer *html.Tokenizer, token html.Token) (Header, bool) {
hxDepth := getHxDepth(token.DataAtom)
if hxDepth == InvalidDepth {
- return nil
+ return Header{}, false
}
var href, name string
@@ -125,7 +124,7 @@ func createHeader(tokenizer *html.Tokenizer, token html.Token) *Header {
t := tokenizer.Token()
switch t.Type {
case html.ErrorToken:
- return nil
+ return Header{}, false
case html.StartTagToken:
tokenDepth++
if t.DataAtom == atom.A {
@@ -133,17 +132,17 @@ func createHeader(tokenizer *html.Tokenizer, token html.Token) *Header {
href = hrefAttr.Val
} else {
// Expected to find href attribute
- return nil
+ return Header{}, false
}
}
case html.EndTagToken:
// If we encountered the matching end tag for the Hx, then we are done
if t.DataAtom == token.DataAtom {
- return &Header{
+ return Header{
Depth: hxDepth,
Name: name,
Href: href,
- }
+ }, true
}
tokenDepth--
case html.TextToken:
From 936143477ba200ce69e4bc923f4335d88b1f4200 Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 09:47:28 -0700
Subject: [PATCH 05/18] Add testify. Initial tests pass
---
go.mod | 6 ++++-
go.sum | 3 +++
headerfinder.go | 16 ++++++--------
headerfinder_test.go | 52 +++++++++++++++++++++++++++++++++-----------
4 files changed, 54 insertions(+), 23 deletions(-)
diff --git a/go.mod b/go.mod
index 720d191..67a976f 100644
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,7 @@ module github.com/ekalinin/github-markdown-toc.go
go 1.19
require (
+ github.com/stretchr/testify v1.7.0
golang.org/x/net v0.1.0
gopkg.in/alecthomas/kingpin.v2 v2.2.4
)
@@ -14,7 +15,10 @@ require (
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc // indirect
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
+ github.com/kr/pretty v0.1.0 // indirect
github.com/mattn/go-isatty v0.0.14 // indirect
+ github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/sergi/go-diff v1.2.0 // indirect
- github.com/stretchr/testify v1.7.0 // indirect
+ gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
+ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
)
diff --git a/go.sum b/go.sum
index c7c02d4..3d2a333 100644
--- a/go.sum
+++ b/go.sum
@@ -11,8 +11,10 @@ github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRF
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
@@ -31,6 +33,7 @@ golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
gopkg.in/alecthomas/kingpin.v2 v2.2.4 h1:CC8tJ/xljioKrK6ii3IeWVXU4Tw7VB+LbjZBJaBxN50=
gopkg.in/alecthomas/kingpin.v2 v2.2.4/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
diff --git a/headerfinder.go b/headerfinder.go
index f9f4ef1..d69b61b 100644
--- a/headerfinder.go
+++ b/headerfinder.go
@@ -75,13 +75,6 @@ func findHeaders(r io.Reader) []Header {
return hdrs
case html.StartTagToken:
t := tokenizer.Token()
-
- // DEBUG BEGIN
- log.Printf("*** CHUCK: default t: %+#v", t)
- // log.Printf("*** CHUCK: default t.Type: %+#v", t.Type)
- // log.Printf("*** CHUCK: default t.DataAtom: %+#v", t.DataAtom)
- // DEBUG END
-
if hdr, ok := createHeader(tokenizer, t); ok {
hdrs = append(hdrs, hdr)
}
@@ -118,10 +111,15 @@ func createHeader(tokenizer *html.Tokenizer, token html.Token) (Header, bool) {
}
var href, name string
- tokenDepth := 0
+ // Start at 1 because we are inside the Hx tag
+ tokenDepth := 1
for {
tokenizer.Next()
t := tokenizer.Token()
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK: createHeader t: %+#v", t)
+ log.Printf("*** CHUCK: createHeader tokenDepth: %+#v", tokenDepth)
+ // DEBUG END
switch t.Type {
case html.ErrorToken:
return Header{}, false
@@ -147,7 +145,7 @@ func createHeader(tokenizer *html.Tokenizer, token html.Token) (Header, bool) {
tokenDepth--
case html.TextToken:
if tokenDepth == 1 {
- name = t.Data
+ name = strings.TrimSpace(t.Data)
}
}
}
diff --git a/headerfinder_test.go b/headerfinder_test.go
index 53ba71b..76e62b0 100644
--- a/headerfinder_test.go
+++ b/headerfinder_test.go
@@ -3,26 +3,52 @@ package ghtoc
import (
"log"
"testing"
+
+ "github.com/stretchr/testify/assert"
)
-const singleHdr = `
+const singleH1 = `
Document Title
`
-// func TestHeaderRegexp(t *testing.T) {
-// r := newHeaderRegexp()
-// results := r.FindAllStringSubmatch(singleHdr, -1)
-// if len(results) != 1 {
-// t.Errorf("Expected a single header. %+#v", results)
+const singleH2 = `
+
+
+
+
+ Interesting Section
+
+`
+
+// func assertHeaderEqual(t *testing.T, expected, actual Header) {
+// if actual != expected {
+// t.Errorf("Unexpected header value. actual: %+#v, expected: %+#v", actual, expected)
// }
// }
func TestFindHeaders(t *testing.T) {
- results := findHeadersInString(singleHdr)
- // DEBUG BEGIN
- log.Printf("*** CHUCK: results: %+#v", results)
- // DEBUG END
- if len(results) != 1 {
- t.Errorf("Expected a single header. %+#v", results)
- }
+ t.Run("single H1", func(t *testing.T) {
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK: ===========")
+ // DEBUG END
+ results := findHeadersInString(singleH1)
+ assert.Len(t, results, 1)
+ assert.Equal(
+ t,
+ Header{Depth: 0, Href: "#document-title", Name: "Document Title"},
+ results[0],
+ )
+ })
+ t.Run("single H2", func(t *testing.T) {
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK: ===========")
+ // DEBUG END
+ results := findHeadersInString(singleH2)
+ assert.Len(t, results, 1)
+ assert.Equal(
+ t,
+ Header{Depth: 1, Href: "#interesting-section", Name: "Interesting Section"},
+ results[0],
+ )
+ })
}
From 2d6ce40e92972db9ed756d25dd069f37c620d889 Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 10:04:11 -0700
Subject: [PATCH 06/18] Added multiple section test
---
headerfinder_test.go | 50 +++++++++++++++++++++++++++++++++-----------
1 file changed, 38 insertions(+), 12 deletions(-)
diff --git a/headerfinder_test.go b/headerfinder_test.go
index 76e62b0..35950b9 100644
--- a/headerfinder_test.go
+++ b/headerfinder_test.go
@@ -1,7 +1,6 @@
package ghtoc
import (
- "log"
"testing"
"github.com/stretchr/testify/assert"
@@ -20,17 +19,18 @@ const singleH2 = `
`
-// func assertHeaderEqual(t *testing.T, expected, actual Header) {
-// if actual != expected {
-// t.Errorf("Unexpected header value. actual: %+#v, expected: %+#v", actual, expected)
-// }
-// }
+const multipleSections = `
+Document Title
+Hi
+First Section
+Some Text
+First Subsection
+Second Section
+Second Subsection
+`
func TestFindHeaders(t *testing.T) {
t.Run("single H1", func(t *testing.T) {
- // DEBUG BEGIN
- log.Printf("*** CHUCK: ===========")
- // DEBUG END
results := findHeadersInString(singleH1)
assert.Len(t, results, 1)
assert.Equal(
@@ -40,9 +40,6 @@ func TestFindHeaders(t *testing.T) {
)
})
t.Run("single H2", func(t *testing.T) {
- // DEBUG BEGIN
- log.Printf("*** CHUCK: ===========")
- // DEBUG END
results := findHeadersInString(singleH2)
assert.Len(t, results, 1)
assert.Equal(
@@ -51,4 +48,33 @@ func TestFindHeaders(t *testing.T) {
results[0],
)
})
+ t.Run("multiple sections", func(t *testing.T) {
+ results := findHeadersInString(multipleSections)
+ assert.Len(t, results, 5)
+ assert.Equal(
+ t,
+ Header{Depth: 0, Href: "#document-title", Name: "Document Title"},
+ results[0],
+ )
+ assert.Equal(
+ t,
+ Header{Depth: 1, Href: "#first-section", Name: "First Section"},
+ results[1],
+ )
+ assert.Equal(
+ t,
+ Header{Depth: 2, Href: "#first-subsection", Name: "First Subsection"},
+ results[2],
+ )
+ assert.Equal(
+ t,
+ Header{Depth: 1, Href: "#second-section", Name: "Second Section"},
+ results[3],
+ )
+ assert.Equal(
+ t,
+ Header{Depth: 3, Href: "#second-subsection", Name: "Second Subsection"},
+ results[4],
+ )
+ })
}
From 14d68a47e71957619d5b8ad7210632860812e0f3 Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 10:05:25 -0700
Subject: [PATCH 07/18] Clean up
---
headerfinder.go | 5 -----
1 file changed, 5 deletions(-)
diff --git a/headerfinder.go b/headerfinder.go
index d69b61b..30b9ef0 100644
--- a/headerfinder.go
+++ b/headerfinder.go
@@ -2,7 +2,6 @@ package ghtoc
import (
"io"
- "log"
"regexp"
"strings"
@@ -116,10 +115,6 @@ func createHeader(tokenizer *html.Tokenizer, token html.Token) (Header, bool) {
for {
tokenizer.Next()
t := tokenizer.Token()
- // DEBUG BEGIN
- log.Printf("*** CHUCK: createHeader t: %+#v", t)
- log.Printf("*** CHUCK: createHeader tokenDepth: %+#v", tokenDepth)
- // DEBUG END
switch t.Type {
case html.ErrorToken:
return Header{}, false
From ccaa8f9f3b8952cb304fde01693c79abc07e6f44 Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 11:09:55 -0700
Subject: [PATCH 08/18] Collect name from parts
---
ghdoc.go | 130 +++++++++++++++++++++++++++----------------
headerfinder.go | 59 ++++----------------
headerfinder_test.go | 19 +++++++
3 files changed, 113 insertions(+), 95 deletions(-)
diff --git a/ghdoc.go b/ghdoc.go
index bbc72d3..5dae117 100644
--- a/ghdoc.go
+++ b/ghdoc.go
@@ -140,64 +140,100 @@ func (doc *GHDoc) GrabToc() *GHToc {
doc.d("GrabToc: start, html size: " + strconv.Itoa(len(doc.html)))
defer doc.d("GrabToc: done.")
- r := headerRegexp()
listIndentation := generateListIndentation(doc.Indent)
toc := GHToc{}
- minHeaderNum := 6
- var groups []map[string]string
- doc.d("GrabToc: matching ...")
- for idx, match := range r.FindAllStringSubmatch(doc.html, -1) {
- doc.d("GrabToc: match #" + strconv.Itoa(idx) + " ...")
- group := make(map[string]string)
- // fill map for groups
- for i, name := range r.SubexpNames() {
- if i == 0 || name == "" {
- continue
- }
- doc.d("GrabToc: process group: " + name + ": " + match[i] + " ...")
- group[name] = removeStuff(match[i])
- }
- // update minimum header number
- n, _ := strconv.Atoi(group["num"])
- if n < minHeaderNum {
- minHeaderNum = n
- }
- groups = append(groups, group)
+ for _, hdr := range findHeadersInString(doc.html) {
+ toc = append(toc, doc.tocEntry(listIndentation(), hdr))
}
- var tmpSection string
- doc.d("GrabToc: processing groups ...")
- doc.d("Including starting from level " + strconv.Itoa(doc.StartDepth))
- for _, group := range groups {
- // format result
- n, _ := strconv.Atoi(group["num"])
- if n <= doc.StartDepth {
- continue
- }
- if doc.Depth > 0 && n > doc.Depth {
- continue
- }
+ return &toc
+}
- link, _ := url.QueryUnescape(group["href"])
- if doc.AbsPaths {
- link = doc.Path + link
- }
+func (doc *GHDoc) tocEntry(indent string, hdr Header) string {
+ // TODO(chuck): Calculate the repeat count with the doc.StartDepth
+ return strings.Repeat(indent, int(hdr.Depth)) + "* " +
+ "[" + doc.tocName(hdr.Name) + "]" +
+ "(" + doc.tocLink(hdr.Href) + ")"
+}
- tmpSection = removeStuff(group["name"])
- if doc.Escape {
- tmpSection = EscapeSpecChars(tmpSection)
- }
- tocItem := strings.Repeat(listIndentation(), n-minHeaderNum-doc.StartDepth) + "* " +
- "[" + tmpSection + "]" +
- "(" + link + ")"
- //fmt.Println(tocItem)
- toc = append(toc, tocItem)
+func (doc *GHDoc) tocName(name string) string {
+ if doc.Escape {
+ return EscapeSpecChars(name)
}
+ return name
+}
- return &toc
+func (doc *GHDoc) tocLink(href string) string {
+ link, _ := url.QueryUnescape(href)
+ if doc.AbsPaths {
+ link = doc.Path + link
+ }
+ return link
}
+//func (doc *GHDoc) GrabToc() *GHToc {
+// doc.d("GrabToc: start, html size: " + strconv.Itoa(len(doc.html)))
+// defer doc.d("GrabToc: done.")
+
+// r := headerRegexp()
+// listIndentation := generateListIndentation(doc.Indent)
+
+// toc := GHToc{}
+// minHeaderNum := 6
+// var groups []map[string]string
+// doc.d("GrabToc: matching ...")
+// for idx, match := range r.FindAllStringSubmatch(doc.html, -1) {
+// doc.d("GrabToc: match #" + strconv.Itoa(idx) + " ...")
+// group := make(map[string]string)
+// // fill map for groups
+// for i, name := range r.SubexpNames() {
+// if i == 0 || name == "" {
+// continue
+// }
+// doc.d("GrabToc: process group: " + name + ": " + match[i] + " ...")
+// group[name] = removeStuff(match[i])
+// }
+// // update minimum header number
+// n, _ := strconv.Atoi(group["num"])
+// if n < minHeaderNum {
+// minHeaderNum = n
+// }
+// groups = append(groups, group)
+// }
+
+// var tmpSection string
+// doc.d("GrabToc: processing groups ...")
+// doc.d("Including starting from level " + strconv.Itoa(doc.StartDepth))
+// for _, group := range groups {
+// // format result
+// n, _ := strconv.Atoi(group["num"])
+// if n <= doc.StartDepth {
+// continue
+// }
+// if doc.Depth > 0 && n > doc.Depth {
+// continue
+// }
+
+// link, _ := url.QueryUnescape(group["href"])
+// if doc.AbsPaths {
+// link = doc.Path + link
+// }
+
+// tmpSection = removeStuff(group["name"])
+// if doc.Escape {
+// tmpSection = EscapeSpecChars(tmpSection)
+// }
+// tocItem := strings.Repeat(listIndentation(), n-minHeaderNum-doc.StartDepth) + "* " +
+// "[" + tmpSection + "]" +
+// "(" + link + ")"
+// //fmt.Println(tocItem)
+// toc = append(toc, tocItem)
+// }
+
+// return &toc
+//}
+
// GetToc return GHToc for a document
func (doc *GHDoc) GetToc() *GHToc {
if err := doc.Convert2HTML(); err != nil {
diff --git a/headerfinder.go b/headerfinder.go
index 30b9ef0..37a1cde 100644
--- a/headerfinder.go
+++ b/headerfinder.go
@@ -2,7 +2,6 @@ package ghtoc
import (
"io"
- "regexp"
"strings"
"golang.org/x/net/html"
@@ -15,42 +14,6 @@ type HxDepth int
// InvalidDepth designates that the data atom is not a valid Hx.
const InvalidDepth HxDepth = -1
-const _headerRegexpStr = `(?si)[1-6])>\s*` +
- `[^"]*)"[^>]*>\s*` +
- `.*?(?P.*?)[1-6])>\s*` +
-// `[^"]*)"\s.*` +
-// `[^>]*>` +
-// `.*?(?P.*?)[1-6])>\s*` +
-// `[^"]*)"[^>]*>\s*` +
-// `.*?(?P.*?)
Date: Mon, 7 Nov 2022 12:24:55 -0700
Subject: [PATCH 09/18] Fixing bugs
---
ghdoc.go | 18 +++++++++++++++++-
ghdoc_test.go | 10 +++++++++-
headerfinder.go | 4 ++++
3 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/ghdoc.go b/ghdoc.go
index 5dae117..d4fbe32 100644
--- a/ghdoc.go
+++ b/ghdoc.go
@@ -142,9 +142,25 @@ func (doc *GHDoc) GrabToc() *GHToc {
listIndentation := generateListIndentation(doc.Indent)
+ minDepth := doc.StartDepth
+ var maxDepth int
+ if doc.Depth > 0 {
+ maxDepth = doc.Depth - 1
+ } else {
+ maxDepth = int(MaxHxDepth)
+ }
+
toc := GHToc{}
for _, hdr := range findHeadersInString(doc.html) {
- toc = append(toc, doc.tocEntry(listIndentation(), hdr))
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK: GrabToc hdr: %+#v", hdr)
+ log.Printf("*** CHUCK: GrabToc minDepth: %+#v", minDepth)
+ log.Printf("*** CHUCK: GrabToc maxDepth: %+#v", maxDepth)
+ // DEBUG END
+ hDepth := int(hdr.Depth)
+ if hDepth >= minDepth && hDepth <= maxDepth {
+ toc = append(toc, doc.tocEntry(listIndentation(), hdr))
+ }
}
return &toc
diff --git a/ghdoc_test.go b/ghdoc_test.go
index 4b0d3dd..1c12012 100644
--- a/ghdoc_test.go
+++ b/ghdoc_test.go
@@ -180,8 +180,16 @@ func TestGrabTocDepth(t *testing.T) {
Depth: 1,
Indent: 2,
}
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK: ==========")
+ // DEBUG END
toc := *doc.GrabToc()
-
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK toc: ")
+ for idx, item := range toc {
+ log.Printf("*** CHUCK %d: %+#v", idx, item)
+ }
+ // DEBUG END
for i := 0; i <= len(tocExpected)-1; i++ {
if toc[i] != tocExpected[i] {
t.Error("Res :", toc[i], "\nExpected :", tocExpected[i])
diff --git a/headerfinder.go b/headerfinder.go
index 37a1cde..4b4d2fb 100644
--- a/headerfinder.go
+++ b/headerfinder.go
@@ -14,6 +14,10 @@ type HxDepth int
// InvalidDepth designates that the data atom is not a valid Hx.
const InvalidDepth HxDepth = -1
+// MaxHxDepth is the maximum HxDepth value.
+// H6 is the last Hx tag (5 = 6 - 1)
+const MaxHxDepth HxDepth = 5
+
// Header represents an HTML header
type Header struct {
Depth HxDepth
From 976cfb3c49429caec28220112ec1dd2e1755183f Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 12:27:21 -0700
Subject: [PATCH 10/18] No more panics
---
ghdoc_test.go | 23 +++++++++++------------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/ghdoc_test.go b/ghdoc_test.go
index 1c12012..ff072fe 100644
--- a/ghdoc_test.go
+++ b/ghdoc_test.go
@@ -180,16 +180,7 @@ func TestGrabTocDepth(t *testing.T) {
Depth: 1,
Indent: 2,
}
- // DEBUG BEGIN
- log.Printf("*** CHUCK: ==========")
- // DEBUG END
toc := *doc.GrabToc()
- // DEBUG BEGIN
- log.Printf("*** CHUCK toc: ")
- for idx, item := range toc {
- log.Printf("*** CHUCK %d: %+#v", idx, item)
- }
- // DEBUG END
for i := 0; i <= len(tocExpected)-1; i++ {
if toc[i] != tocExpected[i] {
t.Error("Res :", toc[i], "\nExpected :", tocExpected[i])
@@ -219,7 +210,7 @@ func TestGrabTocStartDepth(t *testing.T) {
Blabla...
-The command foo3 is even betterer
+The command foo3 is even betterer
Blabla...
@@ -235,7 +226,7 @@ func TestGrabTocStartDepth(t *testing.T) {
Blabla...
-The command bar3 is even betterer
+The command bar3 is even betterer
Blabla...
`, AbsPaths: false,
@@ -243,8 +234,16 @@ func TestGrabTocStartDepth(t *testing.T) {
StartDepth: 1,
Indent: 2,
}
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK: =======")
+ // DEBUG END
toc := *doc.GrabToc()
-
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK toc: ")
+ for idx, item := range toc {
+ log.Printf("*** CHUCK %d: %+#v", idx, item)
+ }
+ // DEBUG END
for i := 0; i <= len(tocExpected)-1; i++ {
if toc[i] != tocExpected[i] {
t.Error("Res :", toc[i], "\nExpected :", tocExpected[i])
From a9295d7fcebf9aa14a17ca8591fa3a0ccafc666f Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 12:32:17 -0700
Subject: [PATCH 11/18] Adjust indent depth by StartDepth
---
ghdoc.go | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/ghdoc.go b/ghdoc.go
index d4fbe32..61297f0 100644
--- a/ghdoc.go
+++ b/ghdoc.go
@@ -167,8 +167,8 @@ func (doc *GHDoc) GrabToc() *GHToc {
}
func (doc *GHDoc) tocEntry(indent string, hdr Header) string {
- // TODO(chuck): Calculate the repeat count with the doc.StartDepth
- return strings.Repeat(indent, int(hdr.Depth)) + "* " +
+ indentDepth := int(hdr.Depth) - doc.StartDepth
+ return strings.Repeat(indent, indentDepth) + "* " +
"[" + doc.tocName(hdr.Name) + "]" +
"(" + doc.tocLink(hdr.Href) + ")"
}
From 33c0c0e59af8c504753f6385e9de3fdbe28a3c2b Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 12:45:48 -0700
Subject: [PATCH 12/18] Fixed indent depth
---
ghdoc.go | 23 +++++++++++++++++++----
ghdoc_test.go | 12 +++---------
2 files changed, 22 insertions(+), 13 deletions(-)
diff --git a/ghdoc.go b/ghdoc.go
index 61297f0..657a5f9 100644
--- a/ghdoc.go
+++ b/ghdoc.go
@@ -150,8 +150,17 @@ func (doc *GHDoc) GrabToc() *GHToc {
maxDepth = int(MaxHxDepth)
}
+ hdrs := findHeadersInString(doc.html)
+
+ minHxDepth := MaxHxDepth
+ for _, hdr := range hdrs {
+ if hdr.Depth < minHxDepth {
+ minHxDepth = hdr.Depth
+ }
+ }
+
toc := GHToc{}
- for _, hdr := range findHeadersInString(doc.html) {
+ for _, hdr := range hdrs {
// DEBUG BEGIN
log.Printf("*** CHUCK: GrabToc hdr: %+#v", hdr)
log.Printf("*** CHUCK: GrabToc minDepth: %+#v", minDepth)
@@ -159,7 +168,14 @@ func (doc *GHDoc) GrabToc() *GHToc {
// DEBUG END
hDepth := int(hdr.Depth)
if hDepth >= minDepth && hDepth <= maxDepth {
- toc = append(toc, doc.tocEntry(listIndentation(), hdr))
+ indentDepth := int(hdr.Depth) - int(minHxDepth) - doc.StartDepth
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK: GrabToc minHxDepth: %+#v", minHxDepth)
+ log.Printf("*** CHUCK: GrabToc doc.StartDepth: %+#v", doc.StartDepth)
+ log.Printf("*** CHUCK: GrabToc indentDepth: %+#v", indentDepth)
+ // DEBUG END
+ indent := strings.Repeat(listIndentation(), indentDepth)
+ toc = append(toc, doc.tocEntry(indent, hdr))
}
}
@@ -167,8 +183,7 @@ func (doc *GHDoc) GrabToc() *GHToc {
}
func (doc *GHDoc) tocEntry(indent string, hdr Header) string {
- indentDepth := int(hdr.Depth) - doc.StartDepth
- return strings.Repeat(indent, indentDepth) + "* " +
+ return indent + "* " +
"[" + doc.tocName(hdr.Name) + "]" +
"(" + doc.tocLink(hdr.Href) + ")"
}
diff --git a/ghdoc_test.go b/ghdoc_test.go
index ff072fe..a66d879 100644
--- a/ghdoc_test.go
+++ b/ghdoc_test.go
@@ -95,6 +95,9 @@ For example:
Depth: 0,
Indent: 2,
}
+ // DEBUG BEGIN
+ log.Printf("*** CHUCK: ========")
+ // DEBUG END
toc := *doc.GrabToc()
for i := 0; i <= len(tocExpected)-1; i++ {
if toc[i] != tocExpected[i] {
@@ -234,16 +237,7 @@ func TestGrabTocStartDepth(t *testing.T) {
StartDepth: 1,
Indent: 2,
}
- // DEBUG BEGIN
- log.Printf("*** CHUCK: =======")
- // DEBUG END
toc := *doc.GrabToc()
- // DEBUG BEGIN
- log.Printf("*** CHUCK toc: ")
- for idx, item := range toc {
- log.Printf("*** CHUCK %d: %+#v", idx, item)
- }
- // DEBUG END
for i := 0; i <= len(tocExpected)-1; i++ {
if toc[i] != tocExpected[i] {
t.Error("Res :", toc[i], "\nExpected :", tocExpected[i])
From 208fe5890204a61b2a5004340984531bd10789f0 Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 12:46:19 -0700
Subject: [PATCH 13/18] Clean up
---
ghdoc.go | 10 ----------
ghdoc_test.go | 3 ---
2 files changed, 13 deletions(-)
diff --git a/ghdoc.go b/ghdoc.go
index 657a5f9..ed4e123 100644
--- a/ghdoc.go
+++ b/ghdoc.go
@@ -161,19 +161,9 @@ func (doc *GHDoc) GrabToc() *GHToc {
toc := GHToc{}
for _, hdr := range hdrs {
- // DEBUG BEGIN
- log.Printf("*** CHUCK: GrabToc hdr: %+#v", hdr)
- log.Printf("*** CHUCK: GrabToc minDepth: %+#v", minDepth)
- log.Printf("*** CHUCK: GrabToc maxDepth: %+#v", maxDepth)
- // DEBUG END
hDepth := int(hdr.Depth)
if hDepth >= minDepth && hDepth <= maxDepth {
indentDepth := int(hdr.Depth) - int(minHxDepth) - doc.StartDepth
- // DEBUG BEGIN
- log.Printf("*** CHUCK: GrabToc minHxDepth: %+#v", minHxDepth)
- log.Printf("*** CHUCK: GrabToc doc.StartDepth: %+#v", doc.StartDepth)
- log.Printf("*** CHUCK: GrabToc indentDepth: %+#v", indentDepth)
- // DEBUG END
indent := strings.Repeat(listIndentation(), indentDepth)
toc = append(toc, doc.tocEntry(indent, hdr))
}
diff --git a/ghdoc_test.go b/ghdoc_test.go
index a66d879..1913695 100644
--- a/ghdoc_test.go
+++ b/ghdoc_test.go
@@ -95,9 +95,6 @@ For example:
Depth: 0,
Indent: 2,
}
- // DEBUG BEGIN
- log.Printf("*** CHUCK: ========")
- // DEBUG END
toc := *doc.GrabToc()
for i := 0; i <= len(tocExpected)-1; i++ {
if toc[i] != tocExpected[i] {
From 0af18b6a4d271b6fa4763745340d13080739433d Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 12:53:59 -0700
Subject: [PATCH 14/18] Clean up
---
ghdoc.go | 65 +++----------------------------------------------
headerfinder.go | 1 -
2 files changed, 3 insertions(+), 63 deletions(-)
diff --git a/ghdoc.go b/ghdoc.go
index ed4e123..8179c66 100644
--- a/ghdoc.go
+++ b/ghdoc.go
@@ -152,6 +152,8 @@ func (doc *GHDoc) GrabToc() *GHToc {
hdrs := findHeadersInString(doc.html)
+ // Determine the min depth represented by the slice of headers. For example, if a document only
+ // has H2 tags and no H1 tags. We want the H2 TOC entries to not have an indent.
minHxDepth := MaxHxDepth
for _, hdr := range hdrs {
if hdr.Depth < minHxDepth {
@@ -159,6 +161,7 @@ func (doc *GHDoc) GrabToc() *GHToc {
}
}
+ // Populate the toc with entries
toc := GHToc{}
for _, hdr := range hdrs {
hDepth := int(hdr.Depth)
@@ -193,68 +196,6 @@ func (doc *GHDoc) tocLink(href string) string {
return link
}
-//func (doc *GHDoc) GrabToc() *GHToc {
-// doc.d("GrabToc: start, html size: " + strconv.Itoa(len(doc.html)))
-// defer doc.d("GrabToc: done.")
-
-// r := headerRegexp()
-// listIndentation := generateListIndentation(doc.Indent)
-
-// toc := GHToc{}
-// minHeaderNum := 6
-// var groups []map[string]string
-// doc.d("GrabToc: matching ...")
-// for idx, match := range r.FindAllStringSubmatch(doc.html, -1) {
-// doc.d("GrabToc: match #" + strconv.Itoa(idx) + " ...")
-// group := make(map[string]string)
-// // fill map for groups
-// for i, name := range r.SubexpNames() {
-// if i == 0 || name == "" {
-// continue
-// }
-// doc.d("GrabToc: process group: " + name + ": " + match[i] + " ...")
-// group[name] = removeStuff(match[i])
-// }
-// // update minimum header number
-// n, _ := strconv.Atoi(group["num"])
-// if n < minHeaderNum {
-// minHeaderNum = n
-// }
-// groups = append(groups, group)
-// }
-
-// var tmpSection string
-// doc.d("GrabToc: processing groups ...")
-// doc.d("Including starting from level " + strconv.Itoa(doc.StartDepth))
-// for _, group := range groups {
-// // format result
-// n, _ := strconv.Atoi(group["num"])
-// if n <= doc.StartDepth {
-// continue
-// }
-// if doc.Depth > 0 && n > doc.Depth {
-// continue
-// }
-
-// link, _ := url.QueryUnescape(group["href"])
-// if doc.AbsPaths {
-// link = doc.Path + link
-// }
-
-// tmpSection = removeStuff(group["name"])
-// if doc.Escape {
-// tmpSection = EscapeSpecChars(tmpSection)
-// }
-// tocItem := strings.Repeat(listIndentation(), n-minHeaderNum-doc.StartDepth) + "* " +
-// "[" + tmpSection + "]" +
-// "(" + link + ")"
-// //fmt.Println(tocItem)
-// toc = append(toc, tocItem)
-// }
-
-// return &toc
-//}
-
// GetToc return GHToc for a document
func (doc *GHDoc) GetToc() *GHToc {
if err := doc.Convert2HTML(); err != nil {
diff --git a/headerfinder.go b/headerfinder.go
index 4b4d2fb..685fce8 100644
--- a/headerfinder.go
+++ b/headerfinder.go
@@ -37,7 +37,6 @@ func findHeaders(r io.Reader) []Header {
tt := tokenizer.Next()
switch tt {
case html.ErrorToken:
- // TODO(chuck): Check if this is io.EOF?
return hdrs
case html.StartTagToken:
t := tokenizer.Token()
From d14602d6350cbd59768430e5fcf4df027182a03c Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 12:59:32 -0700
Subject: [PATCH 15/18] Clean up
---
cmd/gh-md-toc/main.go | 8 --------
1 file changed, 8 deletions(-)
diff --git a/cmd/gh-md-toc/main.go b/cmd/gh-md-toc/main.go
index f085b9b..25ed41f 100644
--- a/cmd/gh-md-toc/main.go
+++ b/cmd/gh-md-toc/main.go
@@ -3,7 +3,6 @@ package main
import (
"fmt"
"io"
- "log"
"os"
"gopkg.in/alecthomas/kingpin.v2"
@@ -64,15 +63,8 @@ func main() {
fmt.Println()
}
- // DEBUG BEGIN
- log.Printf("*** CHUCK: pathsCount: %+#v", pathsCount)
- // DEBUG END
-
for i := 1; i <= pathsCount; i++ {
toc := <-ch
- // DEBUG BEGIN
- log.Printf("*** CHUCK: in loop toc: %+#v", toc)
- // DEBUG END
// #14, check if there's really TOC?
if toc != nil {
check(toc.Print(os.Stdout))
From d29c1845e7b4ff69761941ff4cccfe767286b8b2 Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 13:09:02 -0700
Subject: [PATCH 16/18] Implemented missing test
---
headerfinder.go | 6 +++---
headerfinder_test.go | 3 ++-
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/headerfinder.go b/headerfinder.go
index 685fce8..32c5bc8 100644
--- a/headerfinder.go
+++ b/headerfinder.go
@@ -48,7 +48,7 @@ func findHeaders(r io.Reader) []Header {
}
func getHxDepth(dataAtom atom.Atom) HxDepth {
- depths := []atom.Atom{
+ hxAtoms := []atom.Atom{
atom.H1,
atom.H2,
atom.H3,
@@ -56,8 +56,8 @@ func getHxDepth(dataAtom atom.Atom) HxDepth {
atom.H5,
atom.H6,
}
- for depth, v := range depths {
- if dataAtom == v {
+ for depth, hxAtom := range hxAtoms {
+ if dataAtom == hxAtom {
return HxDepth(depth)
}
}
diff --git a/headerfinder_test.go b/headerfinder_test.go
index aa4bc54..36c3d96 100644
--- a/headerfinder_test.go
+++ b/headerfinder_test.go
@@ -94,6 +94,7 @@ func TestFindAttribute(t *testing.T) {
assert.Equal(t, spaceGreeting, attr)
})
t.Run("attribute does not exist", func(t *testing.T) {
- t.Error("IMPLEMENT ME!")
+ _, ok := findAttribute(attrs, "", "doesnotexist")
+ assert.False(t, ok)
})
}
From 563f2322eacc2fc3ea5be26d14c6f4d12076f87c Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Mon, 7 Nov 2022 13:11:51 -0700
Subject: [PATCH 17/18] Add test for getHxDepth
---
headerfinder_test.go | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/headerfinder_test.go b/headerfinder_test.go
index 36c3d96..c8ada93 100644
--- a/headerfinder_test.go
+++ b/headerfinder_test.go
@@ -5,6 +5,7 @@ import (
"github.com/stretchr/testify/assert"
"golang.org/x/net/html"
+ "golang.org/x/net/html/atom"
)
const singleH1 = `
@@ -98,3 +99,13 @@ func TestFindAttribute(t *testing.T) {
assert.False(t, ok)
})
}
+
+func TestGetHxDepth(t *testing.T) {
+ assert.Equal(t, HxDepth(0), getHxDepth(atom.H1))
+ assert.Equal(t, HxDepth(1), getHxDepth(atom.H2))
+ assert.Equal(t, HxDepth(2), getHxDepth(atom.H3))
+ assert.Equal(t, HxDepth(3), getHxDepth(atom.H4))
+ assert.Equal(t, HxDepth(4), getHxDepth(atom.H5))
+ assert.Equal(t, HxDepth(5), getHxDepth(atom.H6))
+ assert.Equal(t, InvalidDepth, getHxDepth(atom.A))
+}
From a9410b62be9a8629b9aaa9053d2b7217485ee64c Mon Sep 17 00:00:00 2001
From: Chuck Grindel
Date: Tue, 8 Nov 2022 12:28:56 -0700
Subject: [PATCH 18/18] Update README about TOC parsing
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 3ecb6b9..ca93176 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ The advantages of this implementation:
* no dependencies (no need curl, wget, awk, etc.)
* cross-platform (support for Windows, Mac OS, etc.)
- * regexp for parsing TOC
+ * `golang.org/x/net/html` for parsing TOC
* parallel processing of multiple documents