From adea6af35eb25cd984b0e82efd03cd12656231ea Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Sun, 6 Nov 2022 11:34:29 -0700 Subject: [PATCH 01/18] Upgraded to 1.19 in go.mod --- go.mod | 4 +++- go.sum | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index b344236..fe80d0b 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/ekalinin/github-markdown-toc.go -go 1.17 +go 1.19 require gopkg.in/alecthomas/kingpin.v2 v2.2.4 @@ -14,4 +14,6 @@ require ( github.com/mattn/go-isatty v0.0.14 // indirect github.com/sergi/go-diff v1.2.0 // indirect github.com/stretchr/testify v1.7.0 // indirect + golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect + golang.org/x/tools v0.2.0 // indirect ) diff --git a/go.sum b/go.sum index 93e1e05..d0ccf99 100644 --- a/go.sum +++ b/go.sum @@ -24,8 +24,23 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.2.0 h1:G6AHpWxTMGY1KyEYoAQ5WTtIekUUvDNjan3ugu60JvE= +golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/alecthomas/kingpin.v2 v2.2.4 h1:CC8tJ/xljioKrK6ii3IeWVXU4Tw7VB+LbjZBJaBxN50= gopkg.in/alecthomas/kingpin.v2 v2.2.4/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= From b0bea08dd575bf690865d116ec12b33a8fffa85b Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Sun, 6 Nov 2022 13:32:27 -0700 Subject: [PATCH 02/18] Save work before introducing html tokenizer --- cmd/gh-md-toc/main.go | 13 +++++++++++-- ghdoc.go | 8 +------- ghdoc_test.go | 2 +- headerfinder.go | 38 ++++++++++++++++++++++++++++++++++++++ headerfinder_test.go | 16 ++++++++++++++++ 5 files changed, 67 insertions(+), 10 deletions(-) create mode 100644 headerfinder.go create mode 100644 headerfinder_test.go diff --git a/cmd/gh-md-toc/main.go b/cmd/gh-md-toc/main.go index f09b9ba..f085b9b 100644 --- a/cmd/gh-md-toc/main.go +++ b/cmd/gh-md-toc/main.go @@ -3,6 +3,7 @@ package main import ( "fmt" "io" + "log" "os" "gopkg.in/alecthomas/kingpin.v2" @@ -48,10 +49,11 @@ func main() { for _, p := range *paths { ghdoc := ghtoc.NewGHDoc(p, absPathsInToc, *startDepth, *depth, !*noEscape, *token, *indent, *debug) + getFn := func(ch chan *ghtoc.GHToc, ghdoc *ghtoc.GHDoc) { ch <- ghdoc.GetToc() } if *serial { - ch <- ghdoc.GetToc() + getFn(ch, ghdoc) } else { - go func(path string) { ch <- ghdoc.GetToc() }(p) + go getFn(ch, ghdoc) } } @@ -62,8 +64,15 @@ func main() { fmt.Println() } + // DEBUG BEGIN + log.Printf("*** CHUCK: pathsCount: %+#v", pathsCount) + // DEBUG END + for i := 1; i <= pathsCount; i++ { toc := <-ch + // DEBUG BEGIN + log.Printf("*** CHUCK: in loop toc: %+#v", toc) + // DEBUG END // #14, check if there's really TOC? if toc != nil { check(toc.Print(os.Stdout)) diff --git a/ghdoc.go b/ghdoc.go index a7ed8e0..bbc72d3 100644 --- a/ghdoc.go +++ b/ghdoc.go @@ -6,7 +6,6 @@ import ( "log" "net/url" "os" - "regexp" "strconv" "strings" ) @@ -141,12 +140,7 @@ func (doc *GHDoc) GrabToc() *GHToc { doc.d("GrabToc: start, html size: " + strconv.Itoa(len(doc.html))) defer doc.d("GrabToc: done.") - re := `(?si)[1-6])>\s*` + - `]*>\s*` + - `.*?(?P.*?)README in another language +

README in another language

`, AbsPaths: false, Depth: 0, diff --git a/headerfinder.go b/headerfinder.go new file mode 100644 index 0000000..d27e005 --- /dev/null +++ b/headerfinder.go @@ -0,0 +1,38 @@ +package ghtoc + +import "regexp" + +// const _headerRegexpStr = `(?si)[1-6])>\s*` + +// `]*>\s*` + +// `.*?(?P.*?)[1-6])>\s*` + + `]*>` + + `.*?(?P.*?)[1-6])>\s*` + + `]*>\s*` + + `.*?(?P.*?)Document Title +` + +func TestHeaderRegexp(t *testing.T) { + r := newHeaderRegexp() + + results := r.FindAllStringSubmatch(singleHdr, -1) + if len(results) != 1 { + t.Errorf("Expected a single header. %+#v", results) + } +} From 8e7bc67f290abb1def7433dd54fd64a45df3a3e8 Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 09:14:47 -0700 Subject: [PATCH 03/18] Save work with html parser --- ghdoc_test.go | 2 +- go.mod | 7 +- go.sum | 19 +---- headerfinder.go | 164 ++++++++++++++++++++++++++++++++++++++----- headerfinder_test.go | 20 ++++-- 5 files changed, 169 insertions(+), 43 deletions(-) diff --git a/ghdoc_test.go b/ghdoc_test.go index f67be12..4b0d3dd 100644 --- a/ghdoc_test.go +++ b/ghdoc_test.go @@ -30,7 +30,7 @@ func TestGrabTocOneRow(t *testing.T) { } doc := &GHDoc{ html: ` -

README in another language

+

README in another language

`, AbsPaths: false, Depth: 0, diff --git a/go.mod b/go.mod index fe80d0b..720d191 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,10 @@ module github.com/ekalinin/github-markdown-toc.go go 1.19 -require gopkg.in/alecthomas/kingpin.v2 v2.2.4 +require ( + golang.org/x/net v0.1.0 + gopkg.in/alecthomas/kingpin.v2 v2.2.4 +) require ( github.com/alecthomas/assert v0.0.0-20170929043011-405dbfeb8e38 // indirect @@ -14,6 +17,4 @@ require ( github.com/mattn/go-isatty v0.0.14 // indirect github.com/sergi/go-diff v1.2.0 // indirect github.com/stretchr/testify v1.7.0 // indirect - golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect - golang.org/x/tools v0.2.0 // indirect ) diff --git a/go.sum b/go.sum index d0ccf99..c7c02d4 100644 --- a/go.sum +++ b/go.sum @@ -24,23 +24,10 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug= -golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I= +golang.org/x/net v0.1.0 h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0= +golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.2.0 h1:G6AHpWxTMGY1KyEYoAQ5WTtIekUUvDNjan3ugu60JvE= -golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= gopkg.in/alecthomas/kingpin.v2 v2.2.4 h1:CC8tJ/xljioKrK6ii3IeWVXU4Tw7VB+LbjZBJaBxN50= gopkg.in/alecthomas/kingpin.v2 v2.2.4/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/headerfinder.go b/headerfinder.go index d27e005..f1c7519 100644 --- a/headerfinder.go +++ b/headerfinder.go @@ -1,27 +1,42 @@ package ghtoc -import "regexp" +import ( + "io" + "log" + "regexp" + "strings" -// const _headerRegexpStr = `(?si)[1-6])>\s*` + -// `]*>\s*` + -// `.*?(?P.*?)[1-6])>\s*` + - `]*>` + + `]*>\s*` + `.*?(?P.*?)[1-6])>\s*` + - `]*>\s*` + - `.*?(?P.*?)[1-6])>\s*` + +// `]*>` + +// `.*?(?P.*?)[1-6])>\s*` + +// `]*>\s*` + +// `.*?(?P.*?)Document Title ` -func TestHeaderRegexp(t *testing.T) { - r := newHeaderRegexp() +// func TestHeaderRegexp(t *testing.T) { +// r := newHeaderRegexp() +// results := r.FindAllStringSubmatch(singleHdr, -1) +// if len(results) != 1 { +// t.Errorf("Expected a single header. %+#v", results) +// } +// } - results := r.FindAllStringSubmatch(singleHdr, -1) +func TestFindHeaders(t *testing.T) { + results := findHeadersInString(singleHdr) + // DEBUG BEGIN + log.Printf("*** CHUCK: results: %+#v", results) + // DEBUG END if len(results) != 1 { t.Errorf("Expected a single header. %+#v", results) } From 84262ecd4271350759e583c04daf12455a61925b Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 09:19:42 -0700 Subject: [PATCH 04/18] Switched to struct instead of pointer for return --- headerfinder.go | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/headerfinder.go b/headerfinder.go index f1c7519..f9f4ef1 100644 --- a/headerfinder.go +++ b/headerfinder.go @@ -59,13 +59,13 @@ type Header struct { Name string } -func findHeadersInString(str string) []*Header { +func findHeadersInString(str string) []Header { r := strings.NewReader(str) return findHeaders(r) } -func findHeaders(r io.Reader) []*Header { - hdrs := make([]*Header, 0) +func findHeaders(r io.Reader) []Header { + hdrs := make([]Header, 0) tokenizer := html.NewTokenizer(r) for { tt := tokenizer.Next() @@ -82,8 +82,7 @@ func findHeaders(r io.Reader) []*Header { // log.Printf("*** CHUCK: default t.DataAtom: %+#v", t.DataAtom) // DEBUG END - hdr := createHeader(tokenizer, t) - if hdr != nil { + if hdr, ok := createHeader(tokenizer, t); ok { hdrs = append(hdrs, hdr) } } @@ -112,10 +111,10 @@ func getHxDepth(dataAtom atom.Atom) HxDepth { return InvalidDepth } -func createHeader(tokenizer *html.Tokenizer, token html.Token) *Header { +func createHeader(tokenizer *html.Tokenizer, token html.Token) (Header, bool) { hxDepth := getHxDepth(token.DataAtom) if hxDepth == InvalidDepth { - return nil + return Header{}, false } var href, name string @@ -125,7 +124,7 @@ func createHeader(tokenizer *html.Tokenizer, token html.Token) *Header { t := tokenizer.Token() switch t.Type { case html.ErrorToken: - return nil + return Header{}, false case html.StartTagToken: tokenDepth++ if t.DataAtom == atom.A { @@ -133,17 +132,17 @@ func createHeader(tokenizer *html.Tokenizer, token html.Token) *Header { href = hrefAttr.Val } else { // Expected to find href attribute - return nil + return Header{}, false } } case html.EndTagToken: // If we encountered the matching end tag for the Hx, then we are done if t.DataAtom == token.DataAtom { - return &Header{ + return Header{ Depth: hxDepth, Name: name, Href: href, - } + }, true } tokenDepth-- case html.TextToken: From 936143477ba200ce69e4bc923f4335d88b1f4200 Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 09:47:28 -0700 Subject: [PATCH 05/18] Add testify. Initial tests pass --- go.mod | 6 ++++- go.sum | 3 +++ headerfinder.go | 16 ++++++-------- headerfinder_test.go | 52 +++++++++++++++++++++++++++++++++----------- 4 files changed, 54 insertions(+), 23 deletions(-) diff --git a/go.mod b/go.mod index 720d191..67a976f 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/ekalinin/github-markdown-toc.go go 1.19 require ( + github.com/stretchr/testify v1.7.0 golang.org/x/net v0.1.0 gopkg.in/alecthomas/kingpin.v2 v2.2.4 ) @@ -14,7 +15,10 @@ require ( github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc // indirect github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/kr/pretty v0.1.0 // indirect github.com/mattn/go-isatty v0.0.14 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/sergi/go-diff v1.2.0 // indirect - github.com/stretchr/testify v1.7.0 // indirect + gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect + gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect ) diff --git a/go.sum b/go.sum index c7c02d4..3d2a333 100644 --- a/go.sum +++ b/go.sum @@ -11,8 +11,10 @@ github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRF github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= @@ -31,6 +33,7 @@ golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= gopkg.in/alecthomas/kingpin.v2 v2.2.4 h1:CC8tJ/xljioKrK6ii3IeWVXU4Tw7VB+LbjZBJaBxN50= gopkg.in/alecthomas/kingpin.v2 v2.2.4/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/headerfinder.go b/headerfinder.go index f9f4ef1..d69b61b 100644 --- a/headerfinder.go +++ b/headerfinder.go @@ -75,13 +75,6 @@ func findHeaders(r io.Reader) []Header { return hdrs case html.StartTagToken: t := tokenizer.Token() - - // DEBUG BEGIN - log.Printf("*** CHUCK: default t: %+#v", t) - // log.Printf("*** CHUCK: default t.Type: %+#v", t.Type) - // log.Printf("*** CHUCK: default t.DataAtom: %+#v", t.DataAtom) - // DEBUG END - if hdr, ok := createHeader(tokenizer, t); ok { hdrs = append(hdrs, hdr) } @@ -118,10 +111,15 @@ func createHeader(tokenizer *html.Tokenizer, token html.Token) (Header, bool) { } var href, name string - tokenDepth := 0 + // Start at 1 because we are inside the Hx tag + tokenDepth := 1 for { tokenizer.Next() t := tokenizer.Token() + // DEBUG BEGIN + log.Printf("*** CHUCK: createHeader t: %+#v", t) + log.Printf("*** CHUCK: createHeader tokenDepth: %+#v", tokenDepth) + // DEBUG END switch t.Type { case html.ErrorToken: return Header{}, false @@ -147,7 +145,7 @@ func createHeader(tokenizer *html.Tokenizer, token html.Token) (Header, bool) { tokenDepth-- case html.TextToken: if tokenDepth == 1 { - name = t.Data + name = strings.TrimSpace(t.Data) } } } diff --git a/headerfinder_test.go b/headerfinder_test.go index 53ba71b..76e62b0 100644 --- a/headerfinder_test.go +++ b/headerfinder_test.go @@ -3,26 +3,52 @@ package ghtoc import ( "log" "testing" + + "github.com/stretchr/testify/assert" ) -const singleHdr = ` +const singleH1 = `

Document Title

` -// func TestHeaderRegexp(t *testing.T) { -// r := newHeaderRegexp() -// results := r.FindAllStringSubmatch(singleHdr, -1) -// if len(results) != 1 { -// t.Errorf("Expected a single header. %+#v", results) +const singleH2 = ` +

+ + Interesting Section +

+` + +// func assertHeaderEqual(t *testing.T, expected, actual Header) { +// if actual != expected { +// t.Errorf("Unexpected header value. actual: %+#v, expected: %+#v", actual, expected) // } // } func TestFindHeaders(t *testing.T) { - results := findHeadersInString(singleHdr) - // DEBUG BEGIN - log.Printf("*** CHUCK: results: %+#v", results) - // DEBUG END - if len(results) != 1 { - t.Errorf("Expected a single header. %+#v", results) - } + t.Run("single H1", func(t *testing.T) { + // DEBUG BEGIN + log.Printf("*** CHUCK: ===========") + // DEBUG END + results := findHeadersInString(singleH1) + assert.Len(t, results, 1) + assert.Equal( + t, + Header{Depth: 0, Href: "#document-title", Name: "Document Title"}, + results[0], + ) + }) + t.Run("single H2", func(t *testing.T) { + // DEBUG BEGIN + log.Printf("*** CHUCK: ===========") + // DEBUG END + results := findHeadersInString(singleH2) + assert.Len(t, results, 1) + assert.Equal( + t, + Header{Depth: 1, Href: "#interesting-section", Name: "Interesting Section"}, + results[0], + ) + }) } From 2d6ce40e92972db9ed756d25dd069f37c620d889 Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 10:04:11 -0700 Subject: [PATCH 06/18] Added multiple section test --- headerfinder_test.go | 50 +++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/headerfinder_test.go b/headerfinder_test.go index 76e62b0..35950b9 100644 --- a/headerfinder_test.go +++ b/headerfinder_test.go @@ -1,7 +1,6 @@ package ghtoc import ( - "log" "testing" "github.com/stretchr/testify/assert" @@ -20,17 +19,18 @@ const singleH2 = ` ` -// func assertHeaderEqual(t *testing.T, expected, actual Header) { -// if actual != expected { -// t.Errorf("Unexpected header value. actual: %+#v, expected: %+#v", actual, expected) -// } -// } +const multipleSections = ` +

Document Title

+Hi +

First Section

+Some Text +

First Subsection

+

Second Section

+

Second Subsection

+` func TestFindHeaders(t *testing.T) { t.Run("single H1", func(t *testing.T) { - // DEBUG BEGIN - log.Printf("*** CHUCK: ===========") - // DEBUG END results := findHeadersInString(singleH1) assert.Len(t, results, 1) assert.Equal( @@ -40,9 +40,6 @@ func TestFindHeaders(t *testing.T) { ) }) t.Run("single H2", func(t *testing.T) { - // DEBUG BEGIN - log.Printf("*** CHUCK: ===========") - // DEBUG END results := findHeadersInString(singleH2) assert.Len(t, results, 1) assert.Equal( @@ -51,4 +48,33 @@ func TestFindHeaders(t *testing.T) { results[0], ) }) + t.Run("multiple sections", func(t *testing.T) { + results := findHeadersInString(multipleSections) + assert.Len(t, results, 5) + assert.Equal( + t, + Header{Depth: 0, Href: "#document-title", Name: "Document Title"}, + results[0], + ) + assert.Equal( + t, + Header{Depth: 1, Href: "#first-section", Name: "First Section"}, + results[1], + ) + assert.Equal( + t, + Header{Depth: 2, Href: "#first-subsection", Name: "First Subsection"}, + results[2], + ) + assert.Equal( + t, + Header{Depth: 1, Href: "#second-section", Name: "Second Section"}, + results[3], + ) + assert.Equal( + t, + Header{Depth: 3, Href: "#second-subsection", Name: "Second Subsection"}, + results[4], + ) + }) } From 14d68a47e71957619d5b8ad7210632860812e0f3 Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 10:05:25 -0700 Subject: [PATCH 07/18] Clean up --- headerfinder.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/headerfinder.go b/headerfinder.go index d69b61b..30b9ef0 100644 --- a/headerfinder.go +++ b/headerfinder.go @@ -2,7 +2,6 @@ package ghtoc import ( "io" - "log" "regexp" "strings" @@ -116,10 +115,6 @@ func createHeader(tokenizer *html.Tokenizer, token html.Token) (Header, bool) { for { tokenizer.Next() t := tokenizer.Token() - // DEBUG BEGIN - log.Printf("*** CHUCK: createHeader t: %+#v", t) - log.Printf("*** CHUCK: createHeader tokenDepth: %+#v", tokenDepth) - // DEBUG END switch t.Type { case html.ErrorToken: return Header{}, false From ccaa8f9f3b8952cb304fde01693c79abc07e6f44 Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 11:09:55 -0700 Subject: [PATCH 08/18] Collect name from parts --- ghdoc.go | 130 +++++++++++++++++++++++++++---------------- headerfinder.go | 59 ++++---------------- headerfinder_test.go | 19 +++++++ 3 files changed, 113 insertions(+), 95 deletions(-) diff --git a/ghdoc.go b/ghdoc.go index bbc72d3..5dae117 100644 --- a/ghdoc.go +++ b/ghdoc.go @@ -140,64 +140,100 @@ func (doc *GHDoc) GrabToc() *GHToc { doc.d("GrabToc: start, html size: " + strconv.Itoa(len(doc.html))) defer doc.d("GrabToc: done.") - r := headerRegexp() listIndentation := generateListIndentation(doc.Indent) toc := GHToc{} - minHeaderNum := 6 - var groups []map[string]string - doc.d("GrabToc: matching ...") - for idx, match := range r.FindAllStringSubmatch(doc.html, -1) { - doc.d("GrabToc: match #" + strconv.Itoa(idx) + " ...") - group := make(map[string]string) - // fill map for groups - for i, name := range r.SubexpNames() { - if i == 0 || name == "" { - continue - } - doc.d("GrabToc: process group: " + name + ": " + match[i] + " ...") - group[name] = removeStuff(match[i]) - } - // update minimum header number - n, _ := strconv.Atoi(group["num"]) - if n < minHeaderNum { - minHeaderNum = n - } - groups = append(groups, group) + for _, hdr := range findHeadersInString(doc.html) { + toc = append(toc, doc.tocEntry(listIndentation(), hdr)) } - var tmpSection string - doc.d("GrabToc: processing groups ...") - doc.d("Including starting from level " + strconv.Itoa(doc.StartDepth)) - for _, group := range groups { - // format result - n, _ := strconv.Atoi(group["num"]) - if n <= doc.StartDepth { - continue - } - if doc.Depth > 0 && n > doc.Depth { - continue - } + return &toc +} - link, _ := url.QueryUnescape(group["href"]) - if doc.AbsPaths { - link = doc.Path + link - } +func (doc *GHDoc) tocEntry(indent string, hdr Header) string { + // TODO(chuck): Calculate the repeat count with the doc.StartDepth + return strings.Repeat(indent, int(hdr.Depth)) + "* " + + "[" + doc.tocName(hdr.Name) + "]" + + "(" + doc.tocLink(hdr.Href) + ")" +} - tmpSection = removeStuff(group["name"]) - if doc.Escape { - tmpSection = EscapeSpecChars(tmpSection) - } - tocItem := strings.Repeat(listIndentation(), n-minHeaderNum-doc.StartDepth) + "* " + - "[" + tmpSection + "]" + - "(" + link + ")" - //fmt.Println(tocItem) - toc = append(toc, tocItem) +func (doc *GHDoc) tocName(name string) string { + if doc.Escape { + return EscapeSpecChars(name) } + return name +} - return &toc +func (doc *GHDoc) tocLink(href string) string { + link, _ := url.QueryUnescape(href) + if doc.AbsPaths { + link = doc.Path + link + } + return link } +//func (doc *GHDoc) GrabToc() *GHToc { +// doc.d("GrabToc: start, html size: " + strconv.Itoa(len(doc.html))) +// defer doc.d("GrabToc: done.") + +// r := headerRegexp() +// listIndentation := generateListIndentation(doc.Indent) + +// toc := GHToc{} +// minHeaderNum := 6 +// var groups []map[string]string +// doc.d("GrabToc: matching ...") +// for idx, match := range r.FindAllStringSubmatch(doc.html, -1) { +// doc.d("GrabToc: match #" + strconv.Itoa(idx) + " ...") +// group := make(map[string]string) +// // fill map for groups +// for i, name := range r.SubexpNames() { +// if i == 0 || name == "" { +// continue +// } +// doc.d("GrabToc: process group: " + name + ": " + match[i] + " ...") +// group[name] = removeStuff(match[i]) +// } +// // update minimum header number +// n, _ := strconv.Atoi(group["num"]) +// if n < minHeaderNum { +// minHeaderNum = n +// } +// groups = append(groups, group) +// } + +// var tmpSection string +// doc.d("GrabToc: processing groups ...") +// doc.d("Including starting from level " + strconv.Itoa(doc.StartDepth)) +// for _, group := range groups { +// // format result +// n, _ := strconv.Atoi(group["num"]) +// if n <= doc.StartDepth { +// continue +// } +// if doc.Depth > 0 && n > doc.Depth { +// continue +// } + +// link, _ := url.QueryUnescape(group["href"]) +// if doc.AbsPaths { +// link = doc.Path + link +// } + +// tmpSection = removeStuff(group["name"]) +// if doc.Escape { +// tmpSection = EscapeSpecChars(tmpSection) +// } +// tocItem := strings.Repeat(listIndentation(), n-minHeaderNum-doc.StartDepth) + "* " + +// "[" + tmpSection + "]" + +// "(" + link + ")" +// //fmt.Println(tocItem) +// toc = append(toc, tocItem) +// } + +// return &toc +//} + // GetToc return GHToc for a document func (doc *GHDoc) GetToc() *GHToc { if err := doc.Convert2HTML(); err != nil { diff --git a/headerfinder.go b/headerfinder.go index 30b9ef0..37a1cde 100644 --- a/headerfinder.go +++ b/headerfinder.go @@ -2,7 +2,6 @@ package ghtoc import ( "io" - "regexp" "strings" "golang.org/x/net/html" @@ -15,42 +14,6 @@ type HxDepth int // InvalidDepth designates that the data atom is not a valid Hx. const InvalidDepth HxDepth = -1 -const _headerRegexpStr = `(?si)[1-6])>\s*` + - `]*>\s*` + - `.*?(?P.*?)[1-6])>\s*` + -// `]*>` + -// `.*?(?P.*?)[1-6])>\s*` + -// `]*>\s*` + -// `.*?(?P.*?) Date: Mon, 7 Nov 2022 12:24:55 -0700 Subject: [PATCH 09/18] Fixing bugs --- ghdoc.go | 18 +++++++++++++++++- ghdoc_test.go | 10 +++++++++- headerfinder.go | 4 ++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/ghdoc.go b/ghdoc.go index 5dae117..d4fbe32 100644 --- a/ghdoc.go +++ b/ghdoc.go @@ -142,9 +142,25 @@ func (doc *GHDoc) GrabToc() *GHToc { listIndentation := generateListIndentation(doc.Indent) + minDepth := doc.StartDepth + var maxDepth int + if doc.Depth > 0 { + maxDepth = doc.Depth - 1 + } else { + maxDepth = int(MaxHxDepth) + } + toc := GHToc{} for _, hdr := range findHeadersInString(doc.html) { - toc = append(toc, doc.tocEntry(listIndentation(), hdr)) + // DEBUG BEGIN + log.Printf("*** CHUCK: GrabToc hdr: %+#v", hdr) + log.Printf("*** CHUCK: GrabToc minDepth: %+#v", minDepth) + log.Printf("*** CHUCK: GrabToc maxDepth: %+#v", maxDepth) + // DEBUG END + hDepth := int(hdr.Depth) + if hDepth >= minDepth && hDepth <= maxDepth { + toc = append(toc, doc.tocEntry(listIndentation(), hdr)) + } } return &toc diff --git a/ghdoc_test.go b/ghdoc_test.go index 4b0d3dd..1c12012 100644 --- a/ghdoc_test.go +++ b/ghdoc_test.go @@ -180,8 +180,16 @@ func TestGrabTocDepth(t *testing.T) { Depth: 1, Indent: 2, } + // DEBUG BEGIN + log.Printf("*** CHUCK: ==========") + // DEBUG END toc := *doc.GrabToc() - + // DEBUG BEGIN + log.Printf("*** CHUCK toc: ") + for idx, item := range toc { + log.Printf("*** CHUCK %d: %+#v", idx, item) + } + // DEBUG END for i := 0; i <= len(tocExpected)-1; i++ { if toc[i] != tocExpected[i] { t.Error("Res :", toc[i], "\nExpected :", tocExpected[i]) diff --git a/headerfinder.go b/headerfinder.go index 37a1cde..4b4d2fb 100644 --- a/headerfinder.go +++ b/headerfinder.go @@ -14,6 +14,10 @@ type HxDepth int // InvalidDepth designates that the data atom is not a valid Hx. const InvalidDepth HxDepth = -1 +// MaxHxDepth is the maximum HxDepth value. +// H6 is the last Hx tag (5 = 6 - 1) +const MaxHxDepth HxDepth = 5 + // Header represents an HTML header type Header struct { Depth HxDepth From 976cfb3c49429caec28220112ec1dd2e1755183f Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 12:27:21 -0700 Subject: [PATCH 10/18] No more panics --- ghdoc_test.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/ghdoc_test.go b/ghdoc_test.go index 1c12012..ff072fe 100644 --- a/ghdoc_test.go +++ b/ghdoc_test.go @@ -180,16 +180,7 @@ func TestGrabTocDepth(t *testing.T) { Depth: 1, Indent: 2, } - // DEBUG BEGIN - log.Printf("*** CHUCK: ==========") - // DEBUG END toc := *doc.GrabToc() - // DEBUG BEGIN - log.Printf("*** CHUCK toc: ") - for idx, item := range toc { - log.Printf("*** CHUCK %d: %+#v", idx, item) - } - // DEBUG END for i := 0; i <= len(tocExpected)-1; i++ { if toc[i] != tocExpected[i] { t.Error("Res :", toc[i], "\nExpected :", tocExpected[i]) @@ -219,7 +210,7 @@ func TestGrabTocStartDepth(t *testing.T) {

Blabla...

-The command foo3 is even betterer

+The command foo3 is even betterer

Blabla...

@@ -235,7 +226,7 @@ func TestGrabTocStartDepth(t *testing.T) {

Blabla...

-The command bar3 is even betterer

+The command bar3 is even betterer

Blabla...

`, AbsPaths: false, @@ -243,8 +234,16 @@ func TestGrabTocStartDepth(t *testing.T) { StartDepth: 1, Indent: 2, } + // DEBUG BEGIN + log.Printf("*** CHUCK: =======") + // DEBUG END toc := *doc.GrabToc() - + // DEBUG BEGIN + log.Printf("*** CHUCK toc: ") + for idx, item := range toc { + log.Printf("*** CHUCK %d: %+#v", idx, item) + } + // DEBUG END for i := 0; i <= len(tocExpected)-1; i++ { if toc[i] != tocExpected[i] { t.Error("Res :", toc[i], "\nExpected :", tocExpected[i]) From a9295d7fcebf9aa14a17ca8591fa3a0ccafc666f Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 12:32:17 -0700 Subject: [PATCH 11/18] Adjust indent depth by StartDepth --- ghdoc.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ghdoc.go b/ghdoc.go index d4fbe32..61297f0 100644 --- a/ghdoc.go +++ b/ghdoc.go @@ -167,8 +167,8 @@ func (doc *GHDoc) GrabToc() *GHToc { } func (doc *GHDoc) tocEntry(indent string, hdr Header) string { - // TODO(chuck): Calculate the repeat count with the doc.StartDepth - return strings.Repeat(indent, int(hdr.Depth)) + "* " + + indentDepth := int(hdr.Depth) - doc.StartDepth + return strings.Repeat(indent, indentDepth) + "* " + "[" + doc.tocName(hdr.Name) + "]" + "(" + doc.tocLink(hdr.Href) + ")" } From 33c0c0e59af8c504753f6385e9de3fdbe28a3c2b Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 12:45:48 -0700 Subject: [PATCH 12/18] Fixed indent depth --- ghdoc.go | 23 +++++++++++++++++++---- ghdoc_test.go | 12 +++--------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/ghdoc.go b/ghdoc.go index 61297f0..657a5f9 100644 --- a/ghdoc.go +++ b/ghdoc.go @@ -150,8 +150,17 @@ func (doc *GHDoc) GrabToc() *GHToc { maxDepth = int(MaxHxDepth) } + hdrs := findHeadersInString(doc.html) + + minHxDepth := MaxHxDepth + for _, hdr := range hdrs { + if hdr.Depth < minHxDepth { + minHxDepth = hdr.Depth + } + } + toc := GHToc{} - for _, hdr := range findHeadersInString(doc.html) { + for _, hdr := range hdrs { // DEBUG BEGIN log.Printf("*** CHUCK: GrabToc hdr: %+#v", hdr) log.Printf("*** CHUCK: GrabToc minDepth: %+#v", minDepth) @@ -159,7 +168,14 @@ func (doc *GHDoc) GrabToc() *GHToc { // DEBUG END hDepth := int(hdr.Depth) if hDepth >= minDepth && hDepth <= maxDepth { - toc = append(toc, doc.tocEntry(listIndentation(), hdr)) + indentDepth := int(hdr.Depth) - int(minHxDepth) - doc.StartDepth + // DEBUG BEGIN + log.Printf("*** CHUCK: GrabToc minHxDepth: %+#v", minHxDepth) + log.Printf("*** CHUCK: GrabToc doc.StartDepth: %+#v", doc.StartDepth) + log.Printf("*** CHUCK: GrabToc indentDepth: %+#v", indentDepth) + // DEBUG END + indent := strings.Repeat(listIndentation(), indentDepth) + toc = append(toc, doc.tocEntry(indent, hdr)) } } @@ -167,8 +183,7 @@ func (doc *GHDoc) GrabToc() *GHToc { } func (doc *GHDoc) tocEntry(indent string, hdr Header) string { - indentDepth := int(hdr.Depth) - doc.StartDepth - return strings.Repeat(indent, indentDepth) + "* " + + return indent + "* " + "[" + doc.tocName(hdr.Name) + "]" + "(" + doc.tocLink(hdr.Href) + ")" } diff --git a/ghdoc_test.go b/ghdoc_test.go index ff072fe..a66d879 100644 --- a/ghdoc_test.go +++ b/ghdoc_test.go @@ -95,6 +95,9 @@ For example:

Depth: 0, Indent: 2, } + // DEBUG BEGIN + log.Printf("*** CHUCK: ========") + // DEBUG END toc := *doc.GrabToc() for i := 0; i <= len(tocExpected)-1; i++ { if toc[i] != tocExpected[i] { @@ -234,16 +237,7 @@ func TestGrabTocStartDepth(t *testing.T) { StartDepth: 1, Indent: 2, } - // DEBUG BEGIN - log.Printf("*** CHUCK: =======") - // DEBUG END toc := *doc.GrabToc() - // DEBUG BEGIN - log.Printf("*** CHUCK toc: ") - for idx, item := range toc { - log.Printf("*** CHUCK %d: %+#v", idx, item) - } - // DEBUG END for i := 0; i <= len(tocExpected)-1; i++ { if toc[i] != tocExpected[i] { t.Error("Res :", toc[i], "\nExpected :", tocExpected[i]) From 208fe5890204a61b2a5004340984531bd10789f0 Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 12:46:19 -0700 Subject: [PATCH 13/18] Clean up --- ghdoc.go | 10 ---------- ghdoc_test.go | 3 --- 2 files changed, 13 deletions(-) diff --git a/ghdoc.go b/ghdoc.go index 657a5f9..ed4e123 100644 --- a/ghdoc.go +++ b/ghdoc.go @@ -161,19 +161,9 @@ func (doc *GHDoc) GrabToc() *GHToc { toc := GHToc{} for _, hdr := range hdrs { - // DEBUG BEGIN - log.Printf("*** CHUCK: GrabToc hdr: %+#v", hdr) - log.Printf("*** CHUCK: GrabToc minDepth: %+#v", minDepth) - log.Printf("*** CHUCK: GrabToc maxDepth: %+#v", maxDepth) - // DEBUG END hDepth := int(hdr.Depth) if hDepth >= minDepth && hDepth <= maxDepth { indentDepth := int(hdr.Depth) - int(minHxDepth) - doc.StartDepth - // DEBUG BEGIN - log.Printf("*** CHUCK: GrabToc minHxDepth: %+#v", minHxDepth) - log.Printf("*** CHUCK: GrabToc doc.StartDepth: %+#v", doc.StartDepth) - log.Printf("*** CHUCK: GrabToc indentDepth: %+#v", indentDepth) - // DEBUG END indent := strings.Repeat(listIndentation(), indentDepth) toc = append(toc, doc.tocEntry(indent, hdr)) } diff --git a/ghdoc_test.go b/ghdoc_test.go index a66d879..1913695 100644 --- a/ghdoc_test.go +++ b/ghdoc_test.go @@ -95,9 +95,6 @@ For example:

Depth: 0, Indent: 2, } - // DEBUG BEGIN - log.Printf("*** CHUCK: ========") - // DEBUG END toc := *doc.GrabToc() for i := 0; i <= len(tocExpected)-1; i++ { if toc[i] != tocExpected[i] { From 0af18b6a4d271b6fa4763745340d13080739433d Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 12:53:59 -0700 Subject: [PATCH 14/18] Clean up --- ghdoc.go | 65 +++---------------------------------------------- headerfinder.go | 1 - 2 files changed, 3 insertions(+), 63 deletions(-) diff --git a/ghdoc.go b/ghdoc.go index ed4e123..8179c66 100644 --- a/ghdoc.go +++ b/ghdoc.go @@ -152,6 +152,8 @@ func (doc *GHDoc) GrabToc() *GHToc { hdrs := findHeadersInString(doc.html) + // Determine the min depth represented by the slice of headers. For example, if a document only + // has H2 tags and no H1 tags. We want the H2 TOC entries to not have an indent. minHxDepth := MaxHxDepth for _, hdr := range hdrs { if hdr.Depth < minHxDepth { @@ -159,6 +161,7 @@ func (doc *GHDoc) GrabToc() *GHToc { } } + // Populate the toc with entries toc := GHToc{} for _, hdr := range hdrs { hDepth := int(hdr.Depth) @@ -193,68 +196,6 @@ func (doc *GHDoc) tocLink(href string) string { return link } -//func (doc *GHDoc) GrabToc() *GHToc { -// doc.d("GrabToc: start, html size: " + strconv.Itoa(len(doc.html))) -// defer doc.d("GrabToc: done.") - -// r := headerRegexp() -// listIndentation := generateListIndentation(doc.Indent) - -// toc := GHToc{} -// minHeaderNum := 6 -// var groups []map[string]string -// doc.d("GrabToc: matching ...") -// for idx, match := range r.FindAllStringSubmatch(doc.html, -1) { -// doc.d("GrabToc: match #" + strconv.Itoa(idx) + " ...") -// group := make(map[string]string) -// // fill map for groups -// for i, name := range r.SubexpNames() { -// if i == 0 || name == "" { -// continue -// } -// doc.d("GrabToc: process group: " + name + ": " + match[i] + " ...") -// group[name] = removeStuff(match[i]) -// } -// // update minimum header number -// n, _ := strconv.Atoi(group["num"]) -// if n < minHeaderNum { -// minHeaderNum = n -// } -// groups = append(groups, group) -// } - -// var tmpSection string -// doc.d("GrabToc: processing groups ...") -// doc.d("Including starting from level " + strconv.Itoa(doc.StartDepth)) -// for _, group := range groups { -// // format result -// n, _ := strconv.Atoi(group["num"]) -// if n <= doc.StartDepth { -// continue -// } -// if doc.Depth > 0 && n > doc.Depth { -// continue -// } - -// link, _ := url.QueryUnescape(group["href"]) -// if doc.AbsPaths { -// link = doc.Path + link -// } - -// tmpSection = removeStuff(group["name"]) -// if doc.Escape { -// tmpSection = EscapeSpecChars(tmpSection) -// } -// tocItem := strings.Repeat(listIndentation(), n-minHeaderNum-doc.StartDepth) + "* " + -// "[" + tmpSection + "]" + -// "(" + link + ")" -// //fmt.Println(tocItem) -// toc = append(toc, tocItem) -// } - -// return &toc -//} - // GetToc return GHToc for a document func (doc *GHDoc) GetToc() *GHToc { if err := doc.Convert2HTML(); err != nil { diff --git a/headerfinder.go b/headerfinder.go index 4b4d2fb..685fce8 100644 --- a/headerfinder.go +++ b/headerfinder.go @@ -37,7 +37,6 @@ func findHeaders(r io.Reader) []Header { tt := tokenizer.Next() switch tt { case html.ErrorToken: - // TODO(chuck): Check if this is io.EOF? return hdrs case html.StartTagToken: t := tokenizer.Token() From d14602d6350cbd59768430e5fcf4df027182a03c Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 12:59:32 -0700 Subject: [PATCH 15/18] Clean up --- cmd/gh-md-toc/main.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cmd/gh-md-toc/main.go b/cmd/gh-md-toc/main.go index f085b9b..25ed41f 100644 --- a/cmd/gh-md-toc/main.go +++ b/cmd/gh-md-toc/main.go @@ -3,7 +3,6 @@ package main import ( "fmt" "io" - "log" "os" "gopkg.in/alecthomas/kingpin.v2" @@ -64,15 +63,8 @@ func main() { fmt.Println() } - // DEBUG BEGIN - log.Printf("*** CHUCK: pathsCount: %+#v", pathsCount) - // DEBUG END - for i := 1; i <= pathsCount; i++ { toc := <-ch - // DEBUG BEGIN - log.Printf("*** CHUCK: in loop toc: %+#v", toc) - // DEBUG END // #14, check if there's really TOC? if toc != nil { check(toc.Print(os.Stdout)) From d29c1845e7b4ff69761941ff4cccfe767286b8b2 Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 13:09:02 -0700 Subject: [PATCH 16/18] Implemented missing test --- headerfinder.go | 6 +++--- headerfinder_test.go | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/headerfinder.go b/headerfinder.go index 685fce8..32c5bc8 100644 --- a/headerfinder.go +++ b/headerfinder.go @@ -48,7 +48,7 @@ func findHeaders(r io.Reader) []Header { } func getHxDepth(dataAtom atom.Atom) HxDepth { - depths := []atom.Atom{ + hxAtoms := []atom.Atom{ atom.H1, atom.H2, atom.H3, @@ -56,8 +56,8 @@ func getHxDepth(dataAtom atom.Atom) HxDepth { atom.H5, atom.H6, } - for depth, v := range depths { - if dataAtom == v { + for depth, hxAtom := range hxAtoms { + if dataAtom == hxAtom { return HxDepth(depth) } } diff --git a/headerfinder_test.go b/headerfinder_test.go index aa4bc54..36c3d96 100644 --- a/headerfinder_test.go +++ b/headerfinder_test.go @@ -94,6 +94,7 @@ func TestFindAttribute(t *testing.T) { assert.Equal(t, spaceGreeting, attr) }) t.Run("attribute does not exist", func(t *testing.T) { - t.Error("IMPLEMENT ME!") + _, ok := findAttribute(attrs, "", "doesnotexist") + assert.False(t, ok) }) } From 563f2322eacc2fc3ea5be26d14c6f4d12076f87c Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Mon, 7 Nov 2022 13:11:51 -0700 Subject: [PATCH 17/18] Add test for getHxDepth --- headerfinder_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/headerfinder_test.go b/headerfinder_test.go index 36c3d96..c8ada93 100644 --- a/headerfinder_test.go +++ b/headerfinder_test.go @@ -5,6 +5,7 @@ import ( "github.com/stretchr/testify/assert" "golang.org/x/net/html" + "golang.org/x/net/html/atom" ) const singleH1 = ` @@ -98,3 +99,13 @@ func TestFindAttribute(t *testing.T) { assert.False(t, ok) }) } + +func TestGetHxDepth(t *testing.T) { + assert.Equal(t, HxDepth(0), getHxDepth(atom.H1)) + assert.Equal(t, HxDepth(1), getHxDepth(atom.H2)) + assert.Equal(t, HxDepth(2), getHxDepth(atom.H3)) + assert.Equal(t, HxDepth(3), getHxDepth(atom.H4)) + assert.Equal(t, HxDepth(4), getHxDepth(atom.H5)) + assert.Equal(t, HxDepth(5), getHxDepth(atom.H6)) + assert.Equal(t, InvalidDepth, getHxDepth(atom.A)) +} From a9410b62be9a8629b9aaa9053d2b7217485ee64c Mon Sep 17 00:00:00 2001 From: Chuck Grindel Date: Tue, 8 Nov 2022 12:28:56 -0700 Subject: [PATCH 18/18] Update README about TOC parsing --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3ecb6b9..ca93176 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ The advantages of this implementation: * no dependencies (no need curl, wget, awk, etc.) * cross-platform (support for Windows, Mac OS, etc.) - * regexp for parsing TOC + * `golang.org/x/net/html` for parsing TOC * parallel processing of multiple documents