Skip to content

Commit 12e9d34

Browse files
committed
adding todo shortcut and functions for scape html pages
1 parent 0cd7750 commit 12e9d34

File tree

4 files changed

+75
-1
lines changed

4 files changed

+75
-1
lines changed

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ require (
5050
github.com/tebeka/snowball v0.3.0 // indirect
5151
github.com/tecbot/gorocksdb v0.0.0-20191019123150-400c56251341 // indirect
5252
go.etcd.io/bbolt v1.3.3 // indirect
53-
golang.org/x/net v0.0.0-20190923162816-aa69164e4478 // indirect
53+
golang.org/x/net v0.0.0-20190923162816-aa69164e4478
5454
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
55+
gopkg.in/xmlpath.v2 v2.0.0-20150820204837-860cbeca3ebc
5556
)

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
208208
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
209209
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
210210
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
211+
gopkg.in/xmlpath.v2 v2.0.0-20150820204837-860cbeca3ebc h1:LMEBgNcZUqXaP7evD1PZcL6EcDVa2QOFuI+cqM3+AJM=
212+
gopkg.in/xmlpath.v2 v2.0.0-20150820204837-860cbeca3ebc/go.mod h1:N8UOSI6/c2yOpa/XDz3KVUiegocTziPiqNkeNTMiG1k=
211213
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
212214
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
213215
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

simpleapp.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,10 @@ func (app *SimpleApp) GetInputCaptureFunc() func(event *tcell.EventKey) *tcell.E
194194
NewDocFlow("url", app)
195195
defer app.Draw()
196196
return nil
197+
case tcell.KeyCtrlT:
198+
NewDocFlow("todo", app)
199+
defer app.Draw()
200+
return nil
197201
case tcell.KeyCtrlC:
198202
app.Exit()
199203
default:

tools.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
package minidoc
22

33
import (
4+
"bytes"
45
"fmt"
6+
"golang.org/x/net/html"
57
"io/ioutil"
68
"net/http"
79
"os"
810
"os/exec"
11+
"strings"
912
"syscall"
1013
"time"
14+
15+
xmlpath "gopkg.in/xmlpath.v2"
1116
)
1217

1318
func OpenFileIfNoneExist(filepath, content string) error {
@@ -161,3 +166,65 @@ func HTTPGet(url string) ([]byte, error) {
161166

162167
return data, err
163168
}
169+
170+
// ScreenScrape hits the given URL and screen scrape then return dom like object for searching
171+
func ScreenScrape(url string) (*xmlpath.Node, error) {
172+
173+
client := http.Client{
174+
Timeout: 3 * time.Second,
175+
}
176+
177+
resp, err := client.Get(url)
178+
if err != nil {
179+
return nil, err
180+
}
181+
defer resp.Body.Close()
182+
if resp.StatusCode != 200 {
183+
return nil, fmt.Errorf("failed")
184+
}
185+
186+
pageContent, err := ioutil.ReadAll(resp.Body)
187+
188+
reader := strings.NewReader(string(pageContent))
189+
root, err := html.Parse(reader)
190+
if err != nil {
191+
log.Fatal(err)
192+
}
193+
194+
var b bytes.Buffer
195+
html.Render(&b, root)
196+
fixedHTML := b.String()
197+
198+
reader = strings.NewReader(fixedHTML)
199+
xmlroot, xmlerr := xmlpath.ParseHTML(reader)
200+
201+
if xmlerr != nil {
202+
log.Fatal(xmlerr)
203+
}
204+
205+
return xmlroot, nil
206+
}
207+
208+
// SearchByXPath will walk down the node and children using xpath expression
209+
func SearchByXPath(context *xmlpath.Node, xpath string) []*xmlpath.Node {
210+
path := xmlpath.MustCompile(xpath)
211+
212+
nodes := make([]*xmlpath.Node, 0, 100)
213+
214+
iter := path.Iter(context)
215+
for iter.Next() {
216+
nodes = append(nodes, iter.Node())
217+
}
218+
219+
return nodes
220+
}
221+
222+
// XPathGet xpath get by index
223+
func XPathGet(context *xmlpath.Node, xpath string, index int) string {
224+
nodes := SearchByXPath(context, xpath)
225+
if index >= len(nodes) {
226+
fmt.Println("failed to get ", xpath, " index:", index)
227+
return ""
228+
}
229+
return strings.TrimSpace(nodes[index].String())
230+
}

0 commit comments

Comments
 (0)