init
This commit is contained in:
58
extractor/youtube/util.go
Normal file
58
extractor/youtube/util.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package youtube
|
||||
|
||||
import (
|
||||
"golang.org/x/net/html"
|
||||
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func getHTMLScriptFunc(url string, readCodeLineByLine bool, codeFunc func(code string) bool) error {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
z := html.NewTokenizer(resp.Body)
|
||||
isScript := false
|
||||
|
||||
for {
|
||||
tt := z.Next()
|
||||
|
||||
switch tt {
|
||||
case html.ErrorToken:
|
||||
return z.Err()
|
||||
case html.TextToken:
|
||||
if codeFunc != nil && isScript {
|
||||
t := string(z.Text())
|
||||
if readCodeLineByLine {
|
||||
// NOTE: a bufio line scanner doesn't work (bufio.Scanner: token too long); maybe this is a bug
|
||||
// Iterate over each line in the script
|
||||
ls := 0 // line start
|
||||
le := 0 // line end
|
||||
for ls < len(t) {
|
||||
if le == len(t) || t[le] == '\n' {
|
||||
ln := t[ls:le]
|
||||
|
||||
if !codeFunc(ln) {
|
||||
return nil
|
||||
}
|
||||
|
||||
ls = le + 1
|
||||
}
|
||||
le++
|
||||
}
|
||||
} else {
|
||||
if !codeFunc(t) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
case html.StartTagToken, html.EndTagToken:
|
||||
tn, _ := z.TagName()
|
||||
if string(tn) == "script" {
|
||||
isScript = tt == html.StartTagToken
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user