This commit is contained in:
r4
2022-09-20 00:54:22 +02:00
commit 4195b20e65
33 changed files with 6397 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
package builtins
import (
_ "git.nobrain.org/r4/dischord/extractor/spotify"
_ "git.nobrain.org/r4/dischord/extractor/youtube"
_ "git.nobrain.org/r4/dischord/extractor/ytdl"
)

208
extractor/extractor.go Normal file
View File

@@ -0,0 +1,208 @@
package extractor
import (
"errors"
"fmt"
"reflect"
"time"
)
var (
ErrNoSearchResults = errors.New("no search provider available")
ErrNoSearchProvider = errors.New("no search provider available")
ErrNoSuggestionProvider = errors.New("no search suggestion provider available")
)
var (
providers []provider
extractors []extractor
searchers []searcher
suggestors []suggestor
defaultConfig Config
)
func Extract(cfg Config, input string) ([]Data, error) {
if err := cfg.CheckTypes(); err != nil {
return nil, err
}
for _, e := range extractors {
if e.Matches(cfg[e.name], input) {
data, err := e.Extract(cfg[e.name], input)
if err != nil {
return nil, &Error{e.name, err}
}
return data, nil
}
}
d, err := Search(cfg, input)
if err != nil {
return nil, err
}
if len(d) == 0 {
return nil, ErrNoSearchResults
}
return []Data{d[0]}, nil
}
func Search(cfg Config, input string) ([]Data, error) {
if err := cfg.CheckTypes(); err != nil {
return nil, err
}
for _, s := range searchers {
data, err := s.Search(cfg[s.name], input)
if err != nil {
return nil, &Error{s.name, err}
}
return data, nil
}
return nil, ErrNoSearchProvider
}
func Suggest(cfg Config, input string) ([]string, error) {
if err := cfg.CheckTypes(); err != nil {
return nil, err
}
for _, s := range suggestors {
data, err := s.Suggest(cfg[s.name], input)
if err != nil {
return nil, &Error{s.name, err}
}
return data, nil
}
return nil, ErrNoSuggestionProvider
}
type Error struct {
ProviderName string
Err error
}
func (e *Error) Error() string {
return "extractor[" + e.ProviderName + "]: " + e.Err.Error()
}
type provider struct {
Provider
name string
}
type extractor struct {
Extractor
name string
}
type searcher struct {
Searcher
name string
}
type suggestor struct {
Suggestor
name string
}
type Config map[string]ProviderConfig
func DefaultConfig() Config {
if defaultConfig == nil {
cfg := make(Config)
for _, e := range providers {
cfg[e.name] = e.DefaultConfig()
}
return cfg
} else {
return defaultConfig
}
}
func (cfg Config) CheckTypes() error {
for provider, pCfg := range cfg {
if pCfg == nil {
return fmt.Errorf("extractor config for %v is nil", provider)
}
for k, v := range pCfg {
got, expected := reflect.TypeOf(v), reflect.TypeOf(DefaultConfig()[provider][k])
if got != expected {
return &ConfigTypeError{
Provider: provider,
Key: k,
Expected: expected,
Got: got,
}
}
}
}
return nil
}
type ConfigTypeError struct {
Provider string
Key string
Expected reflect.Type
Got reflect.Type
}
func (e *ConfigTypeError) Error() string {
expectedName := "nil"
if e.Expected != nil {
expectedName = e.Expected.Name()
}
gotName := "nil"
if e.Got != nil {
gotName = e.Got.Name()
}
return "extractor config type error: "+e.Provider+"."+e.Key+": expected "+expectedName+" but got "+gotName
}
type ProviderConfig map[string]any
type Provider interface {
DefaultConfig() ProviderConfig
}
type Extractor interface {
Provider
Matches(cfg ProviderConfig, input string) bool
Extract(cfg ProviderConfig, input string) ([]Data, error)
}
func AddExtractor(name string, e Extractor) {
providers = append(providers, provider{e, name})
extractors = append(extractors, extractor{e, name})
}
type Searcher interface {
Provider
Search(cfg ProviderConfig, input string) ([]Data, error)
}
func AddSearcher(name string, s Searcher) {
providers = append(providers, provider{s, name})
searchers = append(searchers, searcher{s, name})
}
type Suggestor interface {
Provider
Suggest(cfg ProviderConfig, input string) ([]string, error)
}
func AddSuggestor(name string, s Suggestor) {
providers = append(providers, provider{s, name})
suggestors = append(suggestors, suggestor{s, name})
}
type Data struct {
// Each instance of this struct should be reconstructable by calling
// Extract() on the SourceUrl
// String values are "" if not present
SourceUrl string
StreamUrl string // may expire, see Expires
Title string
PlaylistUrl string
PlaylistTitle string
Description string
Uploader string
Duration int // in seconds; -1 if unknown
Expires time.Time // when StreamUrl expires
OfficialArtist bool // only for sites that have non-music (e.g. YouTube); search results only
}

220
extractor/extractor_test.go Normal file
View File

@@ -0,0 +1,220 @@
package extractor_test
import (
"git.nobrain.org/r4/dischord/extractor"
_ "git.nobrain.org/r4/dischord/extractor/builtins"
"net/http"
"net/url"
"strings"
"testing"
)
var extractorTestCfg = extractor.DefaultConfig()
func validYtStreamUrl(strmUrl string) bool {
u, err := url.Parse(strmUrl)
if err != nil {
return false
}
q, err := url.ParseQuery(u.RawQuery)
if err != nil {
return false
}
looksOk := u.Scheme == "https" &&
strings.HasSuffix(u.Host, ".googlevideo.com") &&
u.Path == "/videoplayback" &&
q.Has("expire") &&
q.Has("id")
if !looksOk {
return false
}
resp, err := http.Get(strmUrl)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == 200
}
func verifySearchResult(t *testing.T, data []extractor.Data, targetUrl string) {
if len(data) == 0 {
t.Fatalf("Expected search results but got none")
}
first := data[0]
if first.SourceUrl != targetUrl {
t.Fatalf("Invalid search result: expected '%v' but got '%v'", targetUrl, first.SourceUrl)
}
strmData, err := extractor.Extract(extractorTestCfg, first.SourceUrl)
if err != nil {
t.Fatalf("Error retrieving video data: %v", err)
}
if len(strmData) != 1 {
t.Fatalf("Expected exactly one extraction result")
}
if !validYtStreamUrl(strmData[0].StreamUrl) {
t.Fatalf("Invalid YouTube stream URL: got '%v'", strmData[0].StreamUrl)
}
}
func TestSearch(t *testing.T) {
extractor.Extract(extractorTestCfg, "https://open.spotify.com/track/22z9GL53FudbuFJqa43Nzj")
data, err := extractor.Search(extractorTestCfg, "nilered turns water into wine like jesus")
if err != nil {
t.Fatalf("Error searching YouTube: %v", err)
}
verifySearchResult(t, data, "https://www.youtube.com/watch?v=tAU0FX1d044")
}
func TestSearchPlaylist(t *testing.T) {
data, err := extractor.Search(extractorTestCfg, "instant regret clicking this playlist epic donut dude")
if err != nil {
t.Fatalf("Error searching YouTube: %v", err)
}
if len(data) == 0 {
t.Fatalf("Expected search results but got none")
}
target := "https://www.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ"
if data[0].PlaylistUrl != target {
t.Fatalf("Invalid search result: expected '%v' but got '%v'", target, data[0].SourceUrl)
}
}
func TestSearchSuggestions(t *testing.T) {
sug, err := extractor.Suggest(extractorTestCfg, "a")
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(sug) == 0 {
t.Fatalf("Function didn't return any suggestions")
}
}
func TestSearchIntegrityWeirdCharacters(t *testing.T) {
data, err := extractor.Extract(extractorTestCfg, "test lol | # !@#%&(*)!&*!äöfáßö®©œæ %% %3 %32")
if err != nil {
t.Fatalf("Error searching YouTube: %v", err)
}
if len(data) != 1 {
t.Fatalf("Expected exactly one URL but got %v", len(data))
}
}
func TestYoutubeMusicVideo(t *testing.T) {
data, err := extractor.Extract(extractorTestCfg, "https://www.youtube.com/watch?v=dQw4w9WgXcQ")
if err != nil {
t.Fatalf("Error searching YouTube: %v", err)
}
if len(data) != 1 {
t.Fatalf("Expected exactly one URL but got %v", len(data))
}
if !validYtStreamUrl(data[0].StreamUrl) {
t.Fatalf("Invalid YouTube stream URL: got '%v'", data[0].StreamUrl)
}
}
func TestYoutubeMusicVideoMulti(t *testing.T) {
for i := 0; i < 10; i++ {
TestYoutubeMusicVideo(t)
}
}
func TestYoutubePlaylist(t *testing.T) {
cfg := extractor.DefaultConfig()
cfg["YouTube"]["Require direct playlist URL"] = "true"
url := "https://www.youtube.com/watch?v=jdUXfsMTv7o&list=PLdImBTpIvHA1xN1Dfw2Ec5NQ5d-LF3ZP5"
pUrl := "https://www.youtube.com/playlist?list=PLdImBTpIvHA1xN1Dfw2Ec5NQ5d-LF3ZP5"
data, err := extractor.Extract(cfg, url)
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(data) != 1 {
t.Fatalf("Expected only a single video")
}
if data[0].PlaylistTitle != "" {
t.Fatalf("Did not expect a playlist")
}
data, err = extractor.Extract(cfg, pUrl)
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(data) != 14 {
t.Fatalf("Invalid playlist item count: got '%v'", len(data))
}
data, err = extractor.Extract(extractorTestCfg, url)
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(data) != 14 {
t.Fatalf("Invalid playlist item count: got '%v'", len(data))
}
if data[0].Title != "Why I use Linux" {
t.Fatalf("Invalid title of first item: got '%v'", data[0].Title)
}
if data[0].Duration != 70 {
t.Fatalf("Invalid duration of first item: got '%v'", data[0].Duration)
}
}
func TestSpotifyTrack(t *testing.T) {
data, err := extractor.Extract(extractorTestCfg, "https://open.spotify.com/track/7HjaeqTHY6QlwPY0MEjuMF")
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(data) != 1 {
t.Fatalf("Expected exactly one URL but got %v", len(data))
}
if data[0].Title != "Infected Mushroom, Ninet Tayeb - Black Velvet" {
t.Fatalf("Invalid song title: %v", data[0].Title)
}
if data[0].Uploader != "Infected Mushroom, Ninet Tayeb" {
t.Fatalf("Invalid artists: %v", data[0].Uploader)
}
if !validYtStreamUrl(data[0].StreamUrl) {
t.Fatalf("Invalid YouTube stream URL: got '%v'", data[0].StreamUrl)
}
}
func TestSpotifyAlbum(t *testing.T) {
data, err := extractor.Extract(extractorTestCfg, "https://open.spotify.com/album/6YEjK95sgoXQn1yGbYjHsp")
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(data) != 11 {
t.Fatalf("Expected exactly 11 tracks but got %v", len(data))
}
if data[0].Title != "Infected Mushroom, Ninet Tayeb - Black Velvet" {
t.Fatalf("Invalid title of first item: got '%v'", data[0].Title)
}
if data[0].Uploader != "Infected Mushroom, Ninet Tayeb" {
t.Fatalf("Invalid artists in first item: %v", data[0].Uploader)
}
if data[1].Title != "Infected Mushroom - While I'm in the Mood" {
t.Fatalf("Invalid title of second item: got '%v'", data[1].Title)
}
}
func TestYoutubeDl(t *testing.T) {
data, err := extractor.Extract(extractorTestCfg, "https://soundcloud.com/pendulum/sets/hold-your-colour-1")
if err != nil {
t.Fatalf("Error: %v", err)
}
if len(data) != 14 {
t.Fatalf("Invalid playlist item count: got '%v'", len(data))
}
if data[0].Title != "Prelude" {
t.Fatalf("Invalid title of first item: got '%v'", data[0].Title)
}
if data[1].Title != "Slam" {
t.Fatalf("Invalid title of second item: got '%v'", data[1].Title)
}
if data[0].PlaylistTitle != "Hold Your Colour" {
t.Fatalf("Invalid playlist title: got '%v'", data[0].PlaylistTitle)
}
}

View File

@@ -0,0 +1,96 @@
package spotify
import (
"git.nobrain.org/r4/dischord/extractor"
"git.nobrain.org/r4/dischord/extractor/youtube"
"errors"
"net/url"
"strings"
)
func init() {
extractor.AddExtractor("spotify", NewExtractor())
}
type matchType int
const (
matchTypeNone matchType = iota
matchTypeTrack
matchTypeAlbum
matchTypePlaylist
)
var (
ErrInvalidInput = errors.New("invalid input")
)
func matches(input string) (string, matchType) {
u, err := url.Parse(input)
if err != nil {
return "", matchTypeNone
}
if u.Scheme != "http" && u.Scheme != "https" {
return "", matchTypeNone
}
if u.Host != "open.spotify.com" {
return "", matchTypeNone
}
sp := strings.Split(u.Path, "/")
if len(sp) != 3 || sp[0] != "" {
return "", matchTypeNone
}
switch sp[1] {
case "track":
return sp[2], matchTypeTrack
case "album":
return sp[2], matchTypeAlbum
case "playlist":
return sp[2], matchTypePlaylist
}
return "", matchTypeNone
}
type Extractor struct {
ytSearcher *youtube.Searcher
ytSearcherConfig extractor.ProviderConfig
ytExtractor *youtube.Extractor
ytExtractorConfig extractor.ProviderConfig
token apiToken
}
func NewExtractor() *Extractor {
extractor := &Extractor{}
extractor.ytSearcher = &youtube.Searcher{}
extractor.ytSearcherConfig = extractor.ytSearcher.DefaultConfig()
extractor.ytExtractor = &youtube.Extractor{}
extractor.ytExtractorConfig = extractor.ytExtractor.DefaultConfig()
return extractor
}
func (e *Extractor) DefaultConfig() extractor.ProviderConfig {
return extractor.ProviderConfig{}
}
func (e *Extractor) Matches(cfg extractor.ProviderConfig, input string) bool {
_, m := matches(input)
return m != matchTypeNone
}
func (e *Extractor) Extract(cfg extractor.ProviderConfig, input string) ([]extractor.Data, error) {
id, m := matches(input)
switch m {
case matchTypeTrack:
d, err := getTrack(e, id)
if err != nil {
return nil, err
}
return []extractor.Data{d}, nil
case matchTypeAlbum:
return getAlbum(e, id)
case matchTypePlaylist:
return getPlaylist(e, id)
}
return nil, ErrInvalidInput
}

View File

@@ -0,0 +1,378 @@
package spotify
import (
"git.nobrain.org/r4/dischord/extractor"
exutil "git.nobrain.org/r4/dischord/extractor/util"
"encoding/json"
"errors"
"net/http"
"strings"
"time"
)
var (
ErrGettingSessionData = errors.New("unable to get session data")
ErrInvalidTrackData = errors.New("invalid track data")
ErrTrackNotFound = errors.New("unable to find track on YouTube")
ErrUnableToGetYoutubeStream = errors.New("unable to get YouTube stream")
ErrDecodingApiResponse = errors.New("error decoding API response")
)
// distance between two integers
func iDist(a, b int) int {
if a > b {
return a - b
} else {
return b - a
}
}
func containsIgnoreCase(s, substr string) bool {
return strings.Contains(strings.ToUpper(s), strings.ToUpper(substr))
}
type sessionData struct {
AccessToken string `json:"accessToken"`
AccessTokenExpirationTimestampMs int64 `json:"accessTokenExpirationTimestampMs"`
}
type apiToken struct {
token string
expires time.Time
}
func updateApiToken(token *apiToken) error {
if time.Now().Before(token.expires) {
// Token already up-to-date
return nil
}
// Get new token
var data sessionData
var funcErr error
err := exutil.GetHTMLScriptFunc("https://open.spotify.com", false, func(code string) bool {
if strings.HasPrefix(code, "{\"accessToken\":\"") {
// Parse session data
if err := json.Unmarshal([]byte(code), &data); err != nil {
funcErr = err
return false
}
return false
}
return true
})
if err != nil {
return err
}
if funcErr != nil {
return funcErr
}
*token = apiToken{
token: data.AccessToken,
expires: time.UnixMilli(data.AccessTokenExpirationTimestampMs),
}
return nil
}
type trackData struct {
Artists []struct {
Name string `json:"name"`
} `json:"artists"`
DurationMs int `json:"duration_ms"`
ExternalUrls struct {
Spotify string `json:"spotify"`
} `json:"external_urls"`
Name string `json:"name"`
}
func (d trackData) artistsString() (res string) {
for i, v := range d.Artists {
if i != 0 {
res += ", "
}
res += v.Name
}
return
}
func (d trackData) titleString() string {
return d.artistsString() + " - " + d.Name
}
func getTrack(e *Extractor, trackId string) (extractor.Data, error) {
if err := updateApiToken(&e.token); err != nil {
return extractor.Data{}, err
}
// Make API request for track info
req, err := http.NewRequest("GET", "https://api.spotify.com/v1/tracks/"+trackId, nil)
req.Header.Add("Content-Type", "application/json")
req.Header.Add("Authorization", "Bearer "+e.token.token)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return extractor.Data{}, err
}
defer resp.Body.Close()
// Parse track info
var data trackData
dec := json.NewDecoder(resp.Body)
if err := dec.Decode(&data); err != nil {
return extractor.Data{}, ErrDecodingApiResponse
}
if len(data.Artists) == 0 {
return extractor.Data{}, ErrInvalidTrackData
}
// Search for track on YouTube
results, err := e.ytSearcher.Search(e.ytSearcherConfig, data.Name+" - "+data.artistsString())
if err != nil {
return extractor.Data{}, err
}
if len(results) == 0 {
return extractor.Data{}, ErrTrackNotFound
}
// Lower is better
score := func(ytd extractor.Data, resIdx int) (score int) {
// This function determines the likelihood of a given YouTube video
// belonging to the Spotify song.
// It may look pretty complicated, but here's the gist:
// - lower scores are better
// - the general formula is: resIdx - matchAccuracy / penalty
// where 'resIdx' is the position in the search results,
// 'matchAccuracy' is how well the video superficially matches
// with the Spotify song (title, artists, duration) and 'penalty'
// measures the hints pointing to the current video being the
// wrong one (awfully wrong duration, instrumental version, remix
// etc.)
// - if the video is from an official artist channel, that makes the
// penalty points even more credible, so they're squared
// - accuracy and penalty points are multiplicative; this makes them
// have exponentially more weight the more they are given
matchAccuracy := 1.0
matchPenalty := 1.0
sqrPenalty := false
if ytd.OfficialArtist || strings.HasSuffix(ytd.Uploader, " - Topic") {
matchAccuracy *= 4.0
sqrPenalty = true
}
if containsIgnoreCase(ytd.Title, data.Name) {
matchAccuracy *= 4.0
}
matchingArtists := 0.0
firstMatches := false
for i, artist := range data.Artists {
if containsIgnoreCase(ytd.Uploader, artist.Name) ||
containsIgnoreCase(ytd.Title, artist.Name) {
matchingArtists += 1.0
if i == 0 {
firstMatches = true
}
}
}
if firstMatches {
matchAccuracy *= 2.0
}
matchAccuracy *= 2.0 * (matchingArtists / float64(len(data.Artists)))
durationDist := iDist(ytd.Duration, data.DurationMs/1000)
if durationDist <= 5 {
matchAccuracy *= 8.0
} else if durationDist >= 300 {
matchPenalty *= 16.0
}
spotiArtists := data.artistsString()
onlyYtTitleContains := func(s string) bool {
return !containsIgnoreCase(data.Name, s) &&
!containsIgnoreCase(spotiArtists, s) &&
containsIgnoreCase(ytd.Title, s)
}
if onlyYtTitleContains("instrumental") || onlyYtTitleContains("cover") ||
onlyYtTitleContains("live") || onlyYtTitleContains("album") {
matchPenalty *= 8.0
}
if onlyYtTitleContains("remix") || onlyYtTitleContains("rmx") {
matchPenalty *= 8.0
} else if onlyYtTitleContains("mix") {
matchPenalty *= 6.0
}
if onlyYtTitleContains("vip") {
matchPenalty *= 6.0
}
totalPenalty := matchPenalty
if sqrPenalty {
totalPenalty *= totalPenalty
}
return resIdx - int(matchAccuracy/totalPenalty)
}
// Select the result with the lowest (best) score
lowestIdx := -1
lowest := 2147483647
for i, v := range results {
score := score(v, i)
//fmt.Println(i, score, v)
if score < lowest {
lowestIdx = i
lowest = score
}
}
ytData, err := e.ytExtractor.Extract(e.ytExtractorConfig, results[lowestIdx].SourceUrl)
if err != nil {
return extractor.Data{}, err
}
if len(ytData) != 1 {
return extractor.Data{}, ErrUnableToGetYoutubeStream
}
return extractor.Data{
SourceUrl: data.ExternalUrls.Spotify,
StreamUrl: ytData[0].StreamUrl,
Title: data.titleString(),
Uploader: data.artistsString(),
Duration: ytData[0].Duration,
Expires: ytData[0].Expires,
}, nil
}
type playlistData struct {
ExternalUrls struct {
Spotify string `json:"spotify"`
} `json:"external_urls"`
Name string `json:"name"`
Tracks struct {
Items []struct {
Track trackData `json:"track"`
} `json:"items"`
Next string `json:"next"`
} `json:"tracks"`
}
func getPlaylist(e *Extractor, playlistId string) ([]extractor.Data, error) {
if err := updateApiToken(&e.token); err != nil {
return nil, err
}
var data playlistData
trackOnlyReq := false
reqUrl := "https://api.spotify.com/v1/playlists/" + playlistId
var res []extractor.Data
for {
// Make API request for playlist info
req, err := http.NewRequest("GET", reqUrl, nil)
req.Header.Add("Content-Type", "application/json")
req.Header.Add("Authorization", "Bearer "+e.token.token)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// Parse playlist info
dec := json.NewDecoder(resp.Body)
if trackOnlyReq {
// JSON decoder doesn't always overwrite the set value
data.Tracks.Next = ""
data.Tracks.Items = nil
err = dec.Decode(&data.Tracks)
} else {
err = dec.Decode(&data)
}
if err != nil {
return nil, ErrDecodingApiResponse
}
for _, v := range data.Tracks.Items {
res = append(res, extractor.Data{
SourceUrl: v.Track.ExternalUrls.Spotify,
Title: v.Track.titleString(),
Uploader: v.Track.artistsString(),
PlaylistUrl: data.ExternalUrls.Spotify,
PlaylistTitle: data.Name,
})
}
if data.Tracks.Next == "" {
break
} else {
reqUrl = data.Tracks.Next
trackOnlyReq = true
}
}
return res, nil
}
type albumData struct {
ExternalUrls struct {
Spotify string `json:"spotify"`
} `json:"external_urls"`
Name string `json:"name"`
Tracks struct {
Items []trackData `json:"items"`
Next string `json:"next"`
} `json:"tracks"`
}
func getAlbum(e *Extractor, albumId string) ([]extractor.Data, error) {
// This function is pretty much copied from getPlaylist, with minor
// modifications
if err := updateApiToken(&e.token); err != nil {
return nil, err
}
var data albumData
trackOnlyReq := false
reqUrl := "https://api.spotify.com/v1/albums/" + albumId
var res []extractor.Data
for {
// Make API request for album info
req, err := http.NewRequest("GET", reqUrl, nil)
req.Header.Add("Content-Type", "application/json")
req.Header.Add("Authorization", "Bearer "+e.token.token)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// Parse album info
dec := json.NewDecoder(resp.Body)
if trackOnlyReq {
// JSON decoder doesn't always overwrite the set value
data.Tracks.Next = ""
data.Tracks.Items = nil
err = dec.Decode(&data.Tracks)
} else {
err = dec.Decode(&data)
}
if err != nil {
return nil, ErrDecodingApiResponse
}
for _, v := range data.Tracks.Items {
res = append(res, extractor.Data{
SourceUrl: v.ExternalUrls.Spotify,
Title: v.titleString(),
Uploader: v.artistsString(),
PlaylistUrl: data.ExternalUrls.Spotify,
PlaylistTitle: data.Name,
})
}
if data.Tracks.Next == "" {
break
} else {
reqUrl = data.Tracks.Next
trackOnlyReq = true
}
}
return res, nil
}

59
extractor/util/util.go Normal file
View File

@@ -0,0 +1,59 @@
package util
import (
"golang.org/x/net/html"
"net/http"
)
// Retrieve JavaScript embedded in HTML
func GetHTMLScriptFunc(url string, readCodeLineByLine bool, codeFunc func(code string) bool) error {
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
z := html.NewTokenizer(resp.Body)
isScript := false
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
return z.Err()
case html.TextToken:
if codeFunc != nil && isScript {
t := string(z.Text())
if readCodeLineByLine {
// NOTE: a bufio line scanner doesn't work (bufio.Scanner: token too long); maybe this is a bug
// Iterate over each line in the script
ls := 0 // line start
le := 0 // line end
for ls < len(t) {
if le == len(t) || t[le] == '\n' {
ln := t[ls:le]
if !codeFunc(ln) {
return nil
}
ls = le + 1
}
le++
}
} else {
if !codeFunc(t) {
return nil
}
}
}
case html.StartTagToken, html.EndTagToken:
tn, _ := z.TagName()
if string(tn) == "script" {
isScript = tt == html.StartTagToken
}
}
}
}

View File

@@ -0,0 +1,102 @@
package youtube
import (
"git.nobrain.org/r4/dischord/extractor"
"errors"
"net/url"
)
func init() {
extractor.AddExtractor("youtube", &Extractor{})
extractor.AddSearcher("youtube-search", &Searcher{})
extractor.AddSuggestor("youtube-search-suggestions", &Suggestor{})
}
type matchType int
const (
matchTypeNone matchType = iota
matchTypeVideo
matchTypePlaylist
)
var (
ErrInvalidInput = errors.New("invalid input")
)
func matches(requireDirectPlaylistUrl bool, input string) matchType {
u, err := url.Parse(input)
if err != nil {
return matchTypeNone
}
if u.Scheme != "http" && u.Scheme != "https" {
return matchTypeNone
}
q, err := url.ParseQuery(u.RawQuery)
if err != nil {
return matchTypeNone
}
switch u.Host {
case "www.youtube.com", "youtube.com":
if u.Path != "/watch" && u.Path != "/playlist" {
return matchTypeNone
}
if q.Has("list") && (!requireDirectPlaylistUrl || u.Path == "/playlist") {
return matchTypePlaylist
}
return matchTypeVideo
case "youtu.be":
return matchTypeVideo
default:
return matchTypeNone
}
}
type Extractor struct {
decryptor decryptor
}
func (e *Extractor) DefaultConfig() extractor.ProviderConfig {
return extractor.ProviderConfig{
"require-direct-playlist-url": false,
}
}
func (e *Extractor) Matches(cfg extractor.ProviderConfig, input string) bool {
return matches(cfg["require-direct-playlist-url"].(bool), input) != matchTypeNone
}
func (e *Extractor) Extract(cfg extractor.ProviderConfig, input string) ([]extractor.Data, error) {
switch matches(cfg["require-direct-playlist-url"].(bool), input) {
case matchTypeVideo:
d, err := getVideo(&e.decryptor, input)
if err != nil {
return nil, err
}
return []extractor.Data{d}, nil
case matchTypePlaylist:
return getPlaylist(input)
}
return nil, ErrInvalidInput
}
type Searcher struct{}
func (s *Searcher) DefaultConfig() extractor.ProviderConfig {
return extractor.ProviderConfig{}
}
func (s *Searcher) Search(cfg extractor.ProviderConfig, input string) ([]extractor.Data, error) {
return getSearch(input)
}
type Suggestor struct{}
func (s *Suggestor) DefaultConfig() extractor.ProviderConfig {
return extractor.ProviderConfig{}
}
func (s *Suggestor) Suggest(cfg extractor.ProviderConfig, input string) ([]string, error) {
return getSearchSuggestions(input)
}

58
extractor/youtube/util.go Normal file
View File

@@ -0,0 +1,58 @@
package youtube
import (
"golang.org/x/net/html"
"net/http"
)
func getHTMLScriptFunc(url string, readCodeLineByLine bool, codeFunc func(code string) bool) error {
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
z := html.NewTokenizer(resp.Body)
isScript := false
for {
tt := z.Next()
switch tt {
case html.ErrorToken:
return z.Err()
case html.TextToken:
if codeFunc != nil && isScript {
t := string(z.Text())
if readCodeLineByLine {
// NOTE: a bufio line scanner doesn't work (bufio.Scanner: token too long); maybe this is a bug
// Iterate over each line in the script
ls := 0 // line start
le := 0 // line end
for ls < len(t) {
if le == len(t) || t[le] == '\n' {
ln := t[ls:le]
if !codeFunc(ln) {
return nil
}
ls = le + 1
}
le++
}
} else {
if !codeFunc(t) {
return nil
}
}
}
case html.StartTagToken, html.EndTagToken:
tn, _ := z.TagName()
if string(tn) == "script" {
isScript = tt == html.StartTagToken
}
}
}
}

View File

@@ -0,0 +1,416 @@
package youtube
import (
"git.nobrain.org/r4/dischord/extractor"
exutil "git.nobrain.org/r4/dischord/extractor/util"
"git.nobrain.org/r4/dischord/util"
"encoding/json"
"errors"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"time"
)
var (
ErrNoSuitableFormat = errors.New("no suitable audio-only format found")
ErrGettingUrlFromSignatureCipher = errors.New("error getting URL from signature cipher")
ErrDecryptFunctionBroken = errors.New("signature decryptor function is broken (perhaps the extractor is out of date)")
ErrMalformedJson = errors.New("malformed JSON")
)
type playerData struct {
StreamingData struct {
ExpiresInSeconds string `json:"expiresInSeconds"`
Formats []struct {
Url string `json:"url"`
SignatureCipher string `json:"signatureCipher"`
MimeType string `json:"mimeType"`
Bitrate int `json:"bitrate"`
ApproxDurationMs string `json:"approxDurationMs"`
AudioSampleRate string `json:"audioSampleRate"`
AudioChannels int `json:"audioChannels"`
} `json:"formats"`
AdaptiveFormats []struct {
Url string `json:"url"`
SignatureCipher string `json:"signatureCipher"`
MimeType string `json:"mimeType"`
Bitrate int `json:"bitrate"`
ApproxDurationMs string `json:"approxDurationMs"`
AudioSampleRate string `json:"audioSampleRate"`
AudioChannels int `json:"audioChannels"`
} `json:"adaptiveFormats"`
} `json:"streamingData"`
VideoDetails struct {
VideoId string `json:"videoId"`
Title string `json:"title"`
LengthSeconds string `json:"lengthSeconds"`
ShortDescription string `json:"shortDescription"`
Author string `json:"author"`
} `json:"videoDetails"`
}
func getVideo(decryptor *decryptor, vUrl string) (extractor.Data, error) {
try := func() (extractor.Data, error) {
// Get JSON string from YouTube
v, err := getJSVar(vUrl, "ytInitialPlayerResponse")
if err != nil {
return extractor.Data{}, err
}
// Parse player data scraped from YouTube
var data playerData
if err := json.Unmarshal([]byte(v), &data); err != nil {
return extractor.Data{}, err
}
// Get audio format with maximum bitrate
maxBr := -1
for i, f := range data.StreamingData.AdaptiveFormats {
if strings.HasPrefix(f.MimeType, "audio/") {
if maxBr == -1 || f.Bitrate > data.StreamingData.AdaptiveFormats[maxBr].Bitrate {
maxBr = i
}
}
}
if maxBr == -1 {
return extractor.Data{}, ErrNoSuitableFormat
}
duration, err := strconv.Atoi(data.VideoDetails.LengthSeconds)
if err != nil {
duration = -1
}
expires, err := strconv.Atoi(data.StreamingData.ExpiresInSeconds)
if err != nil {
return extractor.Data{}, err
}
ft := data.StreamingData.AdaptiveFormats[maxBr]
var resUrl string
if ft.Url != "" {
resUrl = ft.Url
} else {
// For music, YouTube makes getting the resource URL a bit trickier
q, err := url.ParseQuery(ft.SignatureCipher)
if err != nil {
return extractor.Data{}, ErrGettingUrlFromSignatureCipher
}
sig := q.Get("s")
sigParam := q.Get("sp")
baseUrl := q.Get("url")
sigDecrypted, err := decryptor.decrypt(sig)
if err != nil {
return extractor.Data{}, err
}
resUrl = baseUrl + "&" + sigParam + "=" + sigDecrypted
}
return extractor.Data{
SourceUrl: vUrl,
StreamUrl: resUrl,
Title: data.VideoDetails.Title,
Description: data.VideoDetails.ShortDescription,
Uploader: data.VideoDetails.Author,
Duration: duration,
Expires: time.Now().Add(time.Duration(expires) * time.Second),
}, nil
}
isOk := func(strmUrl string) bool {
resp, err := http.Get(strmUrl)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == 200
}
// Sometimes we just get an invalid stream URL, and I didn't find anything
// simple to do about it, so we just try the stream URL we get and repeat
// if it's invalid
for tries := 0; tries < 10; tries++ {
data, err := try()
if err != nil {
return extractor.Data{}, err
}
if isOk(data.StreamUrl) {
return data, nil
}
}
return extractor.Data{}, ErrDecryptFunctionBroken
}
type playlistVideoData struct {
Contents struct {
TwoColumnWatchNextResults struct {
Playlist struct {
Playlist struct {
Title string `json:"title"`
Contents []struct {
PlaylistPanelVideoRenderer struct {
NavigationEndpoint struct {
WatchEndpoint struct {
VideoId string `json:"videoId"`
Index int `json:"index"`
} `json:"watchEndpoint"`
} `json:"navigationEndpoint"`
Title struct {
SimpleText string `json:"simpleText"`
} `json:"title"`
ShortBylineText struct {
Runs []struct {
Text string `json:"text"` // uploader name
} `json:"runs"`
} `json:"shortBylineText"`
LengthText struct {
SimpleText string `json:"simpleText"`
} `json:"lengthText"`
} `json:"playlistPanelVideoRenderer"`
} `json:"contents"`
} `json:"playlist"`
} `json:"playlist"`
} `json:"twoColumnWatchNextResults"`
} `json:"contents"`
}
// Only gets superficial data, the actual stream URL must be extracted from SourceUrl
func getPlaylist(pUrl string) ([]extractor.Data, error) {
u, err := url.Parse(pUrl)
if err != nil {
return nil, err
}
q, err := url.ParseQuery(u.RawQuery)
if err != nil {
return nil, err
}
listId := q.Get("list")
vidId := ""
index := 0
var res []extractor.Data
// This loop uses the playlist sidebar: each video played in the context
// of a playlist loads 100 or so of the following videos' infos, which we
// add to the returned slice; then we take the last retrieved video's infos
// and use its sidebar and so on
for {
vUrl := "https://www.youtube.com/watch?v=" + vidId + "&list=" + listId + "&index=" + strconv.Itoa(index+1)
// Get JSON string from YouTube
v, err := getJSVar(vUrl, "ytInitialData")
if err != nil {
return nil, err
}
// Parse playlist data scraped from YouTube
var data playlistVideoData
if err := json.Unmarshal([]byte(v), &data); err != nil {
return nil, err
}
added := false
for _, v := range data.Contents.TwoColumnWatchNextResults.Playlist.Playlist.Contents {
vidId = v.PlaylistPanelVideoRenderer.NavigationEndpoint.WatchEndpoint.VideoId
index = v.PlaylistPanelVideoRenderer.NavigationEndpoint.WatchEndpoint.Index
if index == len(res) {
srcUrl := "https://www.youtube.com/watch?v=" + vidId
bylineText := v.PlaylistPanelVideoRenderer.ShortBylineText
if len(bylineText.Runs) == 0 {
return nil, ErrMalformedJson
}
uploader := bylineText.Runs[0].Text
length, err := util.ParseDurationSeconds(v.PlaylistPanelVideoRenderer.LengthText.SimpleText)
if err != nil {
length = -1
}
res = append(res, extractor.Data{
SourceUrl: srcUrl,
Title: v.PlaylistPanelVideoRenderer.Title.SimpleText,
PlaylistUrl: "https://www.youtube.com/playlist?list=" + listId,
PlaylistTitle: data.Contents.TwoColumnWatchNextResults.Playlist.Playlist.Title,
Uploader: uploader,
Duration: length,
})
added = true
}
}
if !added {
break
}
}
return res, nil
}
type searchData struct {
Contents struct {
TwoColumnSearchResultsRenderer struct {
PrimaryContents struct {
SectionListRenderer struct {
Contents []struct {
ItemSectionRenderer struct {
Contents []struct {
PlaylistRenderer struct {
PlaylistId string `json:"playlistId"`
Title struct {
SimpleText string `json:"simpleText"`
} `json:"title"`
} `json:"playlistRenderer"`
VideoRenderer struct {
VideoId string `json:"videoId"`
Title struct {
Runs []struct {
Text string `json:"text"`
} `json:"runs"`
} `json:"title"`
LongBylineText struct {
Runs []struct {
Text string `json:"text"` // uploader name
} `json:"runs"`
} `json:"longBylineText"`
LengthText struct {
SimpleText string `json:"simpleText"`
} `json:"lengthText"`
OwnerBadges []struct {
MetadataBadgeRenderer struct {
Style string `json:"style"`
} `json:"metadataBadgeRenderer"`
} `json:"OwnerBadges"`
} `json:"videoRenderer"`
} `json:"contents"`
} `json:"itemSectionRenderer"`
} `json:"contents"`
} `json:"sectionListRenderer"`
} `json:"primaryContents"`
} `json:"twoColumnSearchResultsRenderer"`
} `json:"contents"`
}
// Only gets superficial data, the actual stream URL must be extracted from SourceUrl
func getSearch(query string) ([]extractor.Data, error) {
// Get JSON string from YouTube
sanitizedQuery := url.QueryEscape(strings.ReplaceAll(query, " ", "+"))
queryUrl := "https://www.youtube.com/results?search_query=" + sanitizedQuery
v, err := getJSVar(queryUrl, "ytInitialData")
if err != nil {
return nil, err
}
// Parse search data scraped from YouTube
var data searchData
if err := json.Unmarshal([]byte(v), &data); err != nil {
return nil, err
}
var res []extractor.Data
for _, v0 := range data.Contents.TwoColumnSearchResultsRenderer.PrimaryContents.SectionListRenderer.Contents {
for _, v1 := range v0.ItemSectionRenderer.Contents {
if v1.VideoRenderer.VideoId != "" {
titleRuns := v1.VideoRenderer.Title.Runs
if len(titleRuns) == 0 {
return nil, ErrMalformedJson
}
title := titleRuns[0].Text
bylineText := v1.VideoRenderer.LongBylineText
if len(bylineText.Runs) == 0 {
return nil, ErrMalformedJson
}
uploader := bylineText.Runs[0].Text
length, err := util.ParseDurationSeconds(v1.VideoRenderer.LengthText.SimpleText)
if err != nil {
length = -1
}
badges := v1.VideoRenderer.OwnerBadges
res = append(res, extractor.Data{
SourceUrl: "https://www.youtube.com/watch?v=" + v1.VideoRenderer.VideoId,
Title: title,
Duration: length,
Uploader: uploader,
OfficialArtist: len(badges) != 0 && badges[0].MetadataBadgeRenderer.Style == "BADGE_STYLE_TYPE_VERIFIED_ARTIST",
})
} else if v1.PlaylistRenderer.PlaylistId != "" {
res = append(res, extractor.Data{
PlaylistUrl: "https://www.youtube.com/playlist?list=" + v1.PlaylistRenderer.PlaylistId,
PlaylistTitle: v1.PlaylistRenderer.Title.SimpleText,
})
}
}
}
return res, nil
}
func getSearchSuggestions(query string) ([]string, error) {
url := "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&ds=yt&q=" + url.QueryEscape(query)
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
raw, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
raw = []byte(strings.TrimSuffix(strings.TrimPrefix(string(raw), "window.google.ac.h("), ")"))
var data []any
if err := json.Unmarshal(raw, &data); err != nil {
return nil, err
}
if len(data) != 3 {
return nil, ErrMalformedJson
}
rawSuggestions, ok := data[1].([]any)
if !ok {
return nil, ErrMalformedJson
}
var res []string
for _, v := range rawSuggestions {
rawSuggestion, ok := v.([]any)
if !ok || len(rawSuggestion) != 3 {
return nil, ErrMalformedJson
}
suggestion, ok := rawSuggestion[0].(string)
if !ok {
return nil, ErrMalformedJson
}
res = append(res, suggestion)
}
return res, nil
}
// Gets a constant JavaScript variable's value from a URL and a variable name
// (variable format must be: var someVarName = {"somekey": "lol"};)
func getJSVar(url, varName string) (string, error) {
match := "var " + varName + " = "
var res string
err := exutil.GetHTMLScriptFunc(url, true, func(code string) bool {
if strings.HasPrefix(code, match) {
res = strings.TrimRight(code[len(match):], ";")
return false
}
return true
})
if err != nil {
return "", err
}
return res, nil
}

View File

@@ -0,0 +1,245 @@
package youtube
import (
exutil "git.nobrain.org/r4/dischord/extractor/util"
"encoding/json"
"errors"
"io"
"net/http"
"regexp"
"strconv"
"strings"
)
var (
ErrDecryptGettingFunctionName = errors.New("error getting signature decryption function name")
ErrDecryptGettingFunction = errors.New("error getting signature decryption function")
ErrDecryptGettingOpTable = errors.New("error getting signature decryption operation table")
ErrGettingBaseJs = errors.New("unable to get base.js")
)
type decryptorOp struct {
fn func(a *string, b int)
arg int
}
type decryptor struct {
// base.js version ID, used for caching
versionId string
// The actual decryption algorithm can be split up into a list of known
// operations
ops []decryptorOp
}
func (d *decryptor) decrypt(input string) (string, error) {
if err := updateDecryptor(d); err != nil {
return "", err
}
s := input
for _, op := range d.ops {
op.fn(&s, op.arg)
}
return s, nil
}
type configData struct {
PlayerJsUrl string `json:"PLAYER_JS_URL"`
}
func updateDecryptor(d *decryptor) error {
prefix := "(function() {window.ytplayer={};\nytcfg.set("
endStr := ");"
// Get base.js URL
var url string
var funcErr error
err := exutil.GetHTMLScriptFunc("https://www.youtube.com", false, func(code string) bool {
if strings.HasPrefix(code, prefix) {
// Cut out the JSON part
code = code[len(prefix):]
end := strings.Index(code, endStr)
if end == -1 {
funcErr = ErrGettingBaseJs
return false
}
// Parse config data
var data configData
if err := json.Unmarshal([]byte(code[:end]), &data); err != nil {
funcErr = ErrGettingBaseJs
return false
}
url = "https://www.youtube.com" + data.PlayerJsUrl
return false
}
return true
})
if err != nil {
return err
}
if funcErr != nil {
return err
}
// Get base.js version ID
sp := strings.SplitN(strings.TrimPrefix(url, "/s/player/"), "/", 2)
if len(sp) != 2 {
return ErrGettingBaseJs
}
verId := sp[0]
if d.versionId == verId {
// Decryptor already up-to-date
return nil
}
// Get base.js contents
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return ErrGettingBaseJs
}
// Copy contents to buffer
buf := new(strings.Builder)
_, err = io.Copy(buf, resp.Body)
if err != nil {
return err
}
// Get decryption operations
ops, err := getDecryptOps(buf.String())
if err != nil {
return err
}
d.versionId = verId
d.ops = ops
return nil
}
var decryptFunctionNameRegexp = regexp.MustCompile(`[a-zA-Z]*&&\([a-zA-Z]*=([a-zA-Z]*)\(decodeURIComponent\([a-zA-Z]*\)\),[a-zA-Z]*\.set\([a-zA-Z]*,encodeURIComponent\([a-zA-Z]*\)\)\)`)
func getDecryptFunction(baseJs string) (string, error) {
idx := decryptFunctionNameRegexp.FindSubmatchIndex([]byte(baseJs))
if len(idx) != 4 {
return "", ErrDecryptGettingFunctionName
}
fnName := baseJs[idx[2]:idx[3]]
startMatch := fnName + `=function(a){a=a.split("");`
endMatch := `;return a.join("")};`
start := strings.Index(baseJs, startMatch)
if start == -1 {
return "", ErrDecryptGettingFunction
}
fn := baseJs[start+len(startMatch):]
end := strings.Index(fn, endMatch)
if start == -1 {
return "", ErrDecryptGettingFunction
}
return fn[:end], nil
}
func getDecryptOps(baseJs string) ([]decryptorOp, error) {
// Extract main decryptor function JS
decrFn, err := getDecryptFunction(baseJs)
if err != nil {
return nil, err
}
// Get decyptor operation JS
var ops string
{
sp := strings.SplitN(decrFn, ".", 2)
if len(sp) != 2 {
return nil, ErrDecryptGettingOpTable
}
opsObjName := sp[0]
startMatch := `var ` + opsObjName + `={`
endMatch := `};`
start := strings.Index(baseJs, startMatch)
if start == -1 {
return nil, ErrDecryptGettingOpTable
}
ops = baseJs[start+len(startMatch):]
end := strings.Index(ops, endMatch)
if start == -1 {
return nil, ErrDecryptGettingOpTable
}
ops = ops[:end]
}
// Make a decryptor operation table that associates the operation
// names with a specific action on an input string
opTable := make(map[string]func(a *string, b int))
{
lns := strings.Split(ops, "\n")
if len(lns) != 3 {
return nil, ErrDecryptGettingOpTable
}
for _, ln := range lns {
sp := strings.Split(ln, ":")
if len(sp) != 2 {
return nil, ErrDecryptGettingOpTable
}
name := sp[0]
fn := sp[1]
switch {
case strings.HasPrefix(fn, `function(a){a.reverse()}`):
opTable[name] = func(a *string, b int) {
// Reverse a
var res string
for _, c := range *a {
res = string(c) + res
}
*a = res
}
case strings.HasPrefix(fn, `function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c}`):
opTable[name] = func(a *string, b int) {
// Swap a[0] and a[b % len(a)]
c := []byte(*a)
c[0], c[b%len(*a)] = c[b%len(*a)], c[0]
*a = string(c)
}
case strings.HasPrefix(fn, `function(a,b){a.splice(0,b)}`):
opTable[name] = func(a *string, b int) {
// Slice off all elements of a up to a[b]
*a = (*a)[b:]
}
}
}
}
// Parse all operations in the main decryptor function and return them in
// order
var res []decryptorOp
for _, fn := range strings.Split(decrFn, ";") {
sp := strings.SplitN(fn, ".", 2)
if len(sp) != 2 {
return nil, ErrDecryptGettingOpTable
}
sp = strings.SplitN(sp[1], "(", 2)
if len(sp) != 2 {
return nil, ErrDecryptGettingOpTable
}
name := sp[0]
argS := strings.TrimSuffix(strings.TrimPrefix(sp[1], "a,"), ")")
arg, err := strconv.Atoi(argS)
if err != nil {
return nil, ErrDecryptGettingOpTable
}
callableOp, exists := opTable[name]
if !exists {
return nil, ErrDecryptGettingOpTable
}
res = append(res, decryptorOp{callableOp, arg})
}
return res, nil
}

View File

@@ -0,0 +1,35 @@
package ytdl
import (
"git.nobrain.org/r4/dischord/extractor"
"strings"
)
func init() {
extractor.AddExtractor("youtube-dl", &Extractor{})
}
type Extractor struct{}
func (e *Extractor) DefaultConfig() extractor.ProviderConfig {
return extractor.ProviderConfig{
"youtube-dl-path": "youtube-dl",
}
}
func (e *Extractor) Matches(cfg extractor.ProviderConfig, input string) bool {
return strings.HasPrefix(input, "http://") || strings.HasPrefix(input, "https://")
}
func (e *Extractor) Extract(cfg extractor.ProviderConfig, input string) ([]extractor.Data, error) {
var res []extractor.Data
dch, errch := ytdlGet(cfg["youtube-dl-path"].(string), input)
for v := range dch {
res = append(res, v)
}
for err := range errch {
return nil, err
}
return res, nil
}

135
extractor/ytdl/ytdl.go Normal file
View File

@@ -0,0 +1,135 @@
package ytdl
import (
"git.nobrain.org/r4/dischord/extractor"
"bufio"
"encoding/json"
"errors"
"os/exec"
"strings"
"time"
)
var (
ErrUnsupportedUrl = errors.New("unsupported URL")
)
// A very reduced version of the JSON structure returned by youtube-dl
type ytdlMetadata struct {
Title string `json:"title"`
Extractor string `json:"extractor"`
Duration float32 `json:"duration"`
WebpageUrl string `json:"webpage_url"`
Playlist string `json:"playlist"`
Uploader string `json:"uploader"`
Description string `json:"description"`
Formats []struct {
Url string `json:"url"`
Format string `json"format"`
VCodec string `json:"vcodec"`
} `json:"formats"`
}
// Gradually sends all audio URLs through the string channel. If an error occurs, it is sent through the
// error channel. Both channels are closed after either an error occurs or all URLs have been output.
func ytdlGet(youtubeDLPath, input string) (<-chan extractor.Data, <-chan error) {
out := make(chan extractor.Data)
errch := make(chan error, 1)
go func() {
defer close(out)
defer close(errch)
// Set youtube-dl args
var ytdlArgs []string
ytdlArgs = append(ytdlArgs, "-j", input)
// Prepare command for execution
cmd := exec.Command(youtubeDLPath, ytdlArgs...)
cmd.Env = []string{"LC_ALL=en_US.UTF-8"} // Youtube-dl doesn't recognize some chars if LC_ALL=C or not set at all
stdout, err := cmd.StdoutPipe()
if err != nil {
errch <- err
return
}
stderr, err := cmd.StderrPipe()
if err != nil {
errch <- err
return
}
// Catch any errors put out by youtube-dl
stderrReadDoneCh := make(chan struct{})
var ytdlError string
go func() {
sc := bufio.NewScanner(stderr)
for sc.Scan() {
line := sc.Text()
if strings.HasPrefix(line, "ERROR: ") {
ytdlError = strings.TrimPrefix(line, "ERROR: ")
}
}
stderrReadDoneCh <- struct{}{}
}()
// Start youtube-dl
if err := cmd.Start(); err != nil {
errch <- err
return
}
// We want to let our main loop know when youtube-dl is done
donech := make(chan error)
go func() {
donech <- cmd.Wait()
}()
// Main JSON decoder loop
dec := json.NewDecoder(stdout)
for dec.More() {
// Read JSON
var m ytdlMetadata
if err := dec.Decode(&m); err != nil {
errch <- err
return
}
// Extract URL from metadata (the latter formats are always the better with youtube-dl)
for i := len(m.Formats) - 1; i >= 0; i-- {
format := m.Formats[i]
if format.VCodec == "none" {
out <- extractor.Data{
SourceUrl: m.WebpageUrl,
StreamUrl: format.Url,
Title: m.Title,
PlaylistTitle: m.Playlist,
Description: m.Description,
Uploader: m.Uploader,
Duration: int(m.Duration),
Expires: time.Now().Add(10 * 365 * 24 * time.Hour),
}
break
}
}
}
// Wait for command to finish executing and catch any errors
err = <-donech
<-stderrReadDoneCh
if err != nil {
if ytdlError == "" {
errch <- err
} else {
if strings.HasPrefix(ytdlError, "Unsupported URL: ") {
errch <- ErrUnsupportedUrl
} else {
errch <- errors.New("ytdl: " + ytdlError)
}
}
return
}
}()
return out, errch
}