Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Rumble extractor #1232

Merged
merged 4 commits into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/stream_rumble.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: rumble

on:
push:
paths:
- "extractors/rumble/*.go"
- ".github/workflows/stream_rumble.yml"
pull_request:
paths:
- "extractors/rumble/*.go"
- ".github/workflows/stream_rumble.yml"
schedule:
# run ci weekly
- cron: "0 0 * * 0"

jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
matrix:
go: ["1.20"]
os: [ubuntu-latest]
name: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: ${{ matrix.go }}

- name: Test
run: go test -timeout 5m -race -coverpkg=./... -coverprofile=coverage.txt github.com/iawia002/lux/extractors/rumble
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ $ lux -j "https://www.bilibili.com/video/av20203945"
| Reddit | <https://www.reddit.com> | ✓ | ✓ | | | | [![reddit](https://github.com/iawia002/lux/actions/workflows/stream_reddit.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_reddit.yml) |
| VKontakte | <https://vk.com> | ✓ | | | | | [![vk](https://github.com/iawia002/lux/actions/workflows/stream_vk.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_vk.yml/) |
| 知乎 | <https://zhihu.com> | ✓ | | | | | [![zhihu](https://github.com/iawia002/lux/actions/workflows/stream_zhihu.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_zhihu.yml/) |
| Rumble | <https://rumble.com> | ✓ | | | | | [![rumble](https://github.com/iawia002/lux/actions/workflows/stream_rumble.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_rumble.yml/) |


## Known issues
Expand Down
1 change: 1 addition & 0 deletions app/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
_ "github.com/iawia002/lux/extractors/pornhub"
_ "github.com/iawia002/lux/extractors/qq"
_ "github.com/iawia002/lux/extractors/reddit"
_ "github.com/iawia002/lux/extractors/rumble"
_ "github.com/iawia002/lux/extractors/streamtape"
_ "github.com/iawia002/lux/extractors/tangdou"
_ "github.com/iawia002/lux/extractors/tiktok"
Expand Down
320 changes: 320 additions & 0 deletions extractors/rumble/rumble.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
package rumble

import (
"compress/flate"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"path"
"regexp"
"strconv"

"github.com/pkg/errors"

"github.com/iawia002/lux/extractors"
"github.com/iawia002/lux/request"
"github.com/iawia002/lux/utils"
)

func init() {
extractors.Register("rumble", New())
}

type extractor struct{}

// New returns a rumble extractor.
func New() extractors.Extractor {
return &extractor{}
}

type rumbleData struct {
Format string `json:"format"`
Name string `json:"name"`
EmbedURL string `json:"embedUrl"`
ThumbnailURL string `json:"thumbnailUrl"`
Type string `json:"@type"`
VideoURL string `json:"videoUrl"`
Quality string `json:"quality"`
}

// Extract is the main function to extract the data.
func (e *extractor) Extract(url string, option extractors.Options) ([]*extractors.Data, error) {
res, err := request.Request(http.MethodGet, url, nil, nil)
if err != nil {
return nil, errors.WithStack(err)
}

defer res.Body.Close() // nolint

var reader io.ReadCloser
switch res.Header.Get("Content-Encoding") {
case "gzip":
reader, _ = gzip.NewReader(res.Body)
case "deflate":
reader = flate.NewReader(res.Body)
default:
reader = res.Body
}
defer reader.Close() // nolint

b, err := io.ReadAll(reader)
if err != nil {
return nil, errors.WithStack(err)
}

html := string(b)
var title string
matchTitle := utils.MatchOneOf(html, `<title>(.+?)</title>`)
if len(matchTitle) > 1 {
title = matchTitle[1]
} else {
title = "rumble video"
}

payload, err := readPayload(html)
if err != nil {
return nil, errors.WithStack(err)
}

videoID, err := getVideoID(payload.EmbedURL)
if err != nil {
return nil, errors.WithStack(err)
}

streams, err := fetchVideoQuality(videoID)
if err != nil {
return nil, errors.WithStack(err)
}

return []*extractors.Data{
{
Site: "Rumble rumble.com",
Title: title,
Type: extractors.DataTypeVideo,
Streams: streams,
URL: url,
},
}, nil
}

// Read JSON object from the video webpage
func readPayload(html string) (*rumbleData, error) {
matchPayload := utils.MatchOneOf(html, `\<script\stype="?application\/ld\+json"?\>(.+?)\<\/script>`)
if len(matchPayload) < 1 {
return nil, errors.WithStack(extractors.ErrURLQueryParamsParseFailed)
}

rumbles := make([]rumbleData, 0)
if err := json.Unmarshal([]byte(matchPayload[1]), &rumbles); err != nil {
return nil, errors.WithStack(err)
}

for _, it := range rumbles {
if it.Type == "VideoObject" {
return &it, nil
}
}

return nil, errors.WithStack(extractors.ErrURLParseFailed)
}

func getVideoID(embedURL string) (string, error) {
u, err := url.Parse(embedURL)
if err != nil {
return "", errors.WithStack(extractors.ErrURLParseFailed)
}

return path.Base(u.Path), nil
}

// Rumble response contains the streams in `rumbleStreams`
type rumbleResponse struct {
Streams *json.RawMessage `json:"ua"`
}

// Common video meta data
type streamInfo struct {
URL string `json:"url"`
Meta struct {
Bitrate uint16 `json:"bitrate"`
Size int64 `json:"size"`
Width uint16 `json:"w"`
Height uint16 `json:"h"`
} `json:"meta"`
}

// common video qualities for `mp4`, `webm`
type videoQualities struct {
Q240 struct{ streamInfo } `json:"240"`
Q360 struct{ streamInfo } `json:"360"`
Q480 struct{ streamInfo } `json:"480"`
Q720 struct{ streamInfo } `json:"720"`
Q1080 struct{ streamInfo } `json:"1080"`
Q1440 struct{ streamInfo } `json:"1440"`
Q2160 struct{ streamInfo } `json:"2160"`
Q2161 struct{ streamInfo } `json:"2161"`
}

// Video payload for adaptive stream and different qualities
type rumbleStreams struct {
FMp4 struct {
videoQualities
} `json:"mp4"`
FWebm struct {
videoQualities
} `json:"webm"`
FHLS struct {
QAuto struct{ streamInfo } `json:"auto"`
} `json:"hls"`
}

// Unmarshall the video response
// Some properties like `mp4`, `webm` are either array or an object
func (r *rumbleStreams) UnmarshalJSON(b []byte) error {
var resp *rumbleResponse
if err := json.Unmarshal(b, &resp); err != nil {
return errors.WithStack(extractors.ErrURLParseFailed)
}

// Get individual stream from the response
var obj map[string]*json.RawMessage
if err := json.Unmarshal(*resp.Streams, &obj); err != nil {
return errors.WithStack(extractors.ErrURLParseFailed)
}

if v, ok := obj["mp4"]; ok {
_ = json.Unmarshal(*v, &r.FMp4)
}
if v, ok := obj["webm"]; ok {
_ = json.Unmarshal(*v, &r.FWebm)
}
if v, ok := obj["hls"]; ok {
_ = json.Unmarshal(*v, &r.FHLS)
}

return nil
}

// Use this to create all the streams for `mp4`, `webm`
func (rs *rumbleStreams) makeAllVODStreams(m map[string]*extractors.Stream) {
m["webm"] = makeStreamMeta("480", "webm", &rs.FWebm.Q480.streamInfo)
m["240"] = makeStreamMeta("240", "mp4", &rs.FMp4.Q240.streamInfo)
m["360"] = makeStreamMeta("360", "mp4", &rs.FMp4.Q360.streamInfo)
m["480"] = makeStreamMeta("480", "mp4", &rs.FMp4.Q480.streamInfo)
m["720"] = makeStreamMeta("720", "mp4", &rs.FMp4.Q720.streamInfo)
m["1080"] = makeStreamMeta("1080", "mp4", &rs.FMp4.Q1080.streamInfo)
m["1440"] = makeStreamMeta("1440", "mp4", &rs.FMp4.Q1440.streamInfo)
m["2160"] = makeStreamMeta("2160", "mp4", &rs.FMp4.Q2160.streamInfo)
m["2161"] = makeStreamMeta("2161", "mp4", &rs.FMp4.Q2161.streamInfo)
}

var reResolution = regexp.MustCompile(`_(\d{3,4})p\/`) // ex. _720p/

// Use this to create all the streams for live videos
func (rs *rumbleStreams) makeAllLiveStreams(m map[string]*extractors.Stream) error {
playlists, err := utils.M3u8URLs(rs.FHLS.QAuto.URL)
if err != nil {
return errors.WithStack(err)
}

if len(playlists) == 0 {
return errors.WithStack(extractors.ErrURLParseFailed)
}

// Find the highest resolution
playlistURL := playlists[0]
maxRes := 0
for _, x := range playlists {
matched := reResolution.FindStringSubmatch(x)
if len(matched) == 0 {
continue
}
res, err := strconv.Atoi(matched[1])
if err != nil {
continue
}

if maxRes < res {
maxRes = res
playlistURL = x
}
}

tsURLs, err := utils.M3u8URLs(playlistURL)
if err != nil {
return errors.WithStack(err)
}

var parts []*extractors.Part
for _, x := range tsURLs {
part := &extractors.Part{
URL: x,
Size: rs.FHLS.QAuto.streamInfo.Meta.Size,
Ext: "ts",
}
parts = append(parts, part)
}

m["hls"] = &extractors.Stream{
Parts: parts,
Size: rs.FHLS.QAuto.streamInfo.Meta.Size,
Quality: strconv.Itoa(maxRes),
}

return nil
}

// Request video formats and qualities
func fetchVideoQuality(videoID string) (map[string]*extractors.Stream, error) {
reqURL := fmt.Sprintf(`https://rumble.com/embedJS/u3/?request=video&ver=2&v=%s&ext={"ad_count":null}&ad_wt=0`, videoID)

res, err := request.Request(http.MethodGet, reqURL, nil, nil)
if err != nil {
return nil, errors.WithStack(err)
}
defer res.Body.Close() // nolint

var reader io.ReadCloser
switch res.Header.Get("Content-Encoding") {
case "gzip":
reader, _ = gzip.NewReader(res.Body)
case "deflate":
reader = flate.NewReader(res.Body)
default:
reader = res.Body
}
defer reader.Close() // nolint

b, err := io.ReadAll(reader)
if err != nil {
return nil, errors.WithStack(err)
}

var rs rumbleStreams
if err := json.Unmarshal(b, &rs); err != nil {
return nil, errors.WithStack(err)
}

streams := make(map[string]*extractors.Stream, 9)
rs.makeAllVODStreams(streams)
_ = rs.makeAllLiveStreams(streams)

return streams, nil
}

func makeStreamMeta(q, ext string, info *streamInfo) *extractors.Stream {
urlMeta := &extractors.Part{
URL: info.URL,
Size: info.Meta.Size,
Ext: ext,
}

return &extractors.Stream{
Parts: []*extractors.Part{urlMeta},
Size: info.Meta.Size,
Quality: q,
}
}
28 changes: 28 additions & 0 deletions extractors/rumble/rumble_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package rumble

import (
"testing"

"github.com/iawia002/lux/extractors"
"github.com/iawia002/lux/test"
)

func TestRumble(t *testing.T) {
tests := []struct {
name string
args test.Args
}{
{
name: "normal test",
args: test.Args{
URL: "https://rumble.com/v24swn0-just-say-yes-to-climate-lockdowns.html",
Title: "Just Say YES to Climate Lockdowns!",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
New().Extract(tt.args.URL, extractors.Options{})
})
}
}