-
Notifications
You must be signed in to change notification settings - Fork 6
/
api.go
124 lines (103 loc) · 3.25 KB
/
api.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
package instagram
import (
"encoding/json"
"errors"
"fmt"
"log"
"net"
"net/http"
"regexp"
"strings"
"time"
browser "github.com/EDDYCJY/fake-useragent"
"github.com/gocolly/colly/v2"
"github.com/omegaatt36/instagramrobot/domain"
)
// Extractor is the implement for fetching Instagram media.
type Extractor struct {
client *http.Client
}
// NewInstagramFetcher will create a new instance of InstagramFetcherRepo.
func NewInstagramFetcher() domain.InstagramFetcher {
return &Extractor{
client: &http.Client{
Timeout: 10 * time.Second,
Transport: &http.Transport{
Dial: (&net.Dialer{
Timeout: 5 * time.Second,
}).Dial,
TLSHandshakeTimeout: 5 * time.Second,
},
},
}
}
// fromEmbedResponse will automatically transforms the EmbedResponse to the Media
func fromEmbedResponse(embed EmbedResponse) domain.Media {
media := domain.Media{
ShortCode: embed.Media.ShortCode,
URL: embed.ExtractMediaURL(),
IsVideo: embed.IsVideo(),
Caption: embed.GetCaption(),
}
for _, item := range embed.Media.SliderItems.Edges {
media.Items = append(media.Items, &domain.MediaItem{
IsVideo: item.Node.IsVideo,
URL: item.Node.ExtractMediaURL(),
})
}
return media
}
// GetPostWithCode lets you to get information about specific Instagram post
// by providing its unique short code
func (repo *Extractor) GetPostWithCode(code string) (domain.Media, error) {
URL := fmt.Sprintf("https://www.instagram.com/p/%v/embed/captioned/", code)
var coverPhoto string
var embedResponse = EmbedResponse{}
collector := colly.NewCollector()
collector.SetClient(repo.client)
collector.OnHTML("img.EmbeddedMediaImage", func(e *colly.HTMLElement) {
coverPhoto = e.Attr("src")
})
collector.OnHTML("script", func(e *colly.HTMLElement) {
r := regexp.MustCompile(`\\\"gql_data\\\":([\s\S]*)\}\"\}\]\]\,\[\"NavigationMetrics`)
match := r.FindStringSubmatch(e.Text)
if len(match) < 2 {
return
}
s := strings.ReplaceAll(match[1], `\"`, `"`)
s = strings.ReplaceAll(s, `\\/`, `/`)
s = strings.ReplaceAll(s, `\\`, `\`)
err := json.Unmarshal([]byte(s), &embedResponse)
if err != nil {
log.Fatal(err)
}
})
collector.OnRequest(func(r *colly.Request) {
r.Headers.Set("User-Agent", browser.Random())
})
if err := collector.Visit(URL); err != nil {
return domain.Media{}, fmt.Errorf("failed to send HTTP request to the Instagram: %v", err)
}
// If the method one which is JSON parsing didn't fail
if !embedResponse.IsEmpty() {
// Transform the Embed response and return
return fromEmbedResponse(embedResponse), nil
}
if coverPhoto != "" {
return domain.Media{
URL: coverPhoto,
Caption: "can only fetch the cover photo",
}, nil
}
// if every two methods have failed, then return an error
return domain.Media{}, errors.New("failed to fetch the post\nthe page might be \"private\", or\nthe link is completely wrong")
}
// ExtractShortCodeFromLink will extract the media short code from a URL link or path
func ExtractShortCodeFromLink(link string) (string, error) {
values := regexp.MustCompile(`(p|tv|reel|reels\/videos)\/([A-Za-z0-9-_]+)`).FindStringSubmatch(link)
if len(values) != 3 {
return "", errors.New("couldn't extract the media short code from the link")
}
// return short code
return values[2], nil
}