Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename, add some docs #50

Merged
merged 1 commit into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions fetch/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const (

type Client interface {
Get(url string, headers http.Header) (*http.Response, error)
Identifier() resource.FetchClient
Identifier() resource.ClientIdentifier
}

type ClientOption func(*defaultClient) error
Expand Down Expand Up @@ -49,7 +49,7 @@ type defaultClient struct {
httpClient *http.Client
}

func (c defaultClient) Identifier() resource.FetchClient {
func (c defaultClient) Identifier() resource.ClientIdentifier {
return resource.DefaultClient
}

Expand Down
2 changes: 1 addition & 1 deletion internal/headless/fetch_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func NewChromeClient(ctx context.Context, userAgent string, maxConcurrent int) (
return c, nil
}

func (c client) Identifier() resource.FetchClient {
func (c client) Identifier() resource.ClientIdentifier {
return resource.HeadlessChromium
}

Expand Down
4 changes: 2 additions & 2 deletions internal/server/api/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (
)

type mockUrlFetcher struct {
fetchMethod resource.FetchClient
fetchMethod resource.ClientIdentifier
}

func (m *mockUrlFetcher) Fetch(url *nurl.URL) (*resource.WebPage, error) {
Expand Down Expand Up @@ -175,7 +175,7 @@ func TestSingleHandler(t *testing.T) {
name string
url string
handler http.HandlerFunc
expectMethod resource.FetchClient
expectMethod resource.ClientIdentifier
}{
{
name: "client",
Expand Down
2 changes: 1 addition & 1 deletion internal/server/version/version.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package version

const (
Commit = "1a12247"
Commit = "0311158"
Tag = "v0.8.6"
RepoURL = "https://github.com/efixler/scrape"
)
2 changes: 1 addition & 1 deletion internal/settings/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func populateTestDB(dbh *database.DBHandle, count int) ([]string, error) {
ds := &DomainSettings{
Domain: d,
Sitename: randomString(32),
FetchClient: resource.FetchClient(rand.Intn(3)),
FetchClient: resource.ClientIdentifier(rand.Intn(3)),
UserAgent: ua.UserAgent(randomString(64)),
Headers: map[string]string{
"x-token": randomString(rand.Intn(128) + 127),
Expand Down
10 changes: 5 additions & 5 deletions internal/settings/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ var (
)

type DomainSettings struct {
Domain string `json:"domain,omitempty"`
Sitename string `json:"sitename,omitempty"`
FetchClient resource.FetchClient `json:"fetch_client,omitempty"`
UserAgent ua.UserAgent `json:"user_agent,omitempty"`
Headers MIMEHeader `json:"headers,omitempty"`
Domain string `json:"domain,omitempty"`
Sitename string `json:"sitename,omitempty"`
FetchClient resource.ClientIdentifier `json:"fetch_client,omitempty"`
UserAgent ua.UserAgent `json:"user_agent,omitempty"`
Headers MIMEHeader `json:"headers,omitempty"`
}

// Domain names will be case-folded to lower case.
Expand Down
4 changes: 2 additions & 2 deletions internal/settings/domain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TestJSONUnmarshal(t *testing.T) {
data string
expectErr bool
expectSitename string
expectFetchClient resource.FetchClient
expectFetchClient resource.ClientIdentifier
expectUserAgent ua.UserAgent
expectHeaders map[string]string
}{
Expand Down Expand Up @@ -90,7 +90,7 @@ func TestJSONMarshal(t *testing.T) {
expectErr bool
expectJSON string
expectSitename string
expectFetchClient resource.FetchClient
expectFetchClient resource.ClientIdentifier
expectUserAgent ua.UserAgent
expectHeaders map[string]string
}{
Expand Down
2 changes: 1 addition & 1 deletion internal/storage/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func (s URLDataStore) Fetch(url *nurl.URL) (*resource.WebPage, error) {
expiryEpoch int64
metadata string
contentText string
fetchMethod resource.FetchClient
fetchMethod resource.ClientIdentifier
)
err = rows.Scan(&canonicalUrl, &parsedUrl, &fetchEpoch, &expiryEpoch, &metadata, &contentText, &fetchMethod)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions resource/feed.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@ import (
"github.com/mmcdole/gofeed"
)

// Adds a RequestedURL field to the gofeed.Feed struct,
// along with the ItemLinks() function.
type Feed struct {
RequestedURL string `json:"requested_url,omitempty"`
gofeed.Feed
}

// Returns a slice of links for each item in the feed.
func (f Feed) ItemLinks() []string {
rval := make([]string, len(f.Items))
for i, item := range f.Items {
Expand Down
26 changes: 13 additions & 13 deletions resource/fetch_method.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,50 +7,50 @@ import (
"fmt"
)

type FetchClient int
type ClientIdentifier int

const (
Unspecified FetchClient = iota
Unspecified ClientIdentifier = iota
DefaultClient
HeadlessChromium
)

var fetchMethods = map[FetchClient]string{
var fetchClientNames = map[ClientIdentifier]string{
Unspecified: "unspecified",
DefaultClient: "direct",
HeadlessChromium: "chromium-headless",
}

var ErrNoSuchFetchMethod = errors.New("no such FetchMethod")
var ErrNoSuchFetchMethod = errors.New("no such fetch client identifier")

func (f FetchClient) String() string {
if val, ok := fetchMethods[f]; ok {
func (f ClientIdentifier) String() string {
if val, ok := fetchClientNames[f]; ok {
return val
} else {
return "Unknown"
}
}

func (f *FetchClient) UnmarshalText(data []byte) error {
for k, v := range fetchMethods {
func (f *ClientIdentifier) UnmarshalText(data []byte) error {
for k, v := range fetchClientNames {
if v == string(data) {
*f = k
return nil
}
}
return errors.Join(
fmt.Errorf("invalid FetchMethod %q", string(data)),
fmt.Errorf("invalid name %q", string(data)),
ErrNoSuchFetchMethod,
)
}

func (f FetchClient) MarshalText() ([]byte, error) {
if val, ok := fetchMethods[f]; ok {
func (f ClientIdentifier) MarshalText() ([]byte, error) {
if val, ok := fetchClientNames[f]; ok {
return []byte(val), nil
} else {
return []byte(fetchMethods[Unspecified]),
return []byte(fetchClientNames[Unspecified]),
errors.Join(
fmt.Errorf("invalid FetchMethod %q", int(f)),
fmt.Errorf("invalid name %q", int(f)),
ErrNoSuchFetchMethod,
)
}
Expand Down
14 changes: 7 additions & 7 deletions resource/fetch_method_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
func TestFetchMethodString(t *testing.T) {
tests := []struct {
name string
f FetchClient
f ClientIdentifier
want string
}{
{
Expand Down Expand Up @@ -37,11 +37,11 @@ func TestFetchMethodString(t *testing.T) {

func TestUnmarshal(t *testing.T) {
type container struct {
F FetchClient `json:"fetch_method"`
F ClientIdentifier `json:"fetch_method"`
}
tests := []struct {
input string
expectedValue FetchClient
expectedValue ClientIdentifier
expectError bool
}{
{input: "unspecified", expectedValue: Unspecified},
Expand Down Expand Up @@ -69,13 +69,13 @@ func TestMarshal(t *testing.T) {
expectedValue string
expectError bool
}{
{input: 0, expectedValue: fetchMethods[Unspecified]},
{input: 1, expectedValue: fetchMethods[DefaultClient]},
{input: 2, expectedValue: fetchMethods[HeadlessChromium]},
{input: 0, expectedValue: fetchClientNames[Unspecified]},
{input: 1, expectedValue: fetchClientNames[DefaultClient]},
{input: 2, expectedValue: fetchClientNames[HeadlessChromium]},
{input: -1, expectError: true},
}
for _, test := range tests {
fm := FetchClient(test.input)
fm := ClientIdentifier(test.input)
val, err := fm.MarshalText()
if (err != nil) != test.expectError {
t.Errorf("%q expected error %v, got %v", test.input, test.expectError, err)
Expand Down
1 change: 1 addition & 0 deletions resource/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ var illegalParams = []string{
"utm_brand",
}

// CleanURL removes utm_ parameters from the URL
func CleanURL(url *nurl.URL) *nurl.URL {
if url == nil {
return nil
Expand Down
46 changes: 23 additions & 23 deletions resource/web_page.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,29 +35,29 @@ func NewWebPage(url nurl.URL) *WebPage {
// Represents a web page that was fetched, including metadata from the page itself,
// text content, and information about the fetch operation.
type WebPage struct { // The page that was requested by the caller
RequestedURL *nurl.URL `json:"-"` // The page that was actually fetched
CanonicalURL *nurl.URL `json:"-"`
OriginalURL string `json:"original_url,omitempty"` // The canonical URL of the page
TTL time.Duration `json:"-"` // Time to live for the resource
FetchTime *time.Time `json:"fetch_time,omitempty"` // When the returned source was fetched
FetchMethod FetchClient `json:"fetch_method,omitempty"` // Method used to fetch the page
Hostname string `json:"hostname,omitempty"` // Hostname of the page
StatusCode int `json:"status_code,omitempty"` // HTTP status code
Error error `json:"error,omitempty"`
Title string `json:"title,omitempty"` // Title of the page
Description string `json:"description,omitempty"` // Description of the page
Sitename string `json:"sitename,omitempty"` // Name of the site
Authors []string `json:"authors,omitempty"` // Authors of the page
Date *time.Time `json:"date,omitempty"` // Date of the page
Categories []string `json:"categories,omitempty"` // Categories of the page
Tags []string `json:"tags,omitempty"` // Tags of the page
Language string `json:"language,omitempty"` // Language of the page
Image string `json:"image,omitempty"` // Image of the page
PageType string `json:"page_type,omitempty"` // Type of the page
License string `json:"license,omitempty"` // License of the page
ID string `json:"id,omitempty"` // ID of the page
Fingerprint string `json:"fingerprint,omitempty"` // Fingerprint of the page
ContentText string `json:"content_text,omitempty"` // Error that occurred during fetching
RequestedURL *nurl.URL `json:"-"` // The page that was actually fetched
CanonicalURL *nurl.URL `json:"-"`
OriginalURL string `json:"original_url,omitempty"` // The canonical URL of the page
TTL time.Duration `json:"-"` // Time to live for the resource
FetchTime *time.Time `json:"fetch_time,omitempty"` // When the returned source was fetched
FetchMethod ClientIdentifier `json:"fetch_method,omitempty"` // Method used to fetch the page
Hostname string `json:"hostname,omitempty"` // Hostname of the page
StatusCode int `json:"status_code,omitempty"` // HTTP status code
Error error `json:"error,omitempty"`
Title string `json:"title,omitempty"` // Title of the page
Description string `json:"description,omitempty"` // Description of the page
Sitename string `json:"sitename,omitempty"` // Name of the site
Authors []string `json:"authors,omitempty"` // Authors of the page
Date *time.Time `json:"date,omitempty"` // Date of the page
Categories []string `json:"categories,omitempty"` // Categories of the page
Tags []string `json:"tags,omitempty"` // Tags of the page
Language string `json:"language,omitempty"` // Language of the page
Image string `json:"image,omitempty"` // Image of the page
PageType string `json:"page_type,omitempty"` // Type of the page
License string `json:"license,omitempty"` // License of the page
ID string `json:"id,omitempty"` // ID of the page
Fingerprint string `json:"fingerprint,omitempty"` // Fingerprint of the page
ContentText string `json:"content_text,omitempty"` // Error that occurred during fetching
skipMap map[skippable]bool
}

Expand Down
2 changes: 1 addition & 1 deletion resource/web_page_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ func TestExpireTime(t *testing.T) {
func TestFetchMethod(t *testing.T) {
tests := []struct {
name string
f FetchClient
f ClientIdentifier
want string
}{
{
Expand Down