Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sitemap support #18407

Merged
merged 14 commits into from
Jun 25, 2022
3 changes: 3 additions & 0 deletions custom/conf/app.example.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,9 @@ PATH =
;; Number of items that are displayed in home feed
;FEED_PAGING_NUM = 20
;;
;; Number of items that are displayed in a single subsitemap
;SITEMAP_PAGING_NUM = 20
;;
;; Number of maximum commits displayed in commit graph.
;GRAPH_MAX_COMMIT_NUM = 100
;;
Expand Down
1 change: 1 addition & 0 deletions docs/content/doc/advanced/config-cheat-sheet.en-us.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ The following configuration set `Content-Type: application/vnd.android.package-a
- `MEMBERS_PAGING_NUM`: **20**: Number of members that are shown in organization members.
- `FEED_MAX_COMMIT_NUM`: **5**: Number of maximum commits shown in one activity feed.
- `FEED_PAGING_NUM`: **20**: Number of items that are displayed in home feed.
- `SITEMAP_PAGING_NUM`: **20**: Number of items that are displayed in a single subsitemap.
danog marked this conversation as resolved.
Show resolved Hide resolved
- `GRAPH_MAX_COMMIT_NUM`: **100**: Number of maximum commits shown in the commit graph.
- `CODE_COMMENT_LINES`: **4**: Number of line of codes shown for a code comment.
- `DEFAULT_THEME`: **auto**: \[auto, gitea, arc-green\]: Set the default theme for the Gitea install.
Expand Down
2 changes: 2 additions & 0 deletions modules/setting/setting.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ var (
// UI settings
UI = struct {
ExplorePagingNum int
SitemapPagingNum int
danog marked this conversation as resolved.
Show resolved Hide resolved
IssuePagingNum int
RepoSearchPagingNum int
MembersPagingNum int
Expand Down Expand Up @@ -260,6 +261,7 @@ var (
} `ini:"ui.meta"`
}{
ExplorePagingNum: 20,
SitemapPagingNum: 20,
IssuePagingNum: 10,
RepoSearchPagingNum: 10,
MembersPagingNum: 20,
Expand Down
69 changes: 69 additions & 0 deletions modules/sitemap/sitemap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package sitemap

import (
"bytes"
"encoding/xml"
"fmt"
"io"
"time"
)

// sitemapFileLimit contains the maximum size of a sitemap file
const sitemapFileLimit = 50 * 1024 * 1024

// Url represents a single sitemap entry
type URL struct {
URL string `xml:"loc"`
LastMod *time.Time `xml:"lastmod,omitempty"`
}

// SitemapUrl represents a sitemap
type Sitemap struct {
XMLName xml.Name
Namespace string `xml:"xmlns,attr"`

URLs []URL `xml:"url"`
}

// NewSitemap creates a sitemap
func NewSitemap() *Sitemap {
return &Sitemap{
XMLName: xml.Name{Local: "urlset"},
Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9",
}
}

// NewSitemap creates a sitemap index.
func NewSitemapIndex() *Sitemap {
return &Sitemap{
XMLName: xml.Name{Local: "sitemapindex"},
Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9",
}
}

// Add adds a URL to the sitemap
func (s *Sitemap) Add(u URL) {
s.URLs = append(s.URLs, u)
}

// Write writes the sitemap to a response
func (s *Sitemap) WriteTo(w io.Writer) (int64, error) {
if len(s.URLs) > 50000 {
return 0, fmt.Errorf("The sitemap contains too many URLs: %d", len(s.URLs))
}
buf := bytes.NewBufferString(xml.Header)
if err := xml.NewEncoder(buf).Encode(s); err != nil {
return 0, err
}
if err := buf.WriteByte('\n'); err != nil {
return 0, err
}
if buf.Len() > sitemapFileLimit {
return 0, fmt.Errorf("The sitemap is too big: %d", buf.Len())
}
return buf.WriteTo(w)
}
77 changes: 77 additions & 0 deletions modules/sitemap/sitemap_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package sitemap

import (
"bytes"
"encoding/xml"
"fmt"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
)

func TestOk(t *testing.T) {
testReal := func(s *Sitemap, name string, urls []URL, expected string) {
for _, url := range urls {
s.Add(url)
}
buf := &bytes.Buffer{}
_, err := s.WriteTo(buf)
assert.NoError(t, nil, err)
assert.Equal(t, xml.Header+"<"+name+" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"+expected+"</"+name+">\n", buf.String())
}
test := func(urls []URL, expected string) {
testReal(NewSitemap(), "urlset", urls, expected)
testReal(NewSitemapIndex(), "sitemapindex", urls, expected)
}

ts := time.Unix(1651322008, 0).UTC()

test(
[]URL{},
"",
)
test(
[]URL{
{URL: "https://gitea.io/test1", LastMod: &ts},
},
"<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>",
)
test(
[]URL{
{URL: "https://gitea.io/test2", LastMod: nil},
},
"<url><loc>https://gitea.io/test2</loc></url>",
)
test(
[]URL{
{URL: "https://gitea.io/test1", LastMod: &ts},
{URL: "https://gitea.io/test2", LastMod: nil},
},
"<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>"+
"<url><loc>https://gitea.io/test2</loc></url>",
)
}

func TestTooManyURLs(t *testing.T) {
s := NewSitemap()
for i := 0; i < 50001; i++ {
s.Add(URL{URL: fmt.Sprintf("https://gitea.io/test%d", i)})
}
buf := &bytes.Buffer{}
_, err := s.WriteTo(buf)
assert.EqualError(t, err, "The sitemap contains too many URLs: 50001")
}

func TestSitemapTooBig(t *testing.T) {
s := NewSitemap()
s.Add(URL{URL: strings.Repeat("b", sitemapFileLimit)})
buf := &bytes.Buffer{}
_, err := s.WriteTo(buf)
assert.EqualError(t, err, "The sitemap is too big: 52428931")
}
26 changes: 25 additions & 1 deletion routers/web/explore/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ import (
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/sitemap"
)

const (
Expand All @@ -30,11 +32,21 @@ type RepoSearchOptions struct {

// RenderRepoSearch render repositories search page
func RenderRepoSearch(ctx *context.Context, opts *RepoSearchOptions) {
page := ctx.FormInt("page")
// Sitemap index for sitemap paths
page := int(ctx.ParamsInt64("idx"))
isSitemap := ctx.Params("idx") != ""
if page <= 1 {
page = ctx.FormInt("page")
}

if page <= 0 {
page = 1
}

if isSitemap {
opts.PageSize = setting.UI.SitemapPagingNum
}

var (
repos []*repo_model.Repository
count int64
Expand Down Expand Up @@ -100,6 +112,18 @@ func RenderRepoSearch(ctx *context.Context, opts *RepoSearchOptions) {
ctx.ServerError("SearchRepository", err)
return
}
if isSitemap {
m := sitemap.NewSitemap()
for _, item := range repos {
m.Add(sitemap.URL{URL: item.HTMLURL(), LastMod: item.UpdatedUnix.AsTimePtr()})
}
ctx.Resp.Header().Set("Content-Type", "text/xml")
if _, err := m.WriteTo(ctx.Resp); err != nil {
log.Error("Failed writing sitemap: %v", err)
}
return
}

ctx.Data["Keyword"] = keyword
ctx.Data["Total"] = count
ctx.Data["Repos"] = repos
Expand Down
25 changes: 24 additions & 1 deletion routers/web/explore/user.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ import (
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/sitemap"
"code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/util"
)
Expand All @@ -33,11 +35,20 @@ func isKeywordValid(keyword string) bool {

// RenderUserSearch render user search page
func RenderUserSearch(ctx *context.Context, opts *user_model.SearchUserOptions, tplName base.TplName) {
opts.Page = ctx.FormInt("page")
// Sitemap index for sitemap paths
opts.Page = int(ctx.ParamsInt64("idx"))
isSitemap := ctx.Params("idx") != ""
if opts.Page <= 1 {
opts.Page = ctx.FormInt("page")
}
if opts.Page <= 1 {
opts.Page = 1
}

if isSitemap {
opts.PageSize = setting.UI.SitemapPagingNum
}

var (
users []*user_model.User
count int64
Expand Down Expand Up @@ -73,6 +84,18 @@ func RenderUserSearch(ctx *context.Context, opts *user_model.SearchUserOptions,
return
}
}
if isSitemap {
m := sitemap.NewSitemap()
for _, item := range users {
m.Add(sitemap.URL{URL: item.HTMLURL(), LastMod: item.UpdatedUnix.AsTimePtr()})
}
ctx.Resp.Header().Set("Content-Type", "text/xml")
if _, err := m.WriteTo(ctx.Resp); err != nil {
log.Error("Failed writing sitemap: %v", err)
}
return
}

ctx.Data["Keyword"] = opts.Keyword
ctx.Data["Total"] = count
ctx.Data["Users"] = users
Expand Down
53 changes: 53 additions & 0 deletions routers/web/home.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,18 @@ package web

import (
"net/http"
"strconv"

"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/sitemap"
"code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/util"
"code.gitea.io/gitea/modules/web/middleware"
"code.gitea.io/gitea/routers/web/auth"
"code.gitea.io/gitea/routers/web/user"
Expand Down Expand Up @@ -59,6 +66,52 @@ func Home(ctx *context.Context) {
ctx.HTML(http.StatusOK, tplHome)
}

// HomeSitemap renders the main sitemap
func HomeSitemap(ctx *context.Context) {
m := sitemap.NewSitemapIndex()
if !setting.Service.Explore.DisableUsersPage {
_, cnt, err := user_model.SearchUsers(&user_model.SearchUserOptions{
Type: user_model.UserTypeIndividual,
ListOptions: db.ListOptions{PageSize: 1},
IsActive: util.OptionalBoolTrue,
Visible: []structs.VisibleType{structs.VisibleTypePublic},
})
if err != nil {
ctx.ServerError("SearchUsers", err)
return
}
count := int(cnt)
idx := 1
for i := 0; i < count; i += setting.UI.SitemapPagingNum {
m.Add(sitemap.URL{URL: setting.AppURL + "explore/users/sitemap-" + strconv.Itoa(idx) + ".xml"})
idx++
}
}

_, cnt, err := repo_model.SearchRepository(&repo_model.SearchRepoOptions{
ListOptions: db.ListOptions{
PageSize: 1,
},
Actor: ctx.Doer,
AllPublic: true,
})
if err != nil {
ctx.ServerError("SearchRepository", err)
return
}
count := int(cnt)
idx := 1
for i := 0; i < count; i += setting.UI.SitemapPagingNum {
m.Add(sitemap.URL{URL: setting.AppURL + "explore/repos/sitemap-" + strconv.Itoa(idx) + ".xml"})
idx++
}

ctx.Resp.Header().Set("Content-Type", "text/xml")
if _, err := m.WriteTo(ctx.Resp); err != nil {
log.Error("Failed writing sitemap: %v", err)
}
}

// NotFound render 404 page
func NotFound(ctx *context.Context) {
ctx.Data["Title"] = "Page Not Found"
Expand Down
3 changes: 3 additions & 0 deletions routers/web/web.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ func RegisterRoutes(m *web.Route) {
// Routers.
// for health check
m.Get("/", Home)
m.Get("/sitemap.xml", ignExploreSignIn, HomeSitemap)
m.Group("/.well-known", func() {
m.Get("/openid-configuration", auth.OIDCWellKnown)
m.Group("", func() {
Expand All @@ -310,7 +311,9 @@ func RegisterRoutes(m *web.Route) {
ctx.Redirect(setting.AppSubURL + "/explore/repos")
})
m.Get("/repos", explore.Repos)
m.Get("/repos/sitemap-{idx}.xml", explore.Repos)
m.Get("/users", explore.Users)
m.Get("/users/sitemap-{idx}.xml", explore.Users)
m.Get("/organizations", explore.Organizations)
m.Get("/code", explore.Code)
m.Get("/topics/search", explore.TopicSearch)
Expand Down