Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add doctor command for full GC of LFS #21978

Merged
merged 16 commits into from
Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions models/git/lfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package git
import (
"context"
"fmt"
"time"

"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/perm"
Expand All @@ -14,6 +15,7 @@ import (
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"

Expand Down Expand Up @@ -180,6 +182,12 @@ func GetLFSMetaObjectByOid(repoID int64, oid string) (*LFSMetaObject, error) {
// RemoveLFSMetaObjectByOid removes a LFSMetaObject entry from database by its OID.
// It may return ErrLFSObjectNotExist or a database error.
func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) {
return RemoveLFSMetaObjectByOidFn(repoID, oid, nil)
}

// RemoveLFSMetaObjectByOidFn removes a LFSMetaObject entry from database by its OID.
// It may return ErrLFSObjectNotExist or a database error. It will run Fn with the current count within the transaction
func RemoveLFSMetaObjectByOidFn(repoID int64, oid string, fn func(count int64) error) (int64, error) {
if len(oid) == 0 {
return 0, ErrLFSObjectNotExist
}
Expand All @@ -200,6 +208,12 @@ func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) {
return count, err
}

if fn != nil {
if err := fn(count); err != nil {
return count, err
}
}

return count, committer.Commit()
}

Expand Down Expand Up @@ -319,3 +333,36 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
}
return lfsSize, nil
}

// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error) error {
var start int
batchSize := setting.Database.IterateBufferSize
sess := db.GetEngine(ctx)
type CountLFSMetaObject struct {
Count int64
LFSMetaObject
}

for {
beans := make([]*CountLFSMetaObject, 0, batchSize)
// SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id != 4 GROUP BY lfs_meta_object.id
sess.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`")
sess.Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid")
sess.Where("repository_id = ? AND created_unix < ?", repoID, time.Now().Add(-24*7*time.Hour))
sess.NoAutoTime().GroupBy("`lfs_meta_object`.id")
zeripath marked this conversation as resolved.
Show resolved Hide resolved
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
return err
}
if len(beans) == 0 {
return nil
}
start += len(beans)

for _, bean := range beans {
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
return err
}
}
}
}
31 changes: 31 additions & 0 deletions modules/doctor/lfs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package doctor

import (
"context"

"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/services/repository"
)

func init() {
Register(&Check{
Title: "Garbage collect LFS",
Name: "gc-lfs",
IsDefault: false,
lunny marked this conversation as resolved.
Show resolved Hide resolved
Run: garbageCollectLFSCheck,
AbortIfFailed: false,
SkipDatabaseInitialization: false,
Priority: 1,
})
}

func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool) error {
if err := repository.GarbageCollectLFSMetaObjects(ctx, logger, autofix); err != nil {
zeripath marked this conversation as resolved.
Show resolved Hide resolved
return err
}

return checkStorage(&checkStorageOptions{LFS: true})(ctx, logger, autofix)
}
2 changes: 1 addition & 1 deletion services/cron/tasks_basic.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func registerRepoHealthCheck() {
for _, arg := range rhcConfig.Args {
args = append(args, git.CmdArg(arg))
}
return repo_service.GitFsck(ctx, rhcConfig.Timeout, args)
return repo_service.GitFsckRepos(ctx, rhcConfig.Timeout, args)
})
}

Expand Down
86 changes: 48 additions & 38 deletions services/repository/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import (
"xorm.io/builder"
)

// GitFsck calls 'git fsck' to check repository health.
func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
// GitFsckRepos calls 'git fsck' to check repository health.
func GitFsckRepos(ctx context.Context, timeout time.Duration, args []git.CmdArg) error {
log.Trace("Doing: GitFsck")

if err := db.Iterate(
Expand All @@ -35,15 +35,7 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
return db.ErrCancelledf("before fsck of %s", repo.FullName())
default:
}
log.Trace("Running health check on repository %v", repo)
repoPath := repo.RepoPath()
if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
log.Warn("Failed to health check repository (%v): %v", repo, err)
if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
}
return nil
return GitFsckRepo(ctx, repo, timeout, args)
},
); err != nil {
log.Trace("Error: GitFsck: %v", err)
Expand All @@ -54,6 +46,19 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro
return nil
}

// GitFsckRepo calls 'git fsck' to check an individual repository's health.
func GitFsckRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
log.Trace("Running health check on repository %-v", repo)
repoPath := repo.RepoPath()
if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil {
log.Warn("Failed to health check repository (%-v): %v", repo, err)
if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
}
return nil
}

// GitGcRepos calls 'git gc' to remove unnecessary files and optimize the local repository
func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) error {
log.Trace("Doing: GitGcRepos")
Expand All @@ -68,33 +73,7 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
return db.ErrCancelledf("before GC of %s", repo.FullName())
default:
}
log.Trace("Running git gc on %v", repo)
command := git.NewCommand(ctx, args...).
SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
var stdout string
var err error
stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})

if err != nil {
log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}

// Now update the size of the repository
if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
log.Error("Updating size as part of garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}

return nil
return GitGcRepo(ctx, repo, timeout, args)
},
); err != nil {
return err
Expand All @@ -104,6 +83,37 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg)
return nil
}

// GitGcRepo calls 'git gc' to remove unnecessary files and optimize the local repository
func GitGcRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error {
log.Trace("Running git gc on %-v", repo)
command := git.NewCommand(ctx, args...).
SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName()))
var stdout string
var err error
stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()})

if err != nil {
log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}

// Now update the size of the repository
if err := repo_module.UpdateRepoSize(ctx, repo); err != nil {
log.Error("Updating size as part of garbage collection failed for %-v. Stdout: %s\nError: %v", repo, stdout, err)
desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err)
if err = system_model.CreateRepositoryNotice(desc); err != nil {
log.Error("CreateRepositoryNotice: %v", err)
}
return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err)
}

return nil
}

func gatherMissingRepoRecords(ctx context.Context) ([]*repo_model.Repository, error) {
repos := make([]*repo_model.Repository, 0, 10)
if err := db.Iterate(
Expand Down
94 changes: 94 additions & 0 deletions services/repository/lfs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package repository

import (
"context"
"fmt"

"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"

"xorm.io/builder"
)

func GarbageCollectLFSMetaObjects(ctx context.Context, logger log.Logger, autofix bool) error {
log.Trace("Doing: GarbageCollectLFSMetaObjects")

if err := db.Iterate(
ctx,
builder.And(builder.Gt{"id": 0}),
func(ctx context.Context, repo *repo_model.Repository) error {
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, logger, autofix)
},
); err != nil {
return err
}

log.Trace("Finished: GarbageCollectLFSMetaObjects")
return nil
}

func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, logger log.Logger, autofix bool) error {
if logger != nil {
logger.Info("Checking %-v", repo)
}
total, orphaned, collected, deleted := 0, 0, 0, 0
if logger != nil {
defer func() {
if orphaned == 0 {
logger.Info("Found %d total LFSMetaObjects in %-v", total, repo)
} else if !autofix {
logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo)
} else {
logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted)
}
}()
}

gitRepo, err := git.OpenRepository(ctx, repo.RepoPath())
if err != nil {
log.Error("Unable to open git repository %-v: %v", repo, err)
return err
}
defer gitRepo.Close()

store := lfs.NewContentStore()

return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
total++
pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent()))

if gitRepo.IsObjectExist(pointerSha.String()) {
return nil
}
orphaned++

if !autofix {
return nil
}
// Non-existent pointer file
_, err = git_model.RemoveLFSMetaObjectByOidFn(repo.ID, metaObject.Oid, func(count int64) error {
if count > 0 {
return nil
}

if err := store.Delete(metaObject.RelativePath()); err != nil {
log.Error("Unable to remove lfs metaobject %s from store: %v", metaObject.Oid, err)
}
deleted++
return nil
})
if err != nil {
return fmt.Errorf("unable to remove meta-object %s in %s: %w", metaObject.Oid, repo.FullName(), err)
}
collected++

return nil
})
}