From bed6a515a58ace879fa28124f90f8f8076fc3416 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 29 Nov 2022 23:07:27 +0000 Subject: [PATCH 1/8] Add doctor command for full GC of LFS The recent PR adding orphaned checks to the LFS storage is not sufficient to completely GC LFS, as it is possible for LFSMetaObjects to remain associated with repos but still need to be garbage collected. Imagine a situation where a branch is uploaded containing LFS files but that branch is later completely deleted. The LFSMetaObjects will remain associated with the Repository but the Repository will no longer contain any pointers to the object. This PR adds a second doctor command to perform a full GC. Signed-off-by: Andrew Thornton --- models/git/lfs.go | 47 ++++++++++++++++ modules/doctor/lfs.go | 31 ++++++++++ services/cron/tasks_basic.go | 2 +- services/repository/check.go | 86 +++++++++++++++------------- services/repository/lfs.go | 106 +++++++++++++++++++++++++++++++++++ 5 files changed, 233 insertions(+), 39 deletions(-) create mode 100644 modules/doctor/lfs.go create mode 100644 services/repository/lfs.go diff --git a/models/git/lfs.go b/models/git/lfs.go index a86e84c050c2b..468dcb98976ff 100644 --- a/models/git/lfs.go +++ b/models/git/lfs.go @@ -6,6 +6,7 @@ package git import ( "context" "fmt" + "time" "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/perm" @@ -14,6 +15,7 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" @@ -180,6 +182,12 @@ func GetLFSMetaObjectByOid(repoID int64, oid string) (*LFSMetaObject, error) { // RemoveLFSMetaObjectByOid removes a LFSMetaObject entry from database by its OID. // It may return ErrLFSObjectNotExist or a database error. func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) { + return RemoveLFSMetaObjectByOidFn(repoID, oid, nil) +} + +// RemoveLFSMetaObjectByOidFn removes a LFSMetaObject entry from database by its OID. +// It may return ErrLFSObjectNotExist or a database error. It will run Fn with the current count within the transaction +func RemoveLFSMetaObjectByOidFn(repoID int64, oid string, fn func(count int64) error) (int64, error) { if len(oid) == 0 { return 0, ErrLFSObjectNotExist } @@ -200,6 +208,12 @@ func RemoveLFSMetaObjectByOid(repoID int64, oid string) (int64, error) { return count, err } + if fn != nil { + if err := fn(count); err != nil { + return count, err + } + } + return count, committer.Commit() } @@ -319,3 +333,36 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) { } return lfsSize, nil } + +// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo +func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error) error { + var start int + batchSize := setting.Database.IterateBufferSize + sess := db.GetEngine(ctx) + type CountLFSMetaObject struct { + Count int64 + LFSMetaObject + } + + for { + beans := make([]*CountLFSMetaObject, 0, batchSize) + // SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id != 4 GROUP BY lfs_meta_object.id + sess.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`") + sess.Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid") + sess.Where("repository_id = ? AND created_unix < ?", repoID, time.Now().Add(-24*7*time.Hour)) + sess.NoAutoTime().GroupBy("`lfs_meta_object`.id") + if err := sess.Limit(batchSize, start).Find(&beans); err != nil { + return err + } + if len(beans) == 0 { + return nil + } + start += len(beans) + + for _, bean := range beans { + if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil { + return err + } + } + } +} diff --git a/modules/doctor/lfs.go b/modules/doctor/lfs.go new file mode 100644 index 0000000000000..8f9da02412474 --- /dev/null +++ b/modules/doctor/lfs.go @@ -0,0 +1,31 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package doctor + +import ( + "context" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/services/repository" +) + +func init() { + Register(&Check{ + Title: "Garbage collect LFS", + Name: "gc-lfs", + IsDefault: false, + Run: garbageCollectLFSCheck, + AbortIfFailed: false, + SkipDatabaseInitialization: false, + Priority: 1, + }) +} + +func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool) error { + if err := repository.GarbageCollectLFSMetaObjects(ctx, logger, autofix); err != nil { + return err + } + + return checkStorage(&checkStorageOptions{LFS: true})(ctx, logger, autofix) +} diff --git a/services/cron/tasks_basic.go b/services/cron/tasks_basic.go index acf3896b7142f..05aef6623d4f7 100644 --- a/services/cron/tasks_basic.go +++ b/services/cron/tasks_basic.go @@ -63,7 +63,7 @@ func registerRepoHealthCheck() { for _, arg := range rhcConfig.Args { args = append(args, git.CmdArg(arg)) } - return repo_service.GitFsck(ctx, rhcConfig.Timeout, args) + return repo_service.GitFsckRepos(ctx, rhcConfig.Timeout, args) }) } diff --git a/services/repository/check.go b/services/repository/check.go index 6e29dc93d1e2b..293cb04d38829 100644 --- a/services/repository/check.go +++ b/services/repository/check.go @@ -22,8 +22,8 @@ import ( "xorm.io/builder" ) -// GitFsck calls 'git fsck' to check repository health. -func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) error { +// GitFsckRepos calls 'git fsck' to check repository health. +func GitFsckRepos(ctx context.Context, timeout time.Duration, args []git.CmdArg) error { log.Trace("Doing: GitFsck") if err := db.Iterate( @@ -35,15 +35,7 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro return db.ErrCancelledf("before fsck of %s", repo.FullName()) default: } - log.Trace("Running health check on repository %v", repo) - repoPath := repo.RepoPath() - if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil { - log.Warn("Failed to health check repository (%v): %v", repo, err) - if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil { - log.Error("CreateRepositoryNotice: %v", err) - } - } - return nil + return GitFsckRepo(ctx, repo, timeout, args) }, ); err != nil { log.Trace("Error: GitFsck: %v", err) @@ -54,6 +46,19 @@ func GitFsck(ctx context.Context, timeout time.Duration, args []git.CmdArg) erro return nil } +// GitFsckRepo calls 'git fsck' to check an individual repository's health. +func GitFsckRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error { + log.Trace("Running health check on repository %-v", repo) + repoPath := repo.RepoPath() + if err := git.Fsck(ctx, repoPath, timeout, args...); err != nil { + log.Warn("Failed to health check repository (%-v): %v", repo, err) + if err = system_model.CreateRepositoryNotice("Failed to health check repository (%s): %v", repo.FullName(), err); err != nil { + log.Error("CreateRepositoryNotice: %v", err) + } + } + return nil +} + // GitGcRepos calls 'git gc' to remove unnecessary files and optimize the local repository func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) error { log.Trace("Doing: GitGcRepos") @@ -68,33 +73,7 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) return db.ErrCancelledf("before GC of %s", repo.FullName()) default: } - log.Trace("Running git gc on %v", repo) - command := git.NewCommand(ctx, args...). - SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName())) - var stdout string - var err error - stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()}) - - if err != nil { - log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err) - desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err) - if err = system_model.CreateRepositoryNotice(desc); err != nil { - log.Error("CreateRepositoryNotice: %v", err) - } - return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err) - } - - // Now update the size of the repository - if err := repo_module.UpdateRepoSize(ctx, repo); err != nil { - log.Error("Updating size as part of garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err) - desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err) - if err = system_model.CreateRepositoryNotice(desc); err != nil { - log.Error("CreateRepositoryNotice: %v", err) - } - return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err) - } - - return nil + return GitGcRepo(ctx, repo, timeout, args) }, ); err != nil { return err @@ -104,6 +83,37 @@ func GitGcRepos(ctx context.Context, timeout time.Duration, args ...git.CmdArg) return nil } +// GitGcRepo calls 'git gc' to remove unnecessary files and optimize the local repository +func GitGcRepo(ctx context.Context, repo *repo_model.Repository, timeout time.Duration, args []git.CmdArg) error { + log.Trace("Running git gc on %-v", repo) + command := git.NewCommand(ctx, args...). + SetDescription(fmt.Sprintf("Repository Garbage Collection: %s", repo.FullName())) + var stdout string + var err error + stdout, _, err = command.RunStdString(&git.RunOpts{Timeout: timeout, Dir: repo.RepoPath()}) + + if err != nil { + log.Error("Repository garbage collection failed for %v. Stdout: %s\nError: %v", repo, stdout, err) + desc := fmt.Sprintf("Repository garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err) + if err = system_model.CreateRepositoryNotice(desc); err != nil { + log.Error("CreateRepositoryNotice: %v", err) + } + return fmt.Errorf("Repository garbage collection failed in repo: %s: Error: %w", repo.FullName(), err) + } + + // Now update the size of the repository + if err := repo_module.UpdateRepoSize(ctx, repo); err != nil { + log.Error("Updating size as part of garbage collection failed for %-v. Stdout: %s\nError: %v", repo, stdout, err) + desc := fmt.Sprintf("Updating size as part of garbage collection failed for %s. Stdout: %s\nError: %v", repo.RepoPath(), stdout, err) + if err = system_model.CreateRepositoryNotice(desc); err != nil { + log.Error("CreateRepositoryNotice: %v", err) + } + return fmt.Errorf("Updating size as part of garbage collection failed in repo: %s: Error: %w", repo.FullName(), err) + } + + return nil +} + func gatherMissingRepoRecords(ctx context.Context) ([]*repo_model.Repository, error) { repos := make([]*repo_model.Repository, 0, 10) if err := db.Iterate( diff --git a/services/repository/lfs.go b/services/repository/lfs.go new file mode 100644 index 0000000000000..b9c63857fe2c2 --- /dev/null +++ b/services/repository/lfs.go @@ -0,0 +1,106 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package repository + +import ( + "context" + "errors" + "fmt" + + "code.gitea.io/gitea/models/db" + git_model "code.gitea.io/gitea/models/git" + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/lfs" + "code.gitea.io/gitea/modules/log" + "xorm.io/builder" +) + +func GarbageCollectLFSMetaObjects(ctx context.Context, logger log.Logger, autofix bool) error { + log.Trace("Doing: GarbageCollectLFSMetaObjects") + + if err := db.Iterate( + ctx, + builder.And(builder.Gt{"id": 0}), + func(ctx context.Context, repo *repo_model.Repository) error { + return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, logger, autofix) + }, + ); err != nil { + return err + } + + log.Trace("Finished: GarbageCollectLFSMetaObjects") + return nil +} + +func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.Repository, logger log.Logger, autofix bool) error { + if logger != nil { + logger.Info("Checking %-v", repo) + } + total, orphaned, collected, deleted := 0, 0, 0, 0 + if logger != nil { + defer func() { + if orphaned == 0 { + logger.Info("Found %d total LFSMetaObjects in %-v", total, repo) + } else if !autofix { + logger.Info("Found %d/%d orphaned LFSMetaObjects in %-v", orphaned, total, repo) + } else { + logger.Info("Collected %d/%d orphaned/%d total LFSMetaObjects in %-v. %d removed from storage.", collected, orphaned, total, repo, deleted) + } + }() + } + + gitRepo, err := git.OpenRepository(ctx, repo.RepoPath()) + if err != nil { + log.Error("Unable to open git repository %-v: %v", repo, err) + return err + } + defer gitRepo.Close() + checkWr, checkRd, cancel := gitRepo.CatFileBatchCheck(ctx) + defer cancel() + + store := lfs.NewContentStore() + + return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error { + total++ + pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent())) + _, err := checkWr.Write([]byte(pointerSha.String())) + if err != nil { + return fmt.Errorf("unable to write pointerSHA to cat-file in %s: %w", repo.FullName(), err) + } + + _, _, _, err = git.ReadBatchLine(checkRd) + if err == nil { + return nil + } + + if !errors.Is(err, git.ErrNotExist{}) { + return fmt.Errorf("unable to read pointerSHA from cat-file in %s: %w", repo.FullName(), err) + } + + orphaned++ + + if !autofix { + return nil + } + // Non-existent pointer file + _, err = git_model.RemoveLFSMetaObjectByOidFn(repo.ID, metaObject.Oid, func(count int64) error { + if count > 0 { + return nil + } + + if err := store.Delete(metaObject.RelativePath()); err != nil { + log.Error("Unable to remove lfs metaobject %s from store: %v", metaObject.Oid, err) + } + deleted++ + return nil + }) + if err != nil { + return fmt.Errorf("unable to remove meta-object %s in %s: %w", metaObject.Oid, repo.FullName(), err) + } + collected++ + + return nil + }) +} From cc31e60ef08c55c5a5bfc091ea5906f88ad5a76b Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Wed, 30 Nov 2022 20:31:09 +0000 Subject: [PATCH 2/8] fix fmt Signed-off-by: Andrew Thornton --- services/repository/lfs.go | 1 + 1 file changed, 1 insertion(+) diff --git a/services/repository/lfs.go b/services/repository/lfs.go index b9c63857fe2c2..f4844490be50d 100644 --- a/services/repository/lfs.go +++ b/services/repository/lfs.go @@ -14,6 +14,7 @@ import ( "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" + "xorm.io/builder" ) From 2d36bf718dfac2f5b2867d96663055767669849b Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sat, 3 Dec 2022 20:07:36 +0000 Subject: [PATCH 3/8] use IsObjectExist instead Signed-off-by: Andrew Thornton --- services/repository/lfs.go | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/services/repository/lfs.go b/services/repository/lfs.go index f4844490be50d..7e616e529b226 100644 --- a/services/repository/lfs.go +++ b/services/repository/lfs.go @@ -5,7 +5,6 @@ package repository import ( "context" - "errors" "fmt" "code.gitea.io/gitea/models/db" @@ -58,28 +57,16 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R return err } defer gitRepo.Close() - checkWr, checkRd, cancel := gitRepo.CatFileBatchCheck(ctx) - defer cancel() store := lfs.NewContentStore() return git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error { total++ pointerSha := git.ComputeBlobHash([]byte(metaObject.Pointer.StringContent())) - _, err := checkWr.Write([]byte(pointerSha.String())) - if err != nil { - return fmt.Errorf("unable to write pointerSHA to cat-file in %s: %w", repo.FullName(), err) - } - _, _, _, err = git.ReadBatchLine(checkRd) - if err == nil { + if gitRepo.IsObjectExist(pointerSha.String()) { return nil } - - if !errors.Is(err, git.ErrNotExist{}) { - return fmt.Errorf("unable to read pointerSHA from cat-file in %s: %w", repo.FullName(), err) - } - orphaned++ if !autofix { From 3b4deb6cad2a55cb7b618ee168988bea21a86c04 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 4 Dec 2022 17:38:05 +0000 Subject: [PATCH 4/8] update date of header Signed-off-by: Andrew Thornton --- services/repository/lfs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/repository/lfs.go b/services/repository/lfs.go index 7e616e529b226..175d3b8419d73 100644 --- a/services/repository/lfs.go +++ b/services/repository/lfs.go @@ -1,4 +1,4 @@ -// Copyright 2020 The Gitea Authors. All rights reserved. +// Copyright 2022 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT package repository From 8aea5a708421e55bb9e68f8829effa58f0b3647d Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 8 Dec 2022 21:15:30 +0000 Subject: [PATCH 5/8] slight adjust select query Signed-off-by: Andrew Thornton --- models/git/lfs.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/models/git/lfs.go b/models/git/lfs.go index 468dcb98976ff..7afac9f4860a8 100644 --- a/models/git/lfs.go +++ b/models/git/lfs.go @@ -346,11 +346,11 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont for { beans := make([]*CountLFSMetaObject, 0, batchSize) - // SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id != 4 GROUP BY lfs_meta_object.id - sess.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`") - sess.Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid") - sess.Where("repository_id = ? AND created_unix < ?", repoID, time.Now().Add(-24*7*time.Hour)) - sess.NoAutoTime().GroupBy("`lfs_meta_object`.id") + // SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id = ? GROUP BY lfs_meta_object.id + sess.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`"). + Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid"). + Where("`lfs_meta_object`.repository_id = ? AND `lfs_meta_object`.created_unix < ?", repoID, time.Now().Add(-24*7*time.Hour)). + GroupBy("`lfs_meta_object`.id") if err := sess.Limit(batchSize, start).Find(&beans); err != nil { return err } From 4b5fc1239f1c4e665b4c20cf3dbf33a8da4a70c1 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 8 Dec 2022 21:19:06 +0000 Subject: [PATCH 6/8] as per wolfogre Signed-off-by: Andrew Thornton --- modules/doctor/lfs.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/doctor/lfs.go b/modules/doctor/lfs.go index 8f9da02412474..410ed5a9a5f89 100644 --- a/modules/doctor/lfs.go +++ b/modules/doctor/lfs.go @@ -5,8 +5,10 @@ package doctor import ( "context" + "fmt" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/services/repository" ) @@ -23,6 +25,10 @@ func init() { } func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool) error { + if !setting.LFS.StartServer { + return fmt.Errorf("LFS support is disabled") + } + if err := repository.GarbageCollectLFSMetaObjects(ctx, logger, autofix); err != nil { return err } From 16c082ffd7a4da06585cfe3310e500b06eb3609f Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Wed, 14 Dec 2022 16:51:54 +0000 Subject: [PATCH 7/8] add olderthan option Signed-off-by: Andrew Thornton --- models/git/lfs.go | 17 ++++++++++++----- services/repository/lfs.go | 3 +++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/models/git/lfs.go b/models/git/lfs.go index 7afac9f4860a8..8d418b928d14b 100644 --- a/models/git/lfs.go +++ b/models/git/lfs.go @@ -334,11 +334,15 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) { return lfsSize, nil } +type IterateLFSMetaObjectsForRepoOptions struct { + OlderThan time.Time +} + // IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo -func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error) error { +func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error { var start int batchSize := setting.Database.IterateBufferSize - sess := db.GetEngine(ctx) + engine := db.GetEngine(ctx) type CountLFSMetaObject struct { Count int64 LFSMetaObject @@ -347,10 +351,13 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont for { beans := make([]*CountLFSMetaObject, 0, batchSize) // SELECT `lfs_meta_object`.*, COUNT(`l1`.id) as `count` FROM lfs_meta_object INNER JOIN lfs_meta_object AS l1 ON l1.oid = lfs_meta_object.oid WHERE lfs_meta_object.repository_id = ? GROUP BY lfs_meta_object.id - sess.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`"). + sess := engine.Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`"). Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid"). - Where("`lfs_meta_object`.repository_id = ? AND `lfs_meta_object`.created_unix < ?", repoID, time.Now().Add(-24*7*time.Hour)). - GroupBy("`lfs_meta_object`.id") + Where("`lfs_meta_object`.repository_id = ?", repoID) + if !opts.OlderThan.IsZero() { + sess.And("`lfs_meta_object`.created_unix < ?", opts.OlderThan) + } + sess.GroupBy("`lfs_meta_object`.id") if err := sess.Limit(batchSize, start).Find(&beans); err != nil { return err } diff --git a/services/repository/lfs.go b/services/repository/lfs.go index 175d3b8419d73..c07af3750757c 100644 --- a/services/repository/lfs.go +++ b/services/repository/lfs.go @@ -6,6 +6,7 @@ package repository import ( "context" "fmt" + "time" "code.gitea.io/gitea/models/db" git_model "code.gitea.io/gitea/models/git" @@ -90,5 +91,7 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R collected++ return nil + }, &git_model.IterateLFSMetaObjectsForRepoOptions{ + OlderThan: time.Now().Add(-24 * 7 * time.Hour), }) } From 46a1a5e2d8c8a2396f57bb3ad90860c4f8b47644 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Wed, 14 Dec 2022 16:57:42 +0000 Subject: [PATCH 8/8] add comment Signed-off-by: Andrew Thornton --- services/repository/lfs.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/services/repository/lfs.go b/services/repository/lfs.go index c07af3750757c..0e88d359a8338 100644 --- a/services/repository/lfs.go +++ b/services/repository/lfs.go @@ -92,6 +92,14 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R return nil }, &git_model.IterateLFSMetaObjectsForRepoOptions{ + // Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload + // and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby + // an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid + // changes in new branches that might lead to lfs objects becoming temporarily unassociated with git + // objects. + // + // It is likely that a week is potentially excessive but it should definitely be enough that any + // unassociated LFS object is genuinely unassociated. OlderThan: time.Now().Add(-24 * 7 * time.Hour), }) }