forgejo/modules/lfs/pointer_scanner_nogogit.go
KN4CK3R c03e488e14
Add LFS Migration and Mirror (#14726)
* Implemented LFS client.

* Implemented scanning for pointer files.

* Implemented downloading of lfs files.

* Moved model-dependent code into services.

* Removed models dependency. Added TryReadPointerFromBuffer.

* Migrated code from service to module.

* Centralised storage creation.

* Removed dependency from models.

* Moved ContentStore into modules.

* Share structs between server and client.

* Moved method to services.

* Implemented lfs download on clone.

* Implemented LFS sync on clone and mirror update.

* Added form fields.

* Updated templates.

* Fixed condition.

* Use alternate endpoint.

* Added missing methods.

* Fixed typo and make linter happy.

* Detached pointer parser from gogit dependency.

* Fixed TestGetLFSRange test.

* Added context to support cancellation.

* Use ReadFull to probably read more data.

* Removed duplicated code from models.

* Moved scan implementation into pointer_scanner_nogogit.

* Changed method name.

* Added comments.

* Added more/specific log/error messages.

* Embedded lfs.Pointer into models.LFSMetaObject.

* Moved code from models to module.

* Moved code from models to module.

* Moved code from models to module.

* Reduced pointer usage.

* Embedded type.

* Use promoted fields.

* Fixed unexpected eof.

* Added unit tests.

* Implemented migration of local file paths.

* Show an error on invalid LFS endpoints.

* Hide settings if not used.

* Added LFS info to mirror struct.

* Fixed comment.

* Check LFS endpoint.

* Manage LFS settings from mirror page.

* Fixed selector.

* Adjusted selector.

* Added more tests.

* Added local filesystem migration test.

* Fixed typo.

* Reset settings.

* Added special windows path handling.

* Added unit test for HTTPClient.

* Added unit test for BasicTransferAdapter.

* Moved into util package.

* Test if LFS endpoint is allowed.

* Added support for git://

* Just use a static placeholder as the displayed url may be invalid.

* Reverted to original code.

* Added "Advanced Settings".

* Updated wording.

* Added discovery info link.

* Implemented suggestion.

* Fixed missing format parameter.

* Added Pointer.IsValid().

* Always remove model on error.

* Added suggestions.

* Use channel instead of array.

* Update routers/repo/migrate.go

* fmt

Signed-off-by: Andrew Thornton <art27@cantab.net>

Co-authored-by: zeripath <art27@cantab.net>
2021-04-08 18:25:57 -04:00

111 lines
3.1 KiB
Go

// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// +build !gogit
package lfs
import (
"bufio"
"context"
"io"
"strconv"
"sync"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/pipeline"
)
// SearchPointerBlobs scans the whole repository for LFS pointer files
func SearchPointerBlobs(ctx context.Context, repo *git.Repository, pointerChan chan<- PointerBlob, errChan chan<- error) {
basePath := repo.Path
catFileCheckReader, catFileCheckWriter := io.Pipe()
shasToBatchReader, shasToBatchWriter := io.Pipe()
catFileBatchReader, catFileBatchWriter := io.Pipe()
wg := sync.WaitGroup{}
wg.Add(4)
// Create the go-routines in reverse order.
// 4. Take the output of cat-file --batch and check if each file in turn
// to see if they're pointers to files in the LFS store
go createPointerResultsFromCatFileBatch(ctx, catFileBatchReader, &wg, pointerChan)
// 3. Take the shas of the blobs and batch read them
go pipeline.CatFileBatch(shasToBatchReader, catFileBatchWriter, &wg, basePath)
// 2. From the provided objects restrict to blobs <=1k
go pipeline.BlobsLessThan1024FromCatFileBatchCheck(catFileCheckReader, shasToBatchWriter, &wg)
// 1. Run batch-check on all objects in the repository
if git.CheckGitVersionAtLeast("2.6.0") != nil {
revListReader, revListWriter := io.Pipe()
shasToCheckReader, shasToCheckWriter := io.Pipe()
wg.Add(2)
go pipeline.CatFileBatchCheck(shasToCheckReader, catFileCheckWriter, &wg, basePath)
go pipeline.BlobsFromRevListObjects(revListReader, shasToCheckWriter, &wg)
go pipeline.RevListAllObjects(revListWriter, &wg, basePath, errChan)
} else {
go pipeline.CatFileBatchCheckAllObjects(catFileCheckWriter, &wg, basePath, errChan)
}
wg.Wait()
close(pointerChan)
close(errChan)
}
func createPointerResultsFromCatFileBatch(ctx context.Context, catFileBatchReader *io.PipeReader, wg *sync.WaitGroup, pointerChan chan<- PointerBlob) {
defer wg.Done()
defer catFileBatchReader.Close()
bufferedReader := bufio.NewReader(catFileBatchReader)
buf := make([]byte, 1025)
loop:
for {
select {
case <-ctx.Done():
break loop
default:
}
// File descriptor line: sha
sha, err := bufferedReader.ReadString(' ')
if err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
// Throw away the blob
if _, err := bufferedReader.ReadString(' '); err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
sizeStr, err := bufferedReader.ReadString('\n')
if err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
size, err := strconv.Atoi(sizeStr[:len(sizeStr)-1])
if err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
pointerBuf := buf[:size+1]
if _, err := io.ReadFull(bufferedReader, pointerBuf); err != nil {
_ = catFileBatchReader.CloseWithError(err)
break
}
pointerBuf = pointerBuf[:size]
// Now we need to check if the pointerBuf is an LFS pointer
pointer, _ := ReadPointerFromBuffer(pointerBuf)
if !pointer.IsValid() {
continue
}
pointerChan <- PointerBlob{Hash: sha, Pointer: pointer}
}
}