From 668709a33f3acd4aa63448ce512c98cb48d9792e Mon Sep 17 00:00:00 2001 From: Gusted Date: Thu, 11 Apr 2024 13:34:53 +0200 Subject: [PATCH] [BUG] Handle bigger files in `git grep` - The parser of `git grep`'s output uses `bufio.Scanner`, which is a good choice overall, however it does have a limit that's usually not noticed, it will not read more than `64 * 1024` bytes at once which can be hit in practical scenarios. - Use `bufio.Reader` instead which doesn't have this limitation, but is a bit harder to work with as it's a more lower level primitive. - Adds unit test. - Resolves https://codeberg.org/forgejo/forgejo/issues/3149 --- modules/git/grep.go | 20 ++++++++++++++++---- modules/git/grep_test.go | 27 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/modules/git/grep.go b/modules/git/grep.go index ee6a858f7..1de739107 100644 --- a/modules/git/grep.go +++ b/modules/git/grep.go @@ -10,6 +10,7 @@ import ( "context" "errors" "fmt" + "io" "os" "strconv" "strings" @@ -80,10 +81,21 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO defer stdoutReader.Close() isInBlock := false - scanner := bufio.NewScanner(stdoutReader) + scanner := bufio.NewReader(stdoutReader) var res *GrepResult - for scanner.Scan() { - line := scanner.Text() + for { + line, err := scanner.ReadString('\n') + if err != nil { + if err == io.EOF { + return nil + } + return err + } + // Remove delimiter. + if len(line) > 0 { + line = line[:len(line)-1] + } + if !isInBlock { if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok { isInBlock = true @@ -109,7 +121,7 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO res.LineCodes = append(res.LineCodes, lineCode) } } - return scanner.Err() + return nil }, }) // git grep exits by cancel (killed), usually it is caused by the limit of results diff --git a/modules/git/grep_test.go b/modules/git/grep_test.go index b5fa437c5..a321b1145 100644 --- a/modules/git/grep_test.go +++ b/modules/git/grep_test.go @@ -4,7 +4,10 @@ package git import ( + "bytes" "context" + "os" + "path" "path/filepath" "testing" @@ -49,3 +52,27 @@ func TestGrepSearch(t *testing.T) { assert.Error(t, err) assert.Len(t, res, 0) } + +func TestGrepLongFiles(t *testing.T) { + tmpDir := t.TempDir() + + err := InitRepository(DefaultContext, tmpDir, false, Sha1ObjectFormat.Name()) + assert.NoError(t, err) + + gitRepo, err := openRepositoryWithDefaultContext(tmpDir) + assert.NoError(t, err) + defer gitRepo.Close() + + assert.NoError(t, os.WriteFile(path.Join(tmpDir, "README.md"), bytes.Repeat([]byte{'a'}, 65*1024), 0o666)) + + err = AddChanges(tmpDir, true) + assert.NoError(t, err) + + err = CommitChanges(tmpDir, CommitChangesOptions{Message: "Long file"}) + assert.NoError(t, err) + + res, err := GrepSearch(context.Background(), gitRepo, "a", GrepOptions{}) + assert.NoError(t, err) + assert.Len(t, res, 1) + assert.Len(t, res[0].LineCodes[0], 65*1024) +}