From 8bb87b9a637048366d33da637ddae7d1ce4e96ac Mon Sep 17 00:00:00 2001 From: Yann Hamon Date: Sun, 26 Sep 2021 17:25:23 +0200 Subject: [PATCH] Allow bufio.Scanner to resize buffer when reading large files --- pkg/resource/files.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pkg/resource/files.go b/pkg/resource/files.go index c232509..a2f3bfa 100644 --- a/pkg/resource/files.go +++ b/pkg/resource/files.go @@ -89,7 +89,9 @@ func findFilesInFolders(ctx context.Context, paths []string, ignoreFilePatterns func findResourcesInReader(p string, f io.Reader, resources chan<- Resource, errors chan<- error, buf []byte) { scanner := bufio.NewScanner(f) - scanner.Buffer(buf, len(buf)) + // We start with a buf that is 4MB, scanner will resize it up to 256MB if needed + // https://github.com/golang/go/blob/aeea5bacbf79fb945edbeac6cd7630dd70c4d9ce/src/bufio/scan.go#L191 + scanner.Buffer(buf, 256*1024*1024*1024) scanner.Split(SplitYAMLDocument) nRes := 0 for scanner.Scan() { @@ -127,8 +129,8 @@ func FromFiles(ctx context.Context, paths []string, ignoreFilePatterns []string) files, errors := findFilesInFolders(ctx, paths, ignoreFilePatterns) go func() { - maxResourceSize := 4 * 1024 * 1024 // 4MB ought to be enough for everybody - buf := make([]byte, maxResourceSize) // We reuse this to avoid multiple large memory allocations + maxResourceSize := 4 * 1024 * 1024 // This is the initial size - scanner will resize if needed + buf := make([]byte, maxResourceSize) // We reuse the same buffer to avoid multiple large memory allocations for p := range files { findResourcesInFile(p, resources, errors, buf)