From 2db318328883fb12dcd7ac7068bcc0645cf0d4d4 Mon Sep 17 00:00:00 2001 From: William Perron Date: Thu, 12 Oct 2023 11:24:27 -0400 Subject: [PATCH] Avoid reading all csv data into memory --- cmd/md-fmt/main.go | 63 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/cmd/md-fmt/main.go b/cmd/md-fmt/main.go index 22f3dbc..53383ff 100644 --- a/cmd/md-fmt/main.go +++ b/cmd/md-fmt/main.go @@ -6,6 +6,7 @@ import ( "encoding/csv" "flag" "fmt" + "io" "log" "os" "strings" @@ -31,52 +32,76 @@ func main() { read.TrimLeadingSpace = true read.LazyQuotes = *lazyQuotes - records, err := read.ReadAll() + rec, err := read.Read() if err != nil { - log.Fatalf("failed to load records in memory: %s\n", err) + log.Fatalf("error reading from csv file: %s", err) } - widths := make([]int, len(records[0])) - for _, row := range records { - for i, col := range row { + widths := make([]int, len(rec)) + for i, col := range rec { + widths[i] = max(widths[i], len(col)) + } + for { + rec, err := read.Read() + if err == io.EOF { + break + } + if err != nil { + log.Fatalf("error reading from csv file: %s", err) + } + + for i, col := range rec { widths[i] = max(widths[i], len(col)) } } - c := make([]string, len(records[0])) - log.Println(records[0], len(c)) + c := make([]string, len(widths)) for i := 0; i < len(c); i++ { c[i] = " %-*s " } pattern := fmt.Sprintf("|%s|\n", strings.Join(c, "|")) - log.Println(pattern) - sb := strings.Builder{} + // Reset file descriptor cursor and take new CSV reader from it + fd.Seek(0, 0) + read = csv.NewReader(fd) + read.Comma = sep + read.TrimLeadingSpace = true + read.LazyQuotes = *lazyQuotes // Format header row + rec, err = read.Read() + if err != nil { + log.Fatalf("failed to read next csv record: %s", err) + } + curr := make([]any, 0, 2*len(widths)) for i := range widths { - curr = append(curr, widths[i], records[0][i]) + curr = append(curr, widths[i], rec[i]) } - sb.WriteString(fmt.Sprintf(pattern, curr...)) + fmt.Printf(pattern, curr...) // Format header separator row curr = curr[:0] // empty slice but preserve capacity for i := range widths { curr = append(curr, widths[i], strings.Repeat("-", widths[i])) } - sb.WriteString(fmt.Sprintf(pattern, curr...)) + fmt.Printf(pattern, curr...) + + for { + rec, err := read.Read() + if err == io.EOF { + break + } + if err != nil { + log.Fatalf("error reading from csv file: %s", err) + } - // Format rest of records - for i := 1; i < len(records); i++ { curr = curr[:0] - for j := range widths { - curr = append(curr, widths[j], records[i][j]) + for i := range widths { + curr = append(curr, widths[i], rec[i]) } - sb.WriteString(fmt.Sprintf(pattern, curr...)) + fmt.Printf(pattern, curr...) } - - fmt.Print(sb.String()) } func max(a, b int) int {