Avoid reading all csv data into memory

otelq
William Perron 1 year ago
parent ed61be7775
commit 2db3183288
No known key found for this signature in database
GPG Key ID: D1815C43C9BA3DE1

@ -6,6 +6,7 @@ import (
"encoding/csv" "encoding/csv"
"flag" "flag"
"fmt" "fmt"
"io"
"log" "log"
"os" "os"
"strings" "strings"
@ -31,52 +32,76 @@ func main() {
read.TrimLeadingSpace = true read.TrimLeadingSpace = true
read.LazyQuotes = *lazyQuotes read.LazyQuotes = *lazyQuotes
records, err := read.ReadAll() rec, err := read.Read()
if err != nil { if err != nil {
log.Fatalf("failed to load records in memory: %s\n", err) log.Fatalf("error reading from csv file: %s", err)
} }
widths := make([]int, len(records[0])) widths := make([]int, len(rec))
for _, row := range records { for i, col := range rec {
for i, col := range row {
widths[i] = max(widths[i], len(col)) widths[i] = max(widths[i], len(col))
} }
for {
rec, err := read.Read()
if err == io.EOF {
break
}
if err != nil {
log.Fatalf("error reading from csv file: %s", err)
} }
c := make([]string, len(records[0])) for i, col := range rec {
log.Println(records[0], len(c)) widths[i] = max(widths[i], len(col))
}
}
c := make([]string, len(widths))
for i := 0; i < len(c); i++ { for i := 0; i < len(c); i++ {
c[i] = " %-*s " c[i] = " %-*s "
} }
pattern := fmt.Sprintf("|%s|\n", strings.Join(c, "|")) pattern := fmt.Sprintf("|%s|\n", strings.Join(c, "|"))
log.Println(pattern)
sb := strings.Builder{} // Reset file descriptor cursor and take new CSV reader from it
fd.Seek(0, 0)
read = csv.NewReader(fd)
read.Comma = sep
read.TrimLeadingSpace = true
read.LazyQuotes = *lazyQuotes
// Format header row // Format header row
rec, err = read.Read()
if err != nil {
log.Fatalf("failed to read next csv record: %s", err)
}
curr := make([]any, 0, 2*len(widths)) curr := make([]any, 0, 2*len(widths))
for i := range widths { for i := range widths {
curr = append(curr, widths[i], records[0][i]) curr = append(curr, widths[i], rec[i])
} }
sb.WriteString(fmt.Sprintf(pattern, curr...)) fmt.Printf(pattern, curr...)
// Format header separator row // Format header separator row
curr = curr[:0] // empty slice but preserve capacity curr = curr[:0] // empty slice but preserve capacity
for i := range widths { for i := range widths {
curr = append(curr, widths[i], strings.Repeat("-", widths[i])) curr = append(curr, widths[i], strings.Repeat("-", widths[i]))
} }
sb.WriteString(fmt.Sprintf(pattern, curr...)) fmt.Printf(pattern, curr...)
// Format rest of records for {
for i := 1; i < len(records); i++ { rec, err := read.Read()
curr = curr[:0] if err == io.EOF {
for j := range widths { break
curr = append(curr, widths[j], records[i][j])
} }
sb.WriteString(fmt.Sprintf(pattern, curr...)) if err != nil {
log.Fatalf("error reading from csv file: %s", err)
} }
fmt.Print(sb.String()) curr = curr[:0]
for i := range widths {
curr = append(curr, widths[i], rec[i])
}
fmt.Printf(pattern, curr...)
}
} }
func max(a, b int) int { func max(a, b int) int {

Loading…
Cancel
Save