From 6b280e7b33d7b82dab427a57d96767e6812ad4aa Mon Sep 17 00:00:00 2001 From: William Perron Date: Wed, 7 Feb 2024 15:31:20 -0500 Subject: [PATCH] finish instrumenting with opentelemetry --- absences.go | 2 +- audit_log.go | 39 ++++++++++++- cmd/themis-server/main.go | 2 +- conflicts.go | 20 +++++++ fmt.go | 12 ++++ notify.go | 1 + store.go | 120 +++++++++++++++++++++++++++++++++++++- 7 files changed, 189 insertions(+), 7 deletions(-) diff --git a/absences.go b/absences.go index 89b0143..51ba5b2 100644 --- a/absences.go +++ b/absences.go @@ -109,7 +109,7 @@ func (s *Store) GetAbsentees(ctx context.Context, session time.Time) ([]string, type Schedule map[string][]string func (s *Store) GetSchedule(ctx context.Context, from, to time.Time) (Schedule, error) { - ctx, span := tracer.Start(ctx, "add_absence", trace.WithAttributes( + ctx, span := tracer.Start(ctx, "get_schedule", trace.WithAttributes( semconv.DBSystemSqlite, semconv.DBSQLTable("absences"), semconv.DBOperation("select"), diff --git a/audit_log.go b/audit_log.go index 1c36572..7645258 100644 --- a/audit_log.go +++ b/audit_log.go @@ -3,11 +3,14 @@ package themis import ( "context" "database/sql" - "errors" "fmt" "time" "github.com/rs/zerolog/log" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + semconv "go.opentelemetry.io/otel/semconv/v1.24.0" + "go.opentelemetry.io/otel/trace" ) type EventType int @@ -45,7 +48,7 @@ func EventTypeFromString(ev string) (EventType, error) { case "ABSENT": return EventAbsence, nil default: - return EventType(9999), errors.New("no such event type") + return EventType(9999), fmt.Errorf("no such event type: %s", ev) } } @@ -59,22 +62,37 @@ type AuditableEvent struct { // Audit writes to the audit table, returns nothing because it is meant to be // used in a defered statement on functions that write to the database. func (s *Store) Audit(ctx context.Context, ev *AuditableEvent) { + ctx, span := tracer.Start(ctx, "audit", trace.WithAttributes( + semconv.DBSystemSqlite, + semconv.DBSQLTable("audit_log"), + semconv.DBOperation("insert"), + attribute.String("user_id", ev.userId), + attribute.Stringer("event_type", ev.eventType), + )) + defer span.End() + if ev.err == nil { log.Debug().Ctx(ctx).Str("event_type", ev.eventType.String()).Str("userid", ev.userId).Msg("recording audit log") ctx := context.Background() tx, err := s.db.Begin() if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to start transaction") log.Error().Ctx(ctx).Err(err).Msg("failed to start transaction") } defer tx.Commit() //nolint:errcheck stmt, err := s.db.PrepareContext(ctx, "INSERT INTO audit_log (userid, event_type, ts) VALUES (?, ?, ?)") if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to prepare audit log insert") log.Error().Ctx(ctx).Err(err).Msg("failed to prepare audit log insert") } if _, err := stmt.ExecContext(ctx, ev.userId, ev.eventType.String(), time.Now()); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to insert audit log") log.Error().Ctx(ctx).Err(err).Msg("failed to insert audit log") } } @@ -88,10 +106,20 @@ type AuditEvent struct { } func (s *Store) LastOf(ctx context.Context, t EventType) (AuditEvent, error) { + ctx, span := tracer.Start(ctx, "find_last_audit_log", trace.WithAttributes( + semconv.DBSystemSqlite, + semconv.DBSQLTable("audit_log"), + semconv.DBOperation("select"), + attribute.Stringer("event_type", t), + )) + defer span.End() + log.Debug().Ctx(ctx).Str("event_type", t.String()).Msg("finding last audit log") stmt, err := s.db.PrepareContext(ctx, `SELECT id, userid, event_type, ts FROM audit_log WHERE event_type = ? ORDER BY ts DESC LIMIT 1`) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to get last event") return AuditEvent{}, fmt.Errorf("failed to get last event of type %s: %w", t.String(), err) } @@ -101,15 +129,20 @@ func (s *Store) LastOf(ctx context.Context, t EventType) (AuditEvent, error) { var rawEventType string err = row.Scan(&ev.Id, &ev.UserId, &rawEventType, &ev.Timestamp) if err == sql.ErrNoRows { + span.RecordError(ErrNever) return AuditEvent{}, ErrNever } if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to scan row") return AuditEvent{}, fmt.Errorf("failed to scan row: %w", err) } ev.EventType, err = EventTypeFromString(rawEventType) if err != nil { - return AuditEvent{}, fmt.Errorf("failed to parse event type %s: %w", rawEventType, err) + span.RecordError(err) + span.SetStatus(codes.Error, "failed to parse event type") + return AuditEvent{}, fmt.Errorf("failed to parse event type: %w", err) } return ev, nil diff --git a/cmd/themis-server/main.go b/cmd/themis-server/main.go index 641fde8..0e04d9f 100644 --- a/cmd/themis-server/main.go +++ b/cmd/themis-server/main.go @@ -951,7 +951,7 @@ func serve(address string) error { func withLogging(name string, h Handler) Handler { return func(ctx context.Context, s *discordgo.Session, i *discordgo.InteractionCreate) error { - ctx, span := tracer.Start(ctx, "discord-command") + ctx, span := tracer.Start(ctx, fmt.Sprintf("discord_command %s", name)) defer span.End() start := time.Now() diff --git a/conflicts.go b/conflicts.go index a1cb5c7..b1e39d3 100644 --- a/conflicts.go +++ b/conflicts.go @@ -5,6 +5,10 @@ import ( "fmt" "github.com/rs/zerolog/log" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + semconv "go.opentelemetry.io/otel/semconv/v1.24.0" + "go.opentelemetry.io/otel/trace" ) type Conflict struct { @@ -34,15 +38,29 @@ INNER JOIN claiming WHERE claims.userid IS NOT ?;` func (s *Store) FindConflicts(ctx context.Context, userId, name string, claimType ClaimType) ([]Conflict, error) { + ctx, span := tracer.Start(ctx, "find_conflicts", trace.WithAttributes( + semconv.DBSystemSqlite, + semconv.DBSQLTable("claims"), + semconv.DBOperation("select"), + attribute.String("user_id", userId), + attribute.String("claim_name", name), + attribute.Stringer("claim_type", claimType), + )) + defer span.End() + log.Debug().Ctx(ctx).Stringer("claim_type", claimType).Str("userid", userId).Msg("searching for potential conflicts") stmt, err := s.db.PrepareContext(ctx, conflictQuery) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to prepare query") return nil, fmt.Errorf("failed to prepare conflicts query: %w", err) } rows, err := stmt.QueryContext(ctx, claimType, name, userId) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to execute query") return nil, fmt.Errorf("failed to get conflicting provinces: %w", err) } defer stmt.Close() @@ -58,6 +76,8 @@ func (s *Store) FindConflicts(ctx context.Context, userId, name string, claimTyp ) err = rows.Scan(&province, &player, &sClaimType, &claimName, &claimId) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to scan row") return nil, fmt.Errorf("failed to scan row: %w", err) } diff --git a/fmt.go b/fmt.go index d50b2ed..914debc 100644 --- a/fmt.go +++ b/fmt.go @@ -7,13 +7,23 @@ import ( "strings" "github.com/rs/zerolog/log" + "go.opentelemetry.io/otel/codes" + semconv "go.opentelemetry.io/otel/semconv/v1.24.0" + "go.opentelemetry.io/otel/trace" ) func FormatRows(ctx context.Context, rows *sql.Rows) (string, error) { + ctx, span := tracer.Start(ctx, "format_rows", trace.WithAttributes( + semconv.DBSystemSqlite, + )) + defer span.End() + sb := strings.Builder{} cols, err := rows.Columns() if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to get rows columns") return "", fmt.Errorf("failed to get rows columns: %w", err) } @@ -37,6 +47,8 @@ func FormatRows(ctx context.Context, rows *sql.Rows) (string, error) { row[i] = new(sql.NullString) } if err := rows.Scan(row...); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to scan row") return "", fmt.Errorf("failed to scan next row: %w", err) } diff --git a/notify.go b/notify.go index 737ce5d..b54ba0d 100644 --- a/notify.go +++ b/notify.go @@ -68,6 +68,7 @@ func (n *Notifier) Send() { n.c <- struct{}{} } +// TODO(wperron) is there a (nice) way to instrument this function? func (n *Notifier) NotifyFunc(ctx context.Context, f func()) { for { select { diff --git a/store.go b/store.go index 2018bbf..30ccb45 100644 --- a/store.go +++ b/store.go @@ -15,6 +15,9 @@ import ( _ "github.com/mattn/go-sqlite3" "github.com/rs/zerolog/log" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + semconv "go.opentelemetry.io/otel/semconv/v1.24.0" "go.opentelemetry.io/otel/trace" ) @@ -71,6 +74,16 @@ func (s *Store) Close() error { } func (s *Store) Claim(ctx context.Context, userId, player, province string, claimType ClaimType) (int, error) { + ctx, span := tracer.Start(ctx, "claim", trace.WithAttributes( + semconv.DBSystemSqlite, + semconv.DBSQLTable("claims"), + semconv.DBOperation("insert"), + attribute.String("user_id", userId), + attribute.String("claim_name", province), + attribute.Stringer("claim_type", claimType), + )) + defer span.End() + log.Debug(). Ctx(ctx). Str("userid", userId). @@ -87,6 +100,8 @@ func (s *Store) Claim(ctx context.Context, userId, player, province string, clai tx, err := s.db.Begin() if err != nil { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "failed to begin transaction") return 0, fmt.Errorf("failed to begin transaction: %w", err) } defer tx.Commit() //nolint:errcheck @@ -94,12 +109,14 @@ func (s *Store) Claim(ctx context.Context, userId, player, province string, clai conflicts, err := s.FindConflicts(ctx, userId, province, claimType) if err != nil { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "conflict check failed") return 0, fmt.Errorf("failed to run conflicts check: %w", err) } if len(conflicts) > 0 { log.Debug().Ctx(ctx).Int("len", len(conflicts)).Msg("found conflicts") - audit.err = err + audit.err = errors.New("found conflicts") return 0, ErrConflict{Conflicts: conflicts} } @@ -107,6 +124,8 @@ func (s *Store) Claim(ctx context.Context, userId, player, province string, clai stmt, err := s.db.PrepareContext(ctx, `SELECT COUNT(1) FROM claimables WHERE lower(name) = ? and typ = ?`) if err != nil { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "failed to prepare query") return 0, fmt.Errorf("failed to prepare count query: %w", err) } defer stmt.Close() @@ -116,17 +135,23 @@ func (s *Store) Claim(ctx context.Context, userId, player, province string, clai err = row.Scan(&count) if err != nil { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "failed to scan row") return 0, fmt.Errorf("failed to scan: %w", err) } if count == 0 { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "no matching provinces found") return 0, fmt.Errorf("found no provinces for %s named %s", claimType, province) } stmt, err = s.db.PrepareContext(ctx, "INSERT INTO claims (player, claim_type, val, userid) VALUES (?, ?, ?, ?)") if err != nil { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "failed to prepare query") return 0, fmt.Errorf("failed to prepare claim query: %w", err) } defer stmt.Close() @@ -134,12 +159,16 @@ func (s *Store) Claim(ctx context.Context, userId, player, province string, clai res, err := stmt.ExecContext(ctx, player, claimType, province, userId) if err != nil { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "failed to execute query") return 0, fmt.Errorf("failed to insert claim: %w", err) } id, err := res.LastInsertId() if err != nil { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "failed to last insert id") return 0, fmt.Errorf("failed to get last ID: %w", err) } @@ -147,6 +176,15 @@ func (s *Store) Claim(ctx context.Context, userId, player, province string, clai } func (s *Store) ListAvailability(ctx context.Context, claimType ClaimType, search ...string) ([]string, error) { + ctx, span := tracer.Start(ctx, "list_availability", trace.WithAttributes( + semconv.DBSystemSqlite, + semconv.DBSQLTable("claimables"), + semconv.DBOperation("select"), + attribute.StringSlice("search_terms", search), + attribute.Stringer("claim_type", claimType), + )) + defer span.End() + log.Debug().Ctx(ctx).Stringer("claim_type", claimType).Strs("search_terms", search).Msg("listing available entries") queryParams := []any{string(claimType)} @@ -164,12 +202,16 @@ func (s *Store) ListAvailability(ctx context.Context, claimType ClaimType, searc stmt, err := s.db.PrepareContext(ctx, queryPattern) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to prepare query") return nil, fmt.Errorf("failed to prepare query: %w", err) } defer stmt.Close() rows, err := stmt.QueryContext(ctx, queryParams...) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to execute query") return nil, fmt.Errorf("failed to execute query: %w", err) } @@ -177,6 +219,8 @@ func (s *Store) ListAvailability(ctx context.Context, claimType ClaimType, searc for rows.Next() { var s string if err := rows.Scan(&s); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to scan row") return nil, fmt.Errorf("failed to scan rows: %w", err) } avail = append(avail, s) @@ -186,15 +230,26 @@ func (s *Store) ListAvailability(ctx context.Context, claimType ClaimType, searc } func (s *Store) ListClaims(ctx context.Context) ([]Claim, error) { + ctx, span := tracer.Start(ctx, "list_claims", trace.WithAttributes( + semconv.DBSystemSqlite, + semconv.DBSQLTable("claims"), + semconv.DBOperation("select"), + )) + defer span.End() + log.Debug().Ctx(ctx).Msg("listing all claims currently in database") stmt, err := s.db.PrepareContext(ctx, `SELECT id, player, claim_type, val FROM claims`) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to prepare query") return nil, fmt.Errorf("failed to prepare query: %w", err) } defer stmt.Close() rows, err := stmt.QueryContext(ctx) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to execute query") return nil, fmt.Errorf("failed to execute query: %w", err) } @@ -204,10 +259,14 @@ func (s *Store) ListClaims(ctx context.Context) ([]Claim, error) { var rawType string err = rows.Scan(&c.ID, &c.Player, &rawType, &c.Name) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to scan row") return nil, fmt.Errorf("failed to scan row: %w", err) } cl, err := ClaimTypeFromString(rawType) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to parse claim type") return nil, fmt.Errorf("unexpected error converting raw claim type: %w", err) } c.Type = cl @@ -233,10 +292,20 @@ func (cd ClaimDetail) String() string { } func (s *Store) DescribeClaim(ctx context.Context, ID int) (ClaimDetail, error) { + ctx, span := tracer.Start(ctx, "describe_claim", trace.WithAttributes( + semconv.DBSystemSqlite, + semconv.DBSQLTable("claims"), + semconv.DBOperation("select"), + attribute.Int("claim_id", ID), + )) + defer span.End() + log.Debug().Ctx(ctx).Int("id", ID).Msg("describing claim") stmt, err := s.db.PrepareContext(ctx, `SELECT id, player, claim_type, val FROM claims WHERE id = ?`) if err != nil { - return ClaimDetail{}, fmt.Errorf("failed to get claim: %w", err) + span.RecordError(err) + span.SetStatus(codes.Error, "failed to prepare query") + return ClaimDetail{}, fmt.Errorf("failed to prepare select claim query: %w", err) } defer stmt.Close() @@ -246,25 +315,34 @@ func (s *Store) DescribeClaim(ctx context.Context, ID int) (ClaimDetail, error) var rawType string err = row.Scan(&c.ID, &c.Player, &rawType, &c.Name) if err == sql.ErrNoRows { + span.RecordError(ErrNoSuchClaim) return ClaimDetail{}, ErrNoSuchClaim } if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to scan row") return ClaimDetail{}, fmt.Errorf("failed to scan row: %w", err) } cl, err := ClaimTypeFromString(rawType) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to parse claim type") return ClaimDetail{}, fmt.Errorf("unexpected error converting raw claim type: %w", err) } c.Type = cl stmt, err = s.db.PrepareContext(ctx, `SELECT province FROM claimables WHERE name = ? AND typ = ?`) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to prepare query") return ClaimDetail{}, fmt.Errorf("failed to prepare query: %w", err) } defer stmt.Close() rows, err := stmt.QueryContext(ctx, c.Name, cl) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to execute query") return ClaimDetail{}, fmt.Errorf("failed to execute query: %w", err) } @@ -273,6 +351,8 @@ func (s *Store) DescribeClaim(ctx context.Context, ID int) (ClaimDetail, error) var p string err = rows.Scan(&p) if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to scan row") return ClaimDetail{}, fmt.Errorf("failed to scan result set: %w", err) } provinces = append(provinces, p) @@ -285,6 +365,15 @@ func (s *Store) DescribeClaim(ctx context.Context, ID int) (ClaimDetail, error) } func (s *Store) DeleteClaim(ctx context.Context, ID int, userId string) error { + ctx, span := tracer.Start(ctx, "delete_claim", trace.WithAttributes( + semconv.DBSystemSqlite, + semconv.DBSQLTable("claims"), + semconv.DBOperation("delete"), + attribute.Int("claim_id", ID), + attribute.String("user_id", userId), + )) + defer span.End() + log.Debug().Ctx(ctx).Str("userid", userId).Int("id", ID).Msg("deleting claim") audit := &AuditableEvent{ userId: userId, @@ -295,6 +384,8 @@ func (s *Store) DeleteClaim(ctx context.Context, ID int, userId string) error { stmt, err := s.db.PrepareContext(ctx, "DELETE FROM claims WHERE id = ? AND userid = ?") if err != nil { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "failed to prepare query") return fmt.Errorf("failed to prepare query: %w", err) } defer stmt.Close() @@ -302,25 +393,39 @@ func (s *Store) DeleteClaim(ctx context.Context, ID int, userId string) error { res, err := stmt.ExecContext(ctx, ID, userId) if err != nil { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "failed to execute query") return fmt.Errorf("failed to delete claim ID %d: %w", ID, err) } rows, err := res.RowsAffected() if err != nil { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "failed to get number of affected rows") return fmt.Errorf("failed to get affected rows: %w", err) } if rows == 0 { audit.err = ErrNoSuchClaim + span.RecordError(ErrNoSuchClaim) return ErrNoSuchClaim } return nil } func (s *Store) CountClaims(ctx context.Context) (total, uniquePlayers int, err error) { + ctx, span := tracer.Start(ctx, "count_claims", trace.WithAttributes( + semconv.DBSystemSqlite, + semconv.DBSQLTable("claims"), + semconv.DBOperation("select"), + )) + defer span.End() + log.Debug().Ctx(ctx).Msg("counting all claims and unique users") stmt, err := s.db.PrepareContext(ctx, "SELECT COUNT(1), COUNT(DISTINCT(userid)) FROM claims") if err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to prepare query") return 0, 0, fmt.Errorf("failed to prepare query: %w", err) } defer stmt.Close() @@ -328,6 +433,8 @@ func (s *Store) CountClaims(ctx context.Context) (total, uniquePlayers int, err res := stmt.QueryRowContext(ctx) if err := res.Scan(&total, &uniquePlayers); err != nil { + span.RecordError(err) + span.SetStatus(codes.Error, "failed to scan row") return 0, 0, fmt.Errorf("failed to scan result: %w", err) } @@ -335,6 +442,13 @@ func (s *Store) CountClaims(ctx context.Context) (total, uniquePlayers int, err } func (s *Store) Flush(ctx context.Context, userId string) error { + ctx, span := tracer.Start(ctx, "flush", trace.WithAttributes( + semconv.DBSystemSqlite, + semconv.DBSQLTable("claims"), + semconv.DBOperation("delete"), + )) + defer span.End() + log.Debug().Ctx(ctx).Str("initiated_by", userId).Msg("flushing all currently help claims") audit := &AuditableEvent{ userId: userId, @@ -345,6 +459,8 @@ func (s *Store) Flush(ctx context.Context, userId string) error { _, err := s.db.ExecContext(ctx, "DELETE FROM claims;") if err != nil { audit.err = err + span.RecordError(err) + span.SetStatus(codes.Error, "failed to execute query") return fmt.Errorf("failed to execute delete query: %w", err) } return nil