From 47b330dc29d6045652118983017ac77a5c59caaf Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Fri, 10 Oct 2025 12:24:14 -0500 Subject: [PATCH 1/8] feat: Add comprehensive AI agent documentation system This commit introduces a complete documentation system specifically designed for AI agents working with the Litestream codebase. Added: - AGENT.md: Main entry point with architecture overview and common pitfalls - docs/SQLITE_INTERNALS.md: SQLite fundamentals including WAL and lock page - docs/LTX_FORMAT.md: Complete LTX format specification - docs/ARCHITECTURE.md: Deep technical dive into components - docs/REPLICA_CLIENT_GUIDE.md: Storage backend implementation guide - docs/TESTING_GUIDE.md: Comprehensive testing strategies - docs/V050_CHANGES.md: v0.5.0 migration guide and breaking changes Key features: - Emphasizes critical concepts like 1GB lock page handling - Documents common pitfalls from recent PRs (#760, #748) - Aligns with v0.5.0 changes (single replica, new compaction levels) - Provides mermaid diagrams for visual understanding - Includes anti-patterns and correct approaches Removed: - docs/RELEASE.md: Outdated release documentation This documentation lives in the repo rather than on litestream.io because: 1. AI agents need immediate access to technical details during code analysis 2. Docs can be versioned alongside code changes 3. PR reviewers can verify AI understanding matches implementation 4. Reduces hallucination by providing authoritative in-repo reference --- AGENT.md | 545 ++++++++++++++++++ docs/ARCHITECTURE.md | 845 ++++++++++++++++++++++++++++ docs/LTX_FORMAT.md | 701 +++++++++++++++++++++++ docs/RELEASE.md | 329 ----------- docs/REPLICA_CLIENT_GUIDE.md | 750 +++++++++++++++++++++++++ docs/SQLITE_INTERNALS.md | 563 +++++++++++++++++++ docs/TESTING_GUIDE.md | 1014 ++++++++++++++++++++++++++++++++++ docs/V050_CHANGES.md | 191 +++++++ 8 files changed, 4609 insertions(+), 329 deletions(-) create mode 100644 AGENT.md create mode 100644 docs/ARCHITECTURE.md create mode 100644 docs/LTX_FORMAT.md delete mode 100644 docs/RELEASE.md create mode 100644 docs/REPLICA_CLIENT_GUIDE.md create mode 100644 docs/SQLITE_INTERNALS.md create mode 100644 docs/TESTING_GUIDE.md create mode 100644 docs/V050_CHANGES.md diff --git a/AGENT.md b/AGENT.md new file mode 100644 index 000000000..3ff053b31 --- /dev/null +++ b/AGENT.md @@ -0,0 +1,545 @@ +# AGENT.md - Litestream AI Agent Documentation + +This document provides comprehensive guidance for AI agents working with the Litestream codebase. Read this document carefully before making any modifications. + +## Table of Contents + +- [Overview](#overview) +- [Fundamental Concepts](#fundamental-concepts) +- [Core Architecture](#core-architecture) +- [Critical Concepts](#critical-concepts) +- [Common Pitfalls](#common-pitfalls) +- [Component Guide](#component-guide) +- [Performance Considerations](#performance-considerations) +- [Testing Requirements](#testing-requirements) + +## Overview + +Litestream is a **disaster recovery tool for SQLite** that runs as a background process and safely replicates changes incrementally to various storage backends. It monitors SQLite's Write-Ahead Log (WAL), converts changes to an immutable LTX format, and replicates these to configured destinations. + +**Version 0.5.0 Major Changes:** +- **New LTX Format**: Replaced WAL segment replication with page-level LTX format +- **Multi-level Compaction**: Hierarchical compaction strategy for efficient storage +- **Single Replica Constraint**: Each database now limited to one replica destination +- **No CGO Required**: Switched to `modernc.org/sqlite` (pure Go implementation) +- **NATS JetStream Support**: Added as new replica type +- **Breaking Change**: Cannot restore from v0.3.x backups + +**Key Design Principles:** +- **Non-invasive**: Uses only SQLite API, never directly manipulates database files +- **Incremental**: Replicates only changes, not full databases +- **Single-destination** (v0.5.0+): One replica destination per database +- **Eventually Consistent**: Handles storage backends with eventual consistency +- **Safe**: Maintains long-running read transactions for consistency + +## Fundamental Concepts + +**CRITICAL**: Understanding SQLite internals and the LTX format is essential for working with Litestream. + +### Required Reading + +1. **[SQLite Internals](docs/SQLITE_INTERNALS.md)** - Understand WAL, pages, transactions, and the 1GB lock page +2. **[LTX Format](docs/LTX_FORMAT.md)** - Learn the custom replication format Litestream uses + +### Key SQLite Concepts + +- **WAL (Write-Ahead Log)**: Temporary file containing uncommitted changes +- **Pages**: Fixed-size blocks (typically 4KB) that make up the database +- **Lock Page at 1GB**: Special page at 0x40000000 that MUST be skipped +- **Checkpoints**: Process of merging WAL back into main database +- **Transaction Isolation**: Long-running read transaction for consistency + +### Key LTX Concepts + +- **Immutable Files**: Once written, LTX files are never modified +- **TXID Ranges**: Each file covers a range of transaction IDs +- **Page Index**: Binary search tree for efficient page lookup +- **Compaction Levels**: Time-based merging to reduce storage (30s → 5min → 1hr) +- **Checksums**: CRC-64 integrity verification at multiple levels +- **CLI Command**: Use `litestream ltx` (not `wal`) for LTX operations + +### The Replication Flow + +```mermaid +graph LR + App[Application] -->|SQL| SQLite + SQLite -->|Writes| WAL[WAL File] + WAL -->|Monitor| Litestream + Litestream -->|Convert| LTX[LTX Format] + LTX -->|Upload| Storage[Cloud Storage] + Storage -->|Restore| Database[New Database] +``` + +## Core Architecture + +```mermaid +graph TB + subgraph "SQLite Layer" + SQLite[SQLite Database] + WAL[WAL File] + SQLite -->|Writes| WAL + end + + subgraph "Litestream Core" + DB[DB Component
db.go] + Replica[Replica Manager
replica.go] + Store[Store
store.go] + + DB -->|Manages| Replica + Store -->|Coordinates| DB + end + + subgraph "Storage Layer" + RC[ReplicaClient Interface
replica_client.go] + S3[S3 Client] + GCS[GCS Client] + File[File Client] + SFTP[SFTP Client] + + Replica -->|Uses| RC + RC -->|Implements| S3 + RC -->|Implements| GCS + RC -->|Implements| File + RC -->|Implements| SFTP + end + + WAL -->|Monitor Changes| DB + DB -->|Checkpoint| SQLite +``` + +### Data Flow Sequence + +```mermaid +sequenceDiagram + participant App + participant SQLite + participant WAL + participant DB + participant Replica + participant Storage + + App->>SQLite: Write Transaction + SQLite->>WAL: Append Changes + + loop Monitor (1s interval) + DB->>WAL: Check Size/Changes + WAL-->>DB: Current State + + alt WAL Has Changes + DB->>WAL: Read Pages + DB->>DB: Convert to LTX Format + DB->>Replica: Queue LTX File + + loop Sync (configurable interval) + Replica->>Storage: WriteLTXFile() + Storage-->>Replica: FileInfo + Replica->>Replica: Update Position + end + end + end + + alt Checkpoint Needed + DB->>SQLite: PRAGMA wal_checkpoint + SQLite->>WAL: Merge to Main DB + end +``` + +## Critical Concepts + +### 1. SQLite Lock Page at 1GB Boundary ⚠️ + +**CRITICAL**: SQLite reserves a special lock page at exactly 1GB (0x40000000 bytes). + +```go +// db.go:951-953 - Must skip lock page during replication +lockPgno := ltx.LockPgno(pageSize) // Page number varies by page size +if pgno == lockPgno { + continue // Skip this page - it's reserved by SQLite +} +``` + +**Lock Page Numbers by Page Size:** +- 4KB pages: 262145 (most common) +- 8KB pages: 131073 +- 16KB pages: 65537 +- 32KB pages: 32769 + +**Testing Requirement**: Any changes affecting page iteration MUST be tested with >1GB databases. + +### 2. LTX File Format + +LTX (Log Transaction) files are **immutable**, append-only files containing: +- Header with transaction IDs (MinTXID, MaxTXID) +- Page data with checksums +- Page index for efficient seeking +- Trailer with metadata + +**Important**: LTX files are NOT SQLite WAL files - they're a custom format for efficient replication. + +### 3. Compaction Process + +Compaction merges multiple LTX files to reduce storage overhead: + +```mermaid +flowchart LR + subgraph "Level 0 (Raw)" + L0A[0000000001-0000000100.ltx] + L0B[0000000101-0000000200.ltx] + L0C[0000000201-0000000300.ltx] + end + + subgraph "Level 1 (30 seconds)" + L1[0000000001-0000000300.ltx] + end + + subgraph "Level 2 (5 minutes)" + L2[0000000001-0000001000.ltx] + end + + subgraph "Level 3 (1 hour)" + L3[0000000001-0000002000.ltx] + end + + subgraph "Snapshot (24h)" + Snap[snapshot.ltx] + end + + L0A -->|Merge| L1 + L0B -->|Merge| L1 + L0C -->|Merge| L1 + L1 -->|30s window| L2 + L2 -->|5min window| L3 + L3 -->|Hourly| Snap +``` + +**Critical Compaction Rule**: When compacting with eventually consistent storage: +```go +// db.go:1280-1294 - ALWAYS read from local disk when available +f, err := os.Open(db.LTXPath(info.Level, info.MinTXID, info.MaxTXID)) +if err == nil { + // Use local file - it's complete and consistent + return f, nil +} +// Only fall back to remote if local doesn't exist +return replica.Client.OpenLTXFile(...) +``` + +### 4. Eventual Consistency Handling + +Many storage backends (S3, R2, etc.) are eventually consistent. This means: +- A file you just wrote might not be immediately readable +- A file might be listed but only partially available +- Reads might return stale or incomplete data + +**Solution**: Always prefer local files during compaction (see PR #760). + +## Common Pitfalls + +### ❌ DON'T: Read from remote during compaction + +```go +// WRONG - Can get partial/corrupt data +f, err := client.OpenLTXFile(ctx, level, minTXID, maxTXID, 0, 0) +``` + +### ✅ DO: Read from local when available + +```go +// CORRECT - Check local first +if f, err := os.Open(localPath); err == nil { + defer f.Close() + // Use local file +} else { + // Fall back to remote only if necessary +} +``` + +### ❌ DON'T: Use RLock for write operations + +```go +// WRONG - Race condition in replica.go:217 +r.mu.RLock() // Should be Lock() for writes +defer r.mu.RUnlock() +r.pos = pos // Writing with RLock! +``` + +### ✅ DO: Use proper lock types + +```go +// CORRECT +r.mu.Lock() +defer r.mu.Unlock() +r.pos = pos +``` + +### ❌ DON'T: Ignore CreatedAt preservation + +```go +// WRONG - Loses timestamp granularity +info := <x.FileInfo{ + CreatedAt: time.Now(), // Don't use current time +} +``` + +### ✅ DO: Preserve earliest timestamp + +```go +// CORRECT - Preserve temporal information +info := <x.FileInfo{ + CreatedAt: oldestSourceFile.CreatedAt, // Keep original +} +``` + +## Component Guide + +### DB Component (db.go) + +**Responsibilities:** +- Manages SQLite database connection (via `modernc.org/sqlite` - no CGO) +- Monitors WAL for changes +- Performs checkpoints +- Maintains long-running read transaction +- Converts WAL pages to LTX format + +**Key Fields:** +```go +type DB struct { + path string // Database file path + db *sql.DB // SQLite connection + rtx *sql.Tx // Long-running read transaction + pageSize int // Database page size (critical for lock page) + notify chan struct{} // Notifies on WAL changes +} +``` + +**Initialization Sequence:** +1. Open database connection +2. Read page size from database +3. Initialize long-running read transaction +4. Start monitor goroutine +5. Initialize replicas + +### Replica Component (replica.go) + +**Responsibilities:** +- Manages replication to a single destination (v0.5.0: one replica per DB only) +- Tracks replication position (ltx.Pos) +- Handles sync intervals +- Manages encryption (if configured) + +**Key Operations:** +- `Sync()`: Synchronizes pending changes +- `SetPos()`: Updates replication position (must use Lock, not RLock!) +- `Snapshot()`: Creates full database snapshot + +### ReplicaClient Interface (replica_client.go) + +**Required Methods:** +```go +type ReplicaClient interface { + Type() string // Client type identifier + + // File operations + LTXFiles(ctx, level, seek) (FileIterator, error) + OpenLTXFile(ctx, level, minTXID, maxTXID, offset, size) (io.ReadCloser, error) + WriteLTXFile(ctx, level, minTXID, maxTXID, r) (*FileInfo, error) + DeleteLTXFiles(ctx, files) error +} +``` + +**Implementation Requirements:** +- Handle partial reads gracefully +- Implement proper error types (os.ErrNotExist) +- Support seek/offset for efficient page fetching +- Preserve file timestamps (CreatedAt) + +### Store Component (store.go) + +**Responsibilities:** +- Coordinates multiple databases +- Manages compaction schedules +- Controls resource usage +- Handles retention policies + +**Compaction Levels (v0.5.0):** +```go +var defaultLevels = CompactionLevels{ + {Level: 0, Interval: 0}, // Raw LTX files (no compaction) + {Level: 1, Interval: 30*Second}, // 30-second windows + {Level: 2, Interval: 5*Minute}, // 5-minute windows + {Level: 3, Interval: 1*Hour}, // Hourly windows + // Snapshots created daily (24h retention) +} +``` + +## Performance Considerations + +### O(n) Operations to Watch + +1. **Page Iteration**: Linear scan through all pages + - Cache page index when possible + - Use binary search on sorted page lists + +2. **File Listing**: Directory scans can be expensive + - Cache file listings when unchanged + - Use seek parameter to skip old files + +3. **Compaction**: Reads all input files + - Limit concurrent compactions + - Use appropriate level intervals + +### Caching Strategy + +```go +// Page index caching example +const DefaultEstimatedPageIndexSize = 32 * 1024 // 32KB + +// Fetch end of file first for page index +offset := info.Size - DefaultEstimatedPageIndexSize +if offset < 0 { + offset = 0 +} +// Read page index once, cache for duration of operation +``` + +### Batch Operations + +- Group small writes into larger LTX files +- Batch delete operations for old files +- Use prepared statements for repeated queries + +## Testing Requirements + +### For Any DB Changes + +```bash +# Test with various page sizes +./bin/litestream-test populate -db test.db -page-size 4096 -target-size 2GB +./bin/litestream-test populate -db test.db -page-size 8192 -target-size 2GB + +# Test lock page handling +./bin/litestream-test validate -source-db test.db -replica-url file:///tmp/replica +``` + +### For Replica Client Changes + +```bash +# Test eventual consistency +go test -v ./replica_client_test.go -integration [s3|gcs|abs|sftp] + +# Test partial reads +go test -v -run TestReplicaClient_PartialRead ./... +``` + +### For Compaction Changes + +```bash +# Test with store compaction +go test -v -run TestStore_CompactDB ./... + +# Test with eventual consistency mock +go test -v -run TestStore_CompactDB_RemotePartialRead ./... +``` + +### Race Condition Testing + +```bash +# Always run with race detector +go test -race -v ./... + +# Specific race-prone areas +go test -race -v -run TestReplica_SetPos ./... +go test -race -v -run TestDB_Monitor ./... +``` + +## Quick Reference + +### File Paths + +- **Database**: `/path/to/database.db` +- **Metadata**: `/path/to/database.db-litestream/` +- **LTX Files**: `/path/to/database.db-litestream/ltx/LEVEL/MIN-MAX.ltx` +- **Snapshots**: `/path/to/database.db-litestream/snapshots/TIMESTAMP.ltx` + +### Key Configuration + +```yaml +dbs: + - path: /path/to/db.sqlite + replicas: + - type: s3 + bucket: my-bucket + path: db-backup + sync-interval: 10s # How often to sync + +# Compaction configuration (v0.5.0 defaults) +levels: + - level: 1 + interval: 30s # 30-second windows + - level: 2 + interval: 5m # 5-minute windows + - level: 3 + interval: 1h # 1-hour windows +``` + +### Important Constants + +```go +DefaultMonitorInterval = 1 * time.Second // WAL check frequency +DefaultCheckpointInterval = 1 * time.Minute // Checkpoint frequency +DefaultMinCheckpointPageN = 1000 // Min pages before passive checkpoint +DefaultMaxCheckpointPageN = 10000 // Max pages before forced checkpoint +DefaultTruncatePageN = 500000 // Pages before truncation +``` + +## Getting Help + +For complex architectural questions, consult: +1. **`docs/V050_CHANGES.md`** - v0.5.0 breaking changes and migration guide +2. **`docs/SQLITE_INTERNALS.md`** - SQLite fundamentals, WAL format, lock page details +3. **`docs/LTX_FORMAT.md`** - LTX file format specification and operations +4. `docs/ARCHITECTURE.md` - Deep technical details of Litestream components +5. `docs/REPLICA_CLIENT_GUIDE.md` - Storage backend implementation guide +6. `docs/TESTING_GUIDE.md` - Comprehensive testing strategies +7. Recent PRs, especially #760 (compaction fix) and #748 (testing harness) + +## Future Roadmap + +**Planned Features:** +- **Litestream VFS**: Virtual File System for read replicas + - Instantly spin up database copies + - Background hydration from S3 + - Enables scaling read operations without full database downloads +- **Enhanced read replica support**: Direct reads from remote storage + +## Important v0.5.0 Migration Notes + +1. **Breaking Changes:** + - Cannot restore from v0.3.x WAL segment files + - Single replica destination per database (removed multi-replica support) + - Command renamed: `litestream wal` → `litestream ltx` + - Removed "generations" concept for backup tracking + +2. **Build Changes:** + - CGO no longer required (uses `modernc.org/sqlite`) + - Pure Go implementation enables easier cross-compilation + +3. **New Features:** + - NATS JetStream replica type added + - Page-level compaction for better efficiency + - Point-in-time restoration with minimal files + +## Final Checklist Before Making Changes + +- [ ] Read this entire document +- [ ] Read `docs/SQLITE_INTERNALS.md` for SQLite fundamentals +- [ ] Read `docs/LTX_FORMAT.md` for replication format details +- [ ] Understand v0.5.0 changes and limitations +- [ ] Understand the component you're modifying +- [ ] Check for eventual consistency implications +- [ ] Consider >1GB database edge cases (lock page at 0x40000000) +- [ ] Plan appropriate tests +- [ ] Review recent similar PRs for patterns +- [ ] Use proper locking (Lock vs RLock) +- [ ] Preserve timestamps where applicable +- [ ] Test with race detector enabled diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 000000000..20ffbf46a --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,845 @@ +# Litestream Architecture - Technical Deep Dive + +## Table of Contents +- [System Layers](#system-layers) +- [Core Components](#core-components) +- [LTX File Format](#ltx-file-format) +- [WAL Monitoring Mechanism](#wal-monitoring-mechanism) +- [Compaction Process](#compaction-process) +- [Transaction Management](#transaction-management) +- [Concurrency Model](#concurrency-model) +- [State Management](#state-management) +- [Initialization Flow](#initialization-flow) +- [Error Handling](#error-handling) + +## System Layers + +Litestream follows a layered architecture with clear separation of concerns: + +```mermaid +graph TB + subgraph "Application Layer" + CLI[CLI Commands
cmd/litestream/] + Config[Configuration
config.go] + end + + subgraph "Core Layer" + Store[Store Manager
store.go] + DB[Database Manager
db.go] + Replica[Replica Manager
replica.go] + end + + subgraph "Storage Abstraction" + RC[ReplicaClient Interface
replica_client.go] + end + + subgraph "Storage Implementations" + S3[s3/replica_client.go] + GCS[gs/replica_client.go] + ABS[abs/replica_client.go] + File[file/replica_client.go] + SFTP[sftp/replica_client.go] + NATS[nats/replica_client.go] + end + + subgraph "External" + SQLite[SQLite Database] + Cloud[Cloud Storage] + end + + CLI --> Store + Store --> DB + DB --> Replica + Replica --> RC + RC --> S3 + RC --> GCS + RC --> ABS + RC --> File + RC --> SFTP + RC --> NATS + DB <--> SQLite + S3 --> Cloud + GCS --> Cloud + ABS --> Cloud +``` + +### Layer Responsibilities + +#### 1. Application Layer +- **CLI Commands**: User interface for operations (replicate, restore, etc.) +- **Configuration**: YAML/environment variable parsing and validation + +#### 2. Core Layer +- **Store**: Multi-database coordination, compaction scheduling +- **DB**: Single database management, WAL monitoring, checkpointing +- **Replica**: Replication to single destination, position tracking + +#### 3. Storage Abstraction +- **ReplicaClient Interface**: Uniform API for all storage backends + +#### 4. Storage Implementations +- Backend-specific logic (authentication, retries, optimizations) + +## Core Components + +### DB Component (db.go) + +The DB component is the heart of Litestream, managing a single SQLite database: + +```go +type DB struct { + // Core fields + path string // Database file path + metaPath string // Metadata directory path + db *sql.DB // SQLite connection + f *os.File // Long-running file descriptor + rtx *sql.Tx // Long-running read transaction + pageSize int // Database page size + + // Synchronization + mu sync.RWMutex // Protects struct fields + chkMu sync.RWMutex // Checkpoint lock + notify chan struct{} // WAL change notifications + + // Lifecycle + ctx context.Context + cancel func() + wg sync.WaitGroup + + // Configuration + MinCheckpointPageN int // Min pages for passive checkpoint + MaxCheckpointPageN int // Max pages for forced checkpoint + TruncatePageN int // Pages before truncation + CheckpointInterval time.Duration + MonitorInterval time.Duration + + // Metrics + dbSizeGauge prometheus.Gauge + walSizeGauge prometheus.Gauge + txIDGauge prometheus.Gauge +} +``` + +#### Key Methods + +```go +// Lifecycle +func (db *DB) Open() error +func (db *DB) Close(ctx context.Context) error + +// Monitoring +func (db *DB) monitor() // Background WAL monitoring +func (db *DB) checkWAL() (bool, error) // Check for WAL changes + +// Checkpointing +func (db *DB) Checkpoint(mode string) error +func (db *DB) autoCheckpoint() error + +// Replication +func (db *DB) WALReader(pgno uint32) (io.ReadCloser, error) +func (db *DB) Sync(ctx context.Context) error + +// Compaction +func (db *DB) Compact(ctx context.Context, destLevel int) (*ltx.FileInfo, error) +``` + +### Replica Component (replica.go) + +Manages replication to a single destination: + +```go +type Replica struct { + db *DB // Parent database + Client ReplicaClient // Storage backend client + + mu sync.RWMutex + pos ltx.Pos // Current replication position + + // Configuration + SyncInterval time.Duration + MonitorEnabled bool + + // Encryption + AgeIdentities []age.Identity + AgeRecipients []age.Recipient + + // Lifecycle + cancel func() + wg sync.WaitGroup +} +``` + +#### Replication Position + +```go +type Pos struct { + TXID TXID // Transaction ID + PageNo uint32 // Page number within transaction + Checksum uint64 // Running checksum +} +``` + +### Store Component (store.go) + +Coordinates multiple databases and manages system-wide resources: + +```go +type Store struct { + mu sync.Mutex + dbs []*DB + levels CompactionLevels + + // Configuration + SnapshotInterval time.Duration + SnapshotRetention time.Duration + CompactionMonitorEnabled bool + + // Lifecycle + ctx context.Context + cancel func() + wg sync.WaitGroup +} +``` + +## LTX File Format + +LTX (Log Transaction) files are immutable files containing database changes: + +``` ++------------------+ +| Header | Fixed size header with metadata ++------------------+ +| | +| Page Frames | Variable number of page frames +| | ++------------------+ +| Page Index | Index for efficient page lookup ++------------------+ +| Trailer | Metadata and checksums ++------------------+ +``` + +### Header Structure + +```go +type Header struct { + Magic [4]byte // "LTX\x00" + Version uint32 // Format version + PageSize uint32 // Database page size + MinTXID TXID // Starting transaction ID + MaxTXID TXID // Ending transaction ID + Timestamp int64 // Creation timestamp + Checksum uint64 // Header checksum +} +``` + +### Page Frame Structure + +```go +type PageFrame struct { + Header PageHeader + Data []byte // Page data (pageSize bytes) +} + +type PageHeader struct { + PageNo uint32 // Page number in database + Size uint32 // Size of page data + Checksum uint64 // Page checksum +} +``` + +### Page Index + +Binary search tree for efficient page lookup: +```go +type PageIndexElem struct { + PageNo uint32 // Page number + Offset int64 // Offset in file + Size uint32 // Size of page frame +} +``` + +### Trailer + +```go +type Trailer struct { + PageIndexOffset int64 // Offset to page index + PageIndexSize int64 // Size of page index + PageCount uint32 // Total pages in file + Checksum uint64 // Full file checksum +} +``` + +## WAL Monitoring Mechanism + +### Monitor Loop (db.go:1499) + +```go +func (db *DB) monitor() { + ticker := time.NewTicker(db.MonitorInterval) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + // Check WAL for changes + changed, err := db.checkWAL() + if err != nil { + slog.Error("wal check failed", "error", err) + continue + } + + if changed { + // Notify replicas of changes + db.notifyReplicas() + + // Check if checkpoint needed + if db.shouldCheckpoint() { + db.autoCheckpoint() + } + } + + case <-db.ctx.Done(): + return + } + } +} +``` + +### WAL Change Detection + +```go +func (db *DB) checkWAL() (bool, error) { + // Get current WAL state + walInfo, err := db.walInfo() + if err != nil { + return false, err + } + + // Compare with previous state + db.mu.Lock() + changed := walInfo.Size != db.prevWALSize || + walInfo.Checksum != db.prevWALChecksum + db.prevWALSize = walInfo.Size + db.prevWALChecksum = walInfo.Checksum + db.mu.Unlock() + + return changed, nil +} +``` + +## Compaction Process + +Compaction merges multiple LTX files to reduce storage overhead: + +### Compaction Algorithm (store.go:189) + +```go +func (s *Store) CompactDB(ctx context.Context, db *DB, lvl *CompactionLevel) (*ltx.FileInfo, error) { + // 1. Check if compaction is needed + if !s.shouldCompact(db, lvl) { + return nil, ErrCompactionTooEarly + } + + // 2. Get source files from previous level + srcLevel := lvl.Level - 1 + srcFiles, err := db.LTXFiles(ctx, srcLevel) + if err != nil { + return nil, err + } + + // 3. Create page map for deduplication + pageMap := make(map[uint32]PageData) + + // 4. Read all source files (preferring local) + for _, info := range srcFiles { + // CRITICAL: Try local first for consistency + f, err := os.Open(db.LTXPath(info)) + if err != nil { + // Fall back to remote only if local doesn't exist + f, err = replica.Client.OpenLTXFile(ctx, info) + if err != nil { + return nil, err + } + } + defer f.Close() + + // Read pages and add to map (newer overwrites older) + pages, err := ltx.ReadPages(f) + for _, page := range pages { + pageMap[page.PageNo] = page + } + } + + // 5. Write compacted file + var buf bytes.Buffer + writer := ltx.NewWriter(&buf) + + // Write pages in order + pageNos := make([]uint32, 0, len(pageMap)) + for pgno := range pageMap { + pageNos = append(pageNos, pgno) + } + sort.Slice(pageNos, func(i, j int) bool { + return pageNos[i] < pageNos[j] + }) + + for _, pgno := range pageNos { + // CRITICAL: Skip lock page at 1GB + if pgno == ltx.LockPgno(db.pageSize) { + continue + } + writer.WritePage(pageMap[pgno]) + } + + // 6. Upload compacted file + info, err := replica.Client.WriteLTXFile(ctx, lvl.Level, minTXID, maxTXID, &buf) + if err != nil { + return nil, err + } + + // CRITICAL: Preserve earliest timestamp + info.CreatedAt = s.earliestTimestamp(srcFiles) + + // 7. Delete source files + return info, replica.Client.DeleteLTXFiles(ctx, srcFiles) +} +``` + +### Compaction Levels + +```go +type CompactionLevel struct { + Level int // Level number (0 = raw, 1+ = compacted) + Interval time.Duration // How often to compact from previous level +} + +// Default configuration +var DefaultCompactionLevels = CompactionLevels{ + {Level: 0, Interval: 0}, // Raw LTX files + {Level: 1, Interval: 1 * Hour}, // Hourly compaction + {Level: 2, Interval: 24 * Hour}, // Daily compaction +} +``` + +## Transaction Management + +### Long-Running Read Transaction + +Litestream maintains a long-running read transaction to ensure consistency: + +```go +func (db *DB) initReadTx() error { + // Start read transaction + tx, err := db.db.BeginTx(context.Background(), &sql.TxOptions{ + ReadOnly: true, + }) + if err != nil { + return err + } + + // Execute dummy query to start transaction + var dummy string + err = tx.QueryRow("SELECT ''").Scan(&dummy) + if err != nil { + tx.Rollback() + return err + } + + db.rtx = tx + return nil +} +``` + +**Purpose:** +- Prevents database from being modified during replication +- Ensures consistent view of database +- Allows reading historical pages from WAL + +### Checkpoint Coordination + +```go +func (db *DB) Checkpoint(mode string) error { + // Acquire checkpoint lock + db.chkMu.Lock() + defer db.chkMu.Unlock() + + // Close read transaction temporarily + if db.rtx != nil { + db.rtx.Rollback() + db.rtx = nil + } + + // Perform checkpoint + _, _, err := db.db.Exec(fmt.Sprintf("PRAGMA wal_checkpoint(%s)", mode)) + if err != nil { + return err + } + + // Restart read transaction + return db.initReadTx() +} +``` + +## Concurrency Model + +### Mutex Usage Patterns + +```go +// DB struct mutexes +type DB struct { + mu sync.RWMutex // Protects struct fields + chkMu sync.RWMutex // Checkpoint coordination +} + +// Replica struct mutexes +type Replica struct { + mu sync.RWMutex // Protects position + muf sync.Mutex // File descriptor lock +} + +// Store struct mutex +type Store struct { + mu sync.Mutex // Protects database list +} +``` + +### Lock Ordering (Prevent Deadlocks) + +Always acquire locks in this order: +1. Store.mu +2. DB.mu +3. DB.chkMu +4. Replica.mu + +### Goroutine Management + +```go +// Start background task +func (db *DB) Start() { + db.wg.Add(1) + go func() { + defer db.wg.Done() + db.monitor() + }() +} + +// Stop with timeout +func (db *DB) Close(ctx context.Context) error { + // Signal shutdown + db.cancel() + + // Wait for goroutines with timeout + done := make(chan struct{}) + go func() { + db.wg.Wait() + close(done) + }() + + select { + case <-done: + return nil + case <-ctx.Done(): + return ctx.Err() + } +} +``` + +## State Management + +### Database States + +```mermaid +stateDiagram-v2 + [*] --> Closed + Closed --> Opening: Open() + Opening --> Open: Success + Opening --> Closed: Error + Open --> Monitoring: Start() + Monitoring --> Syncing: Changes Detected + Syncing --> Monitoring: Sync Complete + Monitoring --> Checkpointing: Threshold Reached + Checkpointing --> Monitoring: Checkpoint Complete + Monitoring --> Closing: Close() + Closing --> Closed: Cleanup Complete +``` + +### Replica States + +```mermaid +stateDiagram-v2 + [*] --> Idle + Idle --> Starting: Start() + Starting --> Monitoring: Success + Starting --> Idle: Error + Monitoring --> Syncing: Timer/Changes + Syncing --> Uploading: Have Changes + Uploading --> Monitoring: Success + Uploading --> Error: Failed + Error --> Monitoring: Retry + Monitoring --> Stopping: Stop() + Stopping --> Idle: Cleanup +``` + +### Position Tracking + +```go +type Pos struct { + TXID TXID // Current transaction ID + PageNo uint32 // Current page number + Checksum uint64 // Running checksum for validation +} + +// Update position atomically +func (r *Replica) SetPos(pos ltx.Pos) { + r.mu.Lock() // MUST use Lock, not RLock! + defer r.mu.Unlock() + r.pos = pos +} + +// Read position safely +func (r *Replica) Pos() ltx.Pos { + r.mu.RLock() + defer r.mu.RUnlock() + return r.pos +} +``` + +## Initialization Flow + +### System Startup Sequence + +```mermaid +sequenceDiagram + participant Main + participant Store + participant DB + participant Replica + participant Monitor + + Main->>Store: NewStore(config) + Store->>Store: Validate config + + Main->>Store: Open() + loop For each database + Store->>DB: NewDB(path) + Store->>DB: Open() + DB->>DB: Open SQLite connection + DB->>DB: Read page size + DB->>DB: Init metadata + DB->>DB: Start read transaction + + loop For each replica + DB->>Replica: NewReplica() + DB->>Replica: Start() + Replica->>Monitor: Start monitoring + end + end + + Store->>Store: Start compaction monitors + Store-->>Main: Ready +``` + +### Critical Initialization Steps + +1. **Database Opening** + ```go + // Must happen in order: + 1. Open SQLite connection + 2. Read page size (PRAGMA page_size) + 3. Create metadata directory + 4. Start long-running read transaction + 5. Initialize replicas + 6. Start monitor goroutine + ``` + +2. **Replica Initialization** + ```go + // Must happen in order: + 1. Create replica with client + 2. Load previous position from metadata + 3. Validate position against database + 4. Start sync goroutine (if monitoring enabled) + ``` + +## Error Handling + +### Error Categories + +1. **Recoverable Errors** + - Network timeouts + - Temporary storage unavailability + - Lock contention + +2. **Fatal Errors** + - Database corruption + - Invalid configuration + - Disk full + +3. **Operational Errors** + - Checkpoint failures + - Compaction conflicts + - Sync delays + +### Error Propagation + +```go +// Bottom-up error propagation +ReplicaClient.WriteLTXFile() error + ↓ +Replica.Sync() error + ↓ +DB.Sync() error + ↓ +Store.monitorDB() // Logs error, continues +``` + +### Retry Logic + +```go +func (r *Replica) syncWithRetry(ctx context.Context) error { + backoff := time.Second + maxBackoff := time.Minute + + for attempt := 0; ; attempt++ { + err := r.Sync(ctx) + if err == nil { + return nil + } + + // Check if error is retryable + if !isRetryable(err) { + return err + } + + // Check context + if ctx.Err() != nil { + return ctx.Err() + } + + // Exponential backoff + time.Sleep(backoff) + backoff *= 2 + if backoff > maxBackoff { + backoff = maxBackoff + } + } +} +``` + +## Performance Characteristics + +### Time Complexity + +| Operation | Complexity | Notes | +|-----------|------------|-------| +| WAL Monitor | O(1) | Fixed interval check | +| Page Write | O(1) | Append to LTX file | +| Compaction | O(n) | n = total pages | +| Restoration | O(n*log(m)) | n = pages, m = files | +| File List | O(k) | k = files in level | + +### Space Complexity + +| Component | Memory Usage | Disk Usage | +|-----------|-------------|------------| +| DB | O(1) + metrics | Original DB + WAL | +| Replica | O(1) | LTX files + metadata | +| Compaction | O(n) pages | Temporary during merge | +| Page Index | O(p) | p = pages in file | + +### Optimization Points + +1. **Page Index Caching** + - Cache frequently accessed indices + - Use estimated size for initial fetch + +2. **Batch Operations** + - Group small changes into larger LTX files + - Batch delete operations + +3. **Concurrent Operations** + - Multiple replicas can sync in parallel + - Compaction runs independently per level + +## Security Considerations + +### Encryption (Age) + +```go +// Encryption during write +func (r *Replica) encryptData(data []byte) ([]byte, error) { + if len(r.AgeRecipients) == 0 { + return data, nil // No encryption + } + + var buf bytes.Buffer + w, err := age.Encrypt(&buf, r.AgeRecipients...) + if err != nil { + return nil, err + } + + _, err = w.Write(data) + w.Close() + return buf.Bytes(), err +} + +// Decryption during read +func (r *Replica) decryptData(data []byte) ([]byte, error) { + if len(r.AgeIdentities) == 0 { + return data, nil // No decryption needed + } + + rd, err := age.Decrypt(bytes.NewReader(data), r.AgeIdentities...) + if err != nil { + return nil, err + } + + return io.ReadAll(rd) +} +``` + +### Access Control + +- File permissions: 0600 for database files +- Directory permissions: 0700 for metadata +- No built-in authentication (rely on storage backend) + +## Monitoring & Metrics + +### Prometheus Metrics + +```go +// Database metrics +db_size_bytes // Current database size +wal_size_bytes // Current WAL size +total_wal_bytes // Total bytes written to WAL +checkpoint_count // Number of checkpoints +sync_count // Number of syncs +sync_error_count // Number of sync errors + +// Replica metrics +replica_lag_seconds // Replication lag +replica_position // Current replication position +``` + +### Health Checks + +```go +func (db *DB) HealthCheck() error { + // Check database connection + if err := db.db.Ping(); err != nil { + return fmt.Errorf("database ping failed: %w", err) + } + + // Check replication lag + for _, r := range db.replicas { + lag := time.Since(r.LastSync()) + if lag > MaxAcceptableLag { + return fmt.Errorf("replica %s lag too high: %v", r.Name(), lag) + } + } + + return nil +} +``` diff --git a/docs/LTX_FORMAT.md b/docs/LTX_FORMAT.md new file mode 100644 index 000000000..8db0bc15c --- /dev/null +++ b/docs/LTX_FORMAT.md @@ -0,0 +1,701 @@ +# LTX Format Specification + +LTX (Log Transaction) is Litestream's custom format for storing database changes in an immutable, append-only manner. + +## Table of Contents +- [Overview](#overview) +- [File Structure](#file-structure) +- [Header Format](#header-format) +- [Page Frames](#page-frames) +- [Page Index](#page-index) +- [Trailer Format](#trailer-format) +- [File Naming Convention](#file-naming-convention) +- [Checksum Calculation](#checksum-calculation) +- [Compaction and Levels](#compaction-and-levels) +- [Reading LTX Files](#reading-ltx-files) +- [Writing LTX Files](#writing-ltx-files) +- [Relationship to SQLite WAL](#relationship-to-sqlite-wal) + +## Overview + +LTX files are immutable snapshots of database changes: +- **Immutable**: Once written, never modified +- **Append-only**: New changes create new files +- **Self-contained**: Each file is independent +- **Indexed**: Contains page index for efficient seeks +- **Checksummed**: Integrity verification built-in + +```mermaid +graph LR + WAL[SQLite WAL] -->|Convert| LTX[LTX File] + LTX -->|Upload| Storage[Cloud Storage] + Storage -->|Download| Restore[Restored DB] +``` + +## File Structure + +``` +┌─────────────────────┐ +│ Header │ Fixed size (varies by version) +├─────────────────────┤ +│ │ +│ Page Frames │ Variable number of pages +│ │ +├─────────────────────┤ +│ Page Index │ Binary search tree +├─────────────────────┤ +│ Trailer │ Fixed size metadata +└─────────────────────┘ +``` + +### Size Calculation + +```go +FileSize = HeaderSize + + (PageCount * (PageHeaderSize + PageSize)) + + PageIndexSize + + TrailerSize +``` + +## Header Format + +The LTX header contains metadata about the file: + +```go +// From github.com/superfly/ltx +type Header struct { + // Magic bytes: "LTX\x00" (0x4C545800) + Magic [4]byte + + // Format version (current: 0) + Version uint32 + + // Flags for special behaviors + Flags uint32 + + // Database page size (typically 4096) + PageSize uint32 + + // Database page count at snapshot + PageCount uint32 + + // Transaction ID range + MinTXID TXID // uint64 + MaxTXID TXID // uint64 + + // Checksum of header + Checksum uint64 +} + +// Header flags +const ( + HeaderFlagNoChecksum = 1 << 0 // Disable checksums +) +``` + +### Binary Layout (Header) + +``` +Offset Size Field +0 4 Magic ("LTX\x00") +4 4 Version +8 4 Flags +12 4 PageSize +16 4 PageCount +20 8 MinTXID +28 8 MaxTXID +36 8 Checksum +Total: 44 bytes +``` + +## Page Frames + +Each page frame contains a database page with metadata: + +```go +type PageFrame struct { + Header PageHeader + Data []byte // Size = PageSize from LTX header +} + +type PageHeader struct { + PageNo uint32 // Page number in database (1-based) + Checksum uint64 // CRC-64 checksum of page data +} +``` + +### Binary Layout (Page Frame) + +``` +Offset Size Field +0 4 Page Number +4 8 Checksum +12 PageSize Page Data +``` + +### Page Frame Constraints + +1. **Sequential Writing**: Pages written in order during creation +2. **Random Access**: Can seek to any page using index +3. **Lock Page Skipping**: Page at 1GB boundary never included +4. **Deduplication**: In compacted files, only latest version of each page + +## Page Index + +The page index enables efficient random access to pages: + +```go +type PageIndexElem struct { + PageNo uint32 // Database page number + Offset int64 // Byte offset in LTX file +} + +// Index is sorted by PageNo for binary search +type PageIndex []PageIndexElem +``` + +### Binary Layout (Page Index) + +``` +Each entry (16 bytes): +Offset Size Field +0 4 Page Number +4 4 Reserved (padding) +8 8 File Offset + +Total index size = EntryCount * 16 +``` + +### Index Usage + +```go +// Finding a page using the index +func findPage(index []PageIndexElem, targetPageNo uint32) (offset int64, found bool) { + // Binary search + idx := sort.Search(len(index), func(i int) bool { + return index[i].PageNo >= targetPageNo + }) + + if idx < len(index) && index[idx].PageNo == targetPageNo { + return index[idx].Offset, true + } + return 0, false +} +``` + +## Trailer Format + +The trailer contains metadata and pointers: + +```go +type Trailer struct { + // Offset to start of page index + PageIndexOffset int64 + + // Size of page index in bytes + PageIndexSize int64 + + // Total checksum of all pages + Checksum uint64 +} +``` + +### Binary Layout (Trailer) + +``` +Offset Size Field +0 8 Page Index Offset +8 8 Page Index Size +16 8 Checksum +Total: 24 bytes +``` + +### Reading Trailer + +The trailer is always at the end of the file: + +```go +func readTrailer(f *os.File) (*Trailer, error) { + // Seek to trailer position + _, err := f.Seek(-TrailerSize, io.SeekEnd) + if err != nil { + return nil, err + } + + var trailer Trailer + err = binary.Read(f, binary.BigEndian, &trailer) + return &trailer, err +} +``` + +## File Naming Convention + +LTX files follow a strict naming pattern: + +``` +Format: MMMMMMMMMMMMMMMM-NNNNNNNNNNNNNNNN.ltx +Where: + M = MinTXID (16 hex digits, zero-padded) + N = MaxTXID (16 hex digits, zero-padded) + +Examples: + 0000000000000001-0000000000000064.ltx (TXID 1-100) + 0000000000000065-00000000000000c8.ltx (TXID 101-200) +``` + +### Parsing Filenames + +```go +// From github.com/superfly/ltx +func ParseFilename(name string) (minTXID, maxTXID TXID, err error) { + // Remove extension + name = strings.TrimSuffix(name, ".ltx") + + // Split on hyphen + parts := strings.Split(name, "-") + if len(parts) != 2 { + return 0, 0, errors.New("invalid format") + } + + // Parse hex values + min, err := strconv.ParseUint(parts[0], 16, 64) + max, err := strconv.ParseUint(parts[1], 16, 64) + + return TXID(min), TXID(max), nil +} + +func FormatFilename(minTXID, maxTXID TXID) string { + return fmt.Sprintf("%016x-%016x.ltx", minTXID, maxTXID) +} +``` + +## Checksum Calculation + +LTX uses CRC-64 ECMA checksums: + +```go +import "hash/crc64" + +var crcTable = crc64.MakeTable(crc64.ECMA) + +func calculateChecksum(data []byte) uint64 { + return crc64.Checksum(data, crcTable) +} + +// Cumulative checksum for multiple pages +func cumulativeChecksum(pages [][]byte) uint64 { + h := crc64.New(crcTable) + for _, page := range pages { + h.Write(page) + } + return h.Sum64() +} +``` + +### Verification During Read + +```go +func verifyPage(header PageHeader, data []byte) error { + if header.Checksum == 0 { + return nil // Checksums disabled + } + + calculated := calculateChecksum(data) + if calculated != header.Checksum { + return fmt.Errorf("checksum mismatch: expected %x, got %x", + header.Checksum, calculated) + } + return nil +} +``` + +## Compaction and Levels + +LTX files are organized in levels for efficient compaction: + +``` +Level 0: Raw files (no compaction) + /ltx/0000/0000000000000001-0000000000000064.ltx + /ltx/0000/0000000000000065-00000000000000c8.ltx + +Level 1: Hourly compaction + /ltx/0001/0000000000000001-0000000000000fff.ltx + +Level 2: Daily compaction + /ltx/0002/0000000000000001-000000000000ffff.ltx + +Snapshots: Full database state + /snapshots/20240101120000.ltx +``` + +### Compaction Process + +```go +func compactLTXFiles(files []*LTXFile) (*LTXFile, error) { + // Create page map (newer overwrites older) + pageMap := make(map[uint32]Page) + + for _, file := range files { + for _, page := range file.Pages { + pageMap[page.Number] = page + } + } + + // Create new LTX with merged pages + merged := <XFile{ + MinTXID: files[0].MinTXID, + MaxTXID: files[len(files)-1].MaxTXID, + } + + // Add pages in order (skip lock page) + for pgno := uint32(1); pgno <= maxPgno; pgno++ { + if pgno == LockPageNumber(pageSize) { + continue // Skip 1GB lock page + } + if page, ok := pageMap[pgno]; ok { + merged.Pages = append(merged.Pages, page) + } + } + + return merged, nil +} +``` + +## Reading LTX Files + +### Complete File Read + +```go +func ReadLTXFile(path string) (*LTXFile, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + dec := ltx.NewDecoder(f) + + // Read and verify header + header, err := dec.Header() + if err != nil { + return nil, err + } + + // Read all pages + var pages []Page + for { + var pageHeader ltx.PageHeader + pageData := make([]byte, header.PageSize) + + err := dec.DecodePage(&pageHeader, pageData) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + + pages = append(pages, Page{ + Number: pageHeader.PageNo, + Data: pageData, + }) + } + + return <XFile{ + Header: header, + Pages: pages, + }, nil +} +``` + +### Partial Read Using Index + +```go +func ReadPage(path string, pageNo uint32) ([]byte, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + // Read trailer to find index + trailer, err := readTrailer(f) + if err != nil { + return nil, err + } + + // Read page index + f.Seek(trailer.PageIndexOffset, io.SeekStart) + indexData := make([]byte, trailer.PageIndexSize) + f.Read(indexData) + + index := parsePageIndex(indexData) + + // Find page in index + offset, found := findPage(index, pageNo) + if !found { + return nil, errors.New("page not found") + } + + // Read page at offset + f.Seek(offset, io.SeekStart) + + var pageHeader PageHeader + binary.Read(f, binary.BigEndian, &pageHeader) + + pageData := make([]byte, pageSize) + f.Read(pageData) + + return pageData, nil +} +``` + +## Writing LTX Files + +### Creating New LTX File + +```go +func WriteLTXFile(path string, pages []Page) error { + f, err := os.Create(path) + if err != nil { + return err + } + defer f.Close() + + enc := ltx.NewEncoder(f) + + // Write header + header := ltx.Header{ + Version: ltx.Version, + Flags: 0, + PageSize: 4096, + PageCount: uint32(len(pages)), + MinTXID: minTXID, + MaxTXID: maxTXID, + } + + if err := enc.EncodeHeader(header); err != nil { + return err + } + + // Write pages and build index + var index []PageIndexElem + for _, page := range pages { + offset := enc.Offset() + + // Skip lock page + if page.Number == LockPageNumber(header.PageSize) { + continue + } + + pageHeader := ltx.PageHeader{ + PageNo: page.Number, + Checksum: calculateChecksum(page.Data), + } + + if err := enc.EncodePage(pageHeader, page.Data); err != nil { + return err + } + + index = append(index, PageIndexElem{ + PageNo: page.Number, + Offset: offset, + }) + } + + // Write page index + if err := enc.EncodePageIndex(index); err != nil { + return err + } + + // Write trailer + if err := enc.EncodeTrailer(); err != nil { + return err + } + + return enc.Close() +} +``` + +## Relationship to SQLite WAL + +### WAL to LTX Conversion + +```mermaid +sequenceDiagram + participant SQLite + participant WAL + participant Litestream + participant LTX + + SQLite->>WAL: Write transaction + WAL->>WAL: Append frames + + Litestream->>WAL: Monitor changes + WAL-->>Litestream: Read frames + + Litestream->>Litestream: Convert frames + Note over Litestream: - Skip lock page
- Add checksums
- Build index + + Litestream->>LTX: Write LTX file + LTX->>Storage: Upload +``` + +### Key Differences + +| Aspect | SQLite WAL | LTX Format | +|--------|------------|------------| +| Purpose | Temporary changes | Permanent archive | +| Mutability | Mutable (checkpoint) | Immutable | +| Structure | Sequential frames | Indexed pages | +| Checksum | Per-frame | Per-page + cumulative | +| Lock Page | Contains lock bytes | Always skipped | +| Naming | Fixed (-wal suffix) | TXID range | +| Lifetime | Until checkpoint | Forever | +| Size | Grows until checkpoint | Fixed at creation | + +### Transaction ID (TXID) + +```go +type TXID uint64 + +// TXID represents a logical transaction boundary +// Not directly from SQLite, but derived from: +// 1. WAL checkpoint sequence +// 2. Frame count +// 3. Logical grouping of changes + +func (db *DB) nextTXID() TXID { + // Increment from last known TXID + return db.lastTXID + 1 +} +``` + +## Best Practices + +### 1. Always Skip Lock Page + +```go +const PENDING_BYTE = 0x40000000 + +func shouldSkipPage(pageNo uint32, pageSize int) bool { + lockPage := uint32(PENDING_BYTE/pageSize) + 1 + return pageNo == lockPage +} +``` + +### 2. Preserve Timestamps During Compaction + +```go +// Keep earliest CreatedAt from source files +func compactWithTimestamp(files []*FileInfo) *FileInfo { + earliest := files[0].CreatedAt + for _, f := range files[1:] { + if f.CreatedAt.Before(earliest) { + earliest = f.CreatedAt + } + } + + return &FileInfo{ + CreatedAt: earliest, // Preserve for point-in-time recovery + } +} +``` + +### 3. Verify Checksums on Read + +```go +func safeReadLTX(path string) (*LTXFile, error) { + file, err := ReadLTXFile(path) + if err != nil { + return nil, err + } + + // Verify all checksums + for _, page := range file.Pages { + if err := verifyPage(page); err != nil { + return nil, fmt.Errorf("corrupted page %d: %w", + page.Number, err) + } + } + + return file, nil +} +``` + +### 4. Handle Partial Files + +```go +// For eventually consistent storage +func readWithRetry(client ReplicaClient, info *FileInfo) ([]byte, error) { + for attempts := 0; attempts < 5; attempts++ { + data, err := client.OpenLTXFile(...) + if err == nil { + // Verify we got complete file + if int64(len(data)) == info.Size { + return data, nil + } + } + + time.Sleep(time.Second * time.Duration(attempts+1)) + } + + return nil, errors.New("incomplete file after retries") +} +``` + +## Debugging LTX Files + +### Inspect LTX File + +```bash +# Using litestream CLI +litestream ltx info file.ltx + +# Output: +# Version: 0 +# Page Size: 4096 +# Page Count: 1234 +# Min TXID: 1 +# Max TXID: 100 +# File Size: 5.2MB +``` + +### Dump Pages + +```bash +# List all pages in file +litestream ltx pages file.ltx + +# Dump specific page +litestream ltx page file.ltx 42 +``` + +### Verify Integrity + +```bash +# Check all checksums +litestream ltx verify file.ltx + +# Output: +# Header checksum: OK +# Page checksums: OK (1234/1234) +# Trailer checksum: OK +# File integrity: VALID +``` + +## Summary + +LTX format provides: +1. **Immutable history** - Every change preserved +2. **Efficient storage** - Indexed, compressed via compaction +3. **Data integrity** - Checksums at multiple levels +4. **Point-in-time recovery** - Via TXID ranges +5. **Cloud-optimized** - Designed for object storage + +Understanding LTX is essential for: +- Implementing replica clients +- Debugging replication issues +- Optimizing compaction +- Ensuring data integrity +- Building recovery tools diff --git a/docs/RELEASE.md b/docs/RELEASE.md deleted file mode 100644 index 0b0bdaf19..000000000 --- a/docs/RELEASE.md +++ /dev/null @@ -1,329 +0,0 @@ -# Litestream Release Process - -This document describes the release process for Litestream using GoReleaser. - -## Quick Start for Maintainers - -To create a release after certificates are configured: - -```bash -# Tag and push -git tag -a v0.3.14 -m "Release v0.3.14" -git push origin v0.3.14 -``` - -The GitHub Actions workflow will handle everything else automatically. - -## Overview - -Litestream uses [GoReleaser](https://goreleaser.com/) to automate the release process, providing: - -- Cross-platform binary builds (Linux, macOS, Windows) -- Automatic changelog generation -- Homebrew formula updates -- Debian/RPM package generation -- Binary signing (when certificates are configured) -- SBOM (Software Bill of Materials) generation - -## Platform Support - -### Officially Supported Platforms - -- Linux (amd64, arm64, armv6, armv7) -- macOS (amd64, arm64) - -### Unsupported Platforms - -- **Windows (amd64, arm64)**: Binaries are provided for convenience but Windows is NOT an officially supported platform. Use at your own risk. Community contributions for Windows improvements are welcome. - -## Prerequisites - -### Required Tools - -- [GoReleaser](https://goreleaser.com/install/) v2.0+ -- [GitHub CLI](https://cli.github.com/) (for automated releases) -- Go 1.24+ - -### Optional Tools (for signing) - -- [gon](https://github.com/mitchellh/gon) (macOS signing and notarization) -- signtool (Windows signing) - -## Release Process - -### 1. Prepare the Release - -1. Ensure all changes are merged to main -2. Update CHANGELOG.md if needed (GoReleaser will auto-generate from commits) -3. Ensure all tests pass: - - ```bash - go test -v ./... - go vet ./... - staticcheck ./... - ``` - -### 2. Create a Release Tag - -```bash -# Create and push a tag -git tag -a v0.3.14 -m "Release v0.3.14" -git push origin v0.3.14 -``` - -The tag push will automatically trigger the GitHub Actions release workflow. - -### 3. Manual Release (if needed) - -If you need to run a release manually: - -```bash -# Export GitHub token -export GITHUB_TOKEN="your-token-here" - -# Run GoReleaser -goreleaser release --clean -``` - -### 4. Testing Releases - -To test the release process without publishing: - -```bash -# Create a snapshot release (doesn't publish) -goreleaser release --snapshot --clean - -# Test a single platform build -goreleaser build --snapshot --clean --single-target -``` - -## GitHub Actions Workflow - -The release workflow (`.github/workflows/release.yml`) is triggered automatically when: - -- A tag matching `v*` is pushed -- Manually via workflow dispatch - -The workflow: - -1. Sets up the build environment -2. Runs GoReleaser to build all binaries -3. Creates GitHub release with artifacts -4. Updates Homebrew tap (if configured) -5. Signs binaries (if certificates are configured) - -## Configuration Files - -### `.goreleaser.yml` - -Main GoReleaser configuration defining: - -- Build targets and flags -- Archive formats -- Package formats (deb, rpm) -- Homebrew formula -- Release notes template - -### `etc/gon-sign.hcl` - -macOS signing configuration (requires Apple Developer certificates) - -### `.github/workflows/release.yml` - -GitHub Actions workflow for automated releases - -## Setting Up Binary Signing - -### macOS Signing - Detailed Instructions - -#### Step 1: Get Apple Developer Account ($99/year) - -1. Go to -2. Click "Enroll" and follow the process -3. Use your existing Apple ID or create a new one -4. Complete identity verification (may take 24-48 hours) -5. Pay the $99 annual fee - -#### Step 2: Create Developer ID Certificate - -1. Once enrolled, go to -2. Navigate to "Certificates, IDs & Profiles" -3. Click the "+" button to create a new certificate -4. Select "Developer ID Application" under "Software" -5. Follow the Certificate Signing Request (CSR) process: - - Open Keychain Access on your Mac - - Menu: Keychain Access → Certificate Assistant → Request a Certificate - - Enter your email and name - - Select "Saved to disk" - - Save the CSR file -6. Upload the CSR file in the Apple Developer portal -7. Download the generated certificate -8. Double-click to install in Keychain Access - -#### Step 3: Export Certificate for CI - -1. Open Keychain Access -2. Find your "Developer ID Application: [Your Name]" certificate -3. Right-click and select "Export" -4. Save as .p12 format with a strong password -5. Convert to base64 for GitHub secrets: - ```bash - base64 -i certificate.p12 -o certificate_base64.txt - ``` - -#### Step 4: Create App Store Connect API Key - -1. Go to -2. Click the "+" button to generate a new API key -3. Name: "GoReleaser CI" -4. Access: "Developer" role -5. Download the .p8 file (IMPORTANT: Can only download once!) -6. Note these values: - - Issuer ID (shown at the top of the API Keys page) - - Key ID (shown in the key list) -7. Convert .p8 to base64: - ```bash - base64 -i AuthKey_XXXXX.p8 -o api_key_base64.txt - ``` - -#### Step 5: Create App-Specific Password - -1. Go to -2. Sign in and go to "Security" -3. Under "App-Specific Passwords", click "Generate Password" -4. Label it "Litestream GoReleaser" -5. Save the generated password securely - -#### Step 6: Configure GitHub Secrets - -Go to GitHub repository Settings → Secrets and variables → Actions: - -| Secret Name | How to Get It | -|------------|---------------| -| `MACOS_CERTIFICATE_P12` | Contents of certificate_base64.txt from Step 3 | -| `MACOS_CERTIFICATE_PASSWORD` | Password used when exporting .p12 in Step 3 | -| `APPLE_API_KEY_ID` | Key ID from Step 4 | -| `APPLE_API_ISSUER_ID` | Issuer ID from Step 4 | -| `APPLE_API_KEY_P8` | Contents of api_key_base64.txt from Step 4 | -| `AC_PASSWORD` | App-specific password from Step 5 | -| `APPLE_ID_USERNAME` | Your Apple ID email | -| `APPLE_TEAM_ID` | Find in Apple Developer account under Membership | -| `APPLE_DEVELOPER_ID` | Full certificate name (e.g., "Developer ID Application: Your Name (TEAMID)") | - -#### Step 7: Enable in Workflow - -Edit `.github/workflows/release.yml`: -- Find the `macos-sign` job -- Remove or change `if: ${{ false }}` to `if: true` - -### Windows Signing (Optional - Unsupported Platform) - -Since Windows is not officially supported, signing is optional. -If you choose to sign: - -1. **Obtain Code Signing Certificate** - - Purchase from DigiCert, Sectigo, or GlobalSign (~$200-500/year) - - Or use Microsoft Trusted Signing (Azure-based) - -2. **Configure GitHub Secrets** - - ```text - WINDOWS_CERTIFICATE_PFX: Base64-encoded .pfx file - WINDOWS_CERTIFICATE_PASSWORD: Certificate password - ``` - -3. **Enable in workflow** - - Remove `if: ${{ false }}` from windows-sign job in release.yml - -## Homebrew Tap Setup (Required for macOS Distribution) - -### Step 1: Create the Tap Repository - -Run the provided script or manually create the repository: - -```bash -./scripts/setup-homebrew-tap.sh -``` - -Or manually: -1. Create a new repository named `homebrew-litestream` under the `benbjohnson` account -2. Make it public -3. Add a README and Formula directory - -### Step 2: Create GitHub Personal Access Token - -1. Go to -2. Name: "Litestream Homebrew Tap" -3. Expiration: No expiration (or 1 year if you prefer) -4. Select scopes: - - `repo` (Full control of private repositories) - - This allows GoReleaser to push formula updates -5. Click "Generate token" -6. Copy the token immediately (won't be shown again) - -### Step 3: Add Token to Repository Secrets - -1. Go to Litestream repository settings -2. Navigate to Settings → Secrets and variables → Actions -3. Click "New repository secret" -4. Name: `HOMEBREW_TAP_GITHUB_TOKEN` -5. Value: Paste the token from Step 2 - -### Step 4: Test Installation - -After the first release: - -```bash -brew tap benbjohnson/litestream -brew install litestream -``` - -## Troubleshooting - -### Common Issues - -#### Build fails with "version: 0" error - -- Ensure `.goreleaser.yml` starts with `version: 2` - -#### Homebrew formula not updated - -- Check HOMEBREW_TAP_GITHUB_TOKEN secret is set -- Verify tap repository exists and is accessible - -#### macOS binary rejected by Gatekeeper - -- Ensure signing certificates are valid -- Check notarization completed successfully -- Verify AC_PASSWORD is an app-specific password - -#### Windows SmartScreen warning - -- This is expected for unsigned binaries -- Consider signing if distributing widely (though platform is unsupported) - -### Testing Local Builds - -```bash -# Test specific platform -GOOS=linux GOARCH=arm64 goreleaser build --snapshot --clean --single-target - -# Check configuration -goreleaser check - -# Dry run (no upload) -goreleaser release --skip=publish --clean -``` - -## Migration from Manual Process - -The old manual release process using Makefile targets and individual workflows has been replaced by GoReleaser. - -## Support and Issues - -For release process issues: - -- Check GoReleaser documentation: -- File issues at: - -Remember: Windows binaries are provided as-is without official support. diff --git a/docs/REPLICA_CLIENT_GUIDE.md b/docs/REPLICA_CLIENT_GUIDE.md new file mode 100644 index 000000000..b40a968f8 --- /dev/null +++ b/docs/REPLICA_CLIENT_GUIDE.md @@ -0,0 +1,750 @@ +# ReplicaClient Implementation Guide + +This guide provides comprehensive instructions for implementing new storage backends for Litestream replication. + +## Table of Contents +- [Interface Contract](#interface-contract) +- [Implementation Checklist](#implementation-checklist) +- [Eventual Consistency Handling](#eventual-consistency-handling) +- [Error Handling](#error-handling) +- [Testing Requirements](#testing-requirements) +- [Common Implementation Mistakes](#common-implementation-mistakes) +- [Reference Implementations](#reference-implementations) + +## Interface Contract + +All replica clients MUST implement the `ReplicaClient` interface defined in `replica_client.go`: + +```go +type ReplicaClient interface { + // Returns the type identifier (e.g., "s3", "gcs", "file") + Type() string + + // Returns iterator of LTX files at given level + // seek: Start from this TXID (0 = beginning) + LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) + + // Opens an LTX file for reading + // Returns os.ErrNotExist if file doesn't exist + OpenLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, offset, size int64) (io.ReadCloser, error) + + // Writes an LTX file to storage + // MUST preserve createdAt timestamp if provided + WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) + + // Deletes one or more LTX files + DeleteLTXFiles(ctx context.Context, files []*ltx.FileInfo) error + + // Deletes all files for this database + DeleteAll(ctx context.Context) error +} +``` + +## Implementation Checklist + +### Required Features +- [ ] Implement all interface methods +- [ ] Support partial reads (offset/size in OpenLTXFile) +- [ ] Return proper error types (especially os.ErrNotExist) +- [ ] Handle context cancellation +- [ ] Preserve file timestamps (CreatedAt) +- [ ] Support concurrent operations +- [ ] Implement proper cleanup in DeleteAll + +### Optional Features +- [ ] Connection pooling +- [ ] Retry logic with exponential backoff +- [ ] Request batching +- [ ] Compression +- [ ] Encryption at rest +- [ ] Bandwidth throttling + +## Eventual Consistency Handling + +Many cloud storage services exhibit eventual consistency, where: +- A file you just wrote might not be immediately visible +- A file might be listed but only partially readable +- Deletes might not take effect immediately + +### Best Practices + +#### 1. Write-After-Write Consistency + +```go +func (c *ReplicaClient) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) { + // Buffer the entire content first + data, err := io.ReadAll(r) + if err != nil { + return nil, fmt.Errorf("buffer ltx data: %w", err) + } + + // Calculate checksum before upload + checksum := crc64.Checksum(data, crc64.MakeTable(crc64.ECMA)) + + // Upload with checksum verification + err = c.uploadWithVerification(ctx, path, data, checksum) + if err != nil { + return nil, err + } + + // Verify the file is readable before returning + return c.verifyUpload(ctx, path, checksum) +} + +func (c *ReplicaClient) verifyUpload(ctx context.Context, path string, expectedChecksum uint64) (*ltx.FileInfo, error) { + // Implement retry loop with backoff + backoff := 100 * time.Millisecond + for i := 0; i < 10; i++ { + info, err := c.statFile(ctx, path) + if err == nil { + // Verify checksum if possible + if info.Checksum == expectedChecksum { + return info, nil + } + } + + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(backoff): + backoff *= 2 + } + } + return nil, errors.New("upload verification failed") +} +``` + +#### 2. List-After-Write Consistency + +```go +func (c *ReplicaClient) LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) { + // List files from storage + files, err := c.listFiles(ctx, level) + if err != nil { + return nil, err + } + + // Sort by TXID for consistent ordering + sort.Slice(files, func(i, j int) bool { + if files[i].MinTXID != files[j].MinTXID { + return files[i].MinTXID < files[j].MinTXID + } + return files[i].MaxTXID < files[j].MaxTXID + }) + + // Filter by seek position + var filtered []*ltx.FileInfo + for _, f := range files { + if f.MinTXID >= seek { + filtered = append(filtered, f) + } + } + + return ltx.NewFileInfoSliceIterator(filtered), nil +} +``` + +#### 3. Read-After-Write Consistency + +```go +func (c *ReplicaClient) OpenLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, offset, size int64) (io.ReadCloser, error) { + path := c.ltxPath(level, minTXID, maxTXID) + + // For eventually consistent backends, implement retry + var lastErr error + backoff := 100 * time.Millisecond + + for i := 0; i < 5; i++ { + reader, err := c.openFile(ctx, path, offset, size) + if err == nil { + return reader, nil + } + + // Don't retry on definitive errors + if errors.Is(err, os.ErrNotExist) { + return nil, err + } + + lastErr = err + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(backoff): + backoff *= 2 + } + } + + return nil, fmt.Errorf("open file after retries: %w", lastErr) +} +``` + +## Error Handling + +### Standard Error Types + +Always return appropriate standard errors: + +```go +// File not found +return nil, os.ErrNotExist + +// Permission denied +return nil, os.ErrPermission + +// Context cancelled +return nil, ctx.Err() + +// Custom errors should wrap standard ones +return nil, fmt.Errorf("s3 download failed: %w", err) +``` + +### Error Classification + +```go +// Retryable errors +func isRetryable(err error) bool { + // Network errors + var netErr net.Error + if errors.As(err, &netErr) && netErr.Temporary() { + return true + } + + // Specific HTTP status codes + if httpErr, ok := err.(HTTPError); ok { + switch httpErr.StatusCode { + case 429, 500, 502, 503, 504: + return true + } + } + + // Timeout errors + if errors.Is(err, context.DeadlineExceeded) { + return true + } + + return false +} +``` + +### Logging Best Practices + +```go +func (c *ReplicaClient) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) { + logger := slog.Default().With( + "replica", c.Type(), + "level", level, + "minTXID", minTXID, + "maxTXID", maxTXID, + ) + + logger.Debug("starting ltx upload") + + info, err := c.upload(ctx, level, minTXID, maxTXID, r) + if err != nil { + logger.Error("ltx upload failed", "error", err) + return nil, err + } + + logger.Info("ltx upload complete", "size", info.Size) + return info, nil +} +``` + +## Testing Requirements + +### Unit Tests + +Every replica client MUST have comprehensive unit tests: + +```go +// replica_client_test.go +func TestReplicaClient_WriteLTXFile(t *testing.T) { + client := NewReplicaClient(testConfig) + ctx := context.Background() + + // Test data + data := []byte("test ltx content") + reader := bytes.NewReader(data) + + // Write file + info, err := client.WriteLTXFile(ctx, 0, 1, 100, reader) + assert.NoError(t, err) + assert.Equal(t, int64(len(data)), info.Size) + + // Verify file exists + rc, err := client.OpenLTXFile(ctx, 0, 1, 100, 0, 0) + assert.NoError(t, err) + defer rc.Close() + + // Read and verify content + content, err := io.ReadAll(rc) + assert.NoError(t, err) + assert.Equal(t, data, content) +} + +func TestReplicaClient_PartialRead(t *testing.T) { + client := NewReplicaClient(testConfig) + ctx := context.Background() + + // Write test file + data := bytes.Repeat([]byte("x"), 1000) + _, err := client.WriteLTXFile(ctx, 0, 1, 100, bytes.NewReader(data)) + require.NoError(t, err) + + // Test partial read + rc, err := client.OpenLTXFile(ctx, 0, 1, 100, 100, 50) + require.NoError(t, err) + defer rc.Close() + + partial, err := io.ReadAll(rc) + assert.NoError(t, err) + assert.Equal(t, 50, len(partial)) + assert.Equal(t, data[100:150], partial) +} + +func TestReplicaClient_NotFound(t *testing.T) { + client := NewReplicaClient(testConfig) + ctx := context.Background() + + // Try to open non-existent file + _, err := client.OpenLTXFile(ctx, 0, 999, 999, 0, 0) + assert.True(t, errors.Is(err, os.ErrNotExist)) +} +``` + +### Integration Tests + +Integration tests run against real backends: + +```go +// +build integration + +func TestReplicaClient_Integration(t *testing.T) { + // Skip if not in integration mode + if testing.Short() { + t.Skip("skipping integration test") + } + + // Get credentials from environment + config := ConfigFromEnv(t) + client := NewReplicaClient(config) + ctx := context.Background() + + t.Run("Concurrent Writes", func(t *testing.T) { + var wg sync.WaitGroup + errors := make(chan error, 10) + + for i := 0; i < 10; i++ { + wg.Add(1) + go func(n int) { + defer wg.Done() + + data := []byte(fmt.Sprintf("concurrent %d", n)) + minTXID := ltx.TXID(n * 100) + maxTXID := ltx.TXID((n + 1) * 100) + + _, err := client.WriteLTXFile(ctx, 0, minTXID, maxTXID, + bytes.NewReader(data)) + if err != nil { + errors <- err + } + }(i) + } + + wg.Wait() + close(errors) + + for err := range errors { + t.Error(err) + } + }) + + t.Run("Large File", func(t *testing.T) { + // Test with 100MB file + data := bytes.Repeat([]byte("x"), 100*1024*1024) + + info, err := client.WriteLTXFile(ctx, 0, 1000, 2000, + bytes.NewReader(data)) + require.NoError(t, err) + assert.Equal(t, int64(len(data)), info.Size) + }) + + t.Run("Cleanup", func(t *testing.T) { + err := client.DeleteAll(ctx) + assert.NoError(t, err) + + // Verify cleanup + iter, err := client.LTXFiles(ctx, 0, 0) + require.NoError(t, err) + defer iter.Close() + + assert.False(t, iter.Next(), "files should be deleted") + }) +} +``` + +### Mock Client for Testing + +Provide a mock implementation for testing: + +```go +// mock/replica_client.go +type ReplicaClient struct { + mu sync.Mutex + files map[string]*ltx.FileInfo + data map[string][]byte + errors map[string]error // Inject errors for testing +} + +func (c *ReplicaClient) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) { + c.mu.Lock() + defer c.mu.Unlock() + + // Check for injected error + key := fmt.Sprintf("write-%d-%d-%d", level, minTXID, maxTXID) + if err, ok := c.errors[key]; ok { + return nil, err + } + + // Store data + data, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + path := ltxPath(level, minTXID, maxTXID) + c.data[path] = data + + info := <x.FileInfo{ + Level: level, + MinTXID: minTXID, + MaxTXID: maxTXID, + Size: int64(len(data)), + CreatedAt: time.Now(), + } + c.files[path] = info + + return info, nil +} +``` + +## Common Implementation Mistakes + +### ❌ Mistake 1: Not Handling Partial Reads + +```go +// WRONG - Always reads entire file +func (c *Client) OpenLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, offset, size int64) (io.ReadCloser, error) { + return c.storage.Download(path) // Ignores offset/size! +} +``` + +```go +// CORRECT - Respects offset and size +func (c *Client) OpenLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, offset, size int64) (io.ReadCloser, error) { + if offset == 0 && size == 0 { + // Full file + return c.storage.Download(path) + } + + // Partial read using Range header or equivalent + end := offset + size - 1 + if size == 0 { + end = 0 // Read to end + } + return c.storage.DownloadRange(path, offset, end) +} +``` + +### ❌ Mistake 2: Not Preserving CreatedAt + +```go +// WRONG - Uses current time +func (c *Client) WriteLTXFile(...) (*ltx.FileInfo, error) { + // Upload file... + + return <x.FileInfo{ + CreatedAt: time.Now(), // Wrong! Loses temporal info + }, nil +} +``` + +```go +// CORRECT - Preserves original timestamp +func (c *Client) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader, createdAt *time.Time) (*ltx.FileInfo, error) { + // Upload file... + + info := <x.FileInfo{ + Level: level, + MinTXID: minTXID, + MaxTXID: maxTXID, + Size: uploadedSize, + } + + // Preserve timestamp if provided + if createdAt != nil { + info.CreatedAt = *createdAt + } else { + info.CreatedAt = time.Now() + } + + return info, nil +} +``` + +### ❌ Mistake 3: Wrong Error Types + +```go +// WRONG - Generic error +func (c *Client) OpenLTXFile(...) (io.ReadCloser, error) { + resp, err := c.get(path) + if err != nil { + return nil, fmt.Errorf("not found") // Wrong type! + } +} +``` + +```go +// CORRECT - Proper error type +func (c *Client) OpenLTXFile(...) (io.ReadCloser, error) { + resp, err := c.get(path) + if err != nil { + if resp.StatusCode == 404 { + return nil, os.ErrNotExist // Correct type + } + return nil, fmt.Errorf("download failed: %w", err) + } +} +``` + +### ❌ Mistake 4: Not Handling Context + +```go +// WRONG - Ignores context +func (c *Client) WriteLTXFile(ctx context.Context, ...) (*ltx.FileInfo, error) { + // Long operation without checking context + for i := 0; i < 1000000; i++ { + doWork() // Could run forever! + } +} +``` + +```go +// CORRECT - Respects context +func (c *Client) WriteLTXFile(ctx context.Context, ...) (*ltx.FileInfo, error) { + // Check context periodically + for i := 0; i < 1000000; i++ { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + // Continue work + } + + if err := doWork(ctx); err != nil { + return nil, err + } + } +} +``` + +### ❌ Mistake 5: Blocking in Iterator + +```go +// WRONG - Loads all files at once +func (c *Client) LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) { + allFiles, err := c.loadAllFiles(level) // Could be millions! + if err != nil { + return nil, err + } + + return NewIterator(allFiles), nil +} +``` + +```go +// CORRECT - Lazy loading with pagination +func (c *Client) LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) { + return &lazyIterator{ + client: c, + level: level, + seek: seek, + pageSize: 1000, + }, nil +} + +type lazyIterator struct { + client *Client + level int + seek ltx.TXID + pageSize int + current []*ltx.FileInfo + index int + done bool +} + +func (i *lazyIterator) Next() bool { + if i.index >= len(i.current) && !i.done { + // Load next page + i.loadNextPage() + } + return i.index < len(i.current) +} +``` + +## Reference Implementations + +### File System Client (Simplest) + +See `file/replica_client.go` for the simplest implementation: +- Direct file I/O operations +- No network complexity +- Good starting reference + +### S3 Client (Most Complex) + +See `s3/replica_client.go` for advanced features: +- Multipart uploads for large files +- Retry logic with exponential backoff +- Request signing +- Eventual consistency handling + +### Key Patterns from S3 Implementation + +```go +// Path construction +func (c *ReplicaClient) ltxDir(level int) string { + if level == SnapshotLevel { + return path.Join(c.Path, "snapshots") + } + return path.Join(c.Path, "ltx", fmt.Sprintf("%04d", level)) +} + +// Metadata handling +func (c *ReplicaClient) WriteLTXFile(...) (*ltx.FileInfo, error) { + // Add metadata to object + metadata := map[string]string{ + "min-txid": fmt.Sprintf("%d", minTXID), + "max-txid": fmt.Sprintf("%d", maxTXID), + "level": fmt.Sprintf("%d", level), + } + + // Upload with metadata + _, err := c.s3.PutObjectWithContext(ctx, &s3.PutObjectInput{ + Bucket: &c.Bucket, + Key: &key, + Body: r, + Metadata: metadata, + }) +} + +// Error mapping +func mapS3Error(err error) error { + if aerr, ok := err.(awserr.Error); ok { + switch aerr.Code() { + case s3.ErrCodeNoSuchKey: + return os.ErrNotExist + case s3.ErrCodeAccessDenied: + return os.ErrPermission + } + } + return err +} +``` + +## Performance Optimization + +### Connection Pooling + +```go +type ReplicaClient struct { + pool *ConnectionPool +} + +func NewReplicaClient(config Config) *ReplicaClient { + pool := &ConnectionPool{ + MaxConnections: config.MaxConnections, + IdleTimeout: config.IdleTimeout, + } + + return &ReplicaClient{ + pool: pool, + } +} +``` + +### Request Batching + +```go +func (c *ReplicaClient) DeleteLTXFiles(ctx context.Context, files []*ltx.FileInfo) error { + // Batch deletes for efficiency + const batchSize = 100 + + for i := 0; i < len(files); i += batchSize { + end := i + batchSize + if end > len(files) { + end = len(files) + } + + batch := files[i:end] + if err := c.deleteBatch(ctx, batch); err != nil { + return fmt.Errorf("delete batch %d: %w", i/batchSize, err) + } + } + + return nil +} +``` + +### Caching + +```go +type ReplicaClient struct { + cache *FileInfoCache +} + +func (c *ReplicaClient) LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) { + // Check cache first + cacheKey := fmt.Sprintf("%d-%d", level, seek) + if cached, ok := c.cache.Get(cacheKey); ok { + return ltx.NewFileInfoSliceIterator(cached), nil + } + + // Load from storage + files, err := c.loadFiles(ctx, level, seek) + if err != nil { + return nil, err + } + + // Cache for future requests + c.cache.Set(cacheKey, files, 5*time.Minute) + + return ltx.NewFileInfoSliceIterator(files), nil +} +``` + +## Checklist for New Implementations + +Before submitting a new replica client: + +- [ ] All interface methods implemented +- [ ] Unit tests with >80% coverage +- [ ] Integration tests (with build tag) +- [ ] Mock client for testing +- [ ] Handles partial reads correctly +- [ ] Returns proper error types +- [ ] Preserves timestamps +- [ ] Handles context cancellation +- [ ] Documents eventual consistency behavior +- [ ] Includes retry logic for transient errors +- [ ] Logs appropriately (debug/info/error) +- [ ] README with configuration examples +- [ ] Added to main configuration parser + +## Getting Help + +1. Study existing implementations (start with `file/`, then `s3/`) +2. Check test files for expected behavior +3. Run integration tests against your backend +4. Use the mock client for rapid development +5. Ask in GitHub discussions for design feedback diff --git a/docs/SQLITE_INTERNALS.md b/docs/SQLITE_INTERNALS.md new file mode 100644 index 000000000..e334e13ce --- /dev/null +++ b/docs/SQLITE_INTERNALS.md @@ -0,0 +1,563 @@ +# SQLite Internals for Litestream + +This document explains SQLite internals critical for understanding Litestream's operation. + +## Table of Contents +- [SQLite File Structure](#sqlite-file-structure) +- [Write-Ahead Log (WAL)](#write-ahead-log-wal) +- [Page Structure](#page-structure) +- [The 1GB Lock Page](#the-1gb-lock-page) +- [Transaction Management](#transaction-management) +- [Checkpoint Modes](#checkpoint-modes) +- [Important SQLite Pragmas](#important-sqlite-pragmas) +- [SQLite API Usage](#sqlite-api-usage) + +## SQLite File Structure + +SQLite databases consist of: +1. **Main database file** - Contains actual data in pages +2. **WAL file** (-wal suffix) - Contains uncommitted changes +3. **SHM file** (-shm suffix) - Shared memory for coordination + +``` +database.db # Main database file (pages) +database.db-wal # Write-ahead log +database.db-shm # Shared memory file +``` + +## Write-Ahead Log (WAL) + +### WAL Basics + +WAL is SQLite's method for implementing atomic commits and rollback: +- Changes are first written to WAL +- Original database file unchanged until checkpoint +- Readers see consistent view through WAL + +```mermaid +graph LR + Write[Write Transaction] -->|Append| WAL[WAL File] + WAL -->|Checkpoint| DB[Main Database] + Read[Read Transaction] -->|Merge View| View[Consistent View] + DB --> View + WAL --> View +``` + +### WAL File Structure + +``` ++------------------+ +| WAL Header | 32 bytes ++------------------+ +| Frame 1 Header | 24 bytes +| Frame 1 Data | Page size bytes ++------------------+ +| Frame 2 Header | 24 bytes +| Frame 2 Data | Page size bytes ++------------------+ +| ... | ++------------------+ +``` + +#### WAL Header (32 bytes) +```go +type WALHeader struct { + Magic [4]byte // 0x377f0682 or 0x377f0683 + FileFormat uint32 // File format version (3007000) + PageSize uint32 // Database page size + Checkpoint uint32 // Checkpoint sequence number + Salt1 uint32 // Random salt for checksum + Salt2 uint32 // Random salt for checksum + Checksum1 uint32 // Header checksum + Checksum2 uint32 // Header checksum +} +``` + +#### WAL Frame Header (24 bytes) +```go +type WALFrameHeader struct { + PageNumber uint32 // Page number in database + DbSize uint32 // Size of database in pages + Salt1 uint32 // Must match header salt + Salt2 uint32 // Must match header salt + Checksum1 uint32 // Cumulative checksum + Checksum2 uint32 // Cumulative checksum +} +``` + +### Reading WAL in Litestream + +```go +// db.go - Reading WAL for replication +func (db *DB) readWAL() ([]Page, error) { + walPath := db.path + "-wal" + f, err := os.Open(walPath) + if err != nil { + return nil, err + } + defer f.Close() + + // Read WAL header + var header WALHeader + binary.Read(f, binary.BigEndian, &header) + + // Validate magic number + magic := binary.BigEndian.Uint32(header.Magic[:]) + if magic != 0x377f0682 && magic != 0x377f0683 { + return nil, errors.New("invalid WAL magic") + } + + // Read frames + var pages []Page + for { + var frameHeader WALFrameHeader + err := binary.Read(f, binary.BigEndian, &frameHeader) + if err == io.EOF { + break + } + + // Read page data + pageData := make([]byte, header.PageSize) + f.Read(pageData) + + pages = append(pages, Page{ + Number: frameHeader.PageNumber, + Data: pageData, + }) + } + + return pages, nil +} +``` + +## Page Structure + +### Database Pages + +SQLite divides the database into fixed-size pages: + +``` +Page Size: Typically 4096 bytes (4KB) +Page Number: 1-based indexing +Page Types: + - B-tree interior pages + - B-tree leaf pages + - Overflow pages + - Freelist pages + - Lock byte page (at 1GB) +``` + +### Page Layout + +``` ++------------------+ +| Page Header | Variable (8-12 bytes) ++------------------+ +| Cell Pointers | 2 bytes each ++------------------+ +| Unallocated | +| Space | ++------------------+ +| Cell Content | Variable size +| Area | (grows upward) ++------------------+ +``` + +### Page Header Structure + +```go +type PageHeader struct { + PageType byte // 0x02, 0x05, 0x0a, 0x0d + FreeBlockStart uint16 // Start of free block list + CellCount uint16 // Number of cells + CellStart uint16 // Offset to first cell + FragmentBytes byte // Fragmented free bytes + // Additional fields for interior pages + RightChild uint32 // Only for interior pages +} +``` + +## The 1GB Lock Page + +### Critical Concept + +SQLite reserves a special page at exactly 1,073,741,824 bytes (0x40000000) for locking: + +```go +const PENDING_BYTE = 0x40000000 // 1GB mark + +// Page number varies by page size +func LockPageNumber(pageSize int) uint32 { + return uint32(PENDING_BYTE/pageSize) + 1 +} + +// Examples: +// 4KB pages: 262145 (0x40001) +// 8KB pages: 131073 (0x20001) +// 16KB pages: 65537 (0x10001) +// 32KB pages: 32769 (0x08001) +// 64KB pages: 16385 (0x04001) +``` + +### Why This Matters + +1. **Cannot contain data** - SQLite will never write user data here +2. **Must be skipped** - During replication/compaction +3. **Affects large databases** - Only databases >1GB +4. **Page number changes** - Different for each page size + +### Implementation in Litestream + +```go +// From superfly/ltx package +func LockPgno(pageSize int) uint32 { + return uint32(PENDING_BYTE/pageSize) + 1 +} + +// db.go - Skipping lock page during iteration +for pgno := uint32(1); pgno <= maxPgno; pgno++ { + if pgno == ltx.LockPgno(db.pageSize) { + continue // Skip lock page + } + + // Process normal page + processPage(pgno) +} +``` + +### Testing Lock Page + +```sql +-- Create database that spans lock page +CREATE TABLE test (id INTEGER PRIMARY KEY, data BLOB); + +-- Insert data until database > 1GB +WITH RECURSIVE generate_series(value) AS ( + SELECT 1 + UNION ALL + SELECT value+1 FROM generate_series + LIMIT 300000 +) +INSERT INTO test SELECT value, randomblob(4000) FROM generate_series; + +-- Check database size +PRAGMA page_count; -- Should be > 262145 for 4KB pages +PRAGMA page_size; -- Typically 4096 + +-- Calculate if lock page is in range +-- For 4KB pages: if page_count > 262145, lock page is included +``` + +## Transaction Management + +### SQLite Transaction Types + +1. **Deferred Transaction** (default) + ```sql + BEGIN DEFERRED; -- Lock acquired on first use + ``` + +2. **Immediate Transaction** + ```sql + BEGIN IMMEDIATE; -- RESERVED lock immediately + ``` + +3. **Exclusive Transaction** + ```sql + BEGIN EXCLUSIVE; -- EXCLUSIVE lock immediately + ``` + +### Lock Types in SQLite + +```mermaid +graph TD + UNLOCKED -->|BEGIN| SHARED + SHARED -->|Write| RESERVED + RESERVED -->|Prepare| PENDING + PENDING -->|Commit| EXCLUSIVE + EXCLUSIVE -->|Done| UNLOCKED +``` + +1. **SHARED** - Multiple readers allowed +2. **RESERVED** - Signals intent to write +3. **PENDING** - Blocking new SHARED locks +4. **EXCLUSIVE** - Single writer, no readers + +### Litestream's Long-Running Read Transaction + +```go +// db.go - Maintaining read transaction for consistency +func (db *DB) initReadTx() error { + // Start read-only transaction + tx, err := db.db.BeginTx(context.Background(), &sql.TxOptions{ + ReadOnly: true, + }) + if err != nil { + return err + } + + // Execute query to acquire SHARED lock + var dummy string + err = tx.QueryRow("SELECT ''").Scan(&dummy) + if err != nil { + tx.Rollback() + return err + } + + // Keep transaction open + db.rtx = tx + return nil +} +``` + +**Purpose:** +- Prevents database from being checkpointed past our read point +- Ensures consistent view of database +- Allows reading pages from WAL + +## Checkpoint Modes + +### PASSIVE Checkpoint (default) +```sql +PRAGMA wal_checkpoint(PASSIVE); +``` +- Attempts checkpoint +- Fails if readers present +- Non-blocking + +### FULL Checkpoint +```sql +PRAGMA wal_checkpoint(FULL); +``` +- Waits for readers to finish +- Blocks new readers +- Ensures checkpoint completes + +### RESTART Checkpoint +```sql +PRAGMA wal_checkpoint(RESTART); +``` +- Like FULL, but also: +- Ensures next writer starts at beginning of WAL +- Resets WAL file + +### TRUNCATE Checkpoint +```sql +PRAGMA wal_checkpoint(TRUNCATE); +``` +- Like RESTART, but also: +- Truncates WAL file to zero length +- Releases disk space + +### Litestream Checkpoint Strategy + +```go +// db.go - Checkpoint decision logic +func (db *DB) autoCheckpoint() error { + walSize := db.WALSize() + pageCount := walSize / db.pageSize + + if pageCount > db.TruncatePageN { + // Force truncation for very large WAL + return db.Checkpoint("TRUNCATE") + } else if pageCount > db.MaxCheckpointPageN { + // Force checkpoint for large WAL + return db.Checkpoint("RESTART") + } else if pageCount > db.MinCheckpointPageN { + // Try passive checkpoint + return db.Checkpoint("PASSIVE") + } + + return nil +} +``` + +## Important SQLite Pragmas + +### Essential Pragmas for Litestream + +```sql +-- Enable WAL mode (required) +PRAGMA journal_mode = WAL; + +-- Get database info +PRAGMA page_size; -- Page size in bytes +PRAGMA page_count; -- Total pages in database +PRAGMA freelist_count; -- Free pages + +-- WAL information +PRAGMA wal_checkpoint; -- Perform checkpoint +PRAGMA wal_autocheckpoint; -- Auto-checkpoint threshold +PRAGMA wal_checkpoint(PASSIVE); -- Non-blocking checkpoint + +-- Database state +PRAGMA integrity_check; -- Verify database integrity +PRAGMA quick_check; -- Fast integrity check + +-- Lock information +PRAGMA lock_status; -- Current locks (debug builds) + +-- Performance tuning +PRAGMA synchronous = NORMAL; -- Sync mode +PRAGMA busy_timeout = 5000; -- Wait 5s for locks +PRAGMA cache_size = -64000; -- 64MB cache +``` + +### Reading Pragmas in Go + +```go +func getDatabaseInfo(db *sql.DB) (*DBInfo, error) { + info := &DBInfo{} + + // Page size + err := db.QueryRow("PRAGMA page_size").Scan(&info.PageSize) + + // Page count + err = db.QueryRow("PRAGMA page_count").Scan(&info.PageCount) + + // Journal mode + err = db.QueryRow("PRAGMA journal_mode").Scan(&info.JournalMode) + + // Calculate size + info.Size = info.PageSize * info.PageCount + + return info, nil +} +``` + +## SQLite API Usage + +### Direct SQLite Access + +Litestream uses both database/sql and direct SQLite APIs: + +```go +// Using database/sql for queries +db, err := sql.Open("sqlite3", "database.db") + +// Using modernc.org/sqlite for low-level access +conn, err := sqlite.Open("database.db") + +// Direct page access (requires special builds) +page := readPage(conn, pageNumber) +``` + +### Connection Modes + +```go +// Read-only connection +db, err := sql.Open("sqlite3", "file:database.db?mode=ro") + +// WAL mode connection +db, err := sql.Open("sqlite3", "database.db?_journal=WAL") + +// With busy timeout +db, err := sql.Open("sqlite3", "database.db?_busy_timeout=5000") + +// Multiple options +db, err := sql.Open("sqlite3", "database.db?_journal=WAL&_busy_timeout=5000&_synchronous=NORMAL") +``` + +### WAL File Access Pattern + +```go +// Litestream's approach to reading WAL +func (db *DB) monitorWAL() { + walPath := db.path + "-wal" + + for { + // Check WAL file size + stat, err := os.Stat(walPath) + if err != nil { + continue // WAL might not exist yet + } + + // Compare with last known size + if stat.Size() > db.lastWALSize { + // New data in WAL + db.processWALChanges() + db.lastWALSize = stat.Size() + } + + time.Sleep(db.MonitorInterval) + } +} +``` + +## Critical SQLite Behaviors + +### 1. Automatic Checkpoint +SQLite automatically checkpoints when WAL reaches 1000 pages (default): +```go +// Can interfere with Litestream's control +// Solution: Set high threshold +db.Exec("PRAGMA wal_autocheckpoint = 10000") +``` + +### 2. Busy Timeout +Default timeout is 0 (immediate failure): +```go +// Set reasonable timeout +db.Exec("PRAGMA busy_timeout = 5000") // 5 seconds +``` + +### 3. Synchronous Mode +Controls when SQLite waits for disk writes: +```go +// NORMAL is safe with WAL +db.Exec("PRAGMA synchronous = NORMAL") +``` + +### 4. Page Cache +SQLite maintains an in-memory page cache: +```go +// Set cache size (negative = KB, positive = pages) +db.Exec("PRAGMA cache_size = -64000") // 64MB +``` + +## WAL to LTX Conversion + +Litestream converts WAL frames to LTX format: + +```go +func walToLTX(walFrames []WALFrame) *LTXFile { + ltx := <XFile{ + Header: LTXHeader{ + PageSize: walFrames[0].PageSize, + MinTXID: walFrames[0].TransactionID, + }, + } + + for _, frame := range walFrames { + // Skip lock page + if frame.PageNumber == LockPageNumber(ltx.Header.PageSize) { + continue + } + + ltx.Pages = append(ltx.Pages, Page{ + Number: frame.PageNumber, + Data: frame.Data, + }) + + ltx.Header.MaxTXID = frame.TransactionID + } + + return ltx +} +``` + +## Key Takeaways + +1. **WAL is temporary** - Gets merged back via checkpoint +2. **Lock page is sacred** - Never write data at 1GB mark +3. **Page size matters** - Affects lock page number and performance +4. **Transactions provide consistency** - Long-running read prevents changes +5. **Checkpoints are critical** - Balance between WAL size and performance +6. **SQLite locks coordinate access** - Understanding prevents deadlocks +7. **Pragmas control behavior** - Must be set correctly for Litestream + +This understanding is essential for: +- Debugging replication issues +- Implementing new features +- Optimizing performance +- Handling edge cases correctly diff --git a/docs/TESTING_GUIDE.md b/docs/TESTING_GUIDE.md new file mode 100644 index 000000000..183d843f0 --- /dev/null +++ b/docs/TESTING_GUIDE.md @@ -0,0 +1,1014 @@ +# Litestream Testing Guide + +Comprehensive guide for testing Litestream components and handling edge cases. + +## Table of Contents +- [Testing Philosophy](#testing-philosophy) +- [1GB Database Testing](#1gb-database-testing) +- [Race Condition Testing](#race-condition-testing) +- [Integration Testing](#integration-testing) +- [Performance Testing](#performance-testing) +- [Mock Usage Patterns](#mock-usage-patterns) +- [Test Utilities](#test-utilities) +- [Common Test Failures](#common-test-failures) + +## Testing Philosophy + +Litestream testing follows these principles: + +1. **Test at Multiple Levels**: Unit, integration, and end-to-end +2. **Focus on Edge Cases**: Especially >1GB databases and eventual consistency +3. **Use Real SQLite**: Avoid mocking SQLite behavior +4. **Race Detection**: Always run with `-race` flag +5. **Deterministic Tests**: Use fixed seeds and timestamps where possible + +## 1GB Database Testing + +### The Lock Page Problem + +SQLite reserves a special lock page at exactly 1GB (0x40000000 bytes). This page cannot contain data and must be skipped during replication. + +### Test Requirements + +#### Creating Test Databases + +```bash +# Use litestream-test tool for large databases +./bin/litestream-test populate \ + -db test.db \ + -target-size 1.5GB \ + -page-size 4096 +``` + +#### Manual Test Database Creation + +```go +func createLargeTestDB(t *testing.T, path string, targetSize int64) { + db, err := sql.Open("sqlite3", path+"?_journal=WAL") + require.NoError(t, err) + defer db.Close() + + // Set page size + _, err = db.Exec("PRAGMA page_size = 4096") + require.NoError(t, err) + + // Create test table + _, err = db.Exec(` + CREATE TABLE test_data ( + id INTEGER PRIMARY KEY, + data BLOB NOT NULL + ) + `) + require.NoError(t, err) + + // Calculate rows needed + rowSize := 4000 // bytes per row + rowsNeeded := targetSize / int64(rowSize) + + // Batch insert for performance + tx, err := db.Begin() + require.NoError(t, err) + + stmt, err := tx.Prepare("INSERT INTO test_data (data) VALUES (?)") + require.NoError(t, err) + + for i := int64(0); i < rowsNeeded; i++ { + data := make([]byte, rowSize) + rand.Read(data) + _, err = stmt.Exec(data) + require.NoError(t, err) + + // Commit periodically + if i%1000 == 0 { + err = tx.Commit() + require.NoError(t, err) + tx, err = db.Begin() + require.NoError(t, err) + stmt, err = tx.Prepare("INSERT INTO test_data (data) VALUES (?)") + require.NoError(t, err) + } + } + + err = tx.Commit() + require.NoError(t, err) + + // Verify size + var pageCount, pageSize int + db.QueryRow("PRAGMA page_count").Scan(&pageCount) + db.QueryRow("PRAGMA page_size").Scan(&pageSize) + + actualSize := int64(pageCount * pageSize) + t.Logf("Created database: %d bytes (%d pages of %d bytes)", + actualSize, pageCount, pageSize) + + // Verify lock page is in range + lockPgno := ltx.LockPgno(pageSize) + if pageCount > lockPgno { + t.Logf("Database spans lock page at page %d", lockPgno) + } +} +``` + +#### Lock Page Test Cases + +```go +func TestDB_LockPageHandling(t *testing.T) { + testCases := []struct { + name string + pageSize int + lockPgno uint32 + }{ + {"4KB pages", 4096, 262145}, + {"8KB pages", 8192, 131073}, + {"16KB pages", 16384, 65537}, + {"32KB pages", 32768, 32769}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create database larger than 1GB + dbPath := filepath.Join(t.TempDir(), "test.db") + createLargeTestDB(t, dbPath, 1100*1024*1024) // 1.1GB + + // Open with Litestream + db := NewDB(dbPath, "") + err := db.Open() + require.NoError(t, err) + defer db.Close(context.Background()) + + // Start replication + replica := NewReplica(db, newMockClient()) + err = replica.Start(context.Background()) + require.NoError(t, err) + + // Perform writes that span the lock page + conn, err := sql.Open("sqlite3", dbPath) + require.NoError(t, err) + + tx, err := conn.Begin() + require.NoError(t, err) + + // Write data around lock page boundary + for i := tc.lockPgno - 10; i < tc.lockPgno+10; i++ { + if i == tc.lockPgno { + continue // Skip lock page + } + + _, err = tx.Exec(fmt.Sprintf( + "INSERT INTO test_data (id, data) VALUES (%d, randomblob(4000))", + i)) + require.NoError(t, err) + } + + err = tx.Commit() + require.NoError(t, err) + + // Wait for sync + err = db.Sync(context.Background()) + require.NoError(t, err) + + // Verify replication skipped lock page + verifyLockPageSkipped(t, replica, tc.lockPgno) + }) + } +} + +func verifyLockPageSkipped(t *testing.T, replica *Replica, lockPgno uint32) { + // Get LTX files + files, err := replica.Client.LTXFiles(context.Background(), 0, 0) + require.NoError(t, err) + + // Check each file + for files.Next() { + info := files.Item() + + // Read page index + pageIndex, err := FetchPageIndex(context.Background(), + replica.Client, info) + require.NoError(t, err) + + // Verify lock page not present + _, hasLockPage := pageIndex[lockPgno] + assert.False(t, hasLockPage, + "Lock page %d should not be in LTX file", lockPgno) + } +} +``` + +### Restoration Testing + +```go +func TestDB_RestoreLargeDatabase(t *testing.T) { + // Create and replicate large database + srcPath := filepath.Join(t.TempDir(), "source.db") + createLargeTestDB(t, srcPath, 1500*1024*1024) // 1.5GB + + // Setup replication + db := NewDB(srcPath, "") + err := db.Open() + require.NoError(t, err) + + client := file.NewReplicaClient(filepath.Join(t.TempDir(), "replica")) + replica := NewReplicaWithClient(db, client) + + err = replica.Start(context.Background()) + require.NoError(t, err) + + // Let it replicate + err = db.Sync(context.Background()) + require.NoError(t, err) + + db.Close(context.Background()) + + // Restore to new location + dstPath := filepath.Join(t.TempDir(), "restored.db") + err = Restore(context.Background(), client, dstPath, nil) + require.NoError(t, err) + + // Verify restoration + verifyDatabasesMatch(t, srcPath, dstPath) +} + +func verifyDatabasesMatch(t *testing.T, path1, path2 string) { + // Compare checksums + checksum1 := calculateDBChecksum(t, path1) + checksum2 := calculateDBChecksum(t, path2) + assert.Equal(t, checksum1, checksum2, "Database checksums should match") + + // Compare page counts + pageCount1 := getPageCount(t, path1) + pageCount2 := getPageCount(t, path2) + assert.Equal(t, pageCount1, pageCount2, "Page counts should match") + + // Run integrity check + db, err := sql.Open("sqlite3", path2) + require.NoError(t, err) + defer db.Close() + + var result string + err = db.QueryRow("PRAGMA integrity_check").Scan(&result) + require.NoError(t, err) + assert.Equal(t, "ok", result, "Integrity check should pass") +} +``` + +## Race Condition Testing + +### Running with Race Detector + +```bash +# Always run tests with race detector +go test -race -v ./... + +# Run specific race-prone tests +go test -race -v -run TestReplica_SetPos ./... +go test -race -v -run TestDB_ConcurrentSync ./... +go test -race -v -run TestStore_Integration ./... +``` + +### Common Race Conditions + +#### 1. Position Updates + +```go +func TestReplica_ConcurrentPositionUpdate(t *testing.T) { + replica := NewReplica(nil) + ctx := context.Background() + + var wg sync.WaitGroup + errors := make(chan error, 100) + + // Concurrent writers + for i := 0; i < 10; i++ { + wg.Add(1) + go func(n int) { + defer wg.Done() + + pos := ltx.Pos{ + TXID: ltx.TXID(n), + PageNo: uint32(n * 100), + } + + // This should use proper locking + replica.SetPos(pos) + + // Verify position + readPos := replica.Pos() + if readPos.TXID < ltx.TXID(n) { + errors <- fmt.Errorf("position went backwards") + } + }(i) + } + + // Concurrent readers + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + + for j := 0; j < 100; j++ { + _ = replica.Pos() + time.Sleep(time.Microsecond) + } + }() + } + + wg.Wait() + close(errors) + + for err := range errors { + t.Error(err) + } +} +``` + +#### 2. WAL Monitoring + +```go +func TestDB_ConcurrentWALAccess(t *testing.T) { + db := setupTestDB(t) + defer db.Close(context.Background()) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + var wg sync.WaitGroup + + // Writer goroutine + wg.Add(1) + go func() { + defer wg.Done() + + conn, err := sql.Open("sqlite3", db.Path()) + if err != nil { + return + } + defer conn.Close() + + for i := 0; i < 100; i++ { + _, _ = conn.Exec("INSERT INTO test VALUES (?)", i) + time.Sleep(10 * time.Millisecond) + } + }() + + // Monitor goroutine + wg.Add(1) + go func() { + defer wg.Done() + + for { + select { + case <-ctx.Done(): + return + case <-db.notify: + // Process WAL changes + _ = db.Sync(context.Background()) + } + } + }() + + // Checkpoint goroutine + wg.Add(1) + go func() { + defer wg.Done() + + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + _ = db.Checkpoint("PASSIVE") + } + } + }() + + wg.Wait() +} +``` + +### Test Cleanup + +```go +func TestStore_Integration(t *testing.T) { + // Setup + tmpDir := t.TempDir() + db := setupTestDB(t, tmpDir) + + // Use defer with error channel for cleanup + insertErr := make(chan error, 1) + + // Cleanup function + cleanup := func() { + select { + case err := <-insertErr: + if err != nil { + t.Errorf("insert error during test: %v", err) + } + default: + } + } + defer cleanup() + + // Test with timeout + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + // Run test... +} +``` + +## Integration Testing + +### Test Structure + +```go +// +build integration + +package litestream_test + +import ( + "context" + "os" + "testing" +) + +func TestIntegration_S3(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + // Check for credentials + if os.Getenv("AWS_ACCESS_KEY_ID") == "" { + t.Skip("AWS_ACCESS_KEY_ID not set") + } + + // Run test against real S3 + runIntegrationTest(t, setupS3Client()) +} + +func runIntegrationTest(t *testing.T, client ReplicaClient) { + ctx := context.Background() + + t.Run("BasicReplication", func(t *testing.T) { + // Test basic write/read cycle + }) + + t.Run("Compaction", func(t *testing.T) { + // Test compaction with remote storage + }) + + t.Run("EventualConsistency", func(t *testing.T) { + // Test handling of eventual consistency + }) + + t.Run("LargeFiles", func(t *testing.T) { + // Test with files > 100MB + }) + + t.Run("Cleanup", func(t *testing.T) { + err := client.DeleteAll(ctx) + require.NoError(t, err) + }) +} +``` + +### Environment-Based Configuration + +```go +func setupS3Client() *s3.ReplicaClient { + return &s3.ReplicaClient{ + AccessKeyID: os.Getenv("AWS_ACCESS_KEY_ID"), + SecretAccessKey: os.Getenv("AWS_SECRET_ACCESS_KEY"), + Region: getEnvOrDefault("AWS_REGION", "us-east-1"), + Bucket: getEnvOrDefault("TEST_S3_BUCKET", "litestream-test"), + Path: fmt.Sprintf("test-%d", time.Now().Unix()), + } +} + +func getEnvOrDefault(key, defaultValue string) string { + if value := os.Getenv(key); value != "" { + return value + } + return defaultValue +} +``` + +## Performance Testing + +### Benchmarks + +```go +func BenchmarkDB_Sync(b *testing.B) { + db := setupBenchDB(b) + defer db.Close(context.Background()) + + // Prepare test data + conn, _ := sql.Open("sqlite3", db.Path()) + defer conn.Close() + + for i := 0; i < 1000; i++ { + conn.Exec("INSERT INTO test VALUES (?)", i) + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + err := db.Sync(context.Background()) + if err != nil { + b.Fatal(err) + } + } + + b.ReportMetric(float64(b.N)/b.Elapsed().Seconds(), "syncs/sec") +} + +func BenchmarkCompaction(b *testing.B) { + benchmarks := []struct { + name string + fileCount int + fileSize int + }{ + {"Small-Many", 1000, 1024}, // Many small files + {"Medium", 100, 10 * 1024}, // Medium files + {"Large-Few", 10, 100 * 1024}, // Few large files + } + + for _, bm := range benchmarks { + b.Run(bm.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.StopTimer() + files := generateTestFiles(bm.fileCount, bm.fileSize) + b.StartTimer() + + _, err := compact(files) + if err != nil { + b.Fatal(err) + } + } + + totalSize := int64(bm.fileCount * bm.fileSize) + b.ReportMetric(float64(totalSize)/float64(b.Elapsed().Nanoseconds()), "bytes/ns") + }) + } +} +``` + +### Load Testing + +```go +func TestDB_LoadTest(t *testing.T) { + if testing.Short() { + t.Skip("skipping load test") + } + + db := setupTestDB(t) + defer db.Close(context.Background()) + + // Configure load + config := LoadConfig{ + Duration: 5 * time.Minute, + WriteRate: 100, // writes/sec + ReadRate: 500, // reads/sec + Workers: 10, + DataSize: 4096, + BurstPattern: true, + } + + results := runLoadTest(t, db, config) + + // Verify results + assert.Greater(t, results.TotalWrites, int64(20000)) + assert.Less(t, results.P99Latency, 100*time.Millisecond) + assert.Zero(t, results.Errors) + + t.Logf("Load test results: %+v", results) +} + +type LoadResults struct { + TotalWrites int64 + TotalReads int64 + Errors int64 + P50Latency time.Duration + P99Latency time.Duration + BytesReplicated int64 +} + +func runLoadTest(t *testing.T, db *DB, config LoadConfig) LoadResults { + ctx, cancel := context.WithTimeout(context.Background(), config.Duration) + defer cancel() + + var results LoadResults + var mu sync.Mutex + latencies := make([]time.Duration, 0, 100000) + + // Start workers + var wg sync.WaitGroup + for i := 0; i < config.Workers; i++ { + wg.Add(1) + go func(workerID int) { + defer wg.Done() + + conn, err := sql.Open("sqlite3", db.Path()) + if err != nil { + return + } + defer conn.Close() + + ticker := time.NewTicker(time.Second / time.Duration(config.WriteRate)) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + start := time.Now() + data := make([]byte, config.DataSize) + rand.Read(data) + + _, err := conn.Exec("INSERT INTO test (data) VALUES (?)", data) + latency := time.Since(start) + + mu.Lock() + if err != nil { + results.Errors++ + } else { + results.TotalWrites++ + latencies = append(latencies, latency) + } + mu.Unlock() + } + } + }(i) + } + + wg.Wait() + + // Calculate percentiles + sort.Slice(latencies, func(i, j int) bool { + return latencies[i] < latencies[j] + }) + + if len(latencies) > 0 { + results.P50Latency = latencies[len(latencies)*50/100] + results.P99Latency = latencies[len(latencies)*99/100] + } + + return results +} +``` + +## Mock Usage Patterns + +### Mock ReplicaClient + +```go +type MockReplicaClient struct { + mu sync.Mutex + files map[string]*ltx.FileInfo + data map[string][]byte + + // Control behavior + FailureRate float64 + Latency time.Duration + EventualDelay time.Duration +} + +func (m *MockReplicaClient) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) { + // Simulate latency + if m.Latency > 0 { + time.Sleep(m.Latency) + } + + // Simulate failures + if m.FailureRate > 0 && rand.Float64() < m.FailureRate { + return nil, errors.New("simulated failure") + } + + // Simulate eventual consistency + if m.EventualDelay > 0 { + time.AfterFunc(m.EventualDelay, func() { + m.mu.Lock() + defer m.mu.Unlock() + // Make file available after delay + }) + } + + // Store file + data, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + m.mu.Lock() + defer m.mu.Unlock() + + key := fmt.Sprintf("%d-%016x-%016x", level, minTXID, maxTXID) + info := <x.FileInfo{ + Level: level, + MinTXID: minTXID, + MaxTXID: maxTXID, + Size: int64(len(data)), + CreatedAt: time.Now(), + } + + m.files[key] = info + m.data[key] = data + + return info, nil +} +``` + +### Mock Database + +```go +type MockDB struct { + mu sync.Mutex + path string + replicas []*Replica + closed bool + + // Control behavior + CheckpointFailures int + SyncDelay time.Duration +} + +func (m *MockDB) Sync(ctx context.Context) error { + if m.SyncDelay > 0 { + select { + case <-time.After(m.SyncDelay): + case <-ctx.Done(): + return ctx.Err() + } + } + + m.mu.Lock() + defer m.mu.Unlock() + + if m.closed { + return errors.New("database closed") + } + + for _, r := range m.replicas { + if err := r.Sync(ctx); err != nil { + return err + } + } + + return nil +} +``` + +## Test Utilities + +### Helper Functions + +```go +// testutil/db.go +package testutil + +import ( + "database/sql" + "testing" + "path/filepath" +) + +func NewTestDB(t testing.TB) *litestream.DB { + t.Helper() + + path := filepath.Join(t.TempDir(), "test.db") + + // Create SQLite database + conn, err := sql.Open("sqlite3", path+"?_journal=WAL") + require.NoError(t, err) + + _, err = conn.Exec(` + CREATE TABLE test ( + id INTEGER PRIMARY KEY, + data BLOB + ) + `) + require.NoError(t, err) + conn.Close() + + // Open with Litestream + db := litestream.NewDB(path, "") + db.MonitorInterval = 10 * time.Millisecond // Speed up for tests + db.MinCheckpointPageN = 100 // Lower threshold for tests + + err = db.Open() + require.NoError(t, err) + + t.Cleanup(func() { + db.Close(context.Background()) + }) + + return db +} + +func WriteTestData(t testing.TB, db *litestream.DB, count int) { + t.Helper() + + conn, err := sql.Open("sqlite3", db.Path()) + require.NoError(t, err) + defer conn.Close() + + tx, err := conn.Begin() + require.NoError(t, err) + + for i := 0; i < count; i++ { + data := make([]byte, 100) + rand.Read(data) + _, err = tx.Exec("INSERT INTO test (data) VALUES (?)", data) + require.NoError(t, err) + } + + err = tx.Commit() + require.NoError(t, err) +} +``` + +### Test Fixtures + +```go +// testdata/fixtures.go +package testdata + +import _ "embed" + +//go:embed small.db +var SmallDB []byte + +//go:embed large.db +var LargeDB []byte + +//go:embed corrupted.db +var CorruptedDB []byte + +func ExtractFixture(name string, path string) error { + var data []byte + + switch name { + case "small": + data = SmallDB + case "large": + data = LargeDB + case "corrupted": + data = CorruptedDB + default: + return fmt.Errorf("unknown fixture: %s", name) + } + + return os.WriteFile(path, data, 0600) +} +``` + +## Common Test Failures + +### 1. Database Locked Errors + +```go +// Problem: Multiple connections without proper WAL mode +func TestBroken(t *testing.T) { + db1, _ := sql.Open("sqlite3", "test.db") // Wrong! + db2, _ := sql.Open("sqlite3", "test.db") // Will fail +} + +// Solution: Use WAL mode +func TestFixed(t *testing.T) { + db1, _ := sql.Open("sqlite3", "test.db?_journal=WAL") + db2, _ := sql.Open("sqlite3", "test.db?_journal=WAL") +} +``` + +### 2. Timing Issues + +```go +// Problem: Race between write and sync +func TestBroken(t *testing.T) { + WriteData(db) + result := ReadReplica() // May not see data yet! +} + +// Solution: Explicit sync +func TestFixed(t *testing.T) { + WriteData(db) + err := db.Sync(context.Background()) + require.NoError(t, err) + result := ReadReplica() // Now guaranteed to see data +} +``` + +### 3. Cleanup Issues + +```go +// Problem: Goroutine outlives test +func TestBroken(t *testing.T) { + go func() { + time.Sleep(10 * time.Second) + doWork() // Test already finished! + }() +} + +// Solution: Use context and wait +func TestFixed(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + select { + case <-ctx.Done(): + return + case <-time.After(10 * time.Second): + doWork() + } + }() + + // Test work... + + cancel() // Signal shutdown + wg.Wait() // Wait for goroutine +} +``` + +### 4. File Handle Leaks + +```go +// Problem: Not closing files +func TestBroken(t *testing.T) { + f, _ := os.Open("test.db") + // Missing f.Close()! +} + +// Solution: Always use defer +func TestFixed(t *testing.T) { + f, err := os.Open("test.db") + require.NoError(t, err) + defer f.Close() +} +``` + +## Test Coverage + +### Running Coverage + +```bash +# Generate coverage report +go test -coverprofile=coverage.out ./... + +# View coverage in browser +go tool cover -html=coverage.out + +# Check coverage percentage +go tool cover -func=coverage.out | grep total + +# Coverage by package +go test -cover ./... +``` + +### Coverage Requirements + +- Core packages (`db.go`, `replica.go`, `store.go`): >80% +- Replica clients: >70% +- Utilities: >60% +- Mock implementations: Not required + +### Improving Coverage + +```go +// Use test tables for comprehensive coverage +func TestDB_Checkpoint(t *testing.T) { + tests := []struct { + name string + mode string + walSize int + wantErr bool + }{ + {"Passive", "PASSIVE", 100, false}, + {"Full", "FULL", 1000, false}, + {"Restart", "RESTART", 5000, false}, + {"Truncate", "TRUNCATE", 10000, false}, + {"Invalid", "INVALID", 100, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + db := setupTestDB(t) + generateWAL(t, db, tt.walSize) + + err := db.Checkpoint(tt.mode) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + }) + } +} +``` diff --git a/docs/V050_CHANGES.md b/docs/V050_CHANGES.md new file mode 100644 index 000000000..0e04ef7e9 --- /dev/null +++ b/docs/V050_CHANGES.md @@ -0,0 +1,191 @@ +# Litestream v0.5.0 Changes and Migration Guide + +This document summarizes the major changes in Litestream v0.5.0, based on Ben Johnson's announcement and the current implementation. + +## Executive Summary + +Litestream v0.5.0 is a **major rewrite** that introduces the LTX format, improves compaction, and removes CGO dependencies. It includes breaking changes that prevent restoration from v0.3.x backups. + +## Breaking Changes + +### 1. Single Replica Constraint +- **Before**: Multiple replica destinations per database +- **After**: Only ONE replica destination per database +- **Impact**: Simplified configuration but reduced redundancy options + +### 2. Cannot Restore from v0.3.x +- **Before**: WAL segment-based backups +- **After**: LTX page-based backups +- **Impact**: Must maintain v0.3.x installation to restore old backups + +### 3. Command Changes +- **Before**: `litestream wal` commands +- **After**: `litestream ltx` commands +- **Example**: `litestream ltx info file.ltx` + +### 4. Generations Removed +- **Before**: Used "generations" to track database backups +- **After**: Simplified tracking with LTX files and TXID ranges +- **Impact**: Cleaner mental model, simpler implementation + +## New Features + +### 1. LTX Format +- **Purpose**: Efficient page-level replication format +- **Benefits**: + - Immutable files with TXID ranges + - Page-level deduplication during compaction + - Indexed pages for fast random access + - Point-in-time restoration + +### 2. Multi-Level Compaction +- **Level 0**: Raw LTX files (no compaction) +- **Level 1**: 30-second windows +- **Level 2**: 5-minute windows +- **Level 3**: 1-hour windows +- **Snapshots**: Daily full database snapshots + +### 3. NATS JetStream Support +- **New replica type**: `nats://` +- **Features**: Distributed messaging with persistence +- **Use case**: Event-driven architectures + +### 4. Pure Go Implementation +- **Change**: Switched from CGO to `modernc.org/sqlite` +- **Benefits**: + - Easier cross-compilation + - No C dependencies + - Simplified builds + - Better portability + +## Technical Improvements + +### Performance +- **Compaction**: Limited only by I/O throughput +- **Page-level operations**: More efficient than WAL segments +- **Indexed access**: Fast page lookups in LTX files + +### Architecture +- **Cleaner separation**: Storage backends more modular +- **Better abstractions**: LTX format decouples from SQLite WAL +- **Simplified state**: No generations to track + +## Migration Path + +### From v0.3.x to v0.5.0 + +1. **Before upgrading**: + ```bash + # Create final backup with v0.3.x + litestream snapshot -replica [destination] + ``` + +2. **Install v0.5.0**: + ```bash + # Download and install new version + curl -L https://github.com/benbjohnson/litestream/releases/download/v0.5.0/litestream-v0.5.0-linux-amd64.tar.gz | tar xz + ``` + +3. **Update configuration**: + ```yaml + # Old (v0.3.x) - Multiple replicas + dbs: + - path: /data/db.sqlite + replicas: + - url: s3://bucket1/db + - url: s3://bucket2/backup + + # New (v0.5.0) - Single replica only + dbs: + - path: /data/db.sqlite + replicas: + - url: s3://bucket1/db + ``` + +4. **Start fresh replication**: + ```bash + # Remove old WAL segments + rm -rf /data/db.sqlite-litestream + + # Start v0.5.0 + litestream replicate + ``` + +### Rollback Procedure + +If you need to restore from v0.3.x backups: + +1. **Keep v0.3.x binary**: Don't delete old version +2. **Use old binary for restoration**: + ```bash + litestream-v0.3.x restore -o restored.db s3://bucket/db + ``` +3. **Then upgrade**: Once restored, can use v0.5.0 going forward + +## Future Roadmap + +### Litestream VFS (In Development) +- **Purpose**: Enable read replicas without full downloads +- **How it works**: + - Virtual File System layer + - On-demand page fetching from S3 + - Background hydration + - Local caching +- **Benefits**: + - Instant database "copies" + - Scales read operations + - Reduces bandwidth costs + +## Best Practices for v0.5.0 + +### 1. Compaction Configuration +```yaml +# Use default intervals for most workloads +levels: + - level: 1 + interval: 30s + - level: 2 + interval: 5m + - level: 3 + interval: 1h +``` + +### 2. Single Replica Strategy +Since only one replica is allowed: +- Choose most reliable storage +- Consider using RAID/redundancy at storage level +- Implement external backup rotation if needed + +### 3. Monitoring +- Watch compaction metrics +- Monitor LTX file counts at each level +- Track restoration time improvements + +### 4. Testing +- Test restoration regularly +- Verify point-in-time recovery works +- Benchmark compaction performance + +## Common Issues and Solutions + +### Issue: "Cannot restore from old backup" +**Solution**: Use v0.3.x binary to restore, then replicate with v0.5.0 + +### Issue: "Multiple replicas not supported" +**Solution**: Use single most reliable destination, implement redundancy at storage layer + +### Issue: "`wal` command not found" +**Solution**: Use `ltx` command instead + +### Issue: "CGO_ENABLED required error" +**Solution**: Not needed in v0.5.0, ensure using latest binary + +## Summary + +Litestream v0.5.0 represents a significant evolution: +- **Simpler**: Single replica, no generations, pure Go +- **More efficient**: Page-level operations, better compaction +- **More flexible**: LTX format enables future features +- **Breaking changes**: Cannot restore old backups directly + +The tradeoffs favor simplicity and efficiency over backward compatibility, positioning Litestream for future enhancements like the VFS read replica system. From 2d4edcbe92c2c8ea225a711a7630c7b1d174bb17 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Fri, 10 Oct 2025 14:54:27 -0500 Subject: [PATCH 2/8] feat(docs): incorporate PR #783 architectural patterns and anti-patterns Based on Ben Johnson's feedback in PR #783, added comprehensive documentation about: - Architectural boundaries between DB and Replica layers - Proper placement of database restoration logic (DB.init() not Replica.Start()) - Atomic file operations pattern (temp file + rename) - Proper error handling (return errors, don't just log and continue) - Leveraging existing mechanisms (e.g., verify() for snapshots) These patterns help AI agents understand proper Litestream architecture and avoid common mistakes when contributing fixes. --- AGENT.md | 300 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 299 insertions(+), 1 deletion(-) diff --git a/AGENT.md b/AGENT.md index 3ff053b31..a52c1a1b1 100644 --- a/AGENT.md +++ b/AGENT.md @@ -8,6 +8,7 @@ This document provides comprehensive guidance for AI agents working with the Lit - [Fundamental Concepts](#fundamental-concepts) - [Core Architecture](#core-architecture) - [Critical Concepts](#critical-concepts) +- [Architectural Boundaries and Patterns](#architectural-boundaries-and-patterns) - [Common Pitfalls](#common-pitfalls) - [Component Guide](#component-guide) - [Performance Considerations](#performance-considerations) @@ -233,8 +234,232 @@ Many storage backends (S3, R2, etc.) are eventually consistent. This means: **Solution**: Always prefer local files during compaction (see PR #760). +## Architectural Boundaries and Patterns + +**CRITICAL**: Based on PR #783 feedback, understanding proper architectural boundaries is essential for successful contributions. + +### Layer Responsibilities + +```mermaid +graph TB + subgraph "DB Layer (db.go)" + DBInit[DB.init()] + DBPos[DB.pos tracking] + DBRestore[Database state validation] + DBSnapshot[Snapshot triggering via verify()] + end + + subgraph "Replica Layer (replica.go)" + ReplicaStart[Replica.Start()] + ReplicaSync[Sync operations] + ReplicaPos[Replica.pos tracking] + ReplicaClient[Storage interaction] + end + + subgraph "Storage Layer" + S3[S3/GCS/Azure] + LTXFiles[LTX Files] + end + + DBInit -->|Initialize| ReplicaStart + DBInit -->|Check positions| DBPos + DBInit -->|Validate state| DBRestore + ReplicaStart -->|Focus on replication only| ReplicaSync + ReplicaSync -->|Upload/Download| ReplicaClient + ReplicaClient -->|Read/Write| S3 + S3 -->|Store| LTXFiles +``` + +### ✅ DO: Handle database state in DB.init() + +```go +// CORRECT - Database restoration logic belongs in DB layer +func (db *DB) init() error { + // Check if database is behind replica + if db.pos < replica.pos { + // Clear local L0 files + if err := db.clearL0Files(); err != nil { + return fmt.Errorf("clear L0 files: %w", err) + } + + // Fetch latest L0 LTX file from replica + ltxFile, err := replica.Client.OpenLTXFile(ctx, 0, replica.pos.MinTXID, replica.pos.MaxTXID, 0, 0) + if err != nil { + return fmt.Errorf("fetch latest L0 LTX: %w", err) + } + defer ltxFile.Close() + + // Write to local L0 directory + if err := db.writeL0File(ltxFile); err != nil { + return fmt.Errorf("write L0 file: %w", err) + } + } + + // Now start replica with clean state + return replica.Start() +} +``` + +### ❌ DON'T: Put database state logic in Replica layer + +```go +// WRONG - Replica should only handle replication concerns +func (r *Replica) Start() error { + // DON'T check database state here + if needsRestore() { // ❌ Wrong layer! + restoreDatabase() // ❌ Wrong layer! + } + // Replica should focus only on replication mechanics +} +``` + +### Atomic File Operations Pattern + +**CRITICAL**: Always use atomic writes to prevent partial/corrupted files. + +### ✅ DO: Write to temp file, then rename + +```go +// CORRECT - Atomic file write pattern +func writeFileAtomic(path string, data []byte) error { + // Create temp file in same directory (for atomic rename) + dir := filepath.Dir(path) + tmpFile, err := os.CreateTemp(dir, ".tmp-*") + if err != nil { + return fmt.Errorf("create temp file: %w", err) + } + tmpPath := tmpFile.Name() + + // Clean up temp file on error + defer func() { + if tmpFile != nil { + tmpFile.Close() + os.Remove(tmpPath) + } + }() + + // Write data to temp file + if _, err := tmpFile.Write(data); err != nil { + return fmt.Errorf("write temp file: %w", err) + } + + // Sync to ensure data is on disk + if err := tmpFile.Sync(); err != nil { + return fmt.Errorf("sync temp file: %w", err) + } + + // Close before rename + if err := tmpFile.Close(); err != nil { + return fmt.Errorf("close temp file: %w", err) + } + tmpFile = nil // Prevent defer cleanup + + // Atomic rename (on same filesystem) + if err := os.Rename(tmpPath, path); err != nil { + os.Remove(tmpPath) + return fmt.Errorf("rename to final path: %w", err) + } + + return nil +} +``` + +### ❌ DON'T: Write directly to final location + +```go +// WRONG - Can leave partial files on failure +func writeFileDirect(path string, data []byte) error { + return os.WriteFile(path, data, 0644) // ❌ Not atomic! +} +``` + +### Error Handling Patterns + +### ✅ DO: Return errors immediately + +```go +// CORRECT - Return error for caller to handle +func (db *DB) validatePosition() error { + if db.pos < replica.pos { + return fmt.Errorf("database position (%v) behind replica (%v)", db.pos, replica.pos) + } + return nil +} +``` + +### ❌ DON'T: Continue on critical errors + +```go +// WRONG - Silently continuing can cause data corruption +func (db *DB) validatePosition() { + if db.pos < replica.pos { + log.Printf("warning: position mismatch") // ❌ Don't just log! + // Continuing here is dangerous + } +} +``` + +### Leveraging Existing Mechanisms + +### ✅ DO: Use verify() for snapshot triggering + +```go +// CORRECT - Leverage existing snapshot mechanism +func (db *DB) ensureSnapshot() error { + // Use existing verify() which already handles snapshot logic + if err := db.verify(); err != nil { + return fmt.Errorf("verify for snapshot: %w", err) + } + // verify() will trigger snapshot if needed + return nil +} +``` + +### ❌ DON'T: Reimplement existing functionality + +```go +// WRONG - Don't recreate what already exists +func (db *DB) customSnapshot() error { + // ❌ Don't write custom snapshot logic + // when verify() already does this correctly +} +``` + ## Common Pitfalls +### ❌ DON'T: Mix architectural concerns (PR #783) + +```go +// WRONG - Database state logic in Replica layer +func (r *Replica) Start() error { + if db.needsRestore() { // ❌ Wrong layer for DB state! + r.restoreDatabase() // ❌ Replica shouldn't manage DB state! + } + return r.sync() +} +``` + +### ✅ DO: Keep concerns in proper layers + +```go +// CORRECT - Each layer handles its own concerns +func (db *DB) init() error { + // DB layer handles database state + if db.needsRestore() { + if err := db.restore(); err != nil { + return err + } + } + // Then start replica for replication only + return db.replica.Start() +} + +func (r *Replica) Start() error { + // Replica focuses only on replication + return r.startSync() +} +``` + ### ❌ DON'T: Read from remote during compaction ```go @@ -290,6 +515,75 @@ info := <x.FileInfo{ } ``` +### ❌ DON'T: Write files without atomic operations (PR #783) + +```go +// WRONG - Can leave partial files on failure +func saveLTXFile(path string, data []byte) error { + return os.WriteFile(path, data, 0644) // ❌ Not atomic! +} +``` + +### ✅ DO: Use atomic write pattern + +```go +// CORRECT - Write to temp, then rename +func saveLTXFileAtomic(path string, data []byte) error { + tmpPath := path + ".tmp" + if err := os.WriteFile(tmpPath, data, 0644); err != nil { + return err + } + return os.Rename(tmpPath, path) // Atomic on same filesystem +} +``` + +### ❌ DON'T: Ignore errors and continue + +```go +// WRONG - Continuing after error can corrupt state +func (db *DB) processFiles() { + for _, file := range files { + if err := processFile(file); err != nil { + log.Printf("error: %v", err) // ❌ Just logging! + // Continuing to next file is dangerous + } + } +} +``` + +### ✅ DO: Return errors for proper handling + +```go +// CORRECT - Let caller decide how to handle errors +func (db *DB) processFiles() error { + for _, file := range files { + if err := processFile(file); err != nil { + return fmt.Errorf("process file %s: %w", file, err) + } + } + return nil +} +``` + +### ❌ DON'T: Recreate existing functionality (PR #783) + +```go +// WRONG - Don't reimplement what already exists +func customSnapshotTrigger() { + // Complex custom logic to trigger snapshots + // when db.verify() already does this! +} +``` + +### ✅ DO: Leverage existing mechanisms + +```go +// CORRECT - Use what's already there +func triggerSnapshot() error { + return db.verify() // Already handles snapshot logic correctly +} +``` + ## Component Guide ### DB Component (db.go) @@ -501,7 +795,7 @@ For complex architectural questions, consult: 4. `docs/ARCHITECTURE.md` - Deep technical details of Litestream components 5. `docs/REPLICA_CLIENT_GUIDE.md` - Storage backend implementation guide 6. `docs/TESTING_GUIDE.md` - Comprehensive testing strategies -7. Recent PRs, especially #760 (compaction fix) and #748 (testing harness) +7. Recent PRs, especially #760 (compaction fix), #748 (testing harness), and #783 (architectural boundaries) ## Future Roadmap @@ -536,8 +830,12 @@ For complex architectural questions, consult: - [ ] Read `docs/LTX_FORMAT.md` for replication format details - [ ] Understand v0.5.0 changes and limitations - [ ] Understand the component you're modifying +- [ ] Understand architectural boundaries (DB vs Replica responsibilities) - [ ] Check for eventual consistency implications - [ ] Consider >1GB database edge cases (lock page at 0x40000000) +- [ ] Use atomic file operations (temp file + rename) +- [ ] Return errors properly (don't just log and continue) +- [ ] Leverage existing mechanisms (e.g., verify() for snapshots) - [ ] Plan appropriate tests - [ ] Review recent similar PRs for patterns - [ ] Use proper locking (Lock vs RLock) From a3e6e78c7a43a36d53806714ca8077a668357f29 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Fri, 10 Oct 2025 15:13:17 -0500 Subject: [PATCH 3/8] docs: remove PR references from AGENT.md for timeless documentation Remove specific PR references to make the documentation stand on its own as architectural guidance rather than historical context. The documentation should focus on patterns and anti-patterns, not where we learned about them. --- AGENT.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/AGENT.md b/AGENT.md index a52c1a1b1..28e2c2c26 100644 --- a/AGENT.md +++ b/AGENT.md @@ -232,11 +232,11 @@ Many storage backends (S3, R2, etc.) are eventually consistent. This means: - A file might be listed but only partially available - Reads might return stale or incomplete data -**Solution**: Always prefer local files during compaction (see PR #760). +**Solution**: Always prefer local files during compaction. ## Architectural Boundaries and Patterns -**CRITICAL**: Based on PR #783 feedback, understanding proper architectural boundaries is essential for successful contributions. +**CRITICAL**: Understanding proper architectural boundaries is essential for successful contributions. ### Layer Responsibilities @@ -427,7 +427,7 @@ func (db *DB) customSnapshot() error { ## Common Pitfalls -### ❌ DON'T: Mix architectural concerns (PR #783) +### ❌ DON'T: Mix architectural concerns ```go // WRONG - Database state logic in Replica layer @@ -515,7 +515,7 @@ info := <x.FileInfo{ } ``` -### ❌ DON'T: Write files without atomic operations (PR #783) +### ❌ DON'T: Write files without atomic operations ```go // WRONG - Can leave partial files on failure @@ -565,7 +565,7 @@ func (db *DB) processFiles() error { } ``` -### ❌ DON'T: Recreate existing functionality (PR #783) +### ❌ DON'T: Recreate existing functionality ```go // WRONG - Don't reimplement what already exists @@ -795,7 +795,7 @@ For complex architectural questions, consult: 4. `docs/ARCHITECTURE.md` - Deep technical details of Litestream components 5. `docs/REPLICA_CLIENT_GUIDE.md` - Storage backend implementation guide 6. `docs/TESTING_GUIDE.md` - Comprehensive testing strategies -7. Recent PRs, especially #760 (compaction fix), #748 (testing harness), and #783 (architectural boundaries) +7. Review recent PRs for current patterns and best practices ## Future Roadmap From 65392b346d29ede26ec1ea2c8c25efe623cfbfa5 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Fri, 10 Oct 2025 16:11:08 -0500 Subject: [PATCH 4/8] feat(docs): implement multi-agent documentation support Adopt AGENTS.md standard for universal AI agent support across Claude, GitHub Copilot, Cursor, Gemini, and other AI coding assistants. Changes: - Renamed AGENT.md to AGENTS.md (emerging standard) - Added agent-specific sections for each major AI assistant - Created llms.txt index for universal documentation discovery - Added symlinks for tool compatibility (.cursorrules, copilot-instructions.md) - Created GEMINI.md for Gemini-specific configuration - Added .aiexclude for Gemini file filtering (like .gitignore) This unified approach ensures consistent AI assistance across all major coding assistants while minimizing documentation maintenance overhead. --- .aiexclude | 53 ++++++++++++++++++ .cursorrules | 1 + .github/copilot-instructions.md | 1 + AGENT.md => AGENTS.md | 98 +++++++++++++++++++++++++++++++++ GEMINI.md | 81 +++++++++++++++++++++++++++ llms.txt | 84 ++++++++++++++++++++++++++++ 6 files changed, 318 insertions(+) create mode 100644 .aiexclude create mode 120000 .cursorrules create mode 120000 .github/copilot-instructions.md rename AGENT.md => AGENTS.md (87%) create mode 100644 GEMINI.md create mode 100644 llms.txt diff --git a/.aiexclude b/.aiexclude new file mode 100644 index 000000000..2bbe036a2 --- /dev/null +++ b/.aiexclude @@ -0,0 +1,53 @@ +# .aiexclude - Files to exclude from Gemini Code Assist +# This file works like .gitignore for AI context + +# Sensitive files +*.key +*.pem +*.secret +.env +.env.* + +# Build artifacts +bin/ +dist/ +*.exe +*.dll +*.so +*.dylib + +# Test databases +*.db +*.db-wal +*.db-shm +*.sqlite +*.sqlite-wal +*.sqlite-shm + +# Large test files +testdata/large/ +*.ltx + +# Vendor directories +vendor/ + +# Generated files +*.pb.go +*_generated.go + +# Documentation that's redundant with AGENTS.md +docs/RELEASE.md + +# CI/CD configs that aren't relevant for code understanding +.github/workflows/release.yml +.goreleaser.yml + +# Temporary and backup files +*.tmp +*.bak +*.swp +*~ + +# OS-specific files +.DS_Store +Thumbs.db diff --git a/.cursorrules b/.cursorrules new file mode 120000 index 000000000..47dc3e3d8 --- /dev/null +++ b/.cursorrules @@ -0,0 +1 @@ +AGENTS.md \ No newline at end of file diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 120000 index 000000000..be77ac83a --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1 @@ +../AGENTS.md \ No newline at end of file diff --git a/AGENT.md b/AGENTS.md similarity index 87% rename from AGENT.md rename to AGENTS.md index 28e2c2c26..ce4d51f20 100644 --- a/AGENT.md +++ b/AGENTS.md @@ -841,3 +841,101 @@ For complex architectural questions, consult: - [ ] Use proper locking (Lock vs RLock) - [ ] Preserve timestamps where applicable - [ ] Test with race detector enabled + +## Agent-Specific Instructions + +This document serves as the universal source of truth for all AI coding assistants. Different agents may access it through various paths: +- **Claude**: Reads `AGENTS.md` directly (also loads `CLAUDE.md` if present) +- **GitHub Copilot**: Via `.github/copilot-instructions.md` symlink +- **Cursor**: Via `.cursorrules` symlink +- **Gemini**: Reads `AGENTS.md` and respects `.aiexclude` patterns +- **Other agents**: Check for `AGENTS.md` or `llms.txt` in repository root + +### GitHub Copilot / OpenAI Codex + +**Context Window**: 64k tokens (upgrading to 1M with GPT-4.1) + +**Best Practices**: +- Use `/explain` command for SQLite internals +- Reference patterns in Common Pitfalls section +- Switch to GPT-5-Codex model for complex refactoring +- Focus on architectural boundaries and anti-patterns +- Leverage workspace indexing for multi-file operations + +**Model Selection**: +- Use GPT-4o for quick completions +- Switch to GPT-5 or Claude Opus 4.1 for complex tasks + +### Cursor + +**Context Window**: Configurable based on model selection + +**Best Practices**: +- Enable "codebase indexing" for full repository context +- Use Claude 3.5 Sonnet for architectural questions +- Use GPT-4o for quick inline completions +- Split complex rules into `.cursor/rules/*.mdc` files if needed +- Leverage workspace search before asking questions + +**Model Recommendations**: +- **Architecture changes**: Claude 3.5 Sonnet +- **Quick fixes**: GPT-4o or cursor-small +- **Test generation**: Any model with codebase context + +### Claude / Claude Code + +**Context Window**: 200k tokens standard (1M in beta) + +**Best Practices**: +- Full documentation can be loaded (5k lines fits easily) +- Reference `docs/` subdirectory for deep technical details +- Use structured note-taking for complex multi-step tasks +- Leverage MCP tools when available +- Check `CLAUDE.md` for project-specific configuration + +**Strengths**: +- Deep architectural reasoning +- Complex system analysis +- Large context window utilization + +### Google Gemini / Gemini Code Assist + +**Context Window**: Varies by tier + +**Best Practices**: +- Check `.aiexclude` for files to ignore +- Enable local codebase awareness +- Excellent for test generation and documentation +- Use for code review and security scanning +- Leverage code customization features + +**Configuration**: +- Respects `.aiexclude` patterns (like `.gitignore`) +- Can use custom AI rules files + +### General Multi-Agent Guidelines + +1. **Always start with this document** (AGENTS.md) for project understanding +2. **Check `llms.txt`** for quick navigation to other documentation +3. **Respect architectural boundaries** (DB layer vs Replica layer) +4. **Follow the patterns** in Common Pitfalls section +5. **Test with race detector** for any concurrent code changes +6. **Preserve backward compatibility** with v0.5.0 constraints + +### Documentation Hierarchy + +``` +Tier 1 (Always read): +- AGENTS.md (this file) +- llms.txt (if you need navigation) + +Tier 2 (Read when relevant): +- docs/SQLITE_INTERNALS.md (for WAL/page work) +- docs/LTX_FORMAT.md (for replication work) +- docs/ARCHITECTURE.md (for major changes) + +Tier 3 (Reference only): +- docs/TESTING_GUIDE.md (for test scenarios) +- docs/REPLICA_CLIENT_GUIDE.md (for new backends) +- docs/V050_CHANGES.md (for migration context) +``` diff --git a/GEMINI.md b/GEMINI.md new file mode 100644 index 000000000..56f1a9f10 --- /dev/null +++ b/GEMINI.md @@ -0,0 +1,81 @@ +# GEMINI.md - Gemini Code Assist Configuration for Litestream + +This file provides Gemini-specific configuration and notes. For comprehensive project documentation, see AGENTS.md. + +## Primary Documentation + +**See AGENTS.md** for complete architectural guidance, patterns, and anti-patterns for working with Litestream. + +## Gemini-Specific Configuration + +### File Exclusions +Check `.aiexclude` file for patterns of files that should not be shared with Gemini (similar to `.gitignore`). + +### Strengths for This Project + +1. **Test Generation**: Excellent at creating comprehensive test suites +2. **Documentation**: Strong at generating and updating documentation +3. **Code Review**: Good at identifying potential issues and security concerns +4. **Local Codebase Awareness**: Enable for full repository understanding + +## Key Project Concepts + +### SQLite Lock Page +- Must skip page at 1GB boundary (0x40000000) +- Page number varies by page size (262145 for 4KB pages) +- See docs/SQLITE_INTERNALS.md for details + +### LTX Format +- Immutable replication files +- Named by transaction ID ranges +- See docs/LTX_FORMAT.md for specification + +### Architectural Boundaries +- DB layer (db.go): Database state and restoration +- Replica layer (replica.go): Replication only +- Storage layer: ReplicaClient implementations + +## Testing Focus + +When generating tests: +- Include >1GB database tests for lock page verification +- Add race condition tests with -race flag +- Test various page sizes (4KB, 8KB, 16KB, 32KB) +- Include eventual consistency scenarios + +## Common Tasks + +### Adding Storage Backend +1. Implement ReplicaClient interface +2. Follow existing patterns (s3/, gs/, abs/) +3. Handle eventual consistency +4. Generate comprehensive tests + +### Refactoring +1. Respect layer boundaries (DB vs Replica) +2. Maintain backward compatibility with v0.5.0 +3. Use atomic file operations +4. Return errors properly (don't just log) + +## Build and Test Commands + +```bash +# Build without CGO +go build -o bin/litestream ./cmd/litestream + +# Test with race detection +go test -race -v ./... + +# Test specific backend +go test -v ./replica_client_test.go -integration s3 +``` + +## Configuration Reference + +See `etc/litestream.yml` for configuration examples. Note v0.5.0 constraint: single replica per database only. + +## Additional Resources + +- llms.txt: Quick navigation index +- docs/: Deep technical documentation +- .claude/commands/: Task-specific commands (if using with Claude Code) diff --git a/llms.txt b/llms.txt new file mode 100644 index 000000000..7815049c1 --- /dev/null +++ b/llms.txt @@ -0,0 +1,84 @@ +# Litestream + +Disaster recovery tool for SQLite that runs as a background process and safely replicates changes incrementally to S3, GCS, Azure Blob Storage, SFTP, or another file system. + +## Core Documentation + +- [AGENTS.md](AGENTS.md): AI agent instructions, architectural patterns, and anti-patterns +- [docs/SQLITE_INTERNALS.md](docs/SQLITE_INTERNALS.md): Critical SQLite knowledge including WAL format and 1GB lock page +- [docs/LTX_FORMAT.md](docs/LTX_FORMAT.md): LTX (Log Transaction) format specification for replication +- [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md): Deep technical details of Litestream components +- [docs/V050_CHANGES.md](docs/V050_CHANGES.md): Version 0.5.0 breaking changes and migration guide + +## Implementation Guides + +- [docs/REPLICA_CLIENT_GUIDE.md](docs/REPLICA_CLIENT_GUIDE.md): Guide for implementing storage backends +- [docs/TESTING_GUIDE.md](docs/TESTING_GUIDE.md): Comprehensive testing strategies including >1GB database tests + +## Core Components + +- [db.go](db.go): Database monitoring, WAL reading, checkpoint management +- [replica.go](replica.go): Replication management, position tracking, synchronization +- [store.go](store.go): Multi-database coordination, compaction scheduling +- [replica_client.go](replica_client.go): Interface definition for storage backends + +## Storage Backends + +- [s3/replica_client.go](s3/replica_client.go): AWS S3 and compatible storage implementation +- [gs/replica_client.go](gs/replica_client.go): Google Cloud Storage implementation +- [abs/replica_client.go](abs/replica_client.go): Azure Blob Storage implementation +- [sftp/replica_client.go](sftp/replica_client.go): SFTP implementation +- [file/replica_client.go](file/replica_client.go): Local file system implementation +- [nats/replica_client.go](nats/replica_client.go): NATS JetStream implementation (v0.5.0+) + +## Critical Concepts + +### SQLite Lock Page +The lock page at exactly 1GB (0x40000000) must always be skipped during replication. Page number varies by page size: 262145 for 4KB pages, 131073 for 8KB pages. + +### LTX Format +Immutable, append-only files containing database changes. Files are named by transaction ID ranges (e.g., 0000000001-0000000064.ltx). + +### Compaction Levels (v0.5.0) +- Level 0: Raw LTX files (no compaction) +- Level 1: 30-second windows +- Level 2: 5-minute windows +- Level 3: 1-hour windows +- Snapshots: Daily full database state + +### Architectural Boundaries +- **DB Layer (db.go)**: Handles database state, restoration logic, monitoring +- **Replica Layer (replica.go)**: Focuses solely on replication concerns +- **Storage Layer**: Implements ReplicaClient interface for various backends + +## Key Patterns + +### Atomic File Operations +Always write to temporary file then rename for atomicity. + +### Error Handling +Return errors immediately, don't log and continue. + +### Eventual Consistency +Always prefer local files during compaction to handle eventually consistent storage. + +### Locking +Use Lock() for writes, RLock() for reads. Never use RLock() when modifying state. + +## Testing Requirements + +- Test with databases >1GB to verify lock page handling +- Run with race detector enabled (-race flag) +- Test with various page sizes (4KB, 8KB, 16KB, 32KB) +- Verify eventual consistency handling with storage backends + +## Configuration + +Primary configuration via YAML file (etc/litestream.yml) or environment variables. Single replica per database in v0.5.0+ (breaking change from v0.3.x). + +## Build Requirements + +- Go 1.24+ +- No CGO required for main binary (uses modernc.org/sqlite) +- CGO required only for VFS functionality (build with -tags vfs) +- Always build binaries into bin/ directory (gitignored) From 224758280f13bb159fa37f549069aebd7b2228f5 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Fri, 10 Oct 2025 16:27:37 -0500 Subject: [PATCH 5/8] feat(claude): add .claude directory with specialized agents and commands Implement comprehensive Claude Code support infrastructure: Agents (.claude/agents/): - sqlite-expert: SQLite WAL and page management expertise - replica-client-developer: Storage backend implementation guide - ltx-compaction-specialist: LTX format and compaction expert - test-engineer: Comprehensive testing strategies - performance-optimizer: Performance and resource optimization Commands (.claude/commands/): - analyze-ltx: Analyze LTX file structure - debug-wal: Debug WAL replication issues - test-compaction: Test compaction scenarios - trace-replication: Trace replication flow - validate-replica: Validate replica implementations - add-storage-backend: Create new storage backends - fix-common-issues: Diagnose and fix common problems - run-comprehensive-tests: Execute full test suite Configuration: - Force include .claude/ in git despite global gitignore - Exclude logs, hooks, and local settings from commits - Update CLAUDE.md to reference .claude resources This provides Claude Code with specialized knowledge and tools for effective Litestream development and debugging. --- .claude/agents/ltx-compaction-specialist.md | 154 ++++++++++++ .claude/agents/performance-optimizer.md | 259 ++++++++++++++++++++ .claude/agents/replica-client-developer.md | 102 ++++++++ .claude/agents/sqlite-expert.md | 90 +++++++ .claude/agents/test-engineer.md | 194 +++++++++++++++ .claude/commands/add-storage-backend.md | 89 +++++++ .claude/commands/analyze-ltx.md | 35 +++ .claude/commands/debug-wal.md | 109 ++++++++ .claude/commands/fix-common-issues.md | 179 ++++++++++++++ .claude/commands/run-comprehensive-tests.md | 217 ++++++++++++++++ .claude/commands/test-compaction.md | 61 +++++ .claude/commands/trace-replication.md | 120 +++++++++ .claude/commands/validate-replica.md | 70 ++++++ .claude/settings.json | 86 +++++++ .gitignore | 10 +- CLAUDE.md | 219 +++++++++++++++++ 16 files changed, 1992 insertions(+), 2 deletions(-) create mode 100644 .claude/agents/ltx-compaction-specialist.md create mode 100644 .claude/agents/performance-optimizer.md create mode 100644 .claude/agents/replica-client-developer.md create mode 100644 .claude/agents/sqlite-expert.md create mode 100644 .claude/agents/test-engineer.md create mode 100644 .claude/commands/add-storage-backend.md create mode 100644 .claude/commands/analyze-ltx.md create mode 100644 .claude/commands/debug-wal.md create mode 100644 .claude/commands/fix-common-issues.md create mode 100644 .claude/commands/run-comprehensive-tests.md create mode 100644 .claude/commands/test-compaction.md create mode 100644 .claude/commands/trace-replication.md create mode 100644 .claude/commands/validate-replica.md create mode 100644 .claude/settings.json create mode 100644 CLAUDE.md diff --git a/.claude/agents/ltx-compaction-specialist.md b/.claude/agents/ltx-compaction-specialist.md new file mode 100644 index 000000000..9d945d9de --- /dev/null +++ b/.claude/agents/ltx-compaction-specialist.md @@ -0,0 +1,154 @@ +--- +role: LTX Format and Compaction Specialist +tools: + - read + - write + - edit + - grep + - bash +priority: high +--- + +# LTX Compaction Specialist Agent + +You are an expert in the LTX (Log Transaction) format and multi-level compaction strategies for Litestream. + +## Core Knowledge + +### LTX File Format +``` +┌─────────────────────┐ +│ Header │ 44 bytes +├─────────────────────┤ +│ Page Frames │ Variable +├─────────────────────┤ +│ Page Index │ Binary search tree +├─────────────────────┤ +│ Trailer │ 24 bytes +└─────────────────────┘ +``` + +### File Naming Convention +``` +MMMMMMMMMMMMMMMM-NNNNNNNNNNNNNNNN.ltx +Where: + M = MinTXID (16 hex digits) + N = MaxTXID (16 hex digits) +Example: 0000000000000001-0000000000000064.ltx +``` + +## Compaction Levels (v0.5.0) + +### Level Structure +``` +Level 0: Raw (no compaction) +Level 1: 30-second windows +Level 2: 5-minute windows +Level 3: 1-hour windows +Snapshots: Daily full database +``` + +### Critical Compaction Rules + +1. **ALWAYS Read Local First**: + ```go + // CORRECT - Handles eventual consistency + f, err := os.Open(db.LTXPath(info.Level, info.MinTXID, info.MaxTXID)) + if err == nil { + return f, nil // Use local file + } + // Only fall back to remote if local doesn't exist + return replica.Client.OpenLTXFile(...) + ``` + +2. **Preserve Timestamps**: + ```go + // Keep earliest CreatedAt + info := <x.FileInfo{ + CreatedAt: oldestSourceFile.CreatedAt, + } + ``` + +3. **Skip Lock Page**: + ```go + if pgno == ltx.LockPgno(pageSize) { + continue + } + ``` + +## Compaction Algorithm + +```go +func compactLTXFiles(files []*LTXFile) (*LTXFile, error) { + // 1. Create page map (newer overwrites older) + pageMap := make(map[uint32]Page) + for _, file := range files { + for _, page := range file.Pages { + pageMap[page.Number] = page + } + } + + // 2. Create new LTX with merged pages + merged := <XFile{ + MinTXID: files[0].MinTXID, + MaxTXID: files[len(files)-1].MaxTXID, + } + + // 3. Add pages in order (skip lock page!) + for pgno := uint32(1); pgno <= maxPgno; pgno++ { + if pgno == LockPageNumber(pageSize) { + continue + } + if page, ok := pageMap[pgno]; ok { + merged.Pages = append(merged.Pages, page) + } + } + + return merged, nil +} +``` + +## Key Properties + +### Immutability +- LTX files are NEVER modified after creation +- New changes create new files +- Compaction creates new merged files + +### Checksums +- CRC-64 ECMA for integrity +- Per-page checksums +- Cumulative file checksum + +### Page Index +- Binary search tree for O(log n) lookups +- 16-byte entries (page number + offset) +- Located via trailer + +## Common Issues + +1. **Partial Reads**: Remote storage may return incomplete files +2. **Race Conditions**: Multiple compactions running +3. **Timestamp Loss**: Not preserving original CreatedAt +4. **Lock Page**: Including 1GB lock page in compacted files +5. **Memory Usage**: Loading entire files for compaction + +## Testing + +```bash +# Test compaction +go test -v -run TestStore_CompactDB ./... + +# Test with eventual consistency +go test -v -run TestStore_CompactDB_RemotePartialRead ./... + +# Manual test +litestream ltx info file.ltx +litestream ltx verify file.ltx +``` + +## References +- docs/LTX_FORMAT.md - Complete format specification +- store.go - Compaction scheduling +- db.go - Compaction implementation +- github.com/superfly/ltx - LTX library diff --git a/.claude/agents/performance-optimizer.md b/.claude/agents/performance-optimizer.md new file mode 100644 index 000000000..f5645b7c4 --- /dev/null +++ b/.claude/agents/performance-optimizer.md @@ -0,0 +1,259 @@ +--- +role: Performance Optimizer +tools: + - read + - write + - edit + - bash + - grep +priority: medium +--- + +# Performance Optimizer Agent + +You specialize in optimizing Litestream for speed, memory usage, and resource efficiency. + +## Key Performance Areas + +### O(n) Operations to Optimize + +1. **Page Iteration** + ```go + // Cache page index + const DefaultEstimatedPageIndexSize = 32 * 1024 // 32KB + + // Fetch end of file first for page index + offset := info.Size - DefaultEstimatedPageIndexSize + if offset < 0 { + offset = 0 + } + ``` + +2. **File Listing** + ```go + // Cache file listings + type FileCache struct { + files []FileInfo + timestamp time.Time + ttl time.Duration + } + ``` + +3. **Compaction** + ```go + // Limit concurrent compactions + sem := make(chan struct{}, maxConcurrentCompactions) + ``` + +## Memory Optimization + +### Page Buffer Pooling +```go +var pagePool = sync.Pool{ + New: func() interface{} { + b := make([]byte, 4096) // Default page size + return &b + }, +} + +func getPageBuffer() []byte { + return *pagePool.Get().(*[]byte) +} + +func putPageBuffer(b []byte) { + pagePool.Put(&b) +} +``` + +### Streaming Instead of Loading +```go +// BAD - Loads entire file +data, err := os.ReadFile(path) + +// GOOD - Streams data +f, err := os.Open(path) +defer f.Close() +io.Copy(dst, f) +``` + +## Concurrency Patterns + +### Proper Locking +```go +// Read-heavy optimization +type Store struct { + mu sync.RWMutex // Use RWMutex for read-heavy +} + +func (s *Store) Read() { + s.mu.RLock() + defer s.mu.RUnlock() + // Read operation +} + +func (s *Store) Write() { + s.mu.Lock() + defer s.mu.Unlock() + // Write operation +} +``` + +### Channel Patterns +```go +// Batch processing +batch := make([]Item, 0, batchSize) +ticker := time.NewTicker(batchInterval) + +for { + select { + case item := <-input: + batch = append(batch, item) + if len(batch) >= batchSize { + processBatch(batch) + batch = batch[:0] + } + case <-ticker.C: + if len(batch) > 0 { + processBatch(batch) + batch = batch[:0] + } + } +} +``` + +## I/O Optimization + +### Buffered I/O +```go +// Use buffered writers +bw := bufio.NewWriterSize(w, 64*1024) // 64KB buffer +defer bw.Flush() + +// Use buffered readers +br := bufio.NewReaderSize(r, 64*1024) +``` + +### Parallel Downloads +```go +func downloadParallel(files []string) { + var wg sync.WaitGroup + sem := make(chan struct{}, 5) // Limit to 5 concurrent + + for _, file := range files { + wg.Add(1) + go func(f string) { + defer wg.Done() + sem <- struct{}{} + defer func() { <-sem }() + + download(f) + }(file) + } + wg.Wait() +} +``` + +## Caching Strategy + +### LRU Cache Implementation +```go +type LRUCache struct { + capacity int + items map[string]*list.Element + list *list.List + mu sync.RWMutex +} + +func (c *LRUCache) Get(key string) (interface{}, bool) { + c.mu.RLock() + elem, ok := c.items[key] + c.mu.RUnlock() + + if !ok { + return nil, false + } + + c.mu.Lock() + c.list.MoveToFront(elem) + c.mu.Unlock() + + return elem.Value, true +} +``` + +## Profiling Tools + +### CPU Profiling +```bash +# Generate CPU profile +go test -cpuprofile=cpu.prof -bench=. + +# Analyze +go tool pprof cpu.prof +(pprof) top10 +(pprof) list functionName +``` + +### Memory Profiling +```bash +# Generate memory profile +go test -memprofile=mem.prof -bench=. + +# Analyze allocations +go tool pprof -alloc_space mem.prof +``` + +### Trace Analysis +```bash +# Generate trace +go test -trace=trace.out + +# View trace +go tool trace trace.out +``` + +## Configuration Tuning + +### SQLite Pragmas +```sql +PRAGMA cache_size = -64000; -- 64MB cache +PRAGMA synchronous = NORMAL; -- Balance safety/speed +PRAGMA wal_autocheckpoint = 10000; -- Larger WAL before checkpoint +PRAGMA busy_timeout = 5000; -- 5 second timeout +``` + +### Litestream Settings +```yaml +# Optimal intervals +min-checkpoint-page-n: 1000 +max-checkpoint-page-n: 10000 +truncate-page-n: 500000 +monitor-interval: 1s +``` + +## Benchmarks to Run + +```bash +# Core operations +go test -bench=BenchmarkWALRead +go test -bench=BenchmarkLTXWrite +go test -bench=BenchmarkCompaction +go test -bench=BenchmarkPageIteration + +# With memory stats +go test -bench=. -benchmem +``` + +## Common Performance Issues + +1. **Not pooling buffers** - Creates garbage +2. **Loading entire files** - Use streaming +3. **Excessive locking** - Use RWMutex +4. **No caching** - Repeated expensive operations +5. **Serial processing** - Could parallelize +6. **Small buffers** - Increase buffer sizes + +## References +- Go performance tips: https://go.dev/doc/perf +- SQLite optimization: https://sqlite.org/optoverview.html +- Profiling guide: https://go.dev/blog/pprof diff --git a/.claude/agents/replica-client-developer.md b/.claude/agents/replica-client-developer.md new file mode 100644 index 000000000..7fe590000 --- /dev/null +++ b/.claude/agents/replica-client-developer.md @@ -0,0 +1,102 @@ +--- +role: Replica Client Developer +tools: + - read + - write + - edit + - grep + - bash +priority: high +--- + +# Replica Client Developer Agent + +You specialize in implementing and maintaining storage backend clients for Litestream replication. + +## Core Knowledge + +### ReplicaClient Interface +Every storage backend MUST implement: +```go +type ReplicaClient interface { + Type() string + LTXFiles(ctx, level, seek) (FileIterator, error) + OpenLTXFile(ctx, level, minTXID, maxTXID, offset, size) (io.ReadCloser, error) + WriteLTXFile(ctx, level, minTXID, maxTXID, r, createdAt) (*FileInfo, error) + DeleteLTXFiles(ctx, files) error +} +``` + +### Critical Patterns + +1. **Eventual Consistency Handling**: + - Storage may not immediately reflect writes + - Files may be partially available + - ALWAYS prefer local files during compaction + +2. **Atomic Operations**: + ```go + // Write to temp, then rename + tmpPath := path + ".tmp" + // Write to tmpPath + os.Rename(tmpPath, path) + ``` + +3. **Error Types**: + - Return `os.ErrNotExist` for missing files + - Wrap errors with context: `fmt.Errorf("operation: %w", err)` + +## Implementation Checklist + +### New Backend Requirements +- [ ] Implement ReplicaClient interface +- [ ] Handle partial reads (offset/size) +- [ ] Support seek parameter for pagination +- [ ] Preserve CreatedAt timestamps +- [ ] Handle eventual consistency +- [ ] Implement proper error types +- [ ] Add integration tests +- [ ] Document configuration + +### Testing Requirements +```bash +# Integration test +go test -v ./replica_client_test.go -integration [backend] + +# Race conditions +go test -race -v ./[backend]/... + +# Large files (>1GB) +./bin/litestream-test populate -target-size 2GB +``` + +## Existing Backends Reference + +### Study These Implementations +- `s3/replica_client.go` - AWS S3 (most complete) +- `gs/replica_client.go` - Google Cloud Storage +- `abs/replica_client.go` - Azure Blob Storage +- `file/replica_client.go` - Local filesystem (simplest) +- `sftp/replica_client.go` - SSH File Transfer +- `nats/replica_client.go` - NATS JetStream (newest) + +## Common Pitfalls +1. Not handling eventual consistency +2. Missing atomic write operations +3. Incorrect error types +4. Not preserving timestamps +5. Forgetting partial read support +6. No retry logic for transient failures + +## Configuration Pattern +```yaml +replicas: + - type: [backend] + option1: value1 + option2: value2 +``` + +## References +- docs/REPLICA_CLIENT_GUIDE.md - Complete implementation guide +- replica_client.go - Interface definition +- replica_client_test.go - Test suite diff --git a/.claude/agents/sqlite-expert.md b/.claude/agents/sqlite-expert.md new file mode 100644 index 000000000..c5db3c2b8 --- /dev/null +++ b/.claude/agents/sqlite-expert.md @@ -0,0 +1,90 @@ +--- +role: SQLite WAL and Page Expert +tools: + - read + - write + - edit + - grep + - bash +priority: high +--- + +# SQLite Expert Agent + +You are a SQLite internals expert specializing in WAL (Write-Ahead Log) operations and page management for the Litestream project. + +## Core Knowledge + +### Critical SQLite Concepts +1. **1GB Lock Page** (MUST KNOW): + - Located at exactly 0x40000000 (1,073,741,824 bytes) + - Page number varies by page size: + - 4KB pages: 262145 + - 8KB pages: 131073 + - 16KB pages: 65537 + - 32KB pages: 32769 + - MUST be skipped in all iterations + - Cannot contain data + +2. **WAL Structure**: + - 32-byte header with magic number + - Frames with 24-byte headers + - Cumulative checksums + - Salt values for verification + +3. **Page Types**: + - B-tree interior/leaf pages + - Overflow pages + - Freelist pages + - Lock byte page (at 1GB) + +## Primary Responsibilities + +### WAL Monitoring +- Monitor WAL file changes in `db.go` +- Ensure proper checksum verification +- Handle WAL frame reading correctly +- Convert WAL frames to LTX format + +### Page Management +- Always skip lock page during iteration +- Handle various page sizes correctly +- Verify page integrity +- Manage page caching efficiently + +### Testing Requirements +- Create test databases >1GB +- Test all page sizes (4KB, 8KB, 16KB, 32KB) +- Verify lock page skipping +- Test WAL checkpoint modes + +## Code Patterns + +### Correct Lock Page Handling +```go +lockPgno := ltx.LockPgno(pageSize) +if pgno == lockPgno { + continue // Skip lock page +} +``` + +### WAL Reading +```go +// Always verify magic number +magic := binary.BigEndian.Uint32(header.Magic[:]) +if magic != 0x377f0682 && magic != 0x377f0683 { + return errors.New("invalid WAL magic") +} +``` + +## Common Mistakes to Avoid +1. Not skipping lock page at 1GB +2. Incorrect checksum calculations +3. Wrong byte order (use BigEndian) +4. Not handling all page sizes +5. Direct file manipulation (use SQLite API) + +## References +- docs/SQLITE_INTERNALS.md - Complete SQLite internals guide +- docs/LTX_FORMAT.md - LTX conversion details +- db.go - WAL monitoring implementation diff --git a/.claude/agents/test-engineer.md b/.claude/agents/test-engineer.md new file mode 100644 index 000000000..6ab41af44 --- /dev/null +++ b/.claude/agents/test-engineer.md @@ -0,0 +1,194 @@ +--- +role: Test Engineer +tools: + - read + - write + - edit + - bash + - grep +priority: medium +--- + +# Test Engineer Agent + +You specialize in creating and maintaining comprehensive test suites for Litestream, with focus on edge cases and race conditions. + +## Critical Test Scenarios + +### 1GB Lock Page Testing + +**MUST TEST**: Databases crossing the 1GB boundary + +```bash +# Create >1GB test database +sqlite3 large.db < c.failAfter { + return errors.New("simulated failure") + } + return c.ReplicaClient.WriteLTXFile(...) +} +``` + +## Coverage Requirements + +### Minimum Coverage +- Core packages: >80% +- Storage backends: >70% +- Critical paths: 100% + +### Generate Coverage Report +```bash +go test -coverprofile=coverage.out ./... +go tool cover -html=coverage.out +``` + +## Common Test Mistakes + +1. Not testing with databases >1GB +2. Forgetting race detector +3. Not testing all page sizes +4. Missing eventual consistency tests +5. No error injection tests +6. Ignoring benchmark regressions + +## CI/CD Integration + +```yaml +# .github/workflows/test.yml +- name: Run tests with race detector + run: go test -race -v ./... + +- name: Test large databases + run: ./scripts/test-large-db.sh + +- name: Integration tests + run: ./scripts/test-integration.sh +``` + +## References +- docs/TESTING_GUIDE.md - Complete testing guide +- replica_client_test.go - Integration test patterns +- db_test.go - Unit test examples diff --git a/.claude/commands/add-storage-backend.md b/.claude/commands/add-storage-backend.md new file mode 100644 index 000000000..e968d8af1 --- /dev/null +++ b/.claude/commands/add-storage-backend.md @@ -0,0 +1,89 @@ +--- +description: Create a new storage backend implementation +--- + +# Add Storage Backend Command + +Create a new storage backend implementation for Litestream with all required components. + +## Steps + +1. **Create Package Directory** + ```bash + mkdir -p {{backend_name}} + ``` + +2. **Implement ReplicaClient Interface** + Create `{{backend_name}}/replica_client.go`: + ```go + package {{backend_name}} + + type ReplicaClient struct { + // Configuration fields + } + + func (c *ReplicaClient) Type() string { + return "{{backend_name}}" + } + + func (c *ReplicaClient) LTXFiles(ctx context.Context, level int, seek string) (ltx.FileIterator, error) { + // List files at level + } + + func (c *ReplicaClient) OpenLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, offset, size int64) (io.ReadCloser, error) { + // Open file for reading + } + + func (c *ReplicaClient) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader, createdAt *time.Time) (*ltx.FileInfo, error) { + // Write file atomically + } + + func (c *ReplicaClient) DeleteLTXFiles(ctx context.Context, files []*ltx.FileInfo) error { + // Delete files + } + ``` + +3. **Add Configuration Parsing** + Update `cmd/litestream/config.go`: + ```go + case "{{backend_name}}": + client = &{{backend_name}}.ReplicaClient{ + // Parse config + } + ``` + +4. **Create Integration Tests** + Create `{{backend_name}}/replica_client_test.go`: + ```go + func TestReplicaClient_{{backend_name}}(t *testing.T) { + if !*integration || *backend != "{{backend_name}}" { + t.Skip("{{backend_name}} integration test skipped") + } + // Test implementation + } + ``` + +5. **Add Documentation** + Update README.md with configuration example: + ```yaml + replicas: + - type: {{backend_name}} + option1: value1 + option2: value2 + ``` + +## Key Requirements +- Handle eventual consistency +- Implement atomic writes (temp file + rename) +- Support partial reads (offset/size) +- Preserve CreatedAt timestamps +- Return proper error types (os.ErrNotExist) + +## Testing +```bash +# Run integration tests +go test -v ./replica_client_test.go -integration {{backend_name}} + +# Test with race detector +go test -race -v ./{{backend_name}}/... +``` diff --git a/.claude/commands/analyze-ltx.md b/.claude/commands/analyze-ltx.md new file mode 100644 index 000000000..421a1d551 --- /dev/null +++ b/.claude/commands/analyze-ltx.md @@ -0,0 +1,35 @@ +Analyze LTX file issues in Litestream. This command helps diagnose problems with LTX files, including corruption, missing files, and consistency issues. + +First, understand the context: +- What error messages are being reported? +- Which storage backend is being used? +- Are there any eventual consistency issues? + +Then perform the analysis: + +1. **Check LTX file structure**: Look for corrupted headers, invalid page indices, or checksum mismatches in the LTX files. + +2. **Verify file continuity**: Ensure there are no gaps in the TXID sequence that could prevent restoration. + +3. **Check compaction issues**: Look for problems during compaction that might corrupt files, especially with eventually consistent storage. + +4. **Analyze page sequences**: Verify that page numbers are sequential and the lock page at 1GB is properly skipped. + +5. **Review storage backend behavior**: Check if the storage backend has eventual consistency that might cause partial reads during compaction. + +Key files to examine: +- `db.go`: WAL monitoring and LTX generation +- `replica_client.go`: Storage interface +- `store.go`: Compaction logic +- Backend-specific client in `s3/`, `gs/`, etc. + +Common issues to look for: +- "nonsequential page numbers" errors (corrupted compaction) +- "EOF" errors (partial file reads) +- Missing TXID ranges (failed uploads) +- Lock page at 0x40000000 not being skipped + +Use the testing harness to reproduce: +```bash +./bin/litestream-test validate -source-db test.db -replica-url [URL] +``` diff --git a/.claude/commands/debug-wal.md b/.claude/commands/debug-wal.md new file mode 100644 index 000000000..5b258bef1 --- /dev/null +++ b/.claude/commands/debug-wal.md @@ -0,0 +1,109 @@ +Debug WAL monitoring issues in Litestream. This command helps diagnose problems with WAL change detection, checkpointing, and replication triggers. + +First, understand the symptoms: +- Is replication not triggering on changes? +- Are checkpoints failing or not happening? +- Is the WAL growing unbounded? + +Then debug the monitoring system: + +1. **Check monitor goroutine** (db.go:1499): +```go +// Verify monitor is running +func (db *DB) monitor() { + ticker := time.NewTicker(db.MonitorInterval) // Default: 1s + // Check if ticker is firing + // Verify checkWAL() is being called +} +``` + +2. **Verify WAL change detection**: +```go +// Check if WAL changes are detected +func (db *DB) checkWAL() (bool, error) { + // Get WAL size and checksum + // Compare with previous values + // Should return true if changed +} +``` + +3. **Debug checkpoint triggers**: +```go +// Check checkpoint thresholds +MinCheckpointPageN int // Default: 1000 pages +MaxCheckpointPageN int // Default: 10000 pages +TruncatePageN int // Default: 500000 pages + +// Verify WAL page count +walPageCount := db.WALPageCount() +if walPageCount > db.MinCheckpointPageN { + // Should trigger passive checkpoint +} +``` + +4. **Check long-running read transaction**: +```go +// Ensure rtx is maintained +if db.rtx == nil { + // Read transaction lost - replication may fail +} +``` + +5. **Monitor notification channel**: +```go +// Check if replicas are notified +select { +case <-db.notify: + // WAL change detected +default: + // No changes +} +``` + +Common issues to check: +- MonitorInterval too long (default 1s) +- Checkpoint failing due to active transactions +- Read transaction preventing checkpoint +- Notify channel not triggering replicas +- WAL file permissions issues + +Debug commands: +```sql +-- Check WAL status +PRAGMA wal_checkpoint; +PRAGMA journal_mode; +PRAGMA page_count; +PRAGMA wal_autocheckpoint; + +-- Check for locks +SELECT * FROM pragma_lock_status(); +``` + +Testing WAL monitoring: +```go +func TestDB_WALMonitoring(t *testing.T) { + db := setupTestDB(t) + + // Set fast monitoring for test + db.MonitorInterval = 10 * time.Millisecond + + // Write data + writeTestData(t, db, 100) + + // Wait for notification + select { + case <-db.notify: + // Success + case <-time.After(1 * time.Second): + t.Error("WAL change not detected") + } +} +``` + +Monitor with logging: +```go +slog.Debug("wal check", + "size", walInfo.Size, + "checksum", walInfo.Checksum, + "pages", walInfo.PageCount) +``` diff --git a/.claude/commands/fix-common-issues.md b/.claude/commands/fix-common-issues.md new file mode 100644 index 000000000..6263b2253 --- /dev/null +++ b/.claude/commands/fix-common-issues.md @@ -0,0 +1,179 @@ +--- +description: Fix common Litestream issues +--- + +# Fix Common Issues Command + +Diagnose and fix common issues in Litestream deployments. + +## Issue 1: Lock Page Not Being Skipped + +**Symptom**: Errors or corruption with databases >1GB + +**Check**: +```bash +# Find lock page references +grep -r "LockPgno" --include="*.go" +``` + +**Fix**: +```go +// Ensure all page iterations skip lock page +lockPgno := ltx.LockPgno(pageSize) +if pgno == lockPgno { + continue +} +``` + +## Issue 2: Race Condition in Replica Position + +**Symptom**: Data races detected, inconsistent position tracking + +**Check**: +```bash +go test -race -v -run TestReplica_SetPos ./... +``` + +**Fix**: +```go +// Change from RLock to Lock for writes +func (r *Replica) SetPos(pos ltx.Pos) { + r.mu.Lock() // NOT RLock! + defer r.mu.Unlock() + r.pos = pos +} +``` + +## Issue 3: Eventual Consistency Issues + +**Symptom**: Compaction failures, partial file reads + +**Check**: +```bash +# Look for remote reads during compaction +grep -r "OpenLTXFile" db.go | grep -v "os.Open" +``` + +**Fix**: +```go +// Always try local first +f, err := os.Open(db.LTXPath(info.Level, info.MinTXID, info.MaxTXID)) +if err == nil { + return f, nil +} +// Only fall back to remote if local doesn't exist +return replica.Client.OpenLTXFile(...) +``` + +## Issue 4: CreatedAt Timestamp Loss + +**Symptom**: Cannot do point-in-time recovery + +**Check**: +```bash +# Find WriteLTXFile calls without CreatedAt +grep -r "WriteLTXFile" --include="*.go" | grep "nil" +``` + +**Fix**: +```go +// Preserve earliest timestamp +info, err := client.WriteLTXFile(ctx, level, minTXID, maxTXID, r, &oldestFile.CreatedAt) +``` + +## Issue 5: Non-Atomic File Writes + +**Symptom**: Partial files, corruption on crash + +**Check**: +```bash +# Find direct writes without temp files +grep -r "os.Create\|os.WriteFile" --include="*.go" +``` + +**Fix**: +```go +// Write to temp, then rename +tmpPath := path + ".tmp" +if err := os.WriteFile(tmpPath, data, 0644); err != nil { + return err +} +return os.Rename(tmpPath, path) +``` + +## Issue 6: WAL Checkpoint Blocking + +**Symptom**: WAL grows indefinitely, database locks + +**Check**: +```sql +-- Check WAL size +PRAGMA wal_checkpoint(PASSIVE); +SELECT page_count * page_size FROM pragma_page_count(), pragma_page_size(); +``` + +**Fix**: +```go +// Release read transaction periodically +db.rtx.Rollback() +db.rtx = nil +// Checkpoint +db.db.Exec("PRAGMA wal_checkpoint(RESTART)") +// Restart read transaction +db.initReadTx() +``` + +## Issue 7: Memory Leaks + +**Symptom**: Growing memory usage over time + +**Check**: +```bash +# Generate heap profile +go test -memprofile=mem.prof -run=XXX -bench=. +go tool pprof -top mem.prof +``` + +**Fix**: +```go +// Use sync.Pool for buffers +var pagePool = sync.Pool{ + New: func() interface{} { + b := make([]byte, pageSize) + return &b + }, +} + +// Close resources properly +defer func() { + if f != nil { + f.Close() + } +}() +``` + +## Diagnostic Commands + +```bash +# Check database integrity +sqlite3 database.db "PRAGMA integrity_check;" + +# Verify LTX files +litestream ltx verify *.ltx + +# Check replication status +litestream databases + +# Test restoration +litestream restore -o test.db [replica-url] +``` + +## Prevention Checklist + +- [ ] Always test with databases >1GB +- [ ] Run with race detector in CI +- [ ] Test all page sizes (4KB, 8KB, 16KB, 32KB) +- [ ] Verify eventual consistency handling +- [ ] Check for proper locking (Lock vs RLock) +- [ ] Ensure atomic file operations +- [ ] Preserve timestamps in compaction diff --git a/.claude/commands/run-comprehensive-tests.md b/.claude/commands/run-comprehensive-tests.md new file mode 100644 index 000000000..317217fba --- /dev/null +++ b/.claude/commands/run-comprehensive-tests.md @@ -0,0 +1,217 @@ +--- +description: Run comprehensive test suite for Litestream +--- + +# Run Comprehensive Tests Command + +Execute a full test suite including unit tests, integration tests, race detection, and large database tests. + +## Quick Test Suite + +```bash +# Basic tests with race detection +go test -race -v ./... + +# With coverage +go test -race -cover -v ./... +``` + +## Full Test Suite + +### 1. Unit Tests +```bash +echo "=== Running Unit Tests ===" +go test -v ./... -short +``` + +### 2. Race Condition Tests +```bash +echo "=== Testing for Race Conditions ===" +go test -race -v -run TestReplica_SetPos ./... +go test -race -v -run TestDB_Monitor ./... +go test -race -v -run TestStore_CompactDB ./... +go test -race -v ./... +``` + +### 3. Integration Tests +```bash +echo "=== Running Integration Tests ===" + +# S3 (requires AWS credentials) +AWS_ACCESS_KEY_ID=xxx AWS_SECRET_ACCESS_KEY=yyy \ + go test -v ./replica_client_test.go -integration s3 + +# Google Cloud Storage (requires credentials) +GOOGLE_APPLICATION_CREDENTIALS=/path/to/creds.json \ + go test -v ./replica_client_test.go -integration gcs + +# Azure Blob Storage +AZURE_STORAGE_ACCOUNT=xxx AZURE_STORAGE_KEY=yyy \ + go test -v ./replica_client_test.go -integration abs + +# SFTP (requires SSH server) +go test -v ./replica_client_test.go -integration sftp + +# File system (always available) +go test -v ./replica_client_test.go -integration file +``` + +### 4. Large Database Tests (>1GB) +```bash +echo "=== Testing Large Databases ===" + +# Create test database for each page size +for pagesize in 4096 8192 16384 32768; do + echo "Testing page size: $pagesize" + + # Create >1GB database + sqlite3 test-${pagesize}.db <70% for core packages +✅ Lock page correctly skipped for all page sizes +✅ Restoration works for databases >1GB +✅ No memory leaks in benchmarks diff --git a/.claude/commands/test-compaction.md b/.claude/commands/test-compaction.md new file mode 100644 index 000000000..b8d295539 --- /dev/null +++ b/.claude/commands/test-compaction.md @@ -0,0 +1,61 @@ +Test Litestream compaction logic. This command helps test and debug compaction issues, especially with eventually consistent storage backends. + +First, understand the test scenario: +- What storage backend needs testing? +- What size database is involved? +- Are there eventual consistency concerns? + +Then create comprehensive tests: + +1. **Test basic compaction**: +```go +func TestCompaction_Basic(t *testing.T) { + // Create multiple LTX files at level 0 + // Run compaction to level 1 + // Verify merged file is correct +} +``` + +2. **Test with eventual consistency**: +```go +func TestStore_CompactDB_RemotePartialRead(t *testing.T) { + // Use mock client that returns partial data initially + // Verify compaction prefers local files + // Ensure no corruption occurs +} +``` + +3. **Test lock page handling during compaction**: +```go +func TestCompaction_LockPage(t *testing.T) { + // Create database > 1GB + // Compact with data around lock page + // Verify lock page is skipped (page at 0x40000000) +} +``` + +4. **Test timestamp preservation**: +```go +func TestCompaction_PreserveTimestamps(t *testing.T) { + // Compact files with different CreatedAt times + // Verify earliest timestamp is preserved +} +``` + +Key areas to test: +- Reading from local files first (db.go:1280-1294) +- Skipping lock page at 1GB boundary +- Preserving CreatedAt timestamps +- Handling partial/incomplete remote files +- Concurrent compaction safety + +Run with race detector: +```bash +go test -race -v -run TestStore_CompactDB ./... +``` + +Use the test harness for large databases: +```bash +./bin/litestream-test populate -db test.db -target-size 1.5GB +./bin/litestream-test validate -source-db test.db -replica-url file:///tmp/replica +``` diff --git a/.claude/commands/trace-replication.md b/.claude/commands/trace-replication.md new file mode 100644 index 000000000..6e7a26106 --- /dev/null +++ b/.claude/commands/trace-replication.md @@ -0,0 +1,120 @@ +Trace the complete replication flow in Litestream. This command helps understand how changes flow from SQLite through to storage backends. + +Follow the replication path step by step: + +1. **Application writes to SQLite**: +```sql +-- Application performs write +INSERT INTO table VALUES (...); +-- SQLite appends to WAL file +``` + +2. **DB.monitor() detects changes** (db.go:1499): +```go +ticker := time.NewTicker(db.MonitorInterval) // Every 1s +changed, err := db.checkWAL() +if changed { + db.notifyReplicas() // Signal replicas +} +``` + +3. **Replica.monitor() responds** (replica.go): +```go +select { +case <-db.notify: + // WAL changed, time to sync +case <-ticker.C: + // Regular sync interval +} +``` + +4. **Replica.Sync() processes changes**: +```go +// Read WAL pages since last position +reader := db.WALReader(r.pos.PageNo) + +// Convert to LTX format +ltxData := convertWALToLTX(reader) + +// Write to storage backend +info, err := r.Client.WriteLTXFile(ctx, level, minTXID, maxTXID, ltxData) + +// Update position +r.SetPos(newPos) +``` + +5. **ReplicaClient uploads to storage**: +```go +// Backend-specific upload +func (c *S3Client) WriteLTXFile(...) (*ltx.FileInfo, error) { + // Upload to S3 + // Return file metadata +} +``` + +6. **Checkpoint when threshold reached**: +```go +if walPageCount > db.MinCheckpointPageN { + db.Checkpoint("PASSIVE") // Try checkpoint +} +if walPageCount > db.MaxCheckpointPageN { + db.Checkpoint("RESTART") // Force checkpoint +} +``` + +Key synchronization points: +- WAL monitoring (1s intervals) +- Replica sync (configurable, default 1s) +- Checkpoint triggers (page thresholds) +- Compaction (hourly/daily) + +Trace with logging: +```go +// Enable debug logging +slog.SetLogLevel(slog.LevelDebug) + +// Key log points: +slog.Debug("wal changed", "size", walSize) +slog.Debug("syncing replica", "pos", r.pos) +slog.Debug("ltx uploaded", "txid", maxTXID) +slog.Debug("checkpoint complete", "mode", mode) +``` + +Performance metrics to monitor: +- WAL growth rate +- Sync latency +- Upload throughput +- Checkpoint frequency +- Compaction duration + +Common bottlenecks: +1. Slow storage uploads +2. Large transactions causing big LTX files +3. Long-running read transactions blocking checkpoints +4. Eventual consistency delays +5. Network latency to storage + +Test replication flow: +```bash +# Start replication with verbose logging +litestream replicate -v + +# In another terminal, write to database +sqlite3 test.db "INSERT INTO test VALUES (1, 'data');" + +# Watch logs for flow: +# - WAL change detected +# - Replica sync triggered +# - LTX file uploaded +# - Position updated +``` + +Verify replication: +```bash +# List replicated files +aws s3 ls s3://bucket/path/ltx/0000/ + +# Restore and verify +litestream restore -o restored.db s3://bucket/path +sqlite3 restored.db "SELECT * FROM test;" +``` diff --git a/.claude/commands/validate-replica.md b/.claude/commands/validate-replica.md new file mode 100644 index 000000000..c860de50a --- /dev/null +++ b/.claude/commands/validate-replica.md @@ -0,0 +1,70 @@ +Validate a ReplicaClient implementation in Litestream. This command helps ensure a replica client correctly implements the interface and handles edge cases. + +First, identify what needs validation: +- Which replica client implementation? +- What storage backend specifics? +- Any known issues or concerns? + +Then validate the implementation: + +1. **Interface compliance check**: +```go +// Ensure all methods are implemented +var _ litestream.ReplicaClient = (*YourClient)(nil) +``` + +2. **Verify error types**: +```go +// OpenLTXFile must return os.ErrNotExist for missing files +_, err := client.OpenLTXFile(ctx, 0, 999, 999, 0, 0) +if !errors.Is(err, os.ErrNotExist) { + t.Errorf("Expected os.ErrNotExist, got %v", err) +} +``` + +3. **Test partial reads**: +```go +// Must support offset and size parameters +rc, err := client.OpenLTXFile(ctx, 0, 1, 100, 50, 25) +data, _ := io.ReadAll(rc) +if len(data) != 25 { + t.Errorf("Expected 25 bytes, got %d", len(data)) +} +``` + +4. **Verify timestamp preservation**: +```go +// WriteLTXFile must preserve CreatedAt if provided +createdAt := time.Now().Add(-24 * time.Hour) +info, _ := client.WriteLTXFile(ctx, 0, 1, 100, reader, &createdAt) +if !info.CreatedAt.Equal(createdAt) { + t.Error("CreatedAt not preserved") +} +``` + +5. **Test eventual consistency handling**: +- Implement retry logic for transient failures +- Handle partial file availability +- Verify write-after-write consistency + +6. **Validate cleanup**: +```go +// DeleteAll must remove everything +err := client.DeleteAll(ctx) +files, _ := client.LTXFiles(ctx, 0, 0) +if files.Next() { + t.Error("Files remain after DeleteAll") +} +``` + +Key validation points: +- Proper error types (os.ErrNotExist, os.ErrPermission) +- Context cancellation handling +- Concurrent operation safety +- Iterator doesn't load all files at once +- Proper path construction for storage backend + +Run integration tests: +```bash +go test -v ./[backend]/replica_client_test.go -integration +``` diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 000000000..db93e6ec9 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,86 @@ +{ + "project_name": "Planning Center Sync", + "description": "Planning Center Sync project for Parable", + + "hooks": { + "PostToolUse": [ + { + "matcher": "Edit|MultiEdit|Write", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/lint-files.sh", + "timeout": 30, + "run_in_background": false + } + ] + }, + { + "matcher": "NotebookEdit", + "hooks": [ + { + "type": "command", + "command": "echo 'Notebook edited - consider running tests'", + "timeout": 5 + } + ] + } + ], + + "PreToolUse": [ + { + "matcher": "Edit|MultiEdit", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/pre-edit-check.sh", + "timeout": 10 + } + ] + } + ], + + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "echo '📝 Processing request for Planning Center Sync project for Parable...'", + "timeout": 2 + } + ] + } + ], + + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/session-summary.sh", + "timeout": 10, + "run_in_background": true + } + ] + } + ] + }, + + "auto_formatting": { + "enabled": true, + "markdown": { + "enabled": true, + "tool": "markdownlint" + }, + "json": { + "enabled": true, + "tool": "jq" + } + }, + + "file_permissions": { + "read_only_patterns": [ + "*.pdf" + ] + } +} diff --git a/.gitignore b/.gitignore index a27f47535..187449059 100644 --- a/.gitignore +++ b/.gitignore @@ -3,8 +3,14 @@ /dist .vscode -# Claude-related files -.claude/ +# Claude-related files (force include despite global gitignore) +!.claude/ +!.claude/** +# But ignore logs, hooks, and local settings +.claude/logs/ +.claude/hooks/ +.claude/settings.local.json +# Keep CLAUDE.md ignored as it's auto-loaded by Claude Code CLAUDE.md # Binary diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..97a094247 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,219 @@ +# CLAUDE.md - Claude Code Optimizations for Litestream + +This file is automatically loaded by Claude Code and provides Claude-specific optimizations. For comprehensive project documentation, see AGENTS.md. + +## Claude-Specific Optimizations + +**Primary Documentation**: See AGENTS.md for comprehensive architectural guidance, patterns, and anti-patterns. + +### Context Window Advantages +With Claude's 200k token context window, you can load the entire documentation suite: +- Full AGENTS.md for patterns and anti-patterns +- All docs/ subdirectory files for deep technical understanding +- Multiple source files simultaneously for cross-referencing + +### Key Focus Areas for Claude +1. **Architectural Reasoning**: Leverage deep understanding of DB vs Replica layer boundaries +2. **Complex Analysis**: Use full context for multi-file refactoring +3. **SQLite Internals**: Reference docs/SQLITE_INTERNALS.md for WAL format details +4. **LTX Format**: Reference docs/LTX_FORMAT.md for replication specifics + +### Claude-Specific Resources + +#### Specialized Agents (.claude/agents/) +- **sqlite-expert.md**: SQLite WAL and page management expertise +- **replica-client-developer.md**: Storage backend implementation +- **ltx-compaction-specialist.md**: LTX format and compaction +- **test-engineer.md**: Comprehensive testing strategies +- **performance-optimizer.md**: Performance and resource optimization + +#### Commands (.claude/commands/) +- `/analyze-ltx`: Analyze LTX file structure and contents +- `/debug-wal`: Debug WAL replication issues +- `/test-compaction`: Test compaction scenarios +- `/trace-replication`: Trace replication flow +- `/validate-replica`: Validate replica client implementation +- `/add-storage-backend`: Create new storage backend +- `/fix-common-issues`: Diagnose and fix common problems +- `/run-comprehensive-tests`: Execute full test suite + +Use these commands with: ` [arguments]` in Claude Code. + +## Overview + +Litestream is a standalone disaster recovery tool for SQLite that runs as a background process and safely replicates changes incrementally to another file or S3. It works through the SQLite API to prevent database corruption. + +## Build and Development Commands + +### Building + +```bash +# Build the main binary +go build ./cmd/litestream + +# Install the binary +go install ./cmd/litestream + +# Build for specific platforms (using Makefile) +make docker # Build Docker image +make dist-linux # Build Linux AMD64 distribution +make dist-linux-arm # Build Linux ARM distribution +make dist-linux-arm64 # Build Linux ARM64 distribution +make dist-macos # Build macOS distribution (requires LITESTREAM_VERSION env var) +``` + +### Testing + +```bash +# Run all tests +go test -v ./... + +# Run tests with coverage +go test -v -cover ./... + +# Test VFS functionality (requires CGO and explicit vfs build tag) +go test -tags vfs ./cmd/litestream-vfs -v + +# Test builds before committing (always use -o bin/ to avoid committing binaries) +go build -o bin/litestream ./cmd/litestream # Test main build (no CGO required) +CGO_ENABLED=1 go build -tags vfs -o bin/litestream-vfs ./cmd/litestream-vfs # Test VFS with CGO + +# Run specific integration tests (requires environment setup) +go test -v ./replica_client_test.go -integration s3 +go test -v ./replica_client_test.go -integration gcs +go test -v ./replica_client_test.go -integration abs +go test -v ./replica_client_test.go -integration sftp +``` + +### Code Quality + +```bash +# Format code +go fmt ./... +goimports -local github.com/benbjohnson/litestream -w . + +# Run linters +go vet ./... +staticcheck ./... + +# Run pre-commit hooks (includes trailing whitespace, goimports, go-vet, staticcheck) +pre-commit run --all-files +``` + +## Architecture + +### Core Components + +**DB (`db.go`)**: Manages a SQLite database instance with WAL monitoring, checkpoint management, and metrics. Handles replication coordination and maintains long-running read transactions for consistency. + +**Replica (`replica.go`)**: Connects a database to replication destinations via ReplicaClient interface. Manages periodic synchronization and maintains replication position. + +**ReplicaClient Interface** (`replica_client.go`): Abstraction for different storage backends (S3, GCS, Azure Blob Storage, SFTP, file system, NATS). Each implementation handles snapshot/WAL segment upload and restoration. The `WriteLTXFile` method accepts an optional `createdAt` timestamp parameter to preserve original file timestamps during compaction operations. + +**WAL Processing**: The system monitors SQLite WAL files for changes, segments them into LTX format files, and replicates these segments to configured destinations. Uses SQLite checksums for integrity verification. + +### Storage Backends + +- **S3** (`s3/replica_client.go`): AWS S3 and compatible storage +- **GCS** (`gs/replica_client.go`): Google Cloud Storage +- **ABS** (`abs/replica_client.go`): Azure Blob Storage +- **SFTP** (`sftp/replica_client.go`): SSH File Transfer Protocol +- **File** (`file/replica_client.go`): Local file system replication +- **NATS** (`nats/replica_client.go`): NATS JetStream object storage + +### Command Structure + +Main entry point (`cmd/litestream/main.go`) provides subcommands: + +- `replicate`: Primary replication daemon mode +- `restore`: Restore database from replica +- `databases`: List configured databases +- `ltx`: WAL/LTX file utilities (renamed from 'wal') +- `version`: Display version information +- `mcp`: Model Context Protocol support + +## Key Design Patterns + +1. **Non-invasive monitoring**: Uses SQLite API exclusively, no direct file manipulation +2. **Incremental replication**: Segments WAL into small chunks for efficient transfer +3. **Multi-destination support**: Single database can replicate to multiple destinations +4. **Age encryption**: Optional end-to-end encryption using age identities/recipients +5. **Prometheus metrics**: Built-in observability for monitoring replication health +6. **Timestamp preservation**: Compaction preserves earliest CreatedAt timestamp from source files to maintain temporal granularity for point-in-time restoration + +## Configuration + +Primary configuration via YAML file (`etc/litestream.yml`) or environment variables. Supports: + +- Database paths and replica destinations +- Sync intervals and checkpoint settings +- Authentication credentials for cloud storage +- Encryption keys for age encryption + +## Important Notes + +- External contributions accepted for bug fixes only (not features) +- Uses pre-commit hooks for code quality enforcement +- Requires Go 1.24+ for build +- Main binary does NOT require CGO +- VFS functionality requires explicit `-tags vfs` build flag AND CGO_ENABLED=1 +- **ALWAYS build binaries into `bin/` directory** which is gitignored (e.g., `go build -o bin/litestream`) +- Always test builds with different configurations before committing + +## Workflows and Best Practices + +- Any time you create/edit markdown files, lint and fix them with markdownlint + +## Testing Considerations + +### SQLite Lock Page at 1GB Boundary + +Litestream handles a critical SQLite edge case: the lock page at exactly 1GB +(offset 0x40000000). This page is reserved by SQLite for file locking and +cannot contain data. The code skips this page during replication (see +db.go:951-953). + +**Key Implementation Details:** + +- Lock page calculation: `LockPgno = (0x40000000 / pageSize) + 1` +- Located in LTX library: `ltx.LockPgno(pageSize)` +- Must be skipped when iterating through database pages +- Affects databases larger than 1GB regardless of page size + +**Testing Requirements:** + +1. **Create databases >1GB** to ensure lock page handling works +2. **Test with various page sizes** as lock page number changes: + - 4KB: page 262145 (default, most common) + - 8KB: page 131073 + - 16KB: page 65537 + - 32KB: page 32769 +3. **Verify replication** correctly skips the lock page +4. **Test restoration** to ensure databases restore properly across 1GB boundary + +**Quick Test Script:** + +```bash +# Create a >1GB test database +sqlite3 large.db </dev/null || stat -c%s large.db)" +echo "Page count: $(sqlite3 large.db 'PRAGMA page_count')" +echo "Lock page should be at: $((0x40000000 / 4096 + 1))" + +# Test replication +./bin/litestream replicate large.db file:///tmp/replica + +# Test restoration +./bin/litestream restore -o restored.db file:///tmp/replica +sqlite3 restored.db "PRAGMA integrity_check;" +``` From 807d84ec1b40cfc103b2793ce5432da5891052c2 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Sun, 12 Oct 2025 08:52:55 -0500 Subject: [PATCH 6/8] docs: update AI documentation to reflect current Litestream architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User updates to align documentation with actual implementation: - Updated .claude/agents with current interface signatures and patterns - Updated .claude/commands with correct command patterns and workflows - Aligned AGENTS.md with current constraints and architectural boundaries - Updated technical documentation (LTX_FORMAT, ARCHITECTURE, REPLICA_CLIENT_GUIDE, TESTING_GUIDE) - Removed outdated V050_CHANGES.md - Updated llms.txt index with correct file references Net change: -212 lines (significant cleanup and consolidation) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .claude/agents/ltx-compaction-specialist.md | 30 +-- .claude/agents/replica-client-developer.md | 11 +- .claude/agents/test-engineer.md | 16 +- .claude/commands/add-storage-backend.md | 10 +- .claude/commands/fix-common-issues.md | 24 ++- .claude/commands/run-comprehensive-tests.md | 4 +- .claude/commands/trace-replication.md | 82 ++++++--- .claude/commands/validate-replica.md | 10 +- AGENTS.md | 141 ++++++++------- CLAUDE.md | 2 +- GEMINI.md | 4 +- docs/ARCHITECTURE.md | 85 ++------- docs/LTX_FORMAT.md | 170 ++++++++--------- docs/REPLICA_CLIENT_GUIDE.md | 48 ++--- docs/TESTING_GUIDE.md | 61 +++---- docs/V050_CHANGES.md | 191 -------------------- llms.txt | 7 +- 17 files changed, 342 insertions(+), 554 deletions(-) delete mode 100644 docs/V050_CHANGES.md diff --git a/.claude/agents/ltx-compaction-specialist.md b/.claude/agents/ltx-compaction-specialist.md index 9d945d9de..58acf3785 100644 --- a/.claude/agents/ltx-compaction-specialist.md +++ b/.claude/agents/ltx-compaction-specialist.md @@ -18,13 +18,13 @@ You are an expert in the LTX (Log Transaction) format and multi-level compaction ### LTX File Format ``` ┌─────────────────────┐ -│ Header │ 44 bytes +│ Header │ 84 bytes ├─────────────────────┤ │ Page Frames │ Variable ├─────────────────────┤ -│ Page Index │ Binary search tree +│ Page Index │ Binary search structure ├─────────────────────┤ -│ Trailer │ 24 bytes +│ Trailer │ 16 bytes └─────────────────────┘ ``` @@ -37,7 +37,7 @@ Where: Example: 0000000000000001-0000000000000064.ltx ``` -## Compaction Levels (v0.5.0) +## Default Compaction Levels ### Level Structure ``` @@ -64,9 +64,11 @@ Snapshots: Daily full database 2. **Preserve Timestamps**: ```go // Keep earliest CreatedAt - info := <x.FileInfo{ - CreatedAt: oldestSourceFile.CreatedAt, + info, err := replica.Client.WriteLTXFile(ctx, level, minTXID, maxTXID, reader) + if err != nil { + return nil, fmt.Errorf("write ltx file: %w", err) } + info.CreatedAt = oldestSourceFile.CreatedAt ``` 3. **Skip Lock Page**: @@ -117,13 +119,13 @@ func compactLTXFiles(files []*LTXFile) (*LTXFile, error) { ### Checksums - CRC-64 ECMA for integrity -- Per-page checksums -- Cumulative file checksum +- `PreApplyChecksum`/`PostApplyChecksum` on the header/trailer bracketing file state +- `FileChecksum` covering the entire file contents ### Page Index -- Binary search tree for O(log n) lookups -- 16-byte entries (page number + offset) -- Located via trailer +- Exposed via `ltx.DecodePageIndex` +- Tracks page number plus offset/size of the encoded payload +- Located by seeking from the end of the file using trailer metadata ## Common Issues @@ -142,9 +144,9 @@ go test -v -run TestStore_CompactDB ./... # Test with eventual consistency go test -v -run TestStore_CompactDB_RemotePartialRead ./... -# Manual test -litestream ltx info file.ltx -litestream ltx verify file.ltx +# Manual inspection +litestream ltx /path/to/db.sqlite +# For deeper inspection use the Go API (ltx.NewDecoder) ``` ## References diff --git a/.claude/agents/replica-client-developer.md b/.claude/agents/replica-client-developer.md index 7fe590000..7ab7236ca 100644 --- a/.claude/agents/replica-client-developer.md +++ b/.claude/agents/replica-client-developer.md @@ -20,10 +20,11 @@ Every storage backend MUST implement: ```go type ReplicaClient interface { Type() string - LTXFiles(ctx, level, seek) (FileIterator, error) - OpenLTXFile(ctx, level, minTXID, maxTXID, offset, size) (io.ReadCloser, error) - WriteLTXFile(ctx, level, minTXID, maxTXID, r, createdAt) (*FileInfo, error) - DeleteLTXFiles(ctx, files) error + LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) + OpenLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, offset, size int64) (io.ReadCloser, error) + WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) + DeleteLTXFiles(ctx context.Context, files []*ltx.FileInfo) error + DeleteAll(ctx context.Context) error } ``` @@ -52,7 +53,7 @@ type ReplicaClient interface { - [ ] Implement ReplicaClient interface - [ ] Handle partial reads (offset/size) - [ ] Support seek parameter for pagination -- [ ] Preserve CreatedAt timestamps +- [ ] Preserve CreatedAt timestamps when metadata is available - [ ] Handle eventual consistency - [ ] Implement proper error types - [ ] Add integration tests diff --git a/.claude/agents/test-engineer.md b/.claude/agents/test-engineer.md index 6ab41af44..a4ac73aa5 100644 --- a/.claude/agents/test-engineer.md +++ b/.claude/agents/test-engineer.md @@ -43,8 +43,8 @@ sqlite3 restored.db "PRAGMA integrity_check;" go test -race -v ./... # Specific areas prone to races -go test -race -v -run TestReplica_SetPos ./... -go test -race -v -run TestDB_Monitor ./... +go test -race -v -run TestReplica_Sync ./... +go test -race -v -run TestDB_Sync ./... go test -race -v -run TestStore_CompactDB ./... ``` @@ -105,11 +105,11 @@ done ### Compaction Scenarios ```bash -# Generate files for compaction -./bin/litestream-test generate-ltx \ - --count 100 \ - --size 10MB \ - --level 0 +# Exercise store-level compaction logic +go test -v -run TestStore_CompactDB ./... + +# Include remote partial-read coverage +go test -v -run TestStore_CompactDB_RemotePartialRead ./... ``` ## Performance Testing @@ -138,7 +138,7 @@ go tool pprof mem.prof ### Simulate Failures ```go type FailingReplicaClient struct { - ReplicaClient + litestream.ReplicaClient failAfter int count int } diff --git a/.claude/commands/add-storage-backend.md b/.claude/commands/add-storage-backend.md index e968d8af1..aab97f2ba 100644 --- a/.claude/commands/add-storage-backend.md +++ b/.claude/commands/add-storage-backend.md @@ -26,7 +26,7 @@ Create a new storage backend implementation for Litestream with all required com return "{{backend_name}}" } - func (c *ReplicaClient) LTXFiles(ctx context.Context, level int, seek string) (ltx.FileIterator, error) { + func (c *ReplicaClient) LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) { // List files at level } @@ -34,13 +34,17 @@ Create a new storage backend implementation for Litestream with all required com // Open file for reading } - func (c *ReplicaClient) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader, createdAt *time.Time) (*ltx.FileInfo, error) { + func (c *ReplicaClient) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) { // Write file atomically } func (c *ReplicaClient) DeleteLTXFiles(ctx context.Context, files []*ltx.FileInfo) error { // Delete files } + + func (c *ReplicaClient) DeleteAll(ctx context.Context) error { + // Remove all files for replica + } ``` 3. **Add Configuration Parsing** @@ -76,7 +80,7 @@ Create a new storage backend implementation for Litestream with all required com - Handle eventual consistency - Implement atomic writes (temp file + rename) - Support partial reads (offset/size) -- Preserve CreatedAt timestamps +- Preserve CreatedAt timestamps in returned FileInfo - Return proper error types (os.ErrNotExist) ## Testing diff --git a/.claude/commands/fix-common-issues.md b/.claude/commands/fix-common-issues.md index 6263b2253..3ce2aff0b 100644 --- a/.claude/commands/fix-common-issues.md +++ b/.claude/commands/fix-common-issues.md @@ -31,7 +31,7 @@ if pgno == lockPgno { **Check**: ```bash -go test -race -v -run TestReplica_SetPos ./... +go test -race -v -run TestReplica_Sync ./... ``` **Fix**: @@ -67,18 +67,24 @@ return replica.Client.OpenLTXFile(...) ## Issue 4: CreatedAt Timestamp Loss -**Symptom**: Cannot do point-in-time recovery +**Symptom**: Point-in-time recovery lacks accurate timestamps **Check**: -```bash -# Find WriteLTXFile calls without CreatedAt -grep -r "WriteLTXFile" --include="*.go" | grep "nil" +```go +info, err := client.WriteLTXFile(ctx, level, minTXID, maxTXID, r) +if err != nil { + t.Fatal(err) +} +if info.CreatedAt.IsZero() { + t.Fatal("CreatedAt not set") +} ``` **Fix**: ```go -// Preserve earliest timestamp -info, err := client.WriteLTXFile(ctx, level, minTXID, maxTXID, r, &oldestFile.CreatedAt) +// Ensure storage metadata is copied into the returned FileInfo +modTime := resp.LastModified +info.CreatedAt = modTime ``` ## Issue 5: Non-Atomic File Writes @@ -158,8 +164,8 @@ defer func() { # Check database integrity sqlite3 database.db "PRAGMA integrity_check;" -# Verify LTX files -litestream ltx verify *.ltx +# List replicated LTX files +litestream ltx /path/to/db.sqlite # Check replication status litestream databases diff --git a/.claude/commands/run-comprehensive-tests.md b/.claude/commands/run-comprehensive-tests.md index 317217fba..158a58e27 100644 --- a/.claude/commands/run-comprehensive-tests.md +++ b/.claude/commands/run-comprehensive-tests.md @@ -27,8 +27,8 @@ go test -v ./... -short ### 2. Race Condition Tests ```bash echo "=== Testing for Race Conditions ===" -go test -race -v -run TestReplica_SetPos ./... -go test -race -v -run TestDB_Monitor ./... +go test -race -v -run TestReplica_Sync ./... +go test -race -v -run TestDB_Sync ./... go test -race -v -run TestStore_CompactDB ./... go test -race -v ./... ``` diff --git a/.claude/commands/trace-replication.md b/.claude/commands/trace-replication.md index 6e7a26106..64aa96c20 100644 --- a/.claude/commands/trace-replication.md +++ b/.claude/commands/trace-replication.md @@ -9,56 +9,80 @@ INSERT INTO table VALUES (...); -- SQLite appends to WAL file ``` -2. **DB.monitor() detects changes** (db.go:1499): +2. **DB.monitor() syncs the shadow WAL** (db.go:1499): ```go -ticker := time.NewTicker(db.MonitorInterval) // Every 1s -changed, err := db.checkWAL() -if changed { - db.notifyReplicas() // Signal replicas +ticker := time.NewTicker(db.MonitorInterval) // default 1s +for { + select { + case <-db.ctx.Done(): + return + case <-ticker.C: + } + + if err := db.Sync(db.ctx); err != nil && !errors.Is(err, context.Canceled) { + db.Logger.Error("sync error", "error", err) + } } ``` 3. **Replica.monitor() responds** (replica.go): ```go -select { -case <-db.notify: - // WAL changed, time to sync -case <-ticker.C: - // Regular sync interval +ticker := time.NewTicker(r.SyncInterval) +defer ticker.Stop() + +notify := r.db.Notify() +for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + // Enforce minimum sync interval + case <-notify: + // WAL changed, time to sync + } + + notify = r.db.Notify() + + if err := r.Sync(ctx); err != nil && !errors.Is(err, context.Canceled) { + r.Logger().Error("monitor error", "error", err) + } } ``` -4. **Replica.Sync() processes changes**: +4. **Replica.Sync() uploads new L0 files** (replica.go): ```go -// Read WAL pages since last position -reader := db.WALReader(r.pos.PageNo) - -// Convert to LTX format -ltxData := convertWALToLTX(reader) - -// Write to storage backend -info, err := r.Client.WriteLTXFile(ctx, level, minTXID, maxTXID, ltxData) +// Determine local database position +dpos, err := r.db.Pos() +if err != nil { + return err +} +if dpos.IsZero() { + return fmt.Errorf("no position, waiting for data") +} -// Update position -r.SetPos(newPos) +// Upload each unreplicated L0 file +for txID := r.Pos().TXID + 1; txID <= dpos.TXID; txID = r.Pos().TXID + 1 { + if err := r.uploadLTXFile(ctx, 0, txID, txID); err != nil { + return err + } + r.SetPos(ltx.Pos{TXID: txID}) +} ``` 5. **ReplicaClient uploads to storage**: ```go -// Backend-specific upload -func (c *S3Client) WriteLTXFile(...) (*ltx.FileInfo, error) { - // Upload to S3 - // Return file metadata +func (c *S3Client) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) { + // Stream LTX data to storage and return metadata (size, CreatedAt, checksums) } ``` -6. **Checkpoint when threshold reached**: +6. **Checkpoint when thresholds are hit**: ```go if walPageCount > db.MinCheckpointPageN { - db.Checkpoint("PASSIVE") // Try checkpoint + db.Checkpoint(ctx, litestream.CheckpointModePassive) } if walPageCount > db.MaxCheckpointPageN { - db.Checkpoint("RESTART") // Force checkpoint + db.Checkpoint(ctx, litestream.CheckpointModeRestart) } ``` @@ -75,7 +99,7 @@ slog.SetLogLevel(slog.LevelDebug) // Key log points: slog.Debug("wal changed", "size", walSize) -slog.Debug("syncing replica", "pos", r.pos) +slog.Debug("syncing replica", "pos", r.Pos()) slog.Debug("ltx uploaded", "txid", maxTXID) slog.Debug("checkpoint complete", "mode", mode) ``` diff --git a/.claude/commands/validate-replica.md b/.claude/commands/validate-replica.md index c860de50a..a9b2abfa4 100644 --- a/.claude/commands/validate-replica.md +++ b/.claude/commands/validate-replica.md @@ -34,11 +34,11 @@ if len(data) != 25 { 4. **Verify timestamp preservation**: ```go -// WriteLTXFile must preserve CreatedAt if provided -createdAt := time.Now().Add(-24 * time.Hour) -info, _ := client.WriteLTXFile(ctx, 0, 1, 100, reader, &createdAt) -if !info.CreatedAt.Equal(createdAt) { - t.Error("CreatedAt not preserved") +// CreatedAt should reflect remote object metadata (or upload time) +start := time.Now() +info, _ := client.WriteLTXFile(ctx, 0, 1, 100, reader) +if info.CreatedAt.IsZero() || info.CreatedAt.Before(start.Add(-time.Second)) { + t.Error("unexpected CreatedAt timestamp") } ``` diff --git a/AGENTS.md b/AGENTS.md index ce4d51f20..8e1c3018f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -18,18 +18,18 @@ This document provides comprehensive guidance for AI agents working with the Lit Litestream is a **disaster recovery tool for SQLite** that runs as a background process and safely replicates changes incrementally to various storage backends. It monitors SQLite's Write-Ahead Log (WAL), converts changes to an immutable LTX format, and replicates these to configured destinations. -**Version 0.5.0 Major Changes:** -- **New LTX Format**: Replaced WAL segment replication with page-level LTX format -- **Multi-level Compaction**: Hierarchical compaction strategy for efficient storage -- **Single Replica Constraint**: Each database now limited to one replica destination -- **No CGO Required**: Switched to `modernc.org/sqlite` (pure Go implementation) -- **NATS JetStream Support**: Added as new replica type -- **Breaking Change**: Cannot restore from v0.3.x backups +**Current Architecture Highlights:** +- **LTX Format**: Page-level replication format replaces direct WAL mirroring +- **Multi-level Compaction**: Hierarchical compaction keeps storage efficient (30s → 5m → 1h → snapshots) +- **Single Replica Constraint**: Each database is replicated to exactly one remote destination +- **Pure Go Build**: Uses `modernc.org/sqlite`, so no CGO dependency for the main binary +- **Optional NATS JetStream Support**: Additional replica backend alongside S3/GCS/ABS/File/SFTP +- **Snapshot Compatibility**: Only LTX-based backups are supported—keep legacy v0.3.x binaries to restore old WAL snapshots **Key Design Principles:** - **Non-invasive**: Uses only SQLite API, never directly manipulates database files - **Incremental**: Replicates only changes, not full databases -- **Single-destination** (v0.5.0+): One replica destination per database +- **Single-destination**: Exactly one replica destination per database - **Eventually Consistent**: Handles storage backends with eventual consistency - **Safe**: Maintains long-running read transactions for consistency @@ -274,29 +274,43 @@ graph TB ```go // CORRECT - Database restoration logic belongs in DB layer -func (db *DB) init() error { - // Check if database is behind replica - if db.pos < replica.pos { - // Clear local L0 files +func (db *DB) init(ctx context.Context, replica *litestream.Replica) error { + dpos, err := db.Pos() + if err != nil { + return err + } + rpos := replica.Pos() + + if dpos.TXID < rpos.TXID { if err := db.clearL0Files(); err != nil { return fmt.Errorf("clear L0 files: %w", err) } - // Fetch latest L0 LTX file from replica - ltxFile, err := replica.Client.OpenLTXFile(ctx, 0, replica.pos.MinTXID, replica.pos.MaxTXID, 0, 0) + itr, err := replica.Client.LTXFiles(ctx, 0, rpos.TXID) if err != nil { - return fmt.Errorf("fetch latest L0 LTX: %w", err) + return fmt.Errorf("enumerate ltx files: %w", err) + } + defer itr.Close() + + if itr.Next() { + info := itr.Item() + rd, err := replica.Client.OpenLTXFile(ctx, info.Level, info.MinTXID, info.MaxTXID, 0, 0) + if err != nil { + return fmt.Errorf("fetch latest L0 LTX: %w", err) + } + defer rd.Close() + + if err := db.writeL0File(rd); err != nil { + return fmt.Errorf("write L0 file: %w", err) + } } - defer ltxFile.Close() - // Write to local L0 directory - if err := db.writeL0File(ltxFile); err != nil { - return fmt.Errorf("write L0 file: %w", err) + if err := itr.Close(); err != nil { + return err } } - // Now start replica with clean state - return replica.Start() + return replica.Start(ctx) } ``` @@ -380,8 +394,13 @@ func writeFileDirect(path string, data []byte) error { ```go // CORRECT - Return error for caller to handle func (db *DB) validatePosition() error { - if db.pos < replica.pos { - return fmt.Errorf("database position (%v) behind replica (%v)", db.pos, replica.pos) + dpos, err := db.Pos() + if err != nil { + return err + } + rpos := replica.Pos() + if dpos.TXID < rpos.TXID { + return fmt.Errorf("database position (%v) behind replica (%v)", dpos, rpos) } return nil } @@ -392,7 +411,7 @@ func (db *DB) validatePosition() error { ```go // WRONG - Silently continuing can cause data corruption func (db *DB) validatePosition() { - if db.pos < replica.pos { + if dpos, _ := db.Pos(); dpos.TXID < replica.Pos().TXID { log.Printf("warning: position mismatch") // ❌ Don't just log! // Continuing here is dangerous } @@ -510,9 +529,11 @@ info := <x.FileInfo{ ```go // CORRECT - Preserve temporal information -info := <x.FileInfo{ - CreatedAt: oldestSourceFile.CreatedAt, // Keep original +info, err := replica.Client.WriteLTXFile(ctx, level, minTXID, maxTXID, r) +if err != nil { + return fmt.Errorf("write ltx: %w", err) } +info.CreatedAt = oldestSourceFile.CreatedAt ``` ### ❌ DON'T: Write files without atomic operations @@ -616,7 +637,7 @@ type DB struct { ### Replica Component (replica.go) **Responsibilities:** -- Manages replication to a single destination (v0.5.0: one replica per DB only) +- Manages replication to a single destination (one replica per DB) - Tracks replication position (ltx.Pos) - Handles sync intervals - Manages encryption (if configured) @@ -634,10 +655,11 @@ type ReplicaClient interface { Type() string // Client type identifier // File operations - LTXFiles(ctx, level, seek) (FileIterator, error) - OpenLTXFile(ctx, level, minTXID, maxTXID, offset, size) (io.ReadCloser, error) - WriteLTXFile(ctx, level, minTXID, maxTXID, r) (*FileInfo, error) - DeleteLTXFiles(ctx, files) error + LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) + OpenLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, offset, size int64) (io.ReadCloser, error) + WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) + DeleteLTXFiles(ctx context.Context, files []*ltx.FileInfo) error + DeleteAll(ctx context.Context) error } ``` @@ -655,13 +677,13 @@ type ReplicaClient interface { - Controls resource usage - Handles retention policies -**Compaction Levels (v0.5.0):** +**Default Compaction Levels:** ```go var defaultLevels = CompactionLevels{ - {Level: 0, Interval: 0}, // Raw LTX files (no compaction) - {Level: 1, Interval: 30*Second}, // 30-second windows - {Level: 2, Interval: 5*Minute}, // 5-minute windows - {Level: 3, Interval: 1*Hour}, // Hourly windows + {Level: 0, Interval: 0}, // Raw LTX files (no compaction) + {Level: 1, Interval: 30*Second}, + {Level: 2, Interval: 5*Minute}, + {Level: 3, Interval: 1*Hour}, // Snapshots created daily (24h retention) } ``` @@ -722,6 +744,7 @@ if offset < 0 { go test -v ./replica_client_test.go -integration [s3|gcs|abs|sftp] # Test partial reads +# (Example) add targeted partial-read tests in your backend package go test -v -run TestReplicaClient_PartialRead ./... ``` @@ -742,8 +765,9 @@ go test -v -run TestStore_CompactDB_RemotePartialRead ./... go test -race -v ./... # Specific race-prone areas -go test -race -v -run TestReplica_SetPos ./... -go test -race -v -run TestDB_Monitor ./... +go test -race -v -run TestReplica_Sync ./... +go test -race -v -run TestDB_Sync ./... +go test -race -v -run TestStore_CompactDB ./... ``` ## Quick Reference @@ -766,7 +790,7 @@ dbs: path: db-backup sync-interval: 10s # How often to sync -# Compaction configuration (v0.5.0 defaults) +# Compaction configuration (default) levels: - level: 1 interval: 30s # 30-second windows @@ -789,13 +813,12 @@ DefaultTruncatePageN = 500000 // Pages before truncation ## Getting Help For complex architectural questions, consult: -1. **`docs/V050_CHANGES.md`** - v0.5.0 breaking changes and migration guide -2. **`docs/SQLITE_INTERNALS.md`** - SQLite fundamentals, WAL format, lock page details -3. **`docs/LTX_FORMAT.md`** - LTX file format specification and operations -4. `docs/ARCHITECTURE.md` - Deep technical details of Litestream components -5. `docs/REPLICA_CLIENT_GUIDE.md` - Storage backend implementation guide -6. `docs/TESTING_GUIDE.md` - Comprehensive testing strategies -7. Review recent PRs for current patterns and best practices +1. **`docs/SQLITE_INTERNALS.md`** - SQLite fundamentals, WAL format, lock page details +2. **`docs/LTX_FORMAT.md`** - LTX file format specification and operations +3. `docs/ARCHITECTURE.md` - Deep technical details of Litestream components +4. `docs/REPLICA_CLIENT_GUIDE.md` - Storage backend implementation guide +5. `docs/TESTING_GUIDE.md` - Comprehensive testing strategies +6. Review recent PRs for current patterns and best practices ## Future Roadmap @@ -806,29 +829,20 @@ For complex architectural questions, consult: - Enables scaling read operations without full database downloads - **Enhanced read replica support**: Direct reads from remote storage -## Important v0.5.0 Migration Notes - -1. **Breaking Changes:** - - Cannot restore from v0.3.x WAL segment files - - Single replica destination per database (removed multi-replica support) - - Command renamed: `litestream wal` → `litestream ltx` - - Removed "generations" concept for backup tracking - -2. **Build Changes:** - - CGO no longer required (uses `modernc.org/sqlite`) - - Pure Go implementation enables easier cross-compilation +## Important Constraints -3. **New Features:** - - NATS JetStream replica type added - - Page-level compaction for better efficiency - - Point-in-time restoration with minimal files +1. **Single Replica Authority**: Each database is replicated to exactly one remote target—configure redundancy at the storage layer if needed. +2. **Legacy Backups**: Pre-LTX (v0.3.x) WAL snapshots cannot be restored with current binaries; keep an old binary around to hydrate those backups before re-replicating. +3. **CLI Changes**: Use `litestream ltx` for LTX inspection; `litestream wal` is deprecated. +4. **Pure Go Build**: The default build is CGO-free via `modernc.org/sqlite`; enable CGO only for optional VFS tooling. +5. **Page-Level Compaction**: Expect compaction to merge files across 30s/5m/1h windows plus daily snapshots. ## Final Checklist Before Making Changes - [ ] Read this entire document - [ ] Read `docs/SQLITE_INTERNALS.md` for SQLite fundamentals - [ ] Read `docs/LTX_FORMAT.md` for replication format details -- [ ] Understand v0.5.0 changes and limitations +- [ ] Understand current constraints (single replica authority, LTX-only restores) - [ ] Understand the component you're modifying - [ ] Understand architectural boundaries (DB vs Replica responsibilities) - [ ] Check for eventual consistency implications @@ -920,7 +934,7 @@ This document serves as the universal source of truth for all AI coding assistan 3. **Respect architectural boundaries** (DB layer vs Replica layer) 4. **Follow the patterns** in Common Pitfalls section 5. **Test with race detector** for any concurrent code changes -6. **Preserve backward compatibility** with v0.5.0 constraints +6. **Preserve backward compatibility** with current constraints ### Documentation Hierarchy @@ -937,5 +951,4 @@ Tier 2 (Read when relevant): Tier 3 (Reference only): - docs/TESTING_GUIDE.md (for test scenarios) - docs/REPLICA_CLIENT_GUIDE.md (for new backends) -- docs/V050_CHANGES.md (for migration context) ``` diff --git a/CLAUDE.md b/CLAUDE.md index 97a094247..e22c517f1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -136,7 +136,7 @@ Main entry point (`cmd/litestream/main.go`) provides subcommands: 1. **Non-invasive monitoring**: Uses SQLite API exclusively, no direct file manipulation 2. **Incremental replication**: Segments WAL into small chunks for efficient transfer -3. **Multi-destination support**: Single database can replicate to multiple destinations +3. **Single remote authority**: Each database replicates to exactly one destination 4. **Age encryption**: Optional end-to-end encryption using age identities/recipients 5. **Prometheus metrics**: Built-in observability for monitoring replication health 6. **Timestamp preservation**: Compaction preserves earliest CreatedAt timestamp from source files to maintain temporal granularity for point-in-time restoration diff --git a/GEMINI.md b/GEMINI.md index 56f1a9f10..0cac68038 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -53,7 +53,7 @@ When generating tests: ### Refactoring 1. Respect layer boundaries (DB vs Replica) -2. Maintain backward compatibility with v0.5.0 +2. Maintain current constraints (single replica authority, LTX-only restores) 3. Use atomic file operations 4. Return errors properly (don't just log) @@ -72,7 +72,7 @@ go test -v ./replica_client_test.go -integration s3 ## Configuration Reference -See `etc/litestream.yml` for configuration examples. Note v0.5.0 constraint: single replica per database only. +See `etc/litestream.yml` for configuration examples. Remember: each database replicates to exactly one remote destination. ## Additional Resources diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 20ffbf46a..b270046b4 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -334,77 +334,20 @@ Compaction merges multiple LTX files to reduce storage overhead: ### Compaction Algorithm (store.go:189) -```go -func (s *Store) CompactDB(ctx context.Context, db *DB, lvl *CompactionLevel) (*ltx.FileInfo, error) { - // 1. Check if compaction is needed - if !s.shouldCompact(db, lvl) { - return nil, ErrCompactionTooEarly - } - - // 2. Get source files from previous level - srcLevel := lvl.Level - 1 - srcFiles, err := db.LTXFiles(ctx, srcLevel) - if err != nil { - return nil, err - } - - // 3. Create page map for deduplication - pageMap := make(map[uint32]PageData) - - // 4. Read all source files (preferring local) - for _, info := range srcFiles { - // CRITICAL: Try local first for consistency - f, err := os.Open(db.LTXPath(info)) - if err != nil { - // Fall back to remote only if local doesn't exist - f, err = replica.Client.OpenLTXFile(ctx, info) - if err != nil { - return nil, err - } - } - defer f.Close() - - // Read pages and add to map (newer overwrites older) - pages, err := ltx.ReadPages(f) - for _, page := range pages { - pageMap[page.PageNo] = page - } - } - - // 5. Write compacted file - var buf bytes.Buffer - writer := ltx.NewWriter(&buf) - - // Write pages in order - pageNos := make([]uint32, 0, len(pageMap)) - for pgno := range pageMap { - pageNos = append(pageNos, pgno) - } - sort.Slice(pageNos, func(i, j int) bool { - return pageNos[i] < pageNos[j] - }) - - for _, pgno := range pageNos { - // CRITICAL: Skip lock page at 1GB - if pgno == ltx.LockPgno(db.pageSize) { - continue - } - writer.WritePage(pageMap[pgno]) - } - - // 6. Upload compacted file - info, err := replica.Client.WriteLTXFile(ctx, lvl.Level, minTXID, maxTXID, &buf) - if err != nil { - return nil, err - } - - // CRITICAL: Preserve earliest timestamp - info.CreatedAt = s.earliestTimestamp(srcFiles) - - // 7. Delete source files - return info, replica.Client.DeleteLTXFiles(ctx, srcFiles) -} -``` +High-level compaction flow: + +1. Determine whether the level is due for compaction (`Store.shouldCompact`). +2. Enumerate level-`L-1` files using `ReplicaClient.LTXFiles`, preferring local + copies via `os.Open(db.LTXPath(...))` and falling back to + `ReplicaClient.OpenLTXFile` only when necessary. +3. Stream the source readers through `ltx.NewCompactor`, which performs + page-level deduplication and enforces lock-page skipping automatically. +4. Pipe the compactor output into `ReplicaClient.WriteLTXFile` to create the + merged LTX file for level `L`. +5. Adjust the returned `ltx.FileInfo.CreatedAt` to the earliest timestamp from + the source files so point-in-time recovery remains accurate. +6. Update the cached max file info for the level and delete old L0 files when + promoting to level 1. ### Compaction Levels diff --git a/docs/LTX_FORMAT.md b/docs/LTX_FORMAT.md index 8db0bc15c..e5b0e3bf6 100644 --- a/docs/LTX_FORMAT.md +++ b/docs/LTX_FORMAT.md @@ -64,48 +64,46 @@ The LTX header contains metadata about the file: ```go // From github.com/superfly/ltx type Header struct { - // Magic bytes: "LTX\x00" (0x4C545800) - Magic [4]byte - - // Format version (current: 0) - Version uint32 - - // Flags for special behaviors - Flags uint32 - - // Database page size (typically 4096) - PageSize uint32 - - // Database page count at snapshot - PageCount uint32 - - // Transaction ID range - MinTXID TXID // uint64 - MaxTXID TXID // uint64 - - // Checksum of header - Checksum uint64 + Version int // Derived from the magic string ("LTX1") + Flags uint32 // Reserved flag bits + PageSize uint32 // Database page size + Commit uint32 // Page count after applying file + MinTXID TXID + MaxTXID TXID + Timestamp int64 // Milliseconds since Unix epoch + PreApplyChecksum Checksum // Database checksum before apply + WALOffset int64 // Offset within source WAL (0 for snapshots) + WALSize int64 // WAL byte length (0 for snapshots) + WALSalt1 uint32 + WALSalt2 uint32 + NodeID uint64 } -// Header flags -const ( - HeaderFlagNoChecksum = 1 << 0 // Disable checksums -) +const HeaderFlagNoChecksum = uint32(1 << 1) ``` +> Note: the version is implied by the magic string. Present files use +> `Magic == "LTX1"`, which corresponds to `ltx.Version == 2`. + ### Binary Layout (Header) ``` Offset Size Field -0 4 Magic ("LTX\x00") -4 4 Version -8 4 Flags -12 4 PageSize -16 4 PageCount -20 8 MinTXID -28 8 MaxTXID -36 8 Checksum -Total: 44 bytes +0 4 Magic ("LTX1") +4 4 Flags +8 4 PageSize +12 4 Commit +16 8 MinTXID +24 8 MaxTXID +32 8 Timestamp +40 8 PreApplyChecksum +48 8 WALOffset +56 8 WALSize +64 4 WALSalt1 +68 4 WALSalt2 +72 8 NodeID +80 20 Reserved (zeros) +Total: 100 bytes ``` ## Page Frames @@ -119,8 +117,7 @@ type PageFrame struct { } type PageHeader struct { - PageNo uint32 // Page number in database (1-based) - Checksum uint64 // CRC-64 checksum of page data + Pgno uint32 // Database page number (1-based) } ``` @@ -128,9 +125,8 @@ type PageHeader struct { ``` Offset Size Field -0 4 Page Number -4 8 Checksum -12 PageSize Page Data +0 4 Page Number (Pgno) +4 PageSize Page Data ``` ### Page Frame Constraints @@ -146,24 +142,19 @@ The page index enables efficient random access to pages: ```go type PageIndexElem struct { - PageNo uint32 // Database page number - Offset int64 // Byte offset in LTX file + Level int + MinTXID TXID + MaxTXID TXID + Offset int64 // Byte offset of encoded payload + Size int64 // Bytes occupied by encoded payload } - -// Index is sorted by PageNo for binary search -type PageIndex []PageIndexElem ``` ### Binary Layout (Page Index) ``` -Each entry (16 bytes): -Offset Size Field -0 4 Page Number -4 4 Reserved (padding) -8 8 File Offset - -Total index size = EntryCount * 16 +Rather than parsing raw bytes, call `ltx.DecodePageIndex` which returns a +map of page number to `ltx.PageIndexElem` for you. ``` ### Index Usage @@ -189,14 +180,8 @@ The trailer contains metadata and pointers: ```go type Trailer struct { - // Offset to start of page index - PageIndexOffset int64 - - // Size of page index in bytes - PageIndexSize int64 - - // Total checksum of all pages - Checksum uint64 + PostApplyChecksum Checksum // Database checksum after apply + FileChecksum Checksum // CRC-64 checksum of entire file } ``` @@ -204,10 +189,9 @@ type Trailer struct { ``` Offset Size Field -0 8 Page Index Offset -8 8 Page Index Size -16 8 Checksum -Total: 24 bytes +0 8 PostApplyChecksum +8 8 FileChecksum +Total: 16 bytes ``` ### Reading Trailer @@ -646,42 +630,42 @@ func readWithRetry(client ReplicaClient, info *FileInfo) ([]byte, error) { ## Debugging LTX Files -### Inspect LTX File +### Inspect LTX Files -```bash -# Using litestream CLI -litestream ltx info file.ltx - -# Output: -# Version: 0 -# Page Size: 4096 -# Page Count: 1234 -# Min TXID: 1 -# Max TXID: 100 -# File Size: 5.2MB -``` - -### Dump Pages +The Litestream CLI currently exposes a single helper for listing LTX files: ```bash -# List all pages in file -litestream ltx pages file.ltx - -# Dump specific page -litestream ltx page file.ltx 42 +litestream ltx /path/to/db.sqlite +litestream ltx s3://bucket/db ``` -### Verify Integrity +For low-level inspection (page payloads, checksums, etc.), use the Go API: -```bash -# Check all checksums -litestream ltx verify file.ltx - -# Output: -# Header checksum: OK -# Page checksums: OK (1234/1234) -# Trailer checksum: OK -# File integrity: VALID +```go +f, err := os.Open("0000000000000001-0000000000000064.ltx") +if err != nil { + log.Fatal(err) +} +defer f.Close() + +dec := ltx.NewDecoder(f) +if err := dec.DecodeHeader(); err != nil { + log.Fatal(err) +} +for { + var hdr ltx.PageHeader + data := make([]byte, dec.Header().PageSize) + if err := dec.DecodePage(&hdr, data); err == io.EOF { + break + } else if err != nil { + log.Fatal(err) + } + // Inspect hdr.Pgno or data here. +} +if err := dec.Close(); err != nil { + log.Fatal(err) +} +fmt.Println("post-apply checksum:", dec.Trailer().PostApplyChecksum) ``` ## Summary diff --git a/docs/REPLICA_CLIENT_GUIDE.md b/docs/REPLICA_CLIENT_GUIDE.md index b40a968f8..9bfd85267 100644 --- a/docs/REPLICA_CLIENT_GUIDE.md +++ b/docs/REPLICA_CLIENT_GUIDE.md @@ -29,7 +29,7 @@ type ReplicaClient interface { OpenLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, offset, size int64) (io.ReadCloser, error) // Writes an LTX file to storage - // MUST preserve createdAt timestamp if provided + // SHOULD set CreatedAt based on backend metadata or upload time WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) // Deletes one or more LTX files @@ -88,18 +88,28 @@ func (c *ReplicaClient) WriteLTXFile(ctx context.Context, level int, minTXID, ma } // Verify the file is readable before returning - return c.verifyUpload(ctx, path, checksum) + return c.verifyUpload(ctx, path, int64(len(data)), checksum) } -func (c *ReplicaClient) verifyUpload(ctx context.Context, path string, expectedChecksum uint64) (*ltx.FileInfo, error) { +func (c *ReplicaClient) verifyUpload(ctx context.Context, path string, expectedSize int64, expectedChecksum uint64) (*ltx.FileInfo, error) { // Implement retry loop with backoff backoff := 100 * time.Millisecond for i := 0; i < 10; i++ { info, err := c.statFile(ctx, path) if err == nil { - // Verify checksum if possible - if info.Checksum == expectedChecksum { - return info, nil + if info.Size == expectedSize { + rc, err := c.openFile(ctx, path, 0, 0) + if err != nil { + return nil, fmt.Errorf("open uploaded file: %w", err) + } + data, err := io.ReadAll(rc) + rc.Close() + if err != nil { + return nil, fmt.Errorf("read uploaded file: %w", err) + } + if crc64.Checksum(data, crc64.MakeTable(crc64.ECMA)) == expectedChecksum { + return info, nil + } } } @@ -471,24 +481,20 @@ func (c *Client) WriteLTXFile(...) (*ltx.FileInfo, error) { ```go // CORRECT - Preserves original timestamp -func (c *Client) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader, createdAt *time.Time) (*ltx.FileInfo, error) { +func (c *Client) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) { // Upload file... - - info := <x.FileInfo{ - Level: level, - MinTXID: minTXID, - MaxTXID: maxTXID, - Size: uploadedSize, - } - - // Preserve timestamp if provided - if createdAt != nil { - info.CreatedAt = *createdAt - } else { - info.CreatedAt = time.Now() + uploadedSize, modTime, err := c.storage.Upload(path, r) + if err != nil { + return nil, err } - return info, nil + return <x.FileInfo{ + Level: level, + MinTXID: minTXID, + MaxTXID: maxTXID, + Size: uploadedSize, + CreatedAt: modTime, + }, nil } ``` diff --git a/docs/TESTING_GUIDE.md b/docs/TESTING_GUIDE.md index 183d843f0..84cfcb10c 100644 --- a/docs/TESTING_GUIDE.md +++ b/docs/TESTING_GUIDE.md @@ -44,7 +44,7 @@ SQLite reserves a special lock page at exactly 1GB (0x40000000 bytes). This page ```go func createLargeTestDB(t *testing.T, path string, targetSize int64) { - db, err := sql.Open("sqlite3", path+"?_journal=WAL") + db, err := sql.Open("sqlite", path+"?_journal=WAL") require.NoError(t, err) defer db.Close() @@ -131,18 +131,18 @@ func TestDB_LockPageHandling(t *testing.T) { createLargeTestDB(t, dbPath, 1100*1024*1024) // 1.1GB // Open with Litestream - db := NewDB(dbPath, "") + db := litestream.NewDB(dbPath) err := db.Open() require.NoError(t, err) defer db.Close(context.Background()) // Start replication - replica := NewReplica(db, newMockClient()) + replica := litestream.NewReplicaWithClient(db, newMockClient()) err = replica.Start(context.Background()) require.NoError(t, err) // Perform writes that span the lock page - conn, err := sql.Open("sqlite3", dbPath) + conn, err := sql.Open("sqlite", dbPath) require.NoError(t, err) tx, err := conn.Begin() @@ -173,7 +173,7 @@ func TestDB_LockPageHandling(t *testing.T) { } } -func verifyLockPageSkipped(t *testing.T, replica *Replica, lockPgno uint32) { +func verifyLockPageSkipped(t *testing.T, replica *litestream.Replica, lockPgno uint32) { // Get LTX files files, err := replica.Client.LTXFiles(context.Background(), 0, 0) require.NoError(t, err) @@ -183,7 +183,7 @@ func verifyLockPageSkipped(t *testing.T, replica *Replica, lockPgno uint32) { info := files.Item() // Read page index - pageIndex, err := FetchPageIndex(context.Background(), + pageIndex, err := litestream.FetchPageIndex(context.Background(), replica.Client, info) require.NoError(t, err) @@ -204,12 +204,12 @@ func TestDB_RestoreLargeDatabase(t *testing.T) { createLargeTestDB(t, srcPath, 1500*1024*1024) // 1.5GB // Setup replication - db := NewDB(srcPath, "") + db := litestream.NewDB(srcPath) err := db.Open() require.NoError(t, err) client := file.NewReplicaClient(filepath.Join(t.TempDir(), "replica")) - replica := NewReplicaWithClient(db, client) + replica := litestream.NewReplicaWithClient(db, client) err = replica.Start(context.Background()) require.NoError(t, err) @@ -222,7 +222,7 @@ func TestDB_RestoreLargeDatabase(t *testing.T) { // Restore to new location dstPath := filepath.Join(t.TempDir(), "restored.db") - err = Restore(context.Background(), client, dstPath, nil) + err = litestream.Restore(context.Background(), client, dstPath, nil) require.NoError(t, err) // Verify restoration @@ -241,7 +241,7 @@ func verifyDatabasesMatch(t *testing.T, path1, path2 string) { assert.Equal(t, pageCount1, pageCount2, "Page counts should match") // Run integrity check - db, err := sql.Open("sqlite3", path2) + db, err := sql.Open("sqlite", path2) require.NoError(t, err) defer db.Close() @@ -261,9 +261,9 @@ func verifyDatabasesMatch(t *testing.T, path1, path2 string) { go test -race -v ./... # Run specific race-prone tests -go test -race -v -run TestReplica_SetPos ./... -go test -race -v -run TestDB_ConcurrentSync ./... -go test -race -v -run TestStore_Integration ./... +go test -race -v -run TestReplica_Sync ./... +go test -race -v -run TestDB_Sync ./... +go test -race -v -run TestStore_CompactDB ./... ``` ### Common Race Conditions @@ -272,9 +272,7 @@ go test -race -v -run TestStore_Integration ./... ```go func TestReplica_ConcurrentPositionUpdate(t *testing.T) { - replica := NewReplica(nil) - ctx := context.Background() - + replica := litestream.NewReplica(nil) var wg sync.WaitGroup errors := make(chan error, 100) @@ -284,10 +282,7 @@ func TestReplica_ConcurrentPositionUpdate(t *testing.T) { go func(n int) { defer wg.Done() - pos := ltx.Pos{ - TXID: ltx.TXID(n), - PageNo: uint32(n * 100), - } + pos := ltx.NewPos(ltx.TXID(n), ltx.Checksum(uint64(n))) // This should use proper locking replica.SetPos(pos) @@ -339,7 +334,7 @@ func TestDB_ConcurrentWALAccess(t *testing.T) { go func() { defer wg.Done() - conn, err := sql.Open("sqlite3", db.Path()) + conn, err := sql.Open("sqlite", db.Path()) if err != nil { return } @@ -356,11 +351,13 @@ func TestDB_ConcurrentWALAccess(t *testing.T) { go func() { defer wg.Done() + notifyCh := db.Notify() + for { select { case <-ctx.Done(): return - case <-db.notify: + case <-notifyCh: // Process WAL changes _ = db.Sync(context.Background()) } @@ -380,7 +377,7 @@ func TestDB_ConcurrentWALAccess(t *testing.T) { case <-ctx.Done(): return case <-ticker.C: - _ = db.Checkpoint("PASSIVE") + _ = db.Checkpoint(context.Background(), litestream.CheckpointModePassive) } } }() @@ -506,7 +503,7 @@ func BenchmarkDB_Sync(b *testing.B) { defer db.Close(context.Background()) // Prepare test data - conn, _ := sql.Open("sqlite3", db.Path()) + conn, _ := sql.Open("sqlite", db.Path()) defer conn.Close() for i := 0; i < 1000; i++ { @@ -611,7 +608,7 @@ func runLoadTest(t *testing.T, db *DB, config LoadConfig) LoadResults { go func(workerID int) { defer wg.Done() - conn, err := sql.Open("sqlite3", db.Path()) + conn, err := sql.Open("sqlite", db.Path()) if err != nil { return } @@ -782,7 +779,7 @@ func NewTestDB(t testing.TB) *litestream.DB { path := filepath.Join(t.TempDir(), "test.db") // Create SQLite database - conn, err := sql.Open("sqlite3", path+"?_journal=WAL") + conn, err := sql.Open("sqlite", path+"?_journal=WAL") require.NoError(t, err) _, err = conn.Exec(` @@ -795,7 +792,7 @@ func NewTestDB(t testing.TB) *litestream.DB { conn.Close() // Open with Litestream - db := litestream.NewDB(path, "") + db := litestream.NewDB(path) db.MonitorInterval = 10 * time.Millisecond // Speed up for tests db.MinCheckpointPageN = 100 // Lower threshold for tests @@ -812,7 +809,7 @@ func NewTestDB(t testing.TB) *litestream.DB { func WriteTestData(t testing.TB, db *litestream.DB, count int) { t.Helper() - conn, err := sql.Open("sqlite3", db.Path()) + conn, err := sql.Open("sqlite", db.Path()) require.NoError(t, err) defer conn.Close() @@ -873,14 +870,14 @@ func ExtractFixture(name string, path string) error { ```go // Problem: Multiple connections without proper WAL mode func TestBroken(t *testing.T) { - db1, _ := sql.Open("sqlite3", "test.db") // Wrong! - db2, _ := sql.Open("sqlite3", "test.db") // Will fail + db1, _ := sql.Open("sqlite", "test.db") // Wrong! WAL disabled + db2, _ := sql.Open("sqlite", "test.db") // Will fail } // Solution: Use WAL mode func TestFixed(t *testing.T) { - db1, _ := sql.Open("sqlite3", "test.db?_journal=WAL") - db2, _ := sql.Open("sqlite3", "test.db?_journal=WAL") + db1, _ := sql.Open("sqlite", "test.db?_journal=WAL") + db2, _ := sql.Open("sqlite", "test.db?_journal=WAL") } ``` diff --git a/docs/V050_CHANGES.md b/docs/V050_CHANGES.md deleted file mode 100644 index 0e04ef7e9..000000000 --- a/docs/V050_CHANGES.md +++ /dev/null @@ -1,191 +0,0 @@ -# Litestream v0.5.0 Changes and Migration Guide - -This document summarizes the major changes in Litestream v0.5.0, based on Ben Johnson's announcement and the current implementation. - -## Executive Summary - -Litestream v0.5.0 is a **major rewrite** that introduces the LTX format, improves compaction, and removes CGO dependencies. It includes breaking changes that prevent restoration from v0.3.x backups. - -## Breaking Changes - -### 1. Single Replica Constraint -- **Before**: Multiple replica destinations per database -- **After**: Only ONE replica destination per database -- **Impact**: Simplified configuration but reduced redundancy options - -### 2. Cannot Restore from v0.3.x -- **Before**: WAL segment-based backups -- **After**: LTX page-based backups -- **Impact**: Must maintain v0.3.x installation to restore old backups - -### 3. Command Changes -- **Before**: `litestream wal` commands -- **After**: `litestream ltx` commands -- **Example**: `litestream ltx info file.ltx` - -### 4. Generations Removed -- **Before**: Used "generations" to track database backups -- **After**: Simplified tracking with LTX files and TXID ranges -- **Impact**: Cleaner mental model, simpler implementation - -## New Features - -### 1. LTX Format -- **Purpose**: Efficient page-level replication format -- **Benefits**: - - Immutable files with TXID ranges - - Page-level deduplication during compaction - - Indexed pages for fast random access - - Point-in-time restoration - -### 2. Multi-Level Compaction -- **Level 0**: Raw LTX files (no compaction) -- **Level 1**: 30-second windows -- **Level 2**: 5-minute windows -- **Level 3**: 1-hour windows -- **Snapshots**: Daily full database snapshots - -### 3. NATS JetStream Support -- **New replica type**: `nats://` -- **Features**: Distributed messaging with persistence -- **Use case**: Event-driven architectures - -### 4. Pure Go Implementation -- **Change**: Switched from CGO to `modernc.org/sqlite` -- **Benefits**: - - Easier cross-compilation - - No C dependencies - - Simplified builds - - Better portability - -## Technical Improvements - -### Performance -- **Compaction**: Limited only by I/O throughput -- **Page-level operations**: More efficient than WAL segments -- **Indexed access**: Fast page lookups in LTX files - -### Architecture -- **Cleaner separation**: Storage backends more modular -- **Better abstractions**: LTX format decouples from SQLite WAL -- **Simplified state**: No generations to track - -## Migration Path - -### From v0.3.x to v0.5.0 - -1. **Before upgrading**: - ```bash - # Create final backup with v0.3.x - litestream snapshot -replica [destination] - ``` - -2. **Install v0.5.0**: - ```bash - # Download and install new version - curl -L https://github.com/benbjohnson/litestream/releases/download/v0.5.0/litestream-v0.5.0-linux-amd64.tar.gz | tar xz - ``` - -3. **Update configuration**: - ```yaml - # Old (v0.3.x) - Multiple replicas - dbs: - - path: /data/db.sqlite - replicas: - - url: s3://bucket1/db - - url: s3://bucket2/backup - - # New (v0.5.0) - Single replica only - dbs: - - path: /data/db.sqlite - replicas: - - url: s3://bucket1/db - ``` - -4. **Start fresh replication**: - ```bash - # Remove old WAL segments - rm -rf /data/db.sqlite-litestream - - # Start v0.5.0 - litestream replicate - ``` - -### Rollback Procedure - -If you need to restore from v0.3.x backups: - -1. **Keep v0.3.x binary**: Don't delete old version -2. **Use old binary for restoration**: - ```bash - litestream-v0.3.x restore -o restored.db s3://bucket/db - ``` -3. **Then upgrade**: Once restored, can use v0.5.0 going forward - -## Future Roadmap - -### Litestream VFS (In Development) -- **Purpose**: Enable read replicas without full downloads -- **How it works**: - - Virtual File System layer - - On-demand page fetching from S3 - - Background hydration - - Local caching -- **Benefits**: - - Instant database "copies" - - Scales read operations - - Reduces bandwidth costs - -## Best Practices for v0.5.0 - -### 1. Compaction Configuration -```yaml -# Use default intervals for most workloads -levels: - - level: 1 - interval: 30s - - level: 2 - interval: 5m - - level: 3 - interval: 1h -``` - -### 2. Single Replica Strategy -Since only one replica is allowed: -- Choose most reliable storage -- Consider using RAID/redundancy at storage level -- Implement external backup rotation if needed - -### 3. Monitoring -- Watch compaction metrics -- Monitor LTX file counts at each level -- Track restoration time improvements - -### 4. Testing -- Test restoration regularly -- Verify point-in-time recovery works -- Benchmark compaction performance - -## Common Issues and Solutions - -### Issue: "Cannot restore from old backup" -**Solution**: Use v0.3.x binary to restore, then replicate with v0.5.0 - -### Issue: "Multiple replicas not supported" -**Solution**: Use single most reliable destination, implement redundancy at storage layer - -### Issue: "`wal` command not found" -**Solution**: Use `ltx` command instead - -### Issue: "CGO_ENABLED required error" -**Solution**: Not needed in v0.5.0, ensure using latest binary - -## Summary - -Litestream v0.5.0 represents a significant evolution: -- **Simpler**: Single replica, no generations, pure Go -- **More efficient**: Page-level operations, better compaction -- **More flexible**: LTX format enables future features -- **Breaking changes**: Cannot restore old backups directly - -The tradeoffs favor simplicity and efficiency over backward compatibility, positioning Litestream for future enhancements like the VFS read replica system. diff --git a/llms.txt b/llms.txt index 7815049c1..2b03a02a4 100644 --- a/llms.txt +++ b/llms.txt @@ -8,7 +8,6 @@ Disaster recovery tool for SQLite that runs as a background process and safely r - [docs/SQLITE_INTERNALS.md](docs/SQLITE_INTERNALS.md): Critical SQLite knowledge including WAL format and 1GB lock page - [docs/LTX_FORMAT.md](docs/LTX_FORMAT.md): LTX (Log Transaction) format specification for replication - [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md): Deep technical details of Litestream components -- [docs/V050_CHANGES.md](docs/V050_CHANGES.md): Version 0.5.0 breaking changes and migration guide ## Implementation Guides @@ -29,7 +28,7 @@ Disaster recovery tool for SQLite that runs as a background process and safely r - [abs/replica_client.go](abs/replica_client.go): Azure Blob Storage implementation - [sftp/replica_client.go](sftp/replica_client.go): SFTP implementation - [file/replica_client.go](file/replica_client.go): Local file system implementation -- [nats/replica_client.go](nats/replica_client.go): NATS JetStream implementation (v0.5.0+) +- [nats/replica_client.go](nats/replica_client.go): NATS JetStream implementation ## Critical Concepts @@ -39,7 +38,7 @@ The lock page at exactly 1GB (0x40000000) must always be skipped during replicat ### LTX Format Immutable, append-only files containing database changes. Files are named by transaction ID ranges (e.g., 0000000001-0000000064.ltx). -### Compaction Levels (v0.5.0) +### Compaction Levels - Level 0: Raw LTX files (no compaction) - Level 1: 30-second windows - Level 2: 5-minute windows @@ -74,7 +73,7 @@ Use Lock() for writes, RLock() for reads. Never use RLock() when modifying state ## Configuration -Primary configuration via YAML file (etc/litestream.yml) or environment variables. Single replica per database in v0.5.0+ (breaking change from v0.3.x). +Primary configuration via YAML file (etc/litestream.yml) or environment variables. Each database replicates to exactly one remote destination. ## Build Requirements From 4a74de583a4fb9661c94afc338e10a90cde92745 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Sun, 12 Oct 2025 09:40:54 -0500 Subject: [PATCH 7/8] fix(docs): correct Mermaid diagram syntax in AGENTS.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed parse error in Layer Responsibilities diagram: - Removed periods from node labels (DB.pos → DB position) - HTML-escaped parentheses in method names for safer parsing Resolves: "Parse error on line 3: got 'PS'" in Mermaid renderer 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- AGENTS.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 8e1c3018f..5002819ff 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -243,16 +243,16 @@ Many storage backends (S3, R2, etc.) are eventually consistent. This means: ```mermaid graph TB subgraph "DB Layer (db.go)" - DBInit[DB.init()] - DBPos[DB.pos tracking] + DBInit[DB.init()] + DBPos[DB position tracking] DBRestore[Database state validation] - DBSnapshot[Snapshot triggering via verify()] + DBSnapshot[Snapshot triggering via verify()] end subgraph "Replica Layer (replica.go)" - ReplicaStart[Replica.Start()] + ReplicaStart[Replica.Start()] ReplicaSync[Sync operations] - ReplicaPos[Replica.pos tracking] + ReplicaPos[Replica position tracking] ReplicaClient[Storage interaction] end From c13c17ddc2059ac7b04b226d325acdbad14a66b1 Mon Sep 17 00:00:00 2001 From: Cory LaNou Date: Mon, 3 Nov 2025 09:53:30 -0600 Subject: [PATCH 8/8] refactor(docs): shift from code examples to principle-based documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This refactor addresses outdated code examples that cause compilation errors and implements industry best practices for maintainable documentation. Changes: - Replace broken code examples with principle-based patterns - Fix all LTXFiles signatures to include useMetadata parameter (10 locations) - Convert implementation examples to architectural pattern descriptions - Add comprehensive doc maintenance guide Key fixes: - AGENTS.md: Replace non-existent clearL0Files()/writeL0File() calls with pattern description and reference to actual implementation - Fix missing useMetadata bool parameter across all ReplicaClient interface definitions and example calls - Update CLAUDE.md to correctly document LTXFiles behavior vs incorrect WriteLTXFile claims Philosophy (based on INNOQ/Google best practices): - Document abstractions, not volatile implementation details - Focus on "why" and architectural principles over "what" and "how" - Reference actual source code instead of duplicating it - Describe stable patterns rather than specific function calls New resource: - docs/DOC_MAINTENANCE.md: Comprehensive guide for keeping documentation synchronized with code changes, including checklists and examples All documentation now passes markdownlint validation and references actual source code that compiles. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .claude/agents/replica-client-developer.md | 13 +- .claude/commands/add-storage-backend.md | 5 +- .claude/commands/validate-replica.md | 91 ++++---- AGENTS.md | 69 +++--- CLAUDE.md | 6 +- docs/DOC_MAINTENANCE.md | 247 +++++++++++++++++++++ docs/REPLICA_CLIENT_GUIDE.md | 37 +-- docs/TESTING_GUIDE.md | 3 +- 8 files changed, 368 insertions(+), 103 deletions(-) create mode 100644 docs/DOC_MAINTENANCE.md diff --git a/.claude/agents/replica-client-developer.md b/.claude/agents/replica-client-developer.md index 7ab7236ca..849db1bcb 100644 --- a/.claude/agents/replica-client-developer.md +++ b/.claude/agents/replica-client-developer.md @@ -16,11 +16,12 @@ You specialize in implementing and maintaining storage backend clients for Lites ## Core Knowledge ### ReplicaClient Interface + Every storage backend MUST implement: ```go type ReplicaClient interface { Type() string - LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) + LTXFiles(ctx context.Context, level int, seek ltx.TXID, useMetadata bool) (ltx.FileIterator, error) OpenLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, offset, size int64) (io.ReadCloser, error) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) DeleteLTXFiles(ctx context.Context, files []*ltx.FileInfo) error @@ -28,6 +29,10 @@ type ReplicaClient interface { } ``` +**LTXFiles useMetadata parameter**: +- When `useMetadata=true`: Fetch accurate timestamps from backend metadata (slower, required for point-in-time restore) +- When `useMetadata=false`: Use fast timestamps from file listing (faster, suitable for replication monitoring) + ### Critical Patterns 1. **Eventual Consistency Handling**: @@ -50,6 +55,7 @@ type ReplicaClient interface { ## Implementation Checklist ### New Backend Requirements + - [ ] Implement ReplicaClient interface - [ ] Handle partial reads (offset/size) - [ ] Support seek parameter for pagination @@ -60,6 +66,7 @@ type ReplicaClient interface { - [ ] Document configuration ### Testing Requirements + ```bash # Integration test go test -v ./replica_client_test.go -integration [backend] @@ -74,6 +81,7 @@ go test -race -v ./[backend]/... ## Existing Backends Reference ### Study These Implementations + - `s3/replica_client.go` - AWS S3 (most complete) - `gs/replica_client.go` - Google Cloud Storage - `abs/replica_client.go` - Azure Blob Storage @@ -82,6 +90,7 @@ go test -race -v ./[backend]/... - `nats/replica_client.go` - NATS JetStream (newest) ## Common Pitfalls + 1. Not handling eventual consistency 2. Missing atomic write operations 3. Incorrect error types @@ -90,6 +99,7 @@ go test -race -v ./[backend]/... 6. No retry logic for transient failures ## Configuration Pattern + ```yaml replicas: - type: [backend] @@ -98,6 +108,7 @@ replicas: ``` ## References + - docs/REPLICA_CLIENT_GUIDE.md - Complete implementation guide - replica_client.go - Interface definition - replica_client_test.go - Test suite diff --git a/.claude/commands/add-storage-backend.md b/.claude/commands/add-storage-backend.md index aab97f2ba..a17b54afc 100644 --- a/.claude/commands/add-storage-backend.md +++ b/.claude/commands/add-storage-backend.md @@ -26,8 +26,9 @@ Create a new storage backend implementation for Litestream with all required com return "{{backend_name}}" } - func (c *ReplicaClient) LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) { + func (c *ReplicaClient) LTXFiles(ctx context.Context, level int, seek ltx.TXID, useMetadata bool) (ltx.FileIterator, error) { // List files at level + // When useMetadata=true, fetch accurate timestamps from backend metadata } func (c *ReplicaClient) OpenLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, offset, size int64) (io.ReadCloser, error) { @@ -77,6 +78,7 @@ Create a new storage backend implementation for Litestream with all required com ``` ## Key Requirements + - Handle eventual consistency - Implement atomic writes (temp file + rename) - Support partial reads (offset/size) @@ -84,6 +86,7 @@ Create a new storage backend implementation for Litestream with all required com - Return proper error types (os.ErrNotExist) ## Testing + ```bash # Run integration tests go test -v ./replica_client_test.go -integration {{backend_name}} diff --git a/.claude/commands/validate-replica.md b/.claude/commands/validate-replica.md index a9b2abfa4..4fbf9b34b 100644 --- a/.claude/commands/validate-replica.md +++ b/.claude/commands/validate-replica.md @@ -8,54 +8,59 @@ First, identify what needs validation: Then validate the implementation: 1. **Interface compliance check**: -```go -// Ensure all methods are implemented -var _ litestream.ReplicaClient = (*YourClient)(nil) -``` -2. **Verify error types**: -```go -// OpenLTXFile must return os.ErrNotExist for missing files -_, err := client.OpenLTXFile(ctx, 0, 999, 999, 0, 0) -if !errors.Is(err, os.ErrNotExist) { - t.Errorf("Expected os.ErrNotExist, got %v", err) -} -``` + ```go + // Ensure all methods are implemented + var _ litestream.ReplicaClient = (*YourClient)(nil) + ``` -3. **Test partial reads**: -```go -// Must support offset and size parameters -rc, err := client.OpenLTXFile(ctx, 0, 1, 100, 50, 25) -data, _ := io.ReadAll(rc) -if len(data) != 25 { - t.Errorf("Expected 25 bytes, got %d", len(data)) -} -``` +1. **Verify error types**: -4. **Verify timestamp preservation**: -```go -// CreatedAt should reflect remote object metadata (or upload time) -start := time.Now() -info, _ := client.WriteLTXFile(ctx, 0, 1, 100, reader) -if info.CreatedAt.IsZero() || info.CreatedAt.Before(start.Add(-time.Second)) { - t.Error("unexpected CreatedAt timestamp") -} -``` + ```go + // OpenLTXFile must return os.ErrNotExist for missing files + _, err := client.OpenLTXFile(ctx, 0, 999, 999, 0, 0) + if !errors.Is(err, os.ErrNotExist) { + t.Errorf("Expected os.ErrNotExist, got %v", err) + } + ``` -5. **Test eventual consistency handling**: -- Implement retry logic for transient failures -- Handle partial file availability -- Verify write-after-write consistency +1. **Test partial reads**: -6. **Validate cleanup**: -```go -// DeleteAll must remove everything -err := client.DeleteAll(ctx) -files, _ := client.LTXFiles(ctx, 0, 0) -if files.Next() { - t.Error("Files remain after DeleteAll") -} -``` + ```go + // Must support offset and size parameters + rc, err := client.OpenLTXFile(ctx, 0, 1, 100, 50, 25) + data, _ := io.ReadAll(rc) + if len(data) != 25 { + t.Errorf("Expected 25 bytes, got %d", len(data)) + } + ``` + +1. **Verify timestamp preservation**: + + ```go + // CreatedAt should reflect remote object metadata (or upload time) + start := time.Now() + info, _ := client.WriteLTXFile(ctx, 0, 1, 100, reader) + if info.CreatedAt.IsZero() || info.CreatedAt.Before(start.Add(-time.Second)) { + t.Error("unexpected CreatedAt timestamp") + } + ``` + +1. **Test eventual consistency handling**: + - Implement retry logic for transient failures + - Handle partial file availability + - Verify write-after-write consistency + +1. **Validate cleanup**: + + ```go + // DeleteAll must remove everything + err := client.DeleteAll(ctx) + files, _ := client.LTXFiles(ctx, 0, 0, false) + if files.Next() { + t.Error("Files remain after DeleteAll") + } + ``` Key validation points: - Proper error types (os.ErrNotExist, os.ErrPermission) diff --git a/AGENTS.md b/AGENTS.md index 5002819ff..7e66ecdc6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -270,49 +270,28 @@ graph TB S3 -->|Store| LTXFiles ``` -### ✅ DO: Handle database state in DB.init() +### ✅ DO: Handle database state in DB layer -```go -// CORRECT - Database restoration logic belongs in DB layer -func (db *DB) init(ctx context.Context, replica *litestream.Replica) error { - dpos, err := db.Pos() - if err != nil { - return err - } - rpos := replica.Pos() +**Principle**: Database restoration logic belongs in the DB layer, not the Replica layer. - if dpos.TXID < rpos.TXID { - if err := db.clearL0Files(); err != nil { - return fmt.Errorf("clear L0 files: %w", err) - } +**Pattern**: When the database is behind the replica (local TXID < remote TXID): - itr, err := replica.Client.LTXFiles(ctx, 0, rpos.TXID) - if err != nil { - return fmt.Errorf("enumerate ltx files: %w", err) - } - defer itr.Close() - - if itr.Next() { - info := itr.Item() - rd, err := replica.Client.OpenLTXFile(ctx, info.Level, info.MinTXID, info.MaxTXID, 0, 0) - if err != nil { - return fmt.Errorf("fetch latest L0 LTX: %w", err) - } - defer rd.Close() - - if err := db.writeL0File(rd); err != nil { - return fmt.Errorf("write L0 file: %w", err) - } - } +1. **Clear local L0 cache**: Remove the entire L0 directory and recreate it + - Use `os.RemoveAll()` on the L0 directory path + - Recreate with proper permissions using `internal.MkdirAll()` - if err := itr.Close(); err != nil { - return err - } - } +2. **Fetch latest L0 file from replica**: Download the most recent L0 LTX file + - Call `replica.Client.OpenLTXFile()` with the remote min/max TXID + - Stream the file contents (don't load into memory) - return replica.Start(ctx) -} -``` +3. **Write using atomic file operations**: Prevent partial/corrupted files + - Write to temporary file with `.tmp` suffix + - Call `Sync()` to ensure data is on disk + - Atomically rename temp file to final path + +**Why this matters**: If the database state is not synchronized before replication starts, the system will attempt to apply WAL segments that are ahead of the database's current position, leading to restore failures. + +**Reference Implementation**: See `DB.checkDatabaseBehindReplica()` in db.go:670-737 ### ❌ DON'T: Put database state logic in Replica layer @@ -655,7 +634,7 @@ type ReplicaClient interface { Type() string // Client type identifier // File operations - LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) + LTXFiles(ctx context.Context, level int, seek ltx.TXID, useMetadata bool) (ltx.FileIterator, error) OpenLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, offset, size int64) (io.ReadCloser, error) WriteLTXFile(ctx context.Context, level int, minTXID, maxTXID ltx.TXID, r io.Reader) (*ltx.FileInfo, error) DeleteLTXFiles(ctx context.Context, files []*ltx.FileInfo) error @@ -663,11 +642,19 @@ type ReplicaClient interface { } ``` +**LTXFiles useMetadata Parameter:** +- **`useMetadata=true`**: Fetch accurate timestamps from backend metadata (required for point-in-time restores) + - Slower but provides correct CreatedAt timestamps + - Use when restoring to specific timestamp +- **`useMetadata=false`**: Use fast timestamps (LastModified/ModTime) for normal operations + - Faster enumeration, suitable for synchronization + - Use during replication monitoring + **Implementation Requirements:** - Handle partial reads gracefully - Implement proper error types (os.ErrNotExist) - Support seek/offset for efficient page fetching -- Preserve file timestamps (CreatedAt) +- Preserve file timestamps when `useMetadata=true` ### Store Component (store.go) @@ -938,7 +925,7 @@ This document serves as the universal source of truth for all AI coding assistan ### Documentation Hierarchy -``` +```text Tier 1 (Always read): - AGENTS.md (this file) - llms.txt (if you need navigation) diff --git a/CLAUDE.md b/CLAUDE.md index e22c517f1..23fb48d6b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -7,12 +7,14 @@ This file is automatically loaded by Claude Code and provides Claude-specific op **Primary Documentation**: See AGENTS.md for comprehensive architectural guidance, patterns, and anti-patterns. ### Context Window Advantages + With Claude's 200k token context window, you can load the entire documentation suite: - Full AGENTS.md for patterns and anti-patterns - All docs/ subdirectory files for deep technical understanding - Multiple source files simultaneously for cross-referencing ### Key Focus Areas for Claude + 1. **Architectural Reasoning**: Leverage deep understanding of DB vs Replica layer boundaries 2. **Complex Analysis**: Use full context for multi-file refactoring 3. **SQLite Internals**: Reference docs/SQLITE_INTERNALS.md for WAL format details @@ -21,6 +23,7 @@ With Claude's 200k token context window, you can load the entire documentation s ### Claude-Specific Resources #### Specialized Agents (.claude/agents/) + - **sqlite-expert.md**: SQLite WAL and page management expertise - **replica-client-developer.md**: Storage backend implementation - **ltx-compaction-specialist.md**: LTX format and compaction @@ -28,6 +31,7 @@ With Claude's 200k token context window, you can load the entire documentation s - **performance-optimizer.md**: Performance and resource optimization #### Commands (.claude/commands/) + - `/analyze-ltx`: Analyze LTX file structure and contents - `/debug-wal`: Debug WAL replication issues - `/test-compaction`: Test compaction scenarios @@ -108,7 +112,7 @@ pre-commit run --all-files **Replica (`replica.go`)**: Connects a database to replication destinations via ReplicaClient interface. Manages periodic synchronization and maintains replication position. -**ReplicaClient Interface** (`replica_client.go`): Abstraction for different storage backends (S3, GCS, Azure Blob Storage, SFTP, file system, NATS). Each implementation handles snapshot/WAL segment upload and restoration. The `WriteLTXFile` method accepts an optional `createdAt` timestamp parameter to preserve original file timestamps during compaction operations. +**ReplicaClient Interface** (`replica_client.go`): Abstraction for different storage backends (S3, GCS, Azure Blob Storage, SFTP, file system, NATS). Each implementation handles snapshot/WAL segment upload and restoration. The `LTXFiles` method includes a `useMetadata` parameter: when true, it fetches accurate timestamps from backend metadata (required for point-in-time restores); when false, it uses fast timestamps for normal operations. During compaction, the system preserves the earliest CreatedAt timestamp from source files to maintain temporal granularity for restoration. **WAL Processing**: The system monitors SQLite WAL files for changes, segments them into LTX format files, and replicates these segments to configured destinations. Uses SQLite checksums for integrity verification. diff --git a/docs/DOC_MAINTENANCE.md b/docs/DOC_MAINTENANCE.md new file mode 100644 index 000000000..ae7b7746d --- /dev/null +++ b/docs/DOC_MAINTENANCE.md @@ -0,0 +1,247 @@ +# Documentation Maintenance Guide + +This guide ensures documentation stays synchronized with code changes and follows the principle-based approach established in PR #787. + +## Philosophy: Principles Over Examples + +**Key Insight**: Code examples become outdated quickly. Documentation should focus on **stable concepts** rather than **volatile implementations**. + +### What to Document + +✅ **DO Document**: + +- **Architectural principles** (e.g., "DB layer handles database state") +- **Interface contracts** (what methods must do, not how they do it) +- **Design patterns** (atomic file operations, eventual consistency handling) +- **Critical edge cases** (1GB lock page, timestamp preservation) +- **"Why" not "what"** (rationale behind decisions) + +❌ **DON'T Document**: + +- Specific function implementations that change frequently +- Exact function names without referencing actual source +- Step-by-step code that duplicates the implementation +- Version-specific details that will quickly become stale + +### Documentation Principles + +1. **Abstractions over Details**: Document the concept, not the specific implementation +2. **Reference over Duplication**: Point to actual source files instead of copying code +3. **Patterns over Examples**: Describe the approach, let developers read the source +4. **Contracts over Implementations**: Define what must happen, not how + +## When Code Changes, Update Docs + +### Interface Changes + +**Trigger**: Modifying `ReplicaClient` interface or any public interface + +**Required Updates**: + +1. Search for interface definitions in docs: + + ```bash + rg "type ReplicaClient interface" docs/ CLAUDE.md AGENTS.md .claude/ + ``` + +2. Update interface signatures (don't forget parameters!) +3. Document new parameters with clear explanations of when/why to use them +4. Update all example calls to include new parameters + +**Files to Check**: + +- `AGENTS.md` - Interface definitions +- `docs/REPLICA_CLIENT_GUIDE.md` - Implementation guide +- `docs/TESTING_GUIDE.md` - Test examples +- `.claude/agents/replica-client-developer.md` - Agent knowledge +- `.claude/commands/add-storage-backend.md` - Backend templates +- `.claude/commands/validate-replica.md` - Validation commands + +### New Features + +**Trigger**: Adding new functionality, methods, or components + +**Approach**: + +1. **Don't rush to document** - Wait until the feature stabilizes +2. **Document the pattern**, not the implementation: + - What problem does it solve? + - What's the high-level approach? + - What are the critical constraints? +3. **Reference the source**: + - `See implementation in file.go:lines` + - `Reference tests in file_test.go` + +### Refactoring + +**Trigger**: Moving or renaming functions, restructuring code + +**Required Actions**: + +1. **Search for references**: + + ```bash + # Find function name references + rg "functionName" docs/ CLAUDE.md AGENTS.md .claude/ + ``` + +2. **Update or remove**: + - If it's a reference pointer (e.g., "See `DB.init()` in db.go:123"), update it + - If it's a code example showing implementation, consider replacing with a pattern description + +3. **Verify links**: Ensure all file:line references are still valid + +## Documentation Update Checklist + +Use this checklist when making code changes: + +- [ ] **Search docs for affected code**: + + ```bash + # Search for function names, types, or concepts + rg "YourFunctionName" docs/ CLAUDE.md AGENTS.md .claude/ + ``` + +- [ ] **Update interface definitions** if signatures changed +- [ ] **Update examples** if they won't compile anymore +- [ ] **Convert brittle examples to patterns** if refactoring made them stale +- [ ] **Update file:line references** if code moved +- [ ] **Verify contracts still hold** (update if behavior changed) +- [ ] **Run markdownlint**: + + ```bash + markdownlint --fix docs/ CLAUDE.md AGENTS.md .claude/ + ``` + +## Preventing Documentation Drift + +### Pre-Commit Practices + +1. **Search before committing**: + + ```bash + git diff --name-only | xargs -I {} rg "basename {}" docs/ + ``` + +2. **Review doc references** in your PR description +3. **Test examples compile** (if they're meant to) + +### Regular Audits + +**Monthly**: Spot-check one documentation file against current codebase + +**Questions to ask**: + +- Do interface definitions match `replica_client.go`? +- Do code examples compile? +- Are file:line references accurate? +- Have we removed outdated examples? + +### When in Doubt + +**Rule**: Delete outdated documentation rather than let it mislead + +- Stale examples cause compilation errors +- Outdated patterns cause architectural mistakes +- Incorrect references waste developer time + +**Better**: A brief pattern description + reference to source than an outdated example + +## Example: Good vs Bad Documentation Updates + +### ❌ Bad: Copying Implementation + +```markdown +### How to initialize DB + +```go +func (db *DB) init() { + db.mu.Lock() + defer db.mu.Unlock() + // ... 50 lines of code copied from db.go +} +\``` +``` + +**Problem**: This will be outdated as soon as the implementation changes. + +### ✅ Good: Documenting Pattern + Reference + +```markdown +### DB Initialization Pattern + +**Principle**: Database initialization must complete before replication starts. + +**Pattern**: + +1. Acquire exclusive lock (`mu.Lock()`) +2. Verify database state consistency +3. Initialize monitoring subsystems +4. Set up replication coordination + +**Critical**: Use `Lock()` not `RLock()` as initialization modifies state. + +**Reference Implementation**: See `DB.init()` in db.go:150-230 +``` + +**Benefits**: Stays accurate even if implementation details change, focuses on the "why" and "what" rather than the "how". + +## Tools and Commands + +### Find Documentation References + +```bash +# Find all code examples in documentation +rg "^```(go|golang)" docs/ CLAUDE.md AGENTS.md .claude/ + +# Find file:line references +rg "\.go:\d+" docs/ CLAUDE.md AGENTS.md .claude/ + +# Find interface definitions +rg "type .* interface" docs/ CLAUDE.md AGENTS.md .claude/ +``` + +### Validate Markdown + +```bash +# Lint all docs +markdownlint docs/ CLAUDE.md AGENTS.md .claude/ + +# Auto-fix issues +markdownlint --fix docs/ CLAUDE.md AGENTS.md .claude/ +``` + +### Check for Broken References + +```bash +# List all go files mentioned in docs +rg -o "[a-z_]+\.go:\d+" docs/ CLAUDE.md AGENTS.md | sort -u + +# Verify they exist and line numbers are reasonable +``` + +## Resources + +- **PR #787**: Original principle-based documentation refactor +- **Issue #805**: Context for why accurate documentation matters +- **INNOQ Best Practices**: +- **Google Style Guide**: + +## Questions? + +When updating documentation, ask: + +1. **Is this a stable concept or a volatile implementation?** + - Stable → Document the principle + - Volatile → Reference the source + +2. **Will this stay accurate for 6+ months?** + - Yes → Keep it + - No → Replace with pattern description + +3. **Does this explain WHY or just WHAT?** + - WHY → Valuable documentation + - WHAT → Code already shows this, just reference it + +4. **Would a link to source code be better?** + - Often, yes! diff --git a/docs/REPLICA_CLIENT_GUIDE.md b/docs/REPLICA_CLIENT_GUIDE.md index 9bfd85267..bc14bb91d 100644 --- a/docs/REPLICA_CLIENT_GUIDE.md +++ b/docs/REPLICA_CLIENT_GUIDE.md @@ -3,6 +3,7 @@ This guide provides comprehensive instructions for implementing new storage backends for Litestream replication. ## Table of Contents + - [Interface Contract](#interface-contract) - [Implementation Checklist](#implementation-checklist) - [Eventual Consistency Handling](#eventual-consistency-handling) @@ -22,7 +23,8 @@ type ReplicaClient interface { // Returns iterator of LTX files at given level // seek: Start from this TXID (0 = beginning) - LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) + // useMetadata: When true, fetch accurate timestamps from backend metadata (required for PIT restore) + LTXFiles(ctx context.Context, level int, seek ltx.TXID, useMetadata bool) (ltx.FileIterator, error) // Opens an LTX file for reading // Returns os.ErrNotExist if file doesn't exist @@ -43,6 +45,7 @@ type ReplicaClient interface { ## Implementation Checklist ### Required Features + - [ ] Implement all interface methods - [ ] Support partial reads (offset/size in OpenLTXFile) - [ ] Return proper error types (especially os.ErrNotExist) @@ -52,6 +55,7 @@ type ReplicaClient interface { - [ ] Implement proper cleanup in DeleteAll ### Optional Features + - [ ] Connection pooling - [ ] Retry logic with exponential backoff - [ ] Request batching @@ -127,9 +131,9 @@ func (c *ReplicaClient) verifyUpload(ctx context.Context, path string, expectedS #### 2. List-After-Write Consistency ```go -func (c *ReplicaClient) LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) { +func (c *ReplicaClient) LTXFiles(ctx context.Context, level int, seek ltx.TXID, useMetadata bool) (ltx.FileIterator, error) { // List files from storage - files, err := c.listFiles(ctx, level) + files, err := c.listFiles(ctx, level, useMetadata) if err != nil { return nil, err } @@ -384,7 +388,7 @@ func TestReplicaClient_Integration(t *testing.T) { assert.NoError(t, err) // Verify cleanup - iter, err := client.LTXFiles(ctx, 0, 0) + iter, err := client.LTXFiles(ctx, 0, 0, false) require.NoError(t, err) defer iter.Close() @@ -558,7 +562,7 @@ func (c *Client) WriteLTXFile(ctx context.Context, ...) (*ltx.FileInfo, error) { ```go // WRONG - Loads all files at once -func (c *Client) LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) { +func (c *Client) LTXFiles(ctx context.Context, level int, seek ltx.TXID, useMetadata bool) (ltx.FileIterator, error) { allFiles, err := c.loadAllFiles(level) // Could be millions! if err != nil { return nil, err @@ -570,12 +574,13 @@ func (c *Client) LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.Fi ```go // CORRECT - Lazy loading with pagination -func (c *Client) LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) { +func (c *Client) LTXFiles(ctx context.Context, level int, seek ltx.TXID, useMetadata bool) (ltx.FileIterator, error) { return &lazyIterator{ - client: c, - level: level, - seek: seek, - pageSize: 1000, + client: c, + level: level, + seek: seek, + useMetadata: useMetadata, + pageSize: 1000, }, nil } @@ -709,15 +714,17 @@ type ReplicaClient struct { cache *FileInfoCache } -func (c *ReplicaClient) LTXFiles(ctx context.Context, level int, seek ltx.TXID) (ltx.FileIterator, error) { - // Check cache first +func (c *ReplicaClient) LTXFiles(ctx context.Context, level int, seek ltx.TXID, useMetadata bool) (ltx.FileIterator, error) { + // Check cache first (only cache when useMetadata=false for fast queries) cacheKey := fmt.Sprintf("%d-%d", level, seek) - if cached, ok := c.cache.Get(cacheKey); ok { - return ltx.NewFileInfoSliceIterator(cached), nil + if !useMetadata { + if cached, ok := c.cache.Get(cacheKey); ok { + return ltx.NewFileInfoSliceIterator(cached), nil + } } // Load from storage - files, err := c.loadFiles(ctx, level, seek) + files, err := c.loadFiles(ctx, level, seek, useMetadata) if err != nil { return nil, err } diff --git a/docs/TESTING_GUIDE.md b/docs/TESTING_GUIDE.md index 84cfcb10c..ac02b16fa 100644 --- a/docs/TESTING_GUIDE.md +++ b/docs/TESTING_GUIDE.md @@ -3,6 +3,7 @@ Comprehensive guide for testing Litestream components and handling edge cases. ## Table of Contents + - [Testing Philosophy](#testing-philosophy) - [1GB Database Testing](#1gb-database-testing) - [Race Condition Testing](#race-condition-testing) @@ -175,7 +176,7 @@ func TestDB_LockPageHandling(t *testing.T) { func verifyLockPageSkipped(t *testing.T, replica *litestream.Replica, lockPgno uint32) { // Get LTX files - files, err := replica.Client.LTXFiles(context.Background(), 0, 0) + files, err := replica.Client.LTXFiles(context.Background(), 0, 0, false) require.NoError(t, err) // Check each file