Performance Tuning Guide¶

This guide provides comprehensive performance optimization techniques for Readur deployments, covering database optimization, OCR processing, resource tuning, and monitoring strategies.

Database Optimization¶

PostgreSQL Configuration¶

Optimize PostgreSQL for document management workloads:

# postgresql.conf optimizations

# Memory settings
shared_buffers = 25% of RAM  # e.g., 4GB for 16GB system
effective_cache_size = 75% of RAM  # e.g., 12GB for 16GB system
work_mem = 32MB  # Per-operation memory
maintenance_work_mem = 512MB  # For VACUUM, indexes

# Connection pooling
max_connections = 200
max_prepared_transactions = 100

# Write performance
checkpoint_completion_target = 0.9
wal_buffers = 16MB
default_statistics_target = 100
random_page_cost = 1.1  # For SSD storage

# Query optimization
enable_partitionwise_join = on
enable_partitionwise_aggregate = on
jit = on  # Just-in-time compilation for complex queries

Index Optimization¶

Critical indexes for performance:

-- Document search performance
CREATE INDEX CONCURRENTLY idx_documents_content_gin 
ON documents USING gin(to_tsvector('english', content));

CREATE INDEX CONCURRENTLY idx_documents_user_created 
ON documents(user_id, created_at DESC);

CREATE INDEX CONCURRENTLY idx_documents_status_updated 
ON documents(status, updated_at DESC) 
WHERE status IN ('pending', 'processing');

-- OCR queue performance
CREATE INDEX CONCURRENTLY idx_ocr_queue_priority 
ON ocr_queue(priority DESC, created_at ASC) 
WHERE status = 'pending';

CREATE INDEX CONCURRENTLY idx_ocr_queue_retry 
ON ocr_queue(retry_count, next_retry_at) 
WHERE status = 'failed' AND retry_count < max_retries;

-- Search optimization
CREATE INDEX CONCURRENTLY idx_documents_metadata 
ON documents USING gin(metadata jsonb_path_ops);

-- File hash for duplicate detection
CREATE INDEX CONCURRENTLY idx_documents_file_hash 
ON documents(file_hash) 
WHERE file_hash IS NOT NULL;

Query Optimization¶

Optimize common queries:

-- Efficient document search with pagination
CREATE OR REPLACE FUNCTION search_documents_optimized(
    search_query TEXT,
    user_id_param UUID,
    limit_param INT DEFAULT 20,
    offset_param INT DEFAULT 0
) RETURNS TABLE (
    id UUID,
    title TEXT,
    content TEXT,
    rank REAL
) AS $$
BEGIN
    RETURN QUERY
    WITH ranked_docs AS (
        SELECT 
            d.id,
            d.title,
            d.content,
            ts_rank_cd(
                to_tsvector('english', d.content),
                plainto_tsquery('english', search_query)
            ) AS rank
        FROM documents d
        WHERE 
            d.user_id = user_id_param
            AND to_tsvector('english', d.content) @@ 
                plainto_tsquery('english', search_query)
    )
    SELECT * FROM ranked_docs
    ORDER BY rank DESC
    LIMIT limit_param
    OFFSET offset_param;
END;
$$ LANGUAGE plpgsql;

-- Efficient OCR queue fetch
CREATE OR REPLACE FUNCTION get_next_ocr_job() 
RETURNS ocr_queue AS $$
DECLARE
    job ocr_queue%ROWTYPE;
BEGIN
    SELECT * INTO job
    FROM ocr_queue
    WHERE status = 'pending'
    ORDER BY priority DESC, created_at ASC
    FOR UPDATE SKIP LOCKED
    LIMIT 1;

    IF FOUND THEN
        UPDATE ocr_queue 
        SET status = 'processing', 
            started_at = NOW()
        WHERE id = job.id;
    END IF;

    RETURN job;
END;
$$ LANGUAGE plpgsql;

Database Maintenance¶

Regular maintenance schedule:

#!/bin/bash
# maintenance.sh - Run as a daily cron job

# Vacuum and analyze tables
psql -U readur -d readur_db <<EOF
VACUUM ANALYZE documents;
VACUUM ANALYZE ocr_queue;
VACUUM ANALYZE users;
REINDEX INDEX CONCURRENTLY idx_documents_content_gin;
EOF

# Update table statistics
psql -U readur -d readur_db <<EOF
ANALYZE documents;
ANALYZE ocr_queue;
EOF

# Clean up old data
psql -U readur -d readur_db <<EOF
DELETE FROM ocr_queue 
WHERE status = 'completed' 
  AND completed_at < NOW() - INTERVAL '30 days';

DELETE FROM notifications 
WHERE read = true 
  AND created_at < NOW() - INTERVAL '7 days';
EOF

OCR Processing Optimization¶

Tesseract Configuration¶

Optimize Tesseract settings for speed vs accuracy:

# Fast processing (lower accuracy)
OCR_ENGINE_MODE: 2  # Legacy + LSTM engines
OCR_PSM: 3  # Fully automatic page segmentation
OCR_TESSDATA_PREFIX: "/usr/share/tesseract-ocr/4.00/tessdata/fast"

# Balanced (recommended)
OCR_ENGINE_MODE: 1  # LSTM engine only
OCR_PSM: 3
OCR_DPI: 300
OCR_TESSDATA_PREFIX: "/usr/share/tesseract-ocr/4.00/tessdata"

# High accuracy (slower)
OCR_ENGINE_MODE: 1
OCR_PSM: 11  # Sparse text
OCR_DPI: 600
OCR_TESSDATA_PREFIX: "/usr/share/tesseract-ocr/4.00/tessdata/best"

Image Preprocessing¶

Optimize images before OCR:

use image::{DynamicImage, ImageBuffer};

fn preprocess_for_ocr(img: DynamicImage) -> DynamicImage {
    let mut processed = img
        .grayscale()  // Convert to grayscale
        .adjust_contrast(20.0)  // Increase contrast
        .brighten(10);  // Adjust brightness

    // Resize if too large (maintain aspect ratio)
    if processed.width() > 3000 {
        processed = processed.resize(
            3000,
            3000 * processed.height() / processed.width(),
            image::imageops::FilterType::Lanczos3
        );
    }

    // Apply denoising
    processed = denoise(processed, 2);

    // Deskew if needed
    if let Some(angle) = detect_skew(&processed) {
        if angle.abs() > 0.5 {
            processed = rotate(&processed, -angle);
        }
    }

    processed
}

Parallel Processing¶

Configure concurrent OCR workers:

# OCR worker configuration
OCR_WORKER_COUNT: 4  # Number of parallel workers
OCR_QUEUE_SIZE: 100  # Maximum queue size
OCR_BATCH_SIZE: 10  # Documents per batch
OCR_TIMEOUT_SECONDS: 300  # Per-document timeout

Implement parallel processing:

use tokio::sync::Semaphore;
use std::sync::Arc;

async fn process_ocr_queue(pool: &PgPool, workers: usize) {
    let semaphore = Arc::new(Semaphore::new(workers));
    let mut tasks = Vec::new();

    loop {
        let permit = semaphore.clone().acquire_owned().await.unwrap();
        let pool_clone = pool.clone();

        let task = tokio::spawn(async move {
            if let Some(job) = fetch_next_ocr_job(&pool_clone).await {
                let _result = process_ocr_job(job, &pool_clone).await;
            }
            drop(permit);
        });

        tasks.push(task);

        // Clean up completed tasks
        tasks.retain(|task| !task.is_finished());

        tokio::time::sleep(Duration::from_millis(100)).await;
    }
}

Memory and CPU Tuning¶

Memory Configuration¶

Optimize memory allocation:

# Application memory settings
RUST_MIN_STACK: 8388608  # 8MB stack size
RUST_BACKTRACE: 0  # Disable in production for performance

# Docker memory limits
docker run -d \
  --memory="4g" \
  --memory-swap="6g" \
  --memory-reservation="2g" \
  --cpus="2.0" \
  readur:latest

CPU Optimization¶

Configure CPU affinity and priorities:

# Set CPU affinity for OCR workers
taskset -c 0-3 ./ocr_worker  # Use cores 0-3

# Adjust process priority
nice -n -5 ./readur_server  # Higher priority

# Configure thread pool
export TOKIO_WORKER_THREADS=8
export RAYON_NUM_THREADS=4

Memory Pool Configuration¶

// Implement object pooling for frequent allocations
use object_pool::{Pool, Reusable};

lazy_static! {
    static ref BUFFER_POOL: Pool<Vec<u8>> = Pool::new(32, || Vec::with_capacity(1024 * 1024));
}

async fn process_document(data: &[u8]) -> Result<()> {
    let mut buffer = BUFFER_POOL.pull();
    buffer.clear();
    buffer.extend_from_slice(data);

    // Process using pooled buffer
    let result = process(&buffer).await?;

    // Buffer automatically returned to pool when dropped
    Ok(result)
}

Connection Pooling¶

Database Connection Pool¶

Configure optimal pool settings:

use sqlx::postgres::PgPoolOptions;

let pool = PgPoolOptions::new()
    .max_connections(32)  // Maximum connections
    .min_connections(5)   // Minimum idle connections
    .connect_timeout(Duration::from_secs(5))
    .acquire_timeout(Duration::from_secs(10))
    .idle_timeout(Duration::from_secs(600))
    .max_lifetime(Duration::from_secs(1800))
    .connect(&database_url)
    .await?;

Redis Connection Pool¶

If using Redis for caching:

use deadpool_redis::{Config, Runtime};

let cfg = Config {
    url: Some("redis://localhost:6379".to_string()),
    pool: Some(deadpool::managed::PoolConfig {
        max_size: 16,
        timeouts: deadpool::managed::Timeouts {
            wait: Some(Duration::from_secs(5)),
            create: Some(Duration::from_secs(5)),
            recycle: Some(Duration::from_secs(5)),
        },
        ..Default::default()
    }),
    ..Default::default()
};

let pool = cfg.create_pool(Some(Runtime::Tokio1))?;

Caching Strategies¶

Application-Level Caching¶

Implement multi-level caching:

use moka::future::Cache;
use std::time::Duration;

// L1 Cache: In-memory for hot data
lazy_static! {
    static ref L1_CACHE: Cache<String, Document> = Cache::builder()
        .max_capacity(1000)
        .time_to_live(Duration::from_secs(300))
        .build();
}

// L2 Cache: Redis for distributed caching
async fn get_document_cached(id: &str) -> Result<Document> {
    // Check L1 cache
    if let Some(doc) = L1_CACHE.get(id).await {
        return Ok(doc);
    }

    // Check L2 cache (Redis)
    if let Some(doc) = redis_get(id).await? {
        L1_CACHE.insert(id.to_string(), doc.clone()).await;
        return Ok(doc);
    }

    // Fetch from database
    let doc = fetch_from_db(id).await?;

    // Update caches
    L1_CACHE.insert(id.to_string(), doc.clone()).await;
    redis_set(id, &doc, 3600).await?;

    Ok(doc)
}

Query Result Caching¶

Cache expensive query results:

-- Materialized view for search statistics
CREATE MATERIALIZED VIEW search_stats AS
SELECT 
    user_id,
    COUNT(*) as total_documents,
    SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed,
    SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed,
    AVG(processing_time_ms) as avg_processing_time
FROM documents
GROUP BY user_id;

-- Refresh periodically
CREATE INDEX ON search_stats(user_id);
REFRESH MATERIALIZED VIEW CONCURRENTLY search_stats;

Static Asset Caching¶

Configure CDN and browser caching:

location /static/ {
    expires 1y;
    add_header Cache-Control "public, immutable";
    add_header Vary "Accept-Encoding";

    # Enable gzip
    gzip on;
    gzip_types text/css application/javascript image/svg+xml;
    gzip_vary on;
}

location /api/ {
    add_header Cache-Control "no-cache, no-store, must-revalidate";
    add_header Pragma "no-cache";
    add_header Expires "0";
}

Performance Monitoring¶

Key Metrics¶

Monitor these critical metrics:

# Prometheus metrics configuration
metrics:
  - name: http_request_duration_seconds
    type: histogram
    buckets: [0.01, 0.05, 0.1, 0.5, 1, 5]

  - name: ocr_processing_duration_seconds
    type: histogram
    buckets: [1, 5, 10, 30, 60, 120]

  - name: database_query_duration_seconds
    type: histogram
    buckets: [0.001, 0.005, 0.01, 0.05, 0.1]

  - name: active_connections
    type: gauge

  - name: memory_usage_bytes
    type: gauge

  - name: cpu_usage_percent
    type: gauge

Performance Dashboards¶

Grafana dashboard queries:

# Request latency P95
histogram_quantile(0.95, 
  rate(http_request_duration_seconds_bucket[5m]))

# OCR throughput
rate(ocr_documents_processed_total[5m])

# Database connection pool usage
database_connections_active / database_connections_max * 100

# Memory usage trend
rate(memory_usage_bytes[5m])

Load Testing¶

Load Test Configuration¶

Use k6 for load testing:

// load-test.js
import http from 'k6/http';
import { check, sleep } from 'k6';

export let options = {
    stages: [
        { duration: '2m', target: 100 }, // Ramp up
        { duration: '5m', target: 100 }, // Stay at 100 users
        { duration: '2m', target: 200 }, // Ramp up
        { duration: '5m', target: 200 }, // Stay at 200 users
        { duration: '2m', target: 0 },   // Ramp down
    ],
    thresholds: {
        http_req_duration: ['p(95)<500'], // 95% of requests under 500ms
        http_req_failed: ['rate<0.1'],    // Error rate under 10%
    },
};

export default function() {
    // Search test
    let searchRes = http.get('http://localhost:8080/api/search?q=test');
    check(searchRes, {
        'search status is 200': (r) => r.status === 200,
        'search response time < 500ms': (r) => r.timings.duration < 500,
    });

    sleep(1);

    // Upload test
    let uploadRes = http.post('http://localhost:8080/api/upload', {
        file: open('./test.pdf', 'b'),
    });
    check(uploadRes, {
        'upload status is 201': (r) => r.status === 201,
    });

    sleep(2);
}

Benchmarking OCR Performance¶

#!/bin/bash
# benchmark-ocr.sh

echo "OCR Performance Benchmark"
echo "========================="

# Test different configurations
for config in "fast" "balanced" "accurate"; do
    echo "Testing $config configuration..."

    export OCR_CONFIG=$config
    time ./ocr_benchmark --input ./test_docs/ --output ./results_$config/

    echo "Results for $config:"
    echo "  Documents processed: $(ls ./results_$config/ | wc -l)"
    echo "  Average accuracy: $(cat ./results_$config/accuracy.txt)"
    echo ""
done

Optimization Checklist¶

Database Optimization¶

Indexes are properly configured
Query plans are optimized
Connection pooling is tuned
Vacuum and analyze run regularly
Slow query log is monitored
Table partitioning for large tables

Application Optimization¶

Memory pools are configured
Thread pools are sized correctly
Caching is implemented
Batch processing is used where applicable
Async I/O is utilized
Resource leaks are monitored

OCR Optimization¶

Image preprocessing is enabled
Parallel processing is configured
Appropriate accuracy settings
Queue management is optimized
Retry logic is efficient
Resource limits are set

Infrastructure Optimization¶

Troubleshooting Performance Issues¶

High Memory Usage¶

# Check memory usage by process
ps aux --sort=-%mem | head -10

# Analyze memory allocations
valgrind --leak-check=full --show-leak-kinds=all ./readur

# Profile memory usage
heaptrack ./readur
heaptrack_gui heaptrack.readur.*.gz

Slow Queries¶

-- Enable slow query logging
ALTER SYSTEM SET log_min_duration_statement = 1000; -- Log queries over 1 second
SELECT pg_reload_conf();

-- Find slow queries
SELECT 
    query,
    calls,
    mean_exec_time,
    total_exec_time
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 10;

-- Analyze query plan
EXPLAIN (ANALYZE, BUFFERS, VERBOSE) 
SELECT * FROM documents WHERE content ILIKE '%search%';

CPU Bottlenecks¶

# Profile CPU usage
perf record -g ./readur
perf report

# Generate flame graph
cargo install flamegraph
cargo flamegraph --bin readur

# Check CPU-bound processes
top -H -p $(pgrep readur)

Best Practices Summary¶

Monitor First: Always measure before optimizing
Cache Aggressively: Cache at multiple levels
Batch Operations: Process in batches when possible
Async Everything: Use async I/O for all operations
Index Strategically: Create indexes based on query patterns
Pool Resources: Use connection and object pools
Profile Regularly: Profile in production-like environments
Test Under Load: Regular load testing reveals bottlenecks
Document Changes: Track all performance optimizations
Incremental Improvements: Optimize iteratively, not all at once