Skip to content

SIMD Compression Quick Start Guide

Quick reference for using SIMD-optimized compression in HeliosDB-Lite.

Basic Usage

1. Standard FSST Compression

use heliosdb_lite::storage::compression::fsst::FsstEncoder;

// Train encoder on sample data
let samples = vec!["email@example.com", "user@example.org"];
let encoder = FsstEncoder::train(&samples)?;

// Compress batch (uses CHUNK_SIZE=128 automatically)
let strings = vec![/* your data */];
let compressed = encoder.compress_batch_preallocated(&strings)?;

2. SIMD-Optimized Compression

use heliosdb_lite::storage::compression::fsst::FsstEncoder;
use heliosdb_lite::storage::compression::simd_ops::SimdBatchProcessor;

// Train encoder
let samples = vec!["email@example.com", "user@example.org"];
let encoder = FsstEncoder::train(&samples)?;

// Create SIMD processor (auto-detects CPU features)
let mut processor = SimdBatchProcessor::new();

// Compress with SIMD optimization
let strings = vec![/* your data */];
let compressed = encoder.compress_batch_simd(&strings, &mut processor)?;

// Get statistics
let (compressed, stats) = encoder.compress_with_stats_simd(&strings, &mut processor)?;
println!("Compression ratio: {:.2}x", stats.compression_ratio);

Performance Tips

Choose the Right Method

Method Best For Throughput
compress_batch() Small batches (<100 strings) Baseline
compress_batch_preallocated() Medium batches (100-1000) +10%
compress_batch_simd() Large batches (>1000) +25%

Batch Size Guidelines

  • < 100 strings: Use compress_batch()
  • 100-1000 strings: Use compress_batch_preallocated()
  • > 1000 strings: Use compress_batch_simd() with processor

Buffer Pool Configuration

// Default configuration (recommended)
let processor = SimdBatchProcessor::new();

// Custom buffer pool
let processor = SimdBatchProcessor::with_buffer_pool(
    8192,  // buffer_size: 8KB per buffer
    32,    // initial_count: pre-allocate 32 buffers
    128,   // max_pooled: keep up to 128 buffers
);

CPU Feature Detection

use heliosdb_lite::vector::simd::cpu_features;

let features = cpu_features();
println!("Available SIMD: {}", features.description());

if features.avx2 {
    println!("Using AVX2 optimizations");
} else if features.sse42 {
    println!("Using SSE4.2 optimizations");
} else {
    println!("Using scalar fallback");
}

Benchmarking

# Run all compression benchmarks
cargo bench --bench compression_simd_bench

# Compare methods
cargo bench --bench compression_simd_bench fsst_batch_standard
cargo bench --bench compression_simd_bench fsst_batch_simd

# Test with different data types
cargo bench --bench compression_simd_bench fsst_simd_datasets

Common Patterns

Pattern 1: High-Throughput Batch Compression

let mut processor = SimdBatchProcessor::new();
let encoder = FsstEncoder::train(&training_samples)?;

for batch in data.chunks(10000) {
    let compressed = encoder.compress_batch_simd(batch, &mut processor)?;
    // Process compressed data
}

Pattern 2: Adaptive Compression with Statistics

let mut processor = SimdBatchProcessor::new();
let encoder = FsstEncoder::train(&training_samples)?;

let (compressed, stats) = encoder.compress_with_stats_simd(&data, &mut processor)?;

if stats.compression_ratio > 1.5 {
    // Good compression, store compressed
    store_compressed(compressed);
} else {
    // Poor compression, store uncompressed
    store_uncompressed(data);
}

Pattern 3: Buffer Pool Reuse

let mut processor = SimdBatchProcessor::new();

loop {
    let buffer = processor.acquire_buffer();

    // Use buffer for compression
    // ...

    processor.release_buffer(buffer);
}

Performance Metrics

Expected Throughput (AMD EPYC 7401P)

Data Type Standard SIMD-Optimized Improvement
Emails 500 MB/s 625 MB/s +25%
URLs 480 MB/s 600 MB/s +25%
Logs 550 MB/s 687 MB/s +25%

Memory Overhead

Batch Size Standard SIMD-Optimized Reduction
100 strings 2.4 KB 1.2 KB 50%
1000 strings 24 KB 12 KB 50%
10000 strings 240 KB 120 KB 50%

Troubleshooting

Issue: No SIMD Acceleration

Check CPU features:

let features = cpu_features();
assert!(features.avx2 || features.sse42, "No SIMD support detected");

Solution: Ensure running on x86_64 architecture with AVX2 or SSE4.2 support.

Issue: Poor Performance on Small Batches

Problem: SIMD overhead dominates for small batches

Solution: Use standard methods for batches < 100 strings:

if strings.len() < 100 {
    encoder.compress_batch(&strings)
} else {
    encoder.compress_batch_simd(&strings, &mut processor)
}

Issue: High Memory Usage

Problem: Buffer pool growing too large

Solution: Configure smaller pool:

let processor = SimdBatchProcessor::with_buffer_pool(4096, 8, 32);

Best Practices

  1. Reuse Processor: Create once, use many times
  2. Batch Appropriately: Group data into 1000-10000 string batches
  3. Monitor Statistics: Use compress_with_stats_simd() for adaptive compression
  4. Profile First: Benchmark your specific workload
  5. Buffer Pool Tuning: Adjust based on memory constraints

See Also