PHP code example of yetidevworks / yetisearch

1. Go to this page and download the library: Download yetidevworks/yetisearch library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

yetidevworks / yetisearch example snippets



use YetiSearch\YetiSearch;

// Initialize YetiSearch with configuration
$config = [
    'storage' => [
        'path' => '/path/to/your/search.db'
    ]
];
$search = new YetiSearch($config);

// Create an index
$indexer = $search->createIndex('pages');

// Index a document
$indexer->insert([
    'id' => 'doc1',
    'content' => [
        'title' => 'Introduction to YetiSearch',
        'body' => 'YetiSearch is a powerful search engine library for PHP applications...',
        'url' => 'https://example.com/intro',
        'tags' => 'search php library'
    ]
]);

// Search for documents
$results = $search->search('pages', 'powerful search');

// Search with fuzzy matching enabled (uses trigram algorithm by default)
$fuzzyResults = $search->search('pages', 'powerfull serch', ['fuzzy' => true]);

// Display results
foreach ($results['results'] as $result) {
    echo $result['title'] . ' (Score: ' . $result['score'] . ")\n";
    echo $result['excerpt'] . "\n\n";
}

use YetiSearch\YetiSearch;

$search = new YetiSearch([
    'storage' => ['path' => './search.db']
]);

$indexer = $search->createIndex('articles');

// Index a single document
$document = [
    'id' => 'article-1',
    'content' => [
        'title' => 'Getting Started with PHP',
        'body' => 'PHP is a popular general-purpose scripting language...',
        'author' => 'John Doe',
        'category' => 'Programming',
        'tags' => 'php programming tutorial'
    ],
    'metadata' => [
        'date' => time()
    ]
];

$indexer->insert($document);

// Index multiple documents
$documents = [
    [
        'id' => 'article-2',
        'content' => [
            'title' => 'Advanced PHP Techniques',
            'body' => 'Let\'s explore advanced PHP programming techniques...',
            'author' => 'Jane Smith',
            'category' => 'Programming',
            'tags' => 'php advanced tips'
        ]
    ],
    [
        'id' => 'article-3',
        'content' => [
            'title' => 'PHP Performance Optimization',
            'body' => 'Optimizing PHP applications for better performance...',
            'author' => 'Bob Johnson',
            'category' => 'Performance',
            'tags' => 'php performance optimization'
        ]
    ]
];

$indexer->insert($documents);

// Flush to ensure all documents are written
$indexer->flush();

// Configure indexer with custom settings
$indexer = $search->createIndex('products', [
    'fields' => [
        'name' => ['boost' => 3.0, 'store' => true],
        'description' => ['boost' => 1.0, 'store' => true],
        'brand' => ['boost' => 2.0, 'store' => true],
        'sku' => ['boost' => 1.0, 'store' => true, 'index' => false],
        'price' => ['boost' => 1.0, 'store' => true, 'index' => false]
    ],
    'chunk_size' => 500,        // Smaller chunks for product descriptions
    'chunk_overlap' => 50,      // Overlap between chunks
    'batch_size' => 100         // Process 100 documents at a time
]);

// Index products with metadata
$product = [
    'id' => 'prod-123',
    'content' => [
        'name' => 'Professional PHP Development Book',
        'description' => 'A comprehensive guide to professional PHP development...',
        'brand' => 'TechBooks Publishing',
        'sku' => 'TB-PHP-001',
        'price' => 49.99
    ],
    'metadata' => [
        'in_stock' => true,
        'rating' => 4.5,
        'reviews' => 127
    ]
];

$indexer->insert($product);

// Basic search
$results = $search->search('articles', 'PHP programming');

// Advanced search with options
$results = $search->search('articles', 'advanced techniques', [
    'limit' => 20,
    'offset' => 0,
    'fields' => ['title', 'content', 'tags'],  // Search only in specific fields
    'highlight' => true,                       // Enable highlighting
    'fuzzy' => true,                          // Enable fuzzy matching
    'unique_by_route' => true,                // Deduplicate results (default)
    'filters' => [
        [
            'field' => 'category',
            'value' => 'Programming',
            'operator' => '='
        ],
        [
            'field' => 'date',
            'value' => strtotime('-30 days'),
            'operator' => '>='
        ]
    ],
    'boost' => [
        'title' => 3.0,
        'tags' => 2.0,
        'content' => 1.0
    ]
]);

// Process results
echo "Found {$results['total']} results in {$results['search_time']} seconds\n\n";

foreach ($results['results'] as $result) {
    echo "Title: " . $result['title'] . "\n";
    echo "Score: " . $result['score'] . "\n";
    echo "URL: " . $result['url'] . "\n";
    echo "Excerpt: " . $result['excerpt'] . "\n";
    echo "---\n";
}

// Available filter operators
$results = $search->search('products', 'laptop', [
    'filters' => [
        ['field' => 'category', 'value' => 'Electronics', 'operator' => '='],      // Exact match
        ['field' => 'price', 'value' => 500, 'operator' => '<'],                   // Less than
        ['field' => 'price', 'value' => 100, 'operator' => '>'],                   // Greater than
        ['field' => 'rating', 'value' => 4, 'operator' => '>='],                   // Greater or equal
        ['field' => 'stock', 'value' => 10, 'operator' => '<='],                   // Less or equal
        ['field' => 'brand', 'value' => 'Apple', 'operator' => '!='],              // Not equal
        ['field' => 'tags', 'value' => ['laptop', 'gaming'], 'operator' => 'in'],  // In array
        ['field' => 'title', 'value' => 'Pro', 'operator' => 'contains'],          // Contains text
        ['field' => 'metadata.warranty', 'operator' => 'exists'],                  // Field exists
    ]
]);

// Get all chunks (no deduplication)
$allChunks = $search->search('articles', 'PHP programming', [
    'unique_by_route' => false  // Show all matching chunks
]);

// Search with pagination
$page = 2;
$perPage = 10;
$results = $search->search('articles', 'PHP', [
    'limit' => $perPage,
    'offset' => ($page - 1) * $perPage
]);

// Faceted search
$results = $search->search('products', 'book', [
    'facets' => [
        'category' => ['limit' => 10],
        'brand' => ['limit' => 5],
        'price_range' => [
            'type' => 'range',
            'ranges' => [
                ['to' => 20],
                ['from' => 20, 'to' => 50],
                ['from' => 50]
            ]
        ]
    ]
]);

// Access facets
foreach ($results['facets']['category'] as $facet) {
    echo "{$facet['value']}: {$facet['count']} items\n";
}

// Search specific indexes
$results = $search->searchMultiple(['products', 'articles'], 'PHP book', [
    'limit' => 20
]);

// Search all indexes matching a pattern
$results = $search->searchMultiple(['content_*'], 'search term', [
    'limit' => 20
]);

// Results 

// Update a document
$indexer->update([
    'id' => 'article-1',
    'content' => [
        'title' => 'Getting Started with PHP 8',  // Updated title
        'body' => 'PHP 8 introduces many new features...',
        'author' => 'John Doe',
        'category' => 'Programming',
        'tags' => 'php php8 programming tutorial'
    ]
]);

// Delete a document
$indexer->delete('article-1');

// Clear entire index
$indexer->clear();

// Get index statistics
$stats = $indexer->getStats();
echo "Total documents: " . $stats['total_documents'] . "\n";
echo "Total size: " . $stats['total_size'] . " bytes\n";
echo "Average document size: " . $stats['avg_document_size'] . " bytes\n";

// Optimize index for better performance
$indexer->optimize();

$config = [
    'storage' => [
        'path' => '/path/to/search.db',
        'timeout' => 5000,              // Connection timeout in ms
        'busy_timeout' => 10000,        // Busy timeout in ms
        'journal_mode' => 'WAL',        // Write-Ahead Logging for better concurrency
        'synchronous' => 'NORMAL',      // Sync mode
        'cache_size' => -2000,          // Cache size in KB (negative = KB)
        'temp_store' => 'MEMORY'        // Use memory for temp tables
    ],
    'analyzer' => [
        'min_word_length' => 2,         // Minimum word length to index
        'max_word_length' => 50,        // Maximum word length to index
        'remove_numbers' => false,      // Keep numbers in index
        'lowercase' => true,            // Convert to lowercase
        'strip_html' => true,           // Remove HTML tags
        'strip_punctuation' => true,    // Remove punctuation
        'expand_contractions' => true,  // Expand contractions (e.g., don't -> do not)
        'custom_stop_words' => ['example', 'custom'], // Additional stop words to exclude
        'disable_stop_words' => false   // Set to true to disable all stop word filtering
    ],
    'indexer' => [
        'batch_size' => 100,            // Documents per batch
        'auto_flush' => true,           // Auto-flush after batch_size
        'chunk_size' => 1000,           // Characters per chunk
        'chunk_overlap' => 100,         // Overlap between chunks
        'fields' => [                   // Field configuration
            'title' => ['boost' => 3.0, 'store' => true],
            'content' => ['boost' => 1.0, 'store' => true],
            'excerpt' => ['boost' => 2.0, 'store' => true],
            'tags' => ['boost' => 2.5, 'store' => true],
            'category' => ['boost' => 2.0, 'store' => true],
            'author' => ['boost' => 1.5, 'store' => true],
            'url' => ['boost' => 1.0, 'store' => true, 'index' => false],
            'route' => ['boost' => 1.0, 'store' => true, 'index' => false]
        ]
    ],
    'search' => [
        'min_score' => 0.0,             // Minimum score threshold
        'highlight_tag' => '<mark>',    // Opening highlight tag
        'highlight_tag_close' => '</mark>', // Closing highlight tag
        'snippet_length' => 150,        // Length of snippets
        'max_results' => 1000,          // Maximum results to return
        'enable_fuzzy' => true,         // Enable fuzzy search
        'fuzzy_algorithm' => 'trigram', // 'trigram', 'jaro_winkler', or 'levenshtein'
        'levenshtein_threshold' => 2,   // Max edit distance for Levenshtein
        
        // NEW: Query result caching (v2.2.0+)
        'cache' => [
            'enabled' => false,         // Enable query result caching (default: false)
            'ttl' => 300,              // Cache time-to-live in seconds (5 minutes)
            'max_size' => 1000         // Maximum cached queries per index
        ]
        
        // NEW: Multi-column FTS configuration (v2.1.0+)
        'multi_column_fts' => true,     // Use separate FTS columns for native BM25 weighting (default: true)
        
        // NEW: Exact match boosting (v2.1.0+)
        'exact_match_boost' => 2.0,     // Multiplier for exact phrase matches
        'exact_terms_boost' => 1.5,     // Multiplier for all exact terms present
        'fuzzy_score_penalty' => 0.5,   // Penalty factor for fuzzy-only matches
        
        // NEW: Two-pass search configuration (v2.1.0+)
        'two_pass_search' => false,     // Enable two-pass search for better primary field results
        'primary_fields' => ['title', 'h1', 'name', 'label'], // Fields to search in first pass
        'primary_field_limit' => 100,   // Max results from first pass
        
        'min_term_frequency' => 2,      // Min term frequency for fuzzy matching
        'max_indexed_terms' => 10000,   // Max indexed terms to check
        'max_fuzzy_variations' => 8,    // Max fuzzy variations per term
        'indexed_terms_cache_ttl' => 300, // Cache TTL for indexed terms
        'enable_suggestions' => true,   // Enable search suggestions
        'cache_ttl' => 300,             // Cache TTL in seconds
        'result_fields' => [            // Fields to 

$indexer = $search->createIndex('books', [
    'chunk_size' => 1000,      // 1000 characters per chunk
    'chunk_overlap' => 100     // 100 character overlap
]);

// Index a large document - it will be automatically chunked
$indexer->insert([
    'id' => 'book-1',
    'title' => 'War and Peace',
    'content' => $veryLongBookContent,  // Will be split into chunks
    'author' => 'Leo Tolstoy'
]);

// Search returns the best matching chunk by default
$results = $search->search('books', 'Napoleon');

// Get all matching chunks
$allChunks = $search->search('books', 'Napoleon', [
    'unique_by_route' => false
]);

// Simple string chunks
$indexer->insert([
    'id' => 'doc-1',
    'content' => ['title' => 'My Document'],
    'chunks' => [
        'Chapter 1: Introduction paragraph...',
        'Chapter 2: Main content paragraph...',
        'Chapter 3: Conclusion paragraph...'
    ]
]);

// Structured chunks with metadata
$indexer->insert([
    'id' => 'doc-2',
    'content' => ['title' => 'Technical Guide'],
    'chunks' => [
        [
            'content' => '## Getting Started\nFirst steps...',
            'metadata' => ['section' => 'intro', 'heading_level' => 2]
        ],
        [
            'content' => '### Installation\nHow to install...',
            'metadata' => ['section' => 'setup', 'heading_level' => 3]
        ]
    ]
]);

$config = [
    'indexer' => [
        'fields' => [
            'title' => ['boost' => 3.0],      // High-priority field
            'name' => ['boost' => 3.0],       // Another high-priority field
            'description' => ['boost' => 1.0], // Standard content field
            'tags' => ['boost' => 2.0],       // Medium priority
        ]
    ]
];

// With this configuration:
$indexer = $search->createIndex('movies', [
    'fields' => [
        'title' => ['boost' => 3.0],    // High-priority field
        'overview' => ['boost' => 1.0]  // Standard field
    ]
]);

// Searching for "star wars" will rank results as:
// 1. "Star Wars" (exact title match - huge bonus)
// 2. "Star Wars: Episode IV" (contains phrase but longer)
// 3. Movies with "star wars" in overview (lower boost field)

// Index documents in different languages
$indexer->insert([
    'id' => 'doc-fr-1',
    'title' => 'Introduction à PHP',
    'content' => 'PHP est un langage de programmation...',
    'language' => 'french'
]);

$indexer->insert([
    'id' => 'doc-de-1',
    'title' => 'Einführung in PHP',
    'content' => 'PHP ist eine Programmiersprache...',
    'language' => 'german'
]);

// Search with language-specific stemming
$results = $search->search('pages', 'programmation', [
    'language' => 'french'
]);

// Configure custom stop words during initialization
$search = new YetiSearch([
    'analyzer' => [
        'custom_stop_words' => ['lorem', 'ipsum', 'dolor']
    ]
]);

// Or add them dynamically
$analyzer = $search->getAnalyzerInstance();
$analyzer->addCustomStopWord('example');
$analyzer->addCustomStopWord('test');

// Remove a custom stop word
$analyzer->removeCustomStopWord('test');

// Get all custom stop words
$customWords = $analyzer->getCustomStopWords();

// Disable all stop word filtering (not recommended)
$analyzer->setStopWordsDisabled(true);

use YetiSearch\Geo\GeoPoint;
use YetiSearch\Geo\GeoBounds;

// Index documents with location data
$indexer->insert([
    'id' => 'coffee-shop-1',
    'content' => [
        'title' => 'Blue Bottle Coffee',
        'body' => 'Specialty coffee roaster and cafe'
    ],
    'geo' => [
        'lat' => 37.7825,
        'lng' => -122.4099
    ]
]);

// Search within radius of a point
$searchQuery = new SearchQuery('coffee');
$searchQuery->near(new GeoPoint(37.7749, -122.4194), 5000); // 5km radius
$results = $searchEngine->search($searchQuery);

// Search within bounding box
$searchQuery = new SearchQuery('restaurant');
$searchQuery->withinBounds(37.8, 37.7, -122.3, -122.5);
// Or with a GeoBounds object:
$bounds = new GeoBounds(37.8, 37.7, -122.3, -122.5);
$searchQuery->within($bounds);

// Sort results by distance
$searchQuery = new SearchQuery('food');
$searchQuery->sortByDistance(new GeoPoint(37.7749, -122.4194), 'asc');

// Combine text search with geo filters
$searchQuery = new SearchQuery('italian restaurant')
    ->near(new GeoPoint(37.7749, -122.4194), 3000)
    ->filter('price_range', '$$')
    ->limit(10);

// Results 

use YetiSearch\Geo\GeoUtils;

// Distance calculations
$distance = GeoUtils::distance($point1, $point2); // meters
$distance = GeoUtils::distanceBetween($lat1, $lng1, $lat2, $lng2);

// Unit conversions
$meters = GeoUtils::kmToMeters(5);
$meters = GeoUtils::milesToMeters(3.1);

// Format distance for display
echo GeoUtils::formatDistance(1500); // "1.5 km"
echo GeoUtils::formatDistance(1500, 'imperial'); // "0.9 mi"

// Parse various coordinate formats
$point = GeoUtils::parsePoint(['lat' => 37.7749, 'lng' => -122.4194]);
$point = GeoUtils::parsePoint([37.7749, -122.4194]);
$point = GeoUtils::parsePoint('37.7749,-122.4194');

// Index areas/regions with bounding boxes
$indexer->insert([
    'id' => 'downtown-sf',
    'content' => [
        'title' => 'Downtown San Francisco',
        'body' => 'Financial district and shopping area'
    ],
    'geo_bounds' => [
        'north' => 37.8,
        'south' => 37.77,
        'east' => -122.39,
        'west' => -122.42
    ]
]);

// Default behavior - returns unique documents (best chunk per document)
$uniqueResults = $search->search('pages', 'PHP framework');
echo "Found {$uniqueResults['total']} unique documents\n";

// Get all chunks including duplicates
$allChunks = $search->search('pages', 'PHP framework', [
    'unique_by_route' => false
]);
echo "Found {$allChunks['total']} total matching chunks\n";

$results = $search->search('pages', 'PHP programming', [
    'highlight' => true,
    'highlight_length' => 200  // Snippet length
]);

foreach ($results['results'] as $result) {
    // Excerpt will contain <mark>PHP</mark> and <mark>programming</mark>
    echo $result['excerpt'] . "\n";
}

// Custom highlight tags
$search = new YetiSearch([
    'search' => [
        'highlight_tag' => '<span class="highlight">',
        'highlight_tag_close' => '</span>'
    ]
]);

// Find results even with typos
$results = $search->search('pages', 'porgramming', [  // Note the typo
    'fuzzy' => true,
    'fuzziness' => 0.8  // 0.0 to 1.0 (higher = stricter)
]);

// Will still find documents about "programming"

// Configure fuzzy search algorithms
$config = [
    'search' => [
        'enable_fuzzy' => true,
        'fuzzy_algorithm' => 'trigram',         // Options: 'trigram', 'jaro_winkler', 'levenshtein'
        'levenshtein_threshold' => 2,           // Max edit distance for Levenshtein (default: 2)
        'min_term_frequency' => 2,              // Min occurrences for a term to be indexed
        'max_indexed_terms' => 10000,           // Max terms to check for fuzzy matches
        'max_fuzzy_variations' => 8,            // Max variations per search term
        'fuzzy_score_penalty' => 0.4,           // Score reduction for fuzzy matches (0.0-1.0)
        'indexed_terms_cache_ttl' => 300        // Cache indexed terms for 5 minutes
    ]
];

$search = new YetiSearch($config);

// Search with advanced fuzzy matching
$results = $search->search('movies', 'Amakin Dkywalker', ['fuzzy' => true]);
// Will find "Anakin Skywalker" despite multiple typos

// Enable caching during initialization
$search = new YetiSearch([
    'search' => [
        'cache' => [
            'enabled' => true,      // Enable query caching
            'ttl' => 300,          // Cache for 5 minutes
            'max_size' => 1000     // Store up to 1000 queries per index
        ]
    ]
]);

// Searches are automatically cached
$results = $search->search('articles', 'PHP programming');  // First search: ~5ms
$results = $search->search('articles', 'PHP programming');  // Cached: <0.5ms

// Get cache statistics
$stats = $search->getCacheStats('articles');
echo "Cache hit rate: " . $stats['hit_rate'] . "%\n";
echo "Total cached queries: " . $stats['total_entries'] . "\n";

// Clear cache manually
$search->clearCache('articles');

// Warm up cache with common queries
$search->warmUpCache('articles', ['PHP', 'Laravel', 'Symfony']);

// Multi-column FTS is enabled by default for optimal performance
$config = [
    'search' => [
        'multi_column_fts' => true,      // Default: true - Use separate FTS columns
        'exact_match_boost' => 2.0,      // Boost for exact phrase matches
        'exact_terms_boost' => 1.5,      // Boost when all exact terms are present
        'field_weights' => [
            'title' => 10.0,              // Title matches score 10x higher
            'h1' => 8.0,                  // H1 headings score 8x higher
            'tags' => 5.0,                // Tags score 5x higher
            'content' => 1.0              // Base content weight
        ]
    ]
];

$search = new YetiSearch($config);

// Create an index with custom fields
$search->createIndex('articles', [
    'fields' => [
        'title' => ['boost' => 10.0],
        'h1' => ['boost' => 8.0],
        'tags' => ['boost' => 5.0],
        'content' => ['boost' => 1.0]
    ]
]);

$config = [
    'search' => [
        'two_pass_search' => true,        // Default: false (for performance)
        'primary_fields' => ['title', 'h1', 'name', 'label'],
        'primary_field_limit' => 100      // Documents to retrieve in first pass
    ]
];

// Two-pass search prioritizes primary fields
$results = $search->search('articles', 'scheduler');
// First pass: Searches title/h1 with doubled weights
// Second pass: Searches all fields and merges results
// Result: Page with title="Scheduler" ranks at the top

// Method 1: Recreate the index (recommended)
$search->dropIndex('articles');
$search->createIndex('articles', [
    'fields' => ['title', 'content', 'tags']  // Specify your fields
]);
// Re-index your documents

// Method 2: Use migration script
// From command line:
// php scripts/migrate_fts.php --index=articles --multi-column

// Old configuration (still works)
$config = [
    'search' => [
        'field_weights' => [
            'title' => 3.0,
            'content' => 1.0
        ]
    ]
];

// New optimized configuration
$config = [
    'search' => [
        'multi_column_fts' => true,      // Enable multi-column FTS (default)
        'exact_match_boost' => 2.0,      // Boost exact matches
        'exact_terms_boost' => 1.5,      // Boost when all terms match
        'field_weights' => [
            'title' => 10.0,              // Increase weights for better differentiation
            'content' => 1.0
        ]
    ]
];

// Index a documentation site with proper field weighting
$search = new YetiSearch([
    'search' => [
        'multi_column_fts' => true,      // Default - enables native field weighting
        'exact_match_boost' => 2.0,      // Exact "scheduler" gets 2x boost
        'field_weights' => [
            'title' => 10.0,              // Title matches are most important
            'h1' => 8.0,
            'h2' => 5.0,
            'content' => 1.0
        ]
    ]
]);

// Index documents with structured fields
$search->index('docs', [
    'id' => 'scheduler-page',
    'content' => [
        'title' => 'Scheduler',           // Exact match in high-weight field
        'h1' => 'Task Scheduler Guide',
        'content' => 'The scheduler allows you to run tasks...'
    ]
]);

$search->index('docs', [
    'id' => 'generic-page',
    'content' => [
        'title' => 'Configuration Guide',
        'content' => 'You can configure the scheduler here...' // Only mentions scheduler
    ]
]);

// Search for "scheduler"
$results = $search->search('docs', 'scheduler');

// Results ranking (v2.1.0):
// 1. scheduler-page (Score: ~150) - Exact title match + h1 match
// 2. generic-page (Score: ~20) - Only content mention

// Previous version results:
// 1. generic-page (Score: ~25) - Multiple mentions
// 2. scheduler-page (Score: ~22) - Title boost not effective enough

// For best performance (3-5ms searches)
$config = [
    'search' => [
        'fuzzy_algorithm' => 'trigram',      // Fast algorithm
        'min_term_frequency' => 5,           // Skip rare terms
        'max_indexed_terms' => 5000,         // Check fewer terms
        'indexed_terms_cache_ttl' => 600    // Cache for 10 minutes
    ]
];

// For best accuracy (handles more typos)
$config = [
    'search' => [
        'fuzzy_algorithm' => 'levenshtein',
        'levenshtein_threshold' => 2,        // Allow 2 edits
        'min_term_frequency' => 1,           // Include all terms
        'max_indexed_terms' => 20000,        // Check more terms
        'fuzzy_score_penalty' => 0.3        // Lower penalty for fuzzy matches
    ]
];

// Run benchmarks to compare algorithm performance
use YetiSearch\Tools\FuzzyBenchmark;

$benchmark = new FuzzyBenchmark($search);
$results = $benchmark->runAllBenchmarks();

// Results show accuracy and performance metrics for each algorithm
foreach ($results as $algorithm => $metrics) {
    echo "$algorithm: {$metrics['accuracy']}% accuracy, {$metrics['avg_time']}ms avg search time\n";
}

$results = $search->search('products', 'laptop', [
    'facets' => [
        'brand' => ['limit' => 10],
        'category' => ['limit' => 5],
        'price' => [
            'type' => 'range',
            'ranges' => [
                ['to' => 500, 'key' => 'budget'],
                ['from' => 500, 'to' => 1000, 'key' => 'mid-range'],
                ['from' => 1000, 'key' => 'premium']
            ]
        ]
    ],
    'aggregations' => [
        'avg_price' => ['type' => 'avg', 'field' => 'price'],
        'max_price' => ['type' => 'max', 'field' => 'price'],
        'min_price' => ['type' => 'min', 'field' => 'price']
    ]
]);

// Display facets
foreach ($results['facets']['brand'] as $brand) {
    echo "{$brand['value']}: {$brand['count']} products\n";
}

// Display aggregations
echo "Average price: $" . $results['aggregations']['avg_price'] . "\n";

use YetiSearch\Geo\GeoPoint;
use YetiSearch\Models\SearchQuery;

$engine = $search->getSearchEngine('places');
$center = new GeoPoint(40.7128, -74.0060); // NYC
$q = (new SearchQuery('coffee'))
  ->near($center, 5)                  // radius 5 km
  // or pass via facade options: ['geoFilters' => ['near' => ['point'=>..., 'radius'=>5, 'units'=>'km']]]
  ->sortByDistance($center, 'asc');   // nearest first

$results = $engine->search($q);
foreach ($results->getResults() as $r) {
  echo $r->get('title') . ' - ' . round($r->getDistance()) . " m\n";
}

$search = new YetiSearch([
  'search' => [
    'geo_units' => 'km',   // default units for near() radius if units not specified per query
  ]
]);

$search = new YetiSearch([
  'search' => [
    // Mix distance into final score (0.0..1.0)
    'distance_weight' => 0.5,
    // Decay factor per km (higher = faster decay)
    'distance_decay_k' => 0.01,
  ]
]);

// With distance_weight > 0, results incorporate both BM25 text score and proximity

// Request distance facets (ranges in chosen units)
$faceted = $search->search('places', 'coffee', [
  'facets' => [
    'distance' => [
      'from' => ['lat' => 40.7128, 'lng' => -74.0060],
      'ranges' => [1, 5, 10],   // thresholds
      'units' => 'km'           // optional (default 'm')
    ]
  ],
  'geoFilters' => [
    'distance_sort' => ['from' => ['lat'=>40.7128,'lng'=>-74.0060], 'direction' => 'asc']
  ]
]);

// Read facet buckets
foreach (($faceted['facets']['distance'] ?? []) as $bucket) {
  echo $bucket['value'] . ': ' . $bucket['count'] . "\n"; // e.g., "<= 1 km: 12"
}

$knn = $search->search('places', '', [
  'geoFilters' => [
    'nearest' => 5, // or ['k' => 5]
    'distance_sort' => ['from' => ['lat'=>40.7128,'lng'=>-74.0060], 'direction' => 'asc'],
    'max_distance' => 10, // optional clamp
    'units' => 'km'       // interpret nearest/max_distance in km
  ],
  'limit' => 5
]);

// Create instance
$search = new YetiSearch(array $config = []);

// Index management
$indexer = $search->createIndex(string $name, array $options = []);
$indexer = $search->getIndexer(string $name);

// Search operations
$results = $search->search(string $indexName, string $query, array $options = []);
$count = $search->count(string $indexName, string $query, array $options = []);
$suggestions = $search->suggest(string $indexName, string $term, array $options = []);

// Index operations
$search->insert(string $indexName, array $documentData);
$search->insertBatch(string $indexName, array $documents);
$search->update(string $indexName, array $documentData);
$search->delete(string $indexName, string $documentId);
$search->clear(string $indexName);
$search->optimize(string $indexName);
$search->getStats(string $indexName);

$document = [
    'id' => 'unique-id',          // Required: unique identifier
    'content' => [                // Required: content fields to index
        'title' => 'Document Title',
        'body' => 'Main content...',
        'author' => 'John Doe',
        // ... any other fields
    ],
    'metadata' => [               // Optional: non-indexed metadata
        'created_at' => time(),
        'status' => 'published',
        // ... any other metadata
    ],
    'language' => 'en',           // Optional: language code
    'type' => 'article',          // Optional: document type
    'timestamp' => time(),        // Optional: defaults to current time
    'geo' => [                    // Optional: geographic point
        'lat' => 37.7749,
        'lng' => -122.4194
    ],
    'geo_bounds' => [             // Optional: geographic bounds
        'north' => 37.8,
        'south' => 37.7,
        'east' => -122.3,
        'west' => -122.5
    ]
];

$document = [
    'id' => 'product-123',
    'content' => [
        'name' => 'Wireless Headphones',
        'description' => 'High-quality Bluetooth headphones with noise cancellation',
        'brand' => 'TechAudio',
        'features' => 'bluetooth wireless noise-cancelling comfortable'
    ],
    'metadata' => [
        'price' => 149.99,           // Don't want searches for "149.99" to match
        'sku' => 'TA-WH-2024-BK',   // Internal reference code
        'stock_count' => 42,         // Numeric data not meant for text search
        'warehouse_id' => 'WH-03',   // Internal data
        'cost' => 89.50,            // Sensitive data
        'last_restock' => time()     // System tracking
    ]
];

// Create query
$query = new SearchQuery($queryString, $options);

// Query building
$query->limit($limit)
      ->offset($offset)
      ->inFields(['title', 'content'])
      ->filter('category', 'tech')
      ->sortBy('date', 'desc')
      ->fuzzy(true)
      ->boost('title', 2.0)
      ->highlight(true);

[
    'results' => [
        [
            'id' => 'doc-123',
            'score' => 85.5,              // Relevance score (0-100)
            'title' => 'Document Title',   // From content fields
            'content' => '...',           // Other content fields
            'excerpt' => '...<mark>highlighted</mark>...', // With highlights if enabled
            'metadata' => [...],          // Metadata fields
            'distance' => 1234.5,         // Distance in meters (if geo search)
            // ... other fields
        ],
        // ... more results
    ],
    'total' => 42,                // Total matching documents
    'count' => 20,                // Results in this page
    'search_time' => 0.023,       // Search time in seconds
    'facets' => [...],            // If facets requested
]

try {
    $results = $search->search('index-name', 'query');
} catch (\YetiSearch\Exceptions\StorageException $e) {
    // Handle storage/database errors
    error_log('Storage error: ' . $e->getMessage());
} catch (\YetiSearch\Exceptions\IndexException $e) {
    // Handle indexing errors
    error_log('Index error: ' . $e->getMessage());
} catch (\Exception $e) {
    // Handle other errors
    error_log('Search error: ' . $e->getMessage());
}

$config = [
    'indexer' => [
        'batch_size' => 250,          // Larger batches
        'auto_flush' => false,        // Manual flushing
        'chunk_size' => 2000,         // Larger chunks
    ],
    'search' => [
        'enable_fuzzy' => false,      // Disable fuzzy indexing
    ]
];

$config = [
    'storage' => [
        'cache_size' => -64000,       // 64MB cache
        'temp_store' => 'MEMORY',     // Memory temp tables
    ],
    'search' => [
        'fuzzy_algorithm' => 'basic', // Fastest fuzzy algorithm
        'cache_ttl' => 3600,          // 1-hour result cache
    ]
];

$config = [
    'search' => [
        'fuzzy_algorithm' => 'levenshtein',
        'levenshtein_threshold' => 2,
        'min_score' => 0.1,           // Include more results
    ]
];

// Type-ahead friendly search
$results = $search->search('movies', $query, [
    'limit' => 8,
    'fields' => ['title','overview','url'],
    'fuzzy' => true,
    'fuzzy_last_token_only' => true,   // fuzz just the last term
    'prefix_last_token' => true,       // 

$config = [
  'indexer' => [
    'fields' => [                      // boosts become BM25 weights
      'title' => ['boost' => 3.0, 'store' => true],
      'overview' => ['boost' => 1.0, 'store' => true],
      'tags' => ['boost' => 2.0, 'store' => true],
    ],
    'fts' => [
      'multi_column' => true,          // create FTS with per-field columns
      'prefix' => [2,3],               // enable FTS5 prefix index (optional)
    ],
  ],
  'search' => [
    'prefix_last_token' => true,       // use last-token prefix (needs prefix option above)
  ],
];
$search = new YetiSearch($config);
$indexer = $search->createIndex('movies');
// Reindex to apply schema changes; or use scripts/migrate_fts.php to migrate existing data

$suggestions = $search->suggest('movies', $query, [
  'limit' => 8,         // max suggestions to return
  'per_variant' => 5,   // results checked per fuzzy variant
  'title_boost' => 100.0, // extra weight if title contains the variant
  'prefix_boost' => 25.0, // extra weight if title starts with the variant
]);

// Example: display top texts
foreach ($suggestions as $s) {
  echo $s['text'] . "\n";
}

$search = new YetiSearch([
  'search' => [
    'enable_synonyms' => true,
    // Flat map or per‑language: ['en' => ['nyc' => ['new york','new york city']]]
    'synonyms' => [
      'nyc' => ['new york', 'new york city'],
      'la'  => ['los angeles']
    ],
    'synonyms_case_sensitive' => false,
    'synonyms_max_expansions' => 3,
  ]
]);

use YetiSearch\DSL\QueryBuilder;

$builder = new QueryBuilder($yetiSearch);

// Natural language DSL
$results = $builder->searchWithDSL('articles',
    'author = "John" AND status IN [published] SORT -created_at LIMIT 10'
);

// Complex query with multiple conditions
$results = $builder->searchWithDSL('products',
    'category = "electronics" AND price > 100 AND price < 500 ' .
    'FIELDS name,price,brand SORT -rating PAGE 1,20'
);

// Parse URL query parameters
$results = $builder->searchWithURL('articles', $_SERVER['QUERY_STRING']);

// Or use array format
$results = $builder->searchWithURL('articles', [
    'q' => 'search term',
    'filter' => [
        'author' => ['eq' => 'John'],
        'status' => ['in' => 'published,featured']
    ],
    'sort' => '-created_at',
    'page' => ['limit' => 10, 'offset' => 20]
]);

$results = $builder->query('search term')
    ->in('articles')
    ->where('status', 'published')
    ->whereIn('category', ['tech', 'programming'])
    ->whereBetween('price', 10, 100)
    ->orderBy('created_at', 'desc')
    ->fuzzy(true, 0.8)
    ->limit(20)
    ->get();

// Get just the first result
$first = $builder->query('specific term')
    ->in('articles')
    ->where('id', 123)
    ->first();

// Get count only
$count = $builder->query('golang')
    ->in('articles')
    ->where('status', 'published')
    ->count();

// Index documents with proper structure
$yetiSearch->index('products', [
    'id' => 'prod-123',
    'content' => [
        // Full-text searchable fields
        'title' => 'Wireless Headphones',
        'description' => 'Premium audio quality'
    ],
    'metadata' => [
        // Filterable/sortable fields
        'price' => 299.99,
        'brand' => 'AudioTech',
        'rating' => 4.5,
        'in_stock' => true
    ]
]);

// Configure custom metadata fields for your application
$builder = new QueryBuilder($yetiSearch, [
    'metadata_fields' => ['price', 'brand', 'rating', 'in_stock']
]);

// Use metadata fields naturally in queries
$results = $builder->searchWithDSL('products', 
    'headphones AND price < 300 AND rating >= 4 SORT -rating'
);
bash
php examples/apartment-search-simple.php
bash
# Using CLI
bin/yetisearch migrate-external --db=benchmarks/benchmark.db --index=movies

# Or standalone script
php scripts/migrate_external_content.php --db=benchmarks/benchmark.db --index=movies

YetiSearch/
├── Analyzers/          # Text analysis and tokenization
│   └── StandardAnalyzer.php
├── Contracts/          # Interfaces for extensibility
│   ├── AnalyzerInterface.php
│   ├── IndexerInterface.php
│   ├── SearchEngineInterface.php
│   └── StorageInterface.php
├── Index/              # Indexing logic
│   └── Indexer.php
├── Models/             # Data models
│   ├── Document.php
│   ├── SearchQuery.php
│   └── SearchResult.php
├── Search/             # Search implementation
│   └── SearchEngine.php
└── Storage/            # Storage backends
    └── SqliteStorage.php
bash
php examples/type-ahead.php --interactive
bash
php examples/type-ahead.php "anaki skywa"
bash
php scripts/migrate_fts.php --db=benchmarks/benchmark.db --index=movies --prefix=2,3