PHP code example of yetidevworks / yetisearch

1. Go to this page and download the library: Download yetidevworks/yetisearch library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

yetidevworks / yetisearch example snippets



use YetiSearch\YetiSearch;

// Initialize YetiSearch with configuration
$config = [
    'storage' => [
        'path' => '/path/to/your/search.db'
    ]
];
$search = new YetiSearch($config);

// Create an index
$indexer = $search->createIndex('pages');

// Index a document
$indexer->insert([
    'id' => 'doc1',
    'content' => [
        'title' => 'Introduction to YetiSearch',
        'body' => 'YetiSearch is a powerful search engine library for PHP applications...',
        'url' => 'https://example.com/intro',
        'tags' => 'search php library'
    ]
]);

// Search for documents
$results = $search->search('pages', 'powerful search');

// Search with fuzzy matching enabled (uses trigram algorithm by default)
$fuzzyResults = $search->search('pages', 'powerfull serch', ['fuzzy' => true]);

// Display results
foreach ($results['results'] as $result) {
    echo $result['title'] . ' (Score: ' . $result['score'] . ")\n";
    echo $result['excerpt'] . "\n\n";
}

use YetiSearch\YetiSearch;

$search = new YetiSearch([
    'storage' => ['path' => './search.db']
]);

$indexer = $search->createIndex('articles');

// Index a single document
$document = [
    'id' => 'article-1',
    'content' => [
        'title' => 'Getting Started with PHP',
        'body' => 'PHP is a popular general-purpose scripting language...',
        'author' => 'John Doe',
        'category' => 'Programming',
        'tags' => 'php programming tutorial'
    ],
    'metadata' => [
        'date' => time()
    ]
];

$indexer->insert($document);

// Index multiple documents
$documents = [
    [
        'id' => 'article-2',
        'content' => [
            'title' => 'Advanced PHP Techniques',
            'body' => 'Let\'s explore advanced PHP programming techniques...',
            'author' => 'Jane Smith',
            'category' => 'Programming',
            'tags' => 'php advanced tips'
        ]
    ],
    [
        'id' => 'article-3',
        'content' => [
            'title' => 'PHP Performance Optimization',
            'body' => 'Optimizing PHP applications for better performance...',
            'author' => 'Bob Johnson',
            'category' => 'Performance',
            'tags' => 'php performance optimization'
        ]
    ]
];

$indexer->insert($documents);

// Flush to ensure all documents are written
$indexer->flush();

// Configure indexer with custom settings
$indexer = $search->createIndex('products', [
    'fields' => [
        'name' => ['boost' => 3.0, 'store' => true],
        'description' => ['boost' => 1.0, 'store' => true],
        'brand' => ['boost' => 2.0, 'store' => true],
        'sku' => ['boost' => 1.0, 'store' => true, 'index' => false],
        'price' => ['boost' => 1.0, 'store' => true, 'index' => false]
    ],
    'chunk_size' => 500,        // Smaller chunks for product descriptions
    'chunk_overlap' => 50,      // Overlap between chunks
    'batch_size' => 100         // Process 100 documents at a time
]);

// Index products with metadata
$product = [
    'id' => 'prod-123',
    'content' => [
        'name' => 'Professional PHP Development Book',
        'description' => 'A comprehensive guide to professional PHP development...',
        'brand' => 'TechBooks Publishing',
        'sku' => 'TB-PHP-001',
        'price' => 49.99
    ],
    'metadata' => [
        'in_stock' => true,
        'rating' => 4.5,
        'reviews' => 127
    ]
];

$indexer->insert($product);

// Basic search
$results = $search->search('articles', 'PHP programming');

// Advanced search with options
$results = $search->search('articles', 'advanced techniques', [
    'limit' => 20,
    'offset' => 0,
    'fields' => ['title', 'content', 'tags'],  // Search only in specific fields
    'highlight' => true,                       // Enable highlighting
    'fuzzy' => true,                          // Enable fuzzy matching
    'unique_by_route' => true,                // Deduplicate results (default)
    'filters' => [
        [
            'field' => 'category',
            'value' => 'Programming',
            'operator' => '='
        ],
        [
            'field' => 'date',
            'value' => strtotime('-30 days'),
            'operator' => '>='
        ]
    ],
    'boost' => [
        'title' => 3.0,
        'tags' => 2.0,
        'content' => 1.0
    ]
]);

// Process results
echo "Found {$results['total']} results in {$results['search_time']} seconds\n\n";

foreach ($results['results'] as $result) {
    echo "Title: " . $result['title'] . "\n";
    echo "Score: " . $result['score'] . "\n";
    echo "URL: " . $result['url'] . "\n";
    echo "Excerpt: " . $result['excerpt'] . "\n";
    echo "---\n";
}

// Available filter operators
$results = $search->search('products', 'laptop', [
    'filters' => [
        ['field' => 'category', 'value' => 'Electronics', 'operator' => '='],      // Exact match
        ['field' => 'price', 'value' => 500, 'operator' => '<'],                   // Less than
        ['field' => 'price', 'value' => 100, 'operator' => '>'],                   // Greater than
        ['field' => 'rating', 'value' => 4, 'operator' => '>='],                   // Greater or equal
        ['field' => 'stock', 'value' => 10, 'operator' => '<='],                   // Less or equal
        ['field' => 'brand', 'value' => 'Apple', 'operator' => '!='],              // Not equal
        ['field' => 'tags', 'value' => ['laptop', 'gaming'], 'operator' => 'in'],  // In array
        ['field' => 'title', 'value' => 'Pro', 'operator' => 'contains'],          // Contains text
        ['field' => 'metadata.warranty', 'operator' => 'exists'],                  // Field exists
    ]
]);

// Get all chunks (no deduplication)
$allChunks = $search->search('articles', 'PHP programming', [
    'unique_by_route' => false  // Show all matching chunks
]);

// Search with pagination
$page = 2;
$perPage = 10;
$results = $search->search('articles', 'PHP', [
    'limit' => $perPage,
    'offset' => ($page - 1) * $perPage
]);

// Faceted search
$results = $search->search('products', 'book', [
    'facets' => [
        'category' => ['limit' => 10],
        'brand' => ['limit' => 5],
        'price_range' => [
            'type' => 'range',
            'ranges' => [
                ['to' => 20],
                ['from' => 20, 'to' => 50],
                ['from' => 50]
            ]
        ]
    ]
]);

// Access facets
foreach ($results['facets']['category'] as $facet) {
    echo "{$facet['value']}: {$facet['count']} items\n";
}

// Search specific indexes
$results = $search->searchMultiple(['products', 'articles'], 'PHP book', [
    'limit' => 20
]);

// Search all indexes matching a pattern
$results = $search->searchMultiple(['content_*'], 'search term', [
    'limit' => 20
]);

// Results 

// Update a document
$indexer->update([
    'id' => 'article-1',
    'content' => [
        'title' => 'Getting Started with PHP 8',  // Updated title
        'body' => 'PHP 8 introduces many new features...',
        'author' => 'John Doe',
        'category' => 'Programming',
        'tags' => 'php php8 programming tutorial'
    ]
]);

// Delete a document
$indexer->delete('article-1');

// Clear entire index
$indexer->clear();

// Get index statistics
$stats = $indexer->getStats();
echo "Total documents: " . $stats['total_documents'] . "\n";
echo "Total size: " . $stats['total_size'] . " bytes\n";
echo "Average document size: " . $stats['avg_document_size'] . " bytes\n";

// Optimize index for better performance
$indexer->optimize();

$config = [
    'storage' => [
        'path' => '/path/to/search.db',
        'timeout' => 5000,              // Connection timeout in ms
        'busy_timeout' => 10000,        // Busy timeout in ms
        'journal_mode' => 'WAL',        // Write-Ahead Logging for better concurrency
        'synchronous' => 'NORMAL',      // Sync mode
        'cache_size' => -2000,          // Cache size in KB (negative = KB)
        'temp_store' => 'MEMORY'        // Use memory for temp tables
    ],
    'analyzer' => [
        'min_word_length' => 2,         // Minimum word length to index
        'max_word_length' => 50,        // Maximum word length to index
        'remove_numbers' => false,      // Keep numbers in index
        'lowercase' => true,            // Convert to lowercase
        'strip_html' => true,           // Remove HTML tags
        'strip_punctuation' => true,    // Remove punctuation
        'expand_contractions' => true,  // Expand contractions (e.g., don't -> do not)
        'custom_stop_words' => ['example', 'custom'], // Additional stop words to exclude
        'disable_stop_words' => false   // Set to true to disable all stop word filtering
    ],
    'indexer' => [
        'batch_size' => 100,            // Documents per batch
        'auto_flush' => true,           // Auto-flush after batch_size
        'chunk_size' => 1000,           // Characters per chunk
        'chunk_overlap' => 100,         // Overlap between chunks
        'fields' => [                   // Field configuration
            'title' => ['boost' => 3.0, 'store' => true],
            'content' => ['boost' => 1.0, 'store' => true],
            'excerpt' => ['boost' => 2.0, 'store' => true],
            'tags' => ['boost' => 2.5, 'store' => true],
            'category' => ['boost' => 2.0, 'store' => true],
            'author' => ['boost' => 1.5, 'store' => true],
            'url' => ['boost' => 1.0, 'store' => true, 'index' => false],
            'route' => ['boost' => 1.0, 'store' => true, 'index' => false]
        ]
    ],
    'search' => [
        'min_score' => 0.0,             // Minimum score threshold
        'highlight_tag' => '<mark>',    // Opening highlight tag
        'highlight_tag_close' => '</mark>', // Closing highlight tag
        'snippet_length' => 150,        // Length of snippets
        'max_results' => 1000,          // Maximum results to return
        'enable_fuzzy' => true,         // Enable fuzzy search
        'fuzzy_algorithm' => 'trigram', // 'trigram', 'jaro_winkler', or 'levenshtein'
        'levenshtein_threshold' => 2,   // Max edit distance for Levenshtein
        'min_term_frequency' => 2,      // Min term frequency for fuzzy matching
        'max_indexed_terms' => 10000,   // Max indexed terms to check
        'max_fuzzy_variations' => 8,    // Max fuzzy variations per term
        'fuzzy_score_penalty' => 0.4,   // Score penalty for fuzzy matches
        'indexed_terms_cache_ttl' => 300, // Cache TTL for indexed terms
        'enable_suggestions' => true,   // Enable search suggestions
        'cache_ttl' => 300,             // Cache TTL in seconds
        'result_fields' => [            // Fields to 

$indexer = $search->createIndex('books', [
    'chunk_size' => 1000,      // 1000 characters per chunk
    'chunk_overlap' => 100     // 100 character overlap
]);

// Index a large document - it will be automatically chunked
$indexer->insert([
    'id' => 'book-1',
    'title' => 'War and Peace',
    'content' => $veryLongBookContent,  // Will be split into chunks
    'author' => 'Leo Tolstoy'
]);

// Search returns the best matching chunk by default
$results = $search->search('books', 'Napoleon');

// Get all matching chunks
$allChunks = $search->search('books', 'Napoleon', [
    'unique_by_route' => false
]);

$config = [
    'indexer' => [
        'fields' => [
            'title' => ['boost' => 3.0],      // High-priority field
            'name' => ['boost' => 3.0],       // Another high-priority field
            'description' => ['boost' => 1.0], // Standard content field
            'tags' => ['boost' => 2.0],       // Medium priority
        ]
    ]
];

// With this configuration:
$indexer = $search->createIndex('movies', [
    'fields' => [
        'title' => ['boost' => 3.0],    // High-priority field
        'overview' => ['boost' => 1.0]  // Standard field
    ]
]);

// Searching for "star wars" will rank results as:
// 1. "Star Wars" (exact title match - huge bonus)
// 2. "Star Wars: Episode IV" (contains phrase but longer)
// 3. Movies with "star wars" in overview (lower boost field)

// Index documents in different languages
$indexer->insert([
    'id' => 'doc-fr-1',
    'title' => 'Introduction à PHP',
    'content' => 'PHP est un langage de programmation...',
    'language' => 'french'
]);

$indexer->insert([
    'id' => 'doc-de-1',
    'title' => 'Einführung in PHP',
    'content' => 'PHP ist eine Programmiersprache...',
    'language' => 'german'
]);

// Search with language-specific stemming
$results = $search->search('pages', 'programmation', [
    'language' => 'french'
]);

// Configure custom stop words during initialization
$search = new YetiSearch([
    'analyzer' => [
        'custom_stop_words' => ['lorem', 'ipsum', 'dolor']
    ]
]);

// Or add them dynamically
$analyzer = $search->getAnalyzerInstance();
$analyzer->addCustomStopWord('example');
$analyzer->addCustomStopWord('test');

// Remove a custom stop word
$analyzer->removeCustomStopWord('test');

// Get all custom stop words
$customWords = $analyzer->getCustomStopWords();

// Disable all stop word filtering (not recommended)
$analyzer->setStopWordsDisabled(true);

use YetiSearch\Geo\GeoPoint;
use YetiSearch\Geo\GeoBounds;

// Index documents with location data
$indexer->insert([
    'id' => 'coffee-shop-1',
    'content' => [
        'title' => 'Blue Bottle Coffee',
        'body' => 'Specialty coffee roaster and cafe'
    ],
    'geo' => [
        'lat' => 37.7825,
        'lng' => -122.4099
    ]
]);

// Search within radius of a point
$searchQuery = new SearchQuery('coffee');
$searchQuery->near(new GeoPoint(37.7749, -122.4194), 5000); // 5km radius
$results = $searchEngine->search($searchQuery);

// Search within bounding box
$searchQuery = new SearchQuery('restaurant');
$searchQuery->withinBounds(37.8, 37.7, -122.3, -122.5);
// Or with a GeoBounds object:
$bounds = new GeoBounds(37.8, 37.7, -122.3, -122.5);
$searchQuery->within($bounds);

// Sort results by distance
$searchQuery = new SearchQuery('food');
$searchQuery->sortByDistance(new GeoPoint(37.7749, -122.4194), 'asc');

// Combine text search with geo filters
$searchQuery = new SearchQuery('italian restaurant')
    ->near(new GeoPoint(37.7749, -122.4194), 3000)
    ->filter('price_range', '$$')
    ->limit(10);

// Results 

use YetiSearch\Geo\GeoUtils;

// Distance calculations
$distance = GeoUtils::distance($point1, $point2); // meters
$distance = GeoUtils::distanceBetween($lat1, $lng1, $lat2, $lng2);

// Unit conversions
$meters = GeoUtils::kmToMeters(5);
$meters = GeoUtils::milesToMeters(3.1);

// Format distance for display
echo GeoUtils::formatDistance(1500); // "1.5 km"
echo GeoUtils::formatDistance(1500, 'imperial'); // "0.9 mi"

// Parse various coordinate formats
$point = GeoUtils::parsePoint(['lat' => 37.7749, 'lng' => -122.4194]);
$point = GeoUtils::parsePoint([37.7749, -122.4194]);
$point = GeoUtils::parsePoint('37.7749,-122.4194');

// Index areas/regions with bounding boxes
$indexer->insert([
    'id' => 'downtown-sf',
    'content' => [
        'title' => 'Downtown San Francisco',
        'body' => 'Financial district and shopping area'
    ],
    'geo_bounds' => [
        'north' => 37.8,
        'south' => 37.77,
        'east' => -122.39,
        'west' => -122.42
    ]
]);

// Default behavior - returns unique documents (best chunk per document)
$uniqueResults = $search->search('pages', 'PHP framework');
echo "Found {$uniqueResults['total']} unique documents\n";

// Get all chunks including duplicates
$allChunks = $search->search('pages', 'PHP framework', [
    'unique_by_route' => false
]);
echo "Found {$allChunks['total']} total matching chunks\n";

$results = $search->search('pages', 'PHP programming', [
    'highlight' => true,
    'highlight_length' => 200  // Snippet length
]);

foreach ($results['results'] as $result) {
    // Excerpt will contain <mark>PHP</mark> and <mark>programming</mark>
    echo $result['excerpt'] . "\n";
}

// Custom highlight tags
$search = new YetiSearch([
    'search' => [
        'highlight_tag' => '<span class="highlight">',
        'highlight_tag_close' => '</span>'
    ]
]);

// Find results even with typos
$results = $search->search('pages', 'porgramming', [  // Note the typo
    'fuzzy' => true,
    'fuzziness' => 0.8  // 0.0 to 1.0 (higher = stricter)
]);

// Will still find documents about "programming"

// Configure fuzzy search algorithms
$config = [
    'search' => [
        'enable_fuzzy' => true,
        'fuzzy_algorithm' => 'trigram',         // Options: 'trigram', 'jaro_winkler', 'levenshtein'
        'levenshtein_threshold' => 2,           // Max edit distance for Levenshtein (default: 2)
        'min_term_frequency' => 2,              // Min occurrences for a term to be indexed
        'max_indexed_terms' => 10000,           // Max terms to check for fuzzy matches
        'max_fuzzy_variations' => 8,            // Max variations per search term
        'fuzzy_score_penalty' => 0.4,           // Score reduction for fuzzy matches (0.0-1.0)
        'indexed_terms_cache_ttl' => 300        // Cache indexed terms for 5 minutes
    ]
];

$search = new YetiSearch($config);

// Search with advanced fuzzy matching
$results = $search->search('movies', 'Amakin Dkywalker', ['fuzzy' => true]);
// Will find "Anakin Skywalker" despite multiple typos

// For best performance (3-5ms searches)
$config = [
    'search' => [
        'fuzzy_algorithm' => 'trigram',      // Fast algorithm
        'min_term_frequency' => 5,           // Skip rare terms
        'max_indexed_terms' => 5000,         // Check fewer terms
        'indexed_terms_cache_ttl' => 600    // Cache for 10 minutes
    ]
];

// For best accuracy (handles more typos)
$config = [
    'search' => [
        'fuzzy_algorithm' => 'levenshtein',
        'levenshtein_threshold' => 2,        // Allow 2 edits
        'min_term_frequency' => 1,           // Include all terms
        'max_indexed_terms' => 20000,        // Check more terms
        'fuzzy_score_penalty' => 0.3        // Lower penalty for fuzzy matches
    ]
];

// Run benchmarks to compare algorithm performance
use YetiSearch\Tools\FuzzyBenchmark;

$benchmark = new FuzzyBenchmark($search);
$results = $benchmark->runAllBenchmarks();

// Results show accuracy and performance metrics for each algorithm
foreach ($results as $algorithm => $metrics) {
    echo "$algorithm: {$metrics['accuracy']}% accuracy, {$metrics['avg_time']}ms avg search time\n";
}

$results = $search->search('products', 'laptop', [
    'facets' => [
        'brand' => ['limit' => 10],
        'category' => ['limit' => 5],
        'price' => [
            'type' => 'range',
            'ranges' => [
                ['to' => 500, 'key' => 'budget'],
                ['from' => 500, 'to' => 1000, 'key' => 'mid-range'],
                ['from' => 1000, 'key' => 'premium']
            ]
        ]
    ],
    'aggregations' => [
        'avg_price' => ['type' => 'avg', 'field' => 'price'],
        'max_price' => ['type' => 'max', 'field' => 'price'],
        'min_price' => ['type' => 'min', 'field' => 'price']
    ]
]);

// Display facets
foreach ($results['facets']['brand'] as $brand) {
    echo "{$brand['value']}: {$brand['count']} products\n";
}

// Display aggregations
echo "Average price: $" . $results['aggregations']['avg_price'] . "\n";

// Create instance
$search = new YetiSearch(array $config = []);

// Index management
$indexer = $search->createIndex(string $name, array $options = []);
$indexer = $search->getIndexer(string $name);

// Search operations
$results = $search->search(string $indexName, string $query, array $options = []);
$count = $search->count(string $indexName, string $query, array $options = []);
$suggestions = $search->suggest(string $indexName, string $term, array $options = []);

// Index operations
$search->insert(string $indexName, array $documentData);
$search->insertBatch(string $indexName, array $documents);
$search->update(string $indexName, array $documentData);
$search->delete(string $indexName, string $documentId);
$search->clear(string $indexName);
$search->optimize(string $indexName);
$search->getStats(string $indexName);

$document = [
    'id' => 'unique-id',          // Required: unique identifier
    'content' => [                // Required: content fields to index
        'title' => 'Document Title',
        'body' => 'Main content...',
        'author' => 'John Doe',
        // ... any other fields
    ],
    'metadata' => [               // Optional: non-indexed metadata
        'created_at' => time(),
        'status' => 'published',
        // ... any other metadata
    ],
    'language' => 'en',           // Optional: language code
    'type' => 'article',          // Optional: document type
    'timestamp' => time(),        // Optional: defaults to current time
    'geo' => [                    // Optional: geographic point
        'lat' => 37.7749,
        'lng' => -122.4194
    ],
    'geo_bounds' => [             // Optional: geographic bounds
        'north' => 37.8,
        'south' => 37.7,
        'east' => -122.3,
        'west' => -122.5
    ]
];

$document = [
    'id' => 'product-123',
    'content' => [
        'name' => 'Wireless Headphones',
        'description' => 'High-quality Bluetooth headphones with noise cancellation',
        'brand' => 'TechAudio',
        'features' => 'bluetooth wireless noise-cancelling comfortable'
    ],
    'metadata' => [
        'price' => 149.99,           // Don't want searches for "149.99" to match
        'sku' => 'TA-WH-2024-BK',   // Internal reference code
        'stock_count' => 42,         // Numeric data not meant for text search
        'warehouse_id' => 'WH-03',   // Internal data
        'cost' => 89.50,            // Sensitive data
        'last_restock' => time()     // System tracking
    ]
];

// Create query
$query = new SearchQuery($queryString, $options);

// Query building
$query->limit($limit)
      ->offset($offset)
      ->inFields(['title', 'content'])
      ->filter('category', 'tech')
      ->sortBy('date', 'desc')
      ->fuzzy(true)
      ->boost('title', 2.0)
      ->highlight(true);

[
    'results' => [
        [
            'id' => 'doc-123',
            'score' => 85.5,              // Relevance score (0-100)
            'title' => 'Document Title',   // From content fields
            'content' => '...',           // Other content fields
            'excerpt' => '...<mark>highlighted</mark>...', // With highlights if enabled
            'metadata' => [...],          // Metadata fields
            'distance' => 1234.5,         // Distance in meters (if geo search)
            // ... other fields
        ],
        // ... more results
    ],
    'total' => 42,                // Total matching documents
    'count' => 20,                // Results in this page
    'search_time' => 0.023,       // Search time in seconds
    'facets' => [...],            // If facets requested
]

try {
    $results = $search->search('index-name', 'query');
} catch (\YetiSearch\Exceptions\StorageException $e) {
    // Handle storage/database errors
    error_log('Storage error: ' . $e->getMessage());
} catch (\YetiSearch\Exceptions\IndexException $e) {
    // Handle indexing errors
    error_log('Index error: ' . $e->getMessage());
} catch (\Exception $e) {
    // Handle other errors
    error_log('Search error: ' . $e->getMessage());
}

$config = [
    'indexer' => [
        'batch_size' => 250,          // Larger batches
        'auto_flush' => false,        // Manual flushing
        'chunk_size' => 2000,         // Larger chunks
    ],
    'search' => [
        'enable_fuzzy' => false,      // Disable fuzzy indexing
    ]
];

$config = [
    'storage' => [
        'cache_size' => -64000,       // 64MB cache
        'temp_store' => 'MEMORY',     // Memory temp tables
    ],
    'search' => [
        'fuzzy_algorithm' => 'basic', // Fastest fuzzy algorithm
        'cache_ttl' => 3600,          // 1-hour result cache
    ]
];

$config = [
    'search' => [
        'fuzzy_algorithm' => 'levenshtein',
        'levenshtein_threshold' => 2,
        'min_score' => 0.1,           // Include more results
    ]
];

YetiSearch/
├── Analyzers/          # Text analysis and tokenization
│   └── StandardAnalyzer.php
├── Contracts/          # Interfaces for extensibility
│   ├── AnalyzerInterface.php
│   ├── IndexerInterface.php
│   ├── SearchEngineInterface.php
│   └── StorageInterface.php
├── Index/              # Indexing logic
│   └── Indexer.php
├── Models/             # Data models
│   ├── Document.php
│   ├── SearchQuery.php
│   └── SearchResult.php
├── Search/             # Search implementation
│   └── SearchEngine.php
└── Storage/            # Storage backends
    └── SqliteStorage.php