1. Go to this page and download the library: Download yetidevworks/yetisearch library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
yetidevworks / yetisearch example snippets
use YetiSearch\YetiSearch;
// Initialize YetiSearch with configuration
$config = [
'storage' => [
'path' => '/path/to/your/search.db'
]
];
$search = new YetiSearch($config);
// Create an index
$indexer = $search->createIndex('pages');
// Index a document
$indexer->insert([
'id' => 'doc1',
'content' => [
'title' => 'Introduction to YetiSearch',
'body' => 'YetiSearch is a powerful search engine library for PHP applications...',
'url' => 'https://example.com/intro',
'tags' => 'search php library'
]
]);
// Search for documents
$results = $search->search('pages', 'powerful search');
// Search with fuzzy matching enabled (uses trigram algorithm by default)
$fuzzyResults = $search->search('pages', 'powerfull serch', ['fuzzy' => true]);
// Display results
foreach ($results['results'] as $result) {
echo $result['title'] . ' (Score: ' . $result['score'] . ")\n";
echo $result['excerpt'] . "\n\n";
}
use YetiSearch\YetiSearch;
$search = new YetiSearch([
'storage' => ['path' => './search.db']
]);
$indexer = $search->createIndex('articles');
// Index a single document
$document = [
'id' => 'article-1',
'content' => [
'title' => 'Getting Started with PHP',
'body' => 'PHP is a popular general-purpose scripting language...',
'author' => 'John Doe',
'category' => 'Programming',
'tags' => 'php programming tutorial'
],
'metadata' => [
'date' => time()
]
];
$indexer->insert($document);
// Index multiple documents
$documents = [
[
'id' => 'article-2',
'content' => [
'title' => 'Advanced PHP Techniques',
'body' => 'Let\'s explore advanced PHP programming techniques...',
'author' => 'Jane Smith',
'category' => 'Programming',
'tags' => 'php advanced tips'
]
],
[
'id' => 'article-3',
'content' => [
'title' => 'PHP Performance Optimization',
'body' => 'Optimizing PHP applications for better performance...',
'author' => 'Bob Johnson',
'category' => 'Performance',
'tags' => 'php performance optimization'
]
]
];
$indexer->insert($documents);
// Flush to ensure all documents are written
$indexer->flush();
// Update a document
$indexer->update([
'id' => 'article-1',
'content' => [
'title' => 'Getting Started with PHP 8', // Updated title
'body' => 'PHP 8 introduces many new features...',
'author' => 'John Doe',
'category' => 'Programming',
'tags' => 'php php8 programming tutorial'
]
]);
// Delete a document
$indexer->delete('article-1');
// Clear entire index
$indexer->clear();
// Get index statistics
$stats = $indexer->getStats();
echo "Total documents: " . $stats['total_documents'] . "\n";
echo "Total size: " . $stats['total_size'] . " bytes\n";
echo "Average document size: " . $stats['avg_document_size'] . " bytes\n";
// Optimize index for better performance
$indexer->optimize();
$config = [
'storage' => [
'path' => '/path/to/search.db',
'timeout' => 5000, // Connection timeout in ms
'busy_timeout' => 10000, // Busy timeout in ms
'journal_mode' => 'WAL', // Write-Ahead Logging for better concurrency
'synchronous' => 'NORMAL', // Sync mode
'cache_size' => -2000, // Cache size in KB (negative = KB)
'temp_store' => 'MEMORY' // Use memory for temp tables
],
'analyzer' => [
'min_word_length' => 2, // Minimum word length to index
'max_word_length' => 50, // Maximum word length to index
'remove_numbers' => false, // Keep numbers in index
'lowercase' => true, // Convert to lowercase
'strip_html' => true, // Remove HTML tags
'strip_punctuation' => true, // Remove punctuation
'expand_contractions' => true, // Expand contractions (e.g., don't -> do not)
'custom_stop_words' => ['example', 'custom'], // Additional stop words to exclude
'disable_stop_words' => false // Set to true to disable all stop word filtering
],
'indexer' => [
'batch_size' => 100, // Documents per batch
'auto_flush' => true, // Auto-flush after batch_size
'chunk_size' => 1000, // Characters per chunk
'chunk_overlap' => 100, // Overlap between chunks
'fields' => [ // Field configuration
'title' => ['boost' => 3.0, 'store' => true],
'content' => ['boost' => 1.0, 'store' => true],
'excerpt' => ['boost' => 2.0, 'store' => true],
'tags' => ['boost' => 2.5, 'store' => true],
'category' => ['boost' => 2.0, 'store' => true],
'author' => ['boost' => 1.5, 'store' => true],
'url' => ['boost' => 1.0, 'store' => true, 'index' => false],
'route' => ['boost' => 1.0, 'store' => true, 'index' => false]
]
],
'search' => [
'min_score' => 0.0, // Minimum score threshold
'highlight_tag' => '<mark>', // Opening highlight tag
'highlight_tag_close' => '</mark>', // Closing highlight tag
'snippet_length' => 150, // Length of snippets
'max_results' => 1000, // Maximum results to return
'enable_fuzzy' => true, // Enable fuzzy search
'fuzzy_algorithm' => 'trigram', // 'trigram', 'jaro_winkler', or 'levenshtein'
'levenshtein_threshold' => 2, // Max edit distance for Levenshtein
'min_term_frequency' => 2, // Min term frequency for fuzzy matching
'max_indexed_terms' => 10000, // Max indexed terms to check
'max_fuzzy_variations' => 8, // Max fuzzy variations per term
'fuzzy_score_penalty' => 0.4, // Score penalty for fuzzy matches
'indexed_terms_cache_ttl' => 300, // Cache TTL for indexed terms
'enable_suggestions' => true, // Enable search suggestions
'cache_ttl' => 300, // Cache TTL in seconds
'result_fields' => [ // Fields to
$indexer = $search->createIndex('books', [
'chunk_size' => 1000, // 1000 characters per chunk
'chunk_overlap' => 100 // 100 character overlap
]);
// Index a large document - it will be automatically chunked
$indexer->insert([
'id' => 'book-1',
'title' => 'War and Peace',
'content' => $veryLongBookContent, // Will be split into chunks
'author' => 'Leo Tolstoy'
]);
// Search returns the best matching chunk by default
$results = $search->search('books', 'Napoleon');
// Get all matching chunks
$allChunks = $search->search('books', 'Napoleon', [
'unique_by_route' => false
]);
$config = [
'indexer' => [
'fields' => [
'title' => ['boost' => 3.0], // High-priority field
'name' => ['boost' => 3.0], // Another high-priority field
'description' => ['boost' => 1.0], // Standard content field
'tags' => ['boost' => 2.0], // Medium priority
]
]
];
// With this configuration:
$indexer = $search->createIndex('movies', [
'fields' => [
'title' => ['boost' => 3.0], // High-priority field
'overview' => ['boost' => 1.0] // Standard field
]
]);
// Searching for "star wars" will rank results as:
// 1. "Star Wars" (exact title match - huge bonus)
// 2. "Star Wars: Episode IV" (contains phrase but longer)
// 3. Movies with "star wars" in overview (lower boost field)
// Index documents in different languages
$indexer->insert([
'id' => 'doc-fr-1',
'title' => 'Introduction à PHP',
'content' => 'PHP est un langage de programmation...',
'language' => 'french'
]);
$indexer->insert([
'id' => 'doc-de-1',
'title' => 'Einführung in PHP',
'content' => 'PHP ist eine Programmiersprache...',
'language' => 'german'
]);
// Search with language-specific stemming
$results = $search->search('pages', 'programmation', [
'language' => 'french'
]);
// Configure custom stop words during initialization
$search = new YetiSearch([
'analyzer' => [
'custom_stop_words' => ['lorem', 'ipsum', 'dolor']
]
]);
// Or add them dynamically
$analyzer = $search->getAnalyzerInstance();
$analyzer->addCustomStopWord('example');
$analyzer->addCustomStopWord('test');
// Remove a custom stop word
$analyzer->removeCustomStopWord('test');
// Get all custom stop words
$customWords = $analyzer->getCustomStopWords();
// Disable all stop word filtering (not recommended)
$analyzer->setStopWordsDisabled(true);
use YetiSearch\Geo\GeoPoint;
use YetiSearch\Geo\GeoBounds;
// Index documents with location data
$indexer->insert([
'id' => 'coffee-shop-1',
'content' => [
'title' => 'Blue Bottle Coffee',
'body' => 'Specialty coffee roaster and cafe'
],
'geo' => [
'lat' => 37.7825,
'lng' => -122.4099
]
]);
// Search within radius of a point
$searchQuery = new SearchQuery('coffee');
$searchQuery->near(new GeoPoint(37.7749, -122.4194), 5000); // 5km radius
$results = $searchEngine->search($searchQuery);
// Search within bounding box
$searchQuery = new SearchQuery('restaurant');
$searchQuery->withinBounds(37.8, 37.7, -122.3, -122.5);
// Or with a GeoBounds object:
$bounds = new GeoBounds(37.8, 37.7, -122.3, -122.5);
$searchQuery->within($bounds);
// Sort results by distance
$searchQuery = new SearchQuery('food');
$searchQuery->sortByDistance(new GeoPoint(37.7749, -122.4194), 'asc');
// Combine text search with geo filters
$searchQuery = new SearchQuery('italian restaurant')
->near(new GeoPoint(37.7749, -122.4194), 3000)
->filter('price_range', '$$')
->limit(10);
// Results
// Find results even with typos
$results = $search->search('pages', 'porgramming', [ // Note the typo
'fuzzy' => true,
'fuzziness' => 0.8 // 0.0 to 1.0 (higher = stricter)
]);
// Will still find documents about "programming"
// Configure fuzzy search algorithms
$config = [
'search' => [
'enable_fuzzy' => true,
'fuzzy_algorithm' => 'trigram', // Options: 'trigram', 'jaro_winkler', 'levenshtein'
'levenshtein_threshold' => 2, // Max edit distance for Levenshtein (default: 2)
'min_term_frequency' => 2, // Min occurrences for a term to be indexed
'max_indexed_terms' => 10000, // Max terms to check for fuzzy matches
'max_fuzzy_variations' => 8, // Max variations per search term
'fuzzy_score_penalty' => 0.4, // Score reduction for fuzzy matches (0.0-1.0)
'indexed_terms_cache_ttl' => 300 // Cache indexed terms for 5 minutes
]
];
$search = new YetiSearch($config);
// Search with advanced fuzzy matching
$results = $search->search('movies', 'Amakin Dkywalker', ['fuzzy' => true]);
// Will find "Anakin Skywalker" despite multiple typos
// For best performance (3-5ms searches)
$config = [
'search' => [
'fuzzy_algorithm' => 'trigram', // Fast algorithm
'min_term_frequency' => 5, // Skip rare terms
'max_indexed_terms' => 5000, // Check fewer terms
'indexed_terms_cache_ttl' => 600 // Cache for 10 minutes
]
];
// For best accuracy (handles more typos)
$config = [
'search' => [
'fuzzy_algorithm' => 'levenshtein',
'levenshtein_threshold' => 2, // Allow 2 edits
'min_term_frequency' => 1, // Include all terms
'max_indexed_terms' => 20000, // Check more terms
'fuzzy_score_penalty' => 0.3 // Lower penalty for fuzzy matches
]
];
// Run benchmarks to compare algorithm performance
use YetiSearch\Tools\FuzzyBenchmark;
$benchmark = new FuzzyBenchmark($search);
$results = $benchmark->runAllBenchmarks();
// Results show accuracy and performance metrics for each algorithm
foreach ($results as $algorithm => $metrics) {
echo "$algorithm: {$metrics['accuracy']}% accuracy, {$metrics['avg_time']}ms avg search time\n";
}