PHP code example of ezimuel / phpvector

1. Go to this page and download the library: Download ezimuel/phpvector library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

ezimuel / phpvector example snippets


use PHPVector\Document;
use PHPVector\VectorDatabase;

$db = new VectorDatabase();

$db->addDocuments([
    new Document(
        id: 1,
        vector: [0.12, 0.85, 0.44, 0.67],
        text: 'PHP vector database with HNSW index',
        metadata: ['url' => 'https://example.com/1', 'lang' => 'en'],
    ),
    new Document(
        id: 2,
        vector: [0.91, 0.23, 0.78, 0.05],
        text: 'Approximate nearest neighbour search in PHP',
        metadata: ['url' => 'https://example.com/2', 'lang' => 'en'],
    ),
    new Document(
        id: 3,
        vector: [0.33, 0.61, 0.19, 0.88],
        text: 'BM25 full-text ranking algorithm explained',
        metadata: ['url' => 'https://example.com/3', 'lang' => 'en'],
    ),
    // No id — a UUID v4 is assigned automatically.
    new Document(
        vector: [0.55, 0.42, 0.71, 0.30],
        text: 'Hybrid search with Reciprocal Rank Fusion',
    ),
]);

$queryVector = [0.10, 0.80, 0.50, 0.60];

$results = $db->vectorSearch(vector: $queryVector, k: 2);

foreach ($results as $result) {
    echo sprintf(
        "[%d] score=%.4f  %s\n",
        $result->rank,
        $result->score,
        $result->document->metadata['url'],
    );
}
// [1] score=0.9987  https://example.com/1
// [2] score=0.8341  https://example.com/3

$results = $db->textSearch(query: 'nearest neighbour PHP', k: 2);

foreach ($results as $result) {
    echo sprintf(
        "[%d] score=%.4f  %s\n",
        $result->rank,
        $result->score,
        $result->document->metadata['url'],
    );
}
// [1] score=1.2430  https://example.com/2
// [2] score=0.8761  https://example.com/1

use PHPVector\HybridMode;

$results = $db->hybridSearch(
    vector: $queryVector,
    text:   'vector database PHP',
    k:      3,
    mode:   HybridMode::RRF,
);

foreach ($results as $result) {
    echo sprintf(
        "[%d] score=%.4f  %s\n",
        $result->rank,
        $result->score,
        $result->document->metadata['url'],
    );
}

$results = $db->hybridSearch(
    vector:       $queryVector,
    text:         'vector database PHP',
    k:            3,
    mode:         HybridMode::Weighted,
    vectorWeight: 0.7,
    textWeight:   0.3,
);

use PHPVector\BM25\Config as BM25Config;
use PHPVector\BM25\SimpleTokenizer;
use PHPVector\Distance;
use PHPVector\HNSW\Config as HNSWConfig;
use PHPVector\VectorDatabase;

$db = new VectorDatabase(
    hnswConfig: new HNSWConfig(
        M:               16,    // Max connections per node per layer. Higher → better recall, more memory.
        efConstruction:  200,   // Beam width during index build. Higher → better graph quality, slower inserts.
        efSearch:        50,    // Beam width during query. Higher → better recall, slower queries.
        distance:        Distance::Cosine, // Cosine | Euclidean | DotProduct | Manhattan
        useHeuristic:    true,  // Diverse neighbour selection (recommended).
    ),
    bm25Config: new BM25Config(
        k1: 1.5,   // TF saturation. Range 1.2–2.0.
        b:  0.75,  // Length normalisation. 0 = none, 1 = full.
    ),
    tokenizer: new SimpleTokenizer(
        stopWords:      SimpleTokenizer::DEFAULT_STOP_WORDS,
        minTokenLength: 2,
    ),
);

use PHPVector\Document;
use PHPVector\VectorDatabase;

$db = new VectorDatabase(path: '/var/data/mydb');

$db->addDocuments([
    new Document(id: 1, vector: [0.12, 0.85, 0.44], text: 'PHP vector search', metadata: ['source' => 'blog']),
    new Document(id: 2, vector: [0.91, 0.23, 0.78], text: 'Approximate nearest neighbour'),
    // ... thousands more
]);

// Flush HNSW graph + BM25 index to disk (document files already written).
$db->save();

use PHPVector\VectorDatabase;

$db = VectorDatabase::open('/var/data/mydb');

// All three search modes work immediately.
$results = $db->vectorSearch(vector: $queryVector, k: 5);
$results = $db->textSearch(query: 'nearest neighbour', k: 5);
$results = $db->hybridSearch(vector: $queryVector, text: 'nearest neighbour', k: 5);

use PHPVector\BM25\Config as BM25Config;
use PHPVector\Distance;
use PHPVector\HNSW\Config as HNSWConfig;
use PHPVector\VectorDatabase;

$db = VectorDatabase::open(
    path:       '/var/data/mydb',
    hnswConfig: new HNSWConfig(
        M:        16,
        efSearch: 100,
        distance: Distance::Euclidean,  // must match the value used on save()
    ),
    bm25Config: new BM25Config(k1: 1.2, b: 0.8),
    tokenizer:  new MyCustomTokenizer(),
);

$db = VectorDatabase::open('/var/data/mydb');
$db->addDocument(new Document(vector: [0.55, 0.42, 0.71], text: 'New document'));
$db->save(); // writes docs/N.bin + updated hnsw.bin, bm25.bin, meta.json

// build.php — run once (or nightly)
$db = new VectorDatabase(
    hnswConfig: new HNSWConfig(M: 32, efConstruction: 400),
    path: '/var/data/mydb',
);
foreach (fetchDocumentsFromDatabase() as $doc) {
    $db->addDocument($doc);
}
$db->save();

// serve.php — loaded on every request or worker boot
$db = VectorDatabase::open('/var/data/mydb', new HNSWConfig(M: 32));
$results = $db->vectorSearch($queryVector, k: 10);

use PHPVector\BM25\SimpleTokenizer;
use PHPVector\BM25\StopWords\EnglishStopWords;
use PHPVector\BM25\StopWords\ItalianStopWords;
use PHPVector\BM25\StopWords\FileStopWords;
use PHPVector\VectorDatabase;

// English (default)
$db = new VectorDatabase();

// Italian
$db = new VectorDatabase(
    tokenizer: new SimpleTokenizer(new ItalianStopWords()),
);

// Load from file (one word per line, # for comments)
$db = new VectorDatabase(
    tokenizer: new SimpleTokenizer(new FileStopWords('/path/to/stopwords.txt')),
);

### Stop words file format (`FileStopWords`)

Use a plain UTF-8 text file with one stop word per line.

Rules:
- Empty lines are ignored
- Lines starting with `#` are treated as comments
- Words are normalized to lowercase when loaded

Example (`stopwords-it.txt`):


// Delete a document by ID
$deleted = $db->deleteDocument(1);  // returns true if found, false otherwise

// Update a document (delete + insert with same ID)
$updated = $db->updateDocument(new Document(
    id: 1,
    vector: [0.5, 0.5, 0.3, 0.2],
    text: 'Updated content here',
    metadata: ['version' => 2],
));

// After modifications, call save() to persist
$db->save();

use PHPVector\Metadata\MetadataFilter;

// Equality / inequality
$filter = MetadataFilter::eq('status', 'published');
$filter = MetadataFilter::neq('type', 'draft');

// Comparison operators
$filter = MetadataFilter::lt('price', 100);
$filter = MetadataFilter::lte('price', 100);
$filter = MetadataFilter::gt('rating', 4.0);
$filter = MetadataFilter::gte('rating', 4.0);

// Set membership
$filter = MetadataFilter::in('category', ['tech', 'science', 'engineering']);
$filter = MetadataFilter::notIn('status', ['deleted', 'archived']);

// Array containment — checks if metadata array contains the value
$filter = MetadataFilter::contains('tags', 'php');  // matches ['tags' => ['php', 'vector']]

// Existence checks — does a metadata key exist (regardless of value)?
$filter = MetadataFilter::exists('thumbnail');
$filter = MetadataFilter::notExists('deleted_at');

use PHPVector\Metadata\MetadataFilter;

// Vector search with filters
$results = $db->vectorSearch(
    vector: $queryVector,
    k: 10,
    filters: [
        MetadataFilter::eq('lang', 'en'),
        MetadataFilter::gt('year', 2020),
    ],
);

// Text search with filters
$results = $db->textSearch(
    query: 'machine learning',
    k: 10,
    filters: [
        MetadataFilter::in('category', ['tech', 'science']),
    ],
);

// Hybrid search with filters
$results = $db->hybridSearch(
    vector: $queryVector,
    text: 'machine learning',
    k: 10,
    filters: [
        MetadataFilter::eq('status', 'published'),
    ],
);

// (category = 'tech' OR category = 'science') AND status = 'published'
$results = $db->vectorSearch(
    vector: $queryVector,
    k: 10,
    filters: [
        [
            MetadataFilter::eq('category', 'tech'),
            MetadataFilter::eq('category', 'science'),
        ],  // OR group
        MetadataFilter::eq('status', 'published'),  // ANDed with the OR group
    ],
);

// Fetch 10× candidates before filtering (useful when filters are very selective)
$results = $db->vectorSearch(
    vector: $queryVector,
    k: 10,
    filters: [MetadataFilter::eq('rare_tag', 'value')],
    overFetch: 10,
);

// Or set the default multiplier at construction time
$db = new VectorDatabase(
    overFetchMultiplier: 10,
);

// Add or update metadata keys
$db->patchMetadata(id: 1, patch: [
    'status' => 'archived',
    'updated_at' => '2026-03-24',
]);

// Remove metadata keys by setting to null
$db->patchMetadata(id: 1, patch: [
    'deprecated_field' => null,  // key will be removed
]);

// patchMetadata returns false if document not found
if (!$db->patchMetadata(id: 999, patch: ['key' => 'value'])) {
    echo "Document not found\n";
}

use PHPVector\Metadata\SortDirection;

// Find all documents matching filters
$results = $db->metadataSearch(
    filters: [MetadataFilter::eq('status', 'published')],
);

// With limit
$results = $db->metadataSearch(
    filters: [MetadataFilter::gt('year', 2020)],
    limit: 100,
);

// With sorting by metadata key
$results = $db->metadataSearch(
    filters: [MetadataFilter::eq('status', 'published')],
    sortBy: 'created_at',
    sortDirection: SortDirection::Desc,
);

// Empty filters returns all documents
$allDocs = $db->metadataSearch(filters: [], limit: 50);

// Document with metadata: ['year' => 2024] (integer)
MetadataFilter::eq('year', 2024);    // ✓ matches
MetadataFilter::eq('year', '2024');  // ✗ does not match (string vs int)

// Document with metadata: ['rating' => 4.5] (float)
MetadataFilter::gt('rating', 4);     // ✓ matches (4.5 > 4)
MetadataFilter::eq('rating', 4.5);   // ✓ matches
MetadataFilter::eq('rating', '4.5'); // ✗ does not match (string vs float)

use PHPVector\BM25\TokenizerInterface;

final class PorterStemTokenizer implements TokenizerInterface
{
    public function tokenize(string $text): array
    {
        $tokens = preg_split('/\s+/', mb_strtolower(trim($text)), -1, PREG_SPLIT_NO_EMPTY);
        return array_map(fn($t) => porter_stem($t), $tokens); // your stemmer here
    }
}

$db = new VectorDatabase(tokenizer: new PorterStemTokenizer());
bash
# Quick run (1 K and 10 K vectors, 128 dimensions)
php benchmark/benchmark.php

# Full run — save report to a file
php benchmark/benchmark.php --scenarios=xs,small,medium,large,highdim --output=report.md

# Large dataset, skip recall (brute-force would be slow)
php benchmark/benchmark.php --scenarios=large --no-recall --queries=500

# Tune HNSW parameters
php benchmark/benchmark.php --scenarios=small --ef-search=100 --m=32

# All options
php benchmark/benchmark.php --help