1. Go to this page and download the library: Download ezimuel/phpvector library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
ezimuel / phpvector example snippets
use PHPVector\Document;
use PHPVector\VectorDatabase;
$db = new VectorDatabase();
$db->addDocuments([
new Document(
id: 1,
vector: [0.12, 0.85, 0.44, 0.67],
text: 'PHP vector database with HNSW index',
metadata: ['url' => 'https://example.com/1', 'lang' => 'en'],
),
new Document(
id: 2,
vector: [0.91, 0.23, 0.78, 0.05],
text: 'Approximate nearest neighbour search in PHP',
metadata: ['url' => 'https://example.com/2', 'lang' => 'en'],
),
new Document(
id: 3,
vector: [0.33, 0.61, 0.19, 0.88],
text: 'BM25 full-text ranking algorithm explained',
metadata: ['url' => 'https://example.com/3', 'lang' => 'en'],
),
// No id — a UUID v4 is assigned automatically.
new Document(
vector: [0.55, 0.42, 0.71, 0.30],
text: 'Hybrid search with Reciprocal Rank Fusion',
),
]);
use PHPVector\BM25\Config as BM25Config;
use PHPVector\BM25\SimpleTokenizer;
use PHPVector\Distance;
use PHPVector\HNSW\Config as HNSWConfig;
use PHPVector\VectorDatabase;
$db = new VectorDatabase(
hnswConfig: new HNSWConfig(
M: 16, // Max connections per node per layer. Higher → better recall, more memory.
efConstruction: 200, // Beam width during index build. Higher → better graph quality, slower inserts.
efSearch: 50, // Beam width during query. Higher → better recall, slower queries.
distance: Distance::Cosine, // Cosine | Euclidean | DotProduct | Manhattan
useHeuristic: true, // Diverse neighbour selection (recommended).
),
bm25Config: new BM25Config(
k1: 1.5, // TF saturation. Range 1.2–2.0.
b: 0.75, // Length normalisation. 0 = none, 1 = full.
),
tokenizer: new SimpleTokenizer(
stopWords: SimpleTokenizer::DEFAULT_STOP_WORDS,
minTokenLength: 2,
),
);
use PHPVector\Document;
use PHPVector\VectorDatabase;
$db = new VectorDatabase(path: '/var/data/mydb');
$db->addDocuments([
new Document(id: 1, vector: [0.12, 0.85, 0.44], text: 'PHP vector search', metadata: ['source' => 'blog']),
new Document(id: 2, vector: [0.91, 0.23, 0.78], text: 'Approximate nearest neighbour'),
// ... thousands more
]);
// Flush HNSW graph + BM25 index to disk (document files already written).
$db->save();
use PHPVector\VectorDatabase;
$db = VectorDatabase::open('/var/data/mydb');
// All three search modes work immediately.
$results = $db->vectorSearch(vector: $queryVector, k: 5);
$results = $db->textSearch(query: 'nearest neighbour', k: 5);
$results = $db->hybridSearch(vector: $queryVector, text: 'nearest neighbour', k: 5);
use PHPVector\BM25\Config as BM25Config;
use PHPVector\Distance;
use PHPVector\HNSW\Config as HNSWConfig;
use PHPVector\VectorDatabase;
$db = VectorDatabase::open(
path: '/var/data/mydb',
hnswConfig: new HNSWConfig(
M: 16,
efSearch: 100,
distance: Distance::Euclidean, // must match the value used on save()
),
bm25Config: new BM25Config(k1: 1.2, b: 0.8),
tokenizer: new MyCustomTokenizer(),
);
// build.php — run once (or nightly)
$db = new VectorDatabase(
hnswConfig: new HNSWConfig(M: 32, efConstruction: 400),
path: '/var/data/mydb',
);
foreach (fetchDocumentsFromDatabase() as $doc) {
$db->addDocument($doc);
}
$db->save();
// serve.php — loaded on every request or worker boot
$db = VectorDatabase::open('/var/data/mydb', new HNSWConfig(M: 32));
$results = $db->vectorSearch($queryVector, k: 10);
use PHPVector\BM25\SimpleTokenizer;
use PHPVector\BM25\StopWords\EnglishStopWords;
use PHPVector\BM25\StopWords\ItalianStopWords;
use PHPVector\BM25\StopWords\FileStopWords;
use PHPVector\VectorDatabase;
// English (default)
$db = new VectorDatabase();
// Italian
$db = new VectorDatabase(
tokenizer: new SimpleTokenizer(new ItalianStopWords()),
);
// Load from file (one word per line, # for comments)
$db = new VectorDatabase(
tokenizer: new SimpleTokenizer(new FileStopWords('/path/to/stopwords.txt')),
);
### Stop words file format (`FileStopWords`)
Use a plain UTF-8 text file with one stop word per line.
Rules:
- Empty lines are ignored
- Lines starting with `#` are treated as comments
- Words are normalized to lowercase when loaded
Example (`stopwords-it.txt`):
// Delete a document by ID
$deleted = $db->deleteDocument(1); // returns true if found, false otherwise
// Update a document (delete + insert with same ID)
$updated = $db->updateDocument(new Document(
id: 1,
vector: [0.5, 0.5, 0.3, 0.2],
text: 'Updated content here',
metadata: ['version' => 2],
));
// After modifications, call save() to persist
$db->save();
use PHPVector\Metadata\MetadataFilter;
// Equality / inequality
$filter = MetadataFilter::eq('status', 'published');
$filter = MetadataFilter::neq('type', 'draft');
// Comparison operators
$filter = MetadataFilter::lt('price', 100);
$filter = MetadataFilter::lte('price', 100);
$filter = MetadataFilter::gt('rating', 4.0);
$filter = MetadataFilter::gte('rating', 4.0);
// Set membership
$filter = MetadataFilter::in('category', ['tech', 'science', 'engineering']);
$filter = MetadataFilter::notIn('status', ['deleted', 'archived']);
// Array containment — checks if metadata array contains the value
$filter = MetadataFilter::contains('tags', 'php'); // matches ['tags' => ['php', 'vector']]
// Existence checks — does a metadata key exist (regardless of value)?
$filter = MetadataFilter::exists('thumbnail');
$filter = MetadataFilter::notExists('deleted_at');
// (category = 'tech' OR category = 'science') AND status = 'published'
$results = $db->vectorSearch(
vector: $queryVector,
k: 10,
filters: [
[
MetadataFilter::eq('category', 'tech'),
MetadataFilter::eq('category', 'science'),
], // OR group
MetadataFilter::eq('status', 'published'), // ANDed with the OR group
],
);
// Fetch 10× candidates before filtering (useful when filters are very selective)
$results = $db->vectorSearch(
vector: $queryVector,
k: 10,
filters: [MetadataFilter::eq('rare_tag', 'value')],
overFetch: 10,
);
// Or set the default multiplier at construction time
$db = new VectorDatabase(
overFetchMultiplier: 10,
);
// Add or update metadata keys
$db->patchMetadata(id: 1, patch: [
'status' => 'archived',
'updated_at' => '2026-03-24',
]);
// Remove metadata keys by setting to null
$db->patchMetadata(id: 1, patch: [
'deprecated_field' => null, // key will be removed
]);
// patchMetadata returns false if document not found
if (!$db->patchMetadata(id: 999, patch: ['key' => 'value'])) {
echo "Document not found\n";
}
use PHPVector\Metadata\SortDirection;
// Find all documents matching filters
$results = $db->metadataSearch(
filters: [MetadataFilter::eq('status', 'published')],
);
// With limit
$results = $db->metadataSearch(
filters: [MetadataFilter::gt('year', 2020)],
limit: 100,
);
// With sorting by metadata key
$results = $db->metadataSearch(
filters: [MetadataFilter::eq('status', 'published')],
sortBy: 'created_at',
sortDirection: SortDirection::Desc,
);
// Empty filters returns all documents
$allDocs = $db->metadataSearch(filters: [], limit: 50);
// Document with metadata: ['year' => 2024] (integer)
MetadataFilter::eq('year', 2024); // ✓ matches
MetadataFilter::eq('year', '2024'); // ✗ does not match (string vs int)
// Document with metadata: ['rating' => 4.5] (float)
MetadataFilter::gt('rating', 4); // ✓ matches (4.5 > 4)
MetadataFilter::eq('rating', 4.5); // ✓ matches
MetadataFilter::eq('rating', '4.5'); // ✗ does not match (string vs float)
use PHPVector\BM25\TokenizerInterface;
final class PorterStemTokenizer implements TokenizerInterface
{
public function tokenize(string $text): array
{
$tokens = preg_split('/\s+/', mb_strtolower(trim($text)), -1, PREG_SPLIT_NO_EMPTY);
return array_map(fn($t) => porter_stem($t), $tokens); // your stemmer here
}
}
$db = new VectorDatabase(tokenizer: new PorterStemTokenizer());
bash
# Quick run (1 K and 10 K vectors, 128 dimensions)
php benchmark/benchmark.php
# Full run — save report to a file
php benchmark/benchmark.php --scenarios=xs,small,medium,large,highdim --output=report.md
# Large dataset, skip recall (brute-force would be slow)
php benchmark/benchmark.php --scenarios=large --no-recall --queries=500
# Tune HNSW parameters
php benchmark/benchmark.php --scenarios=small --ef-search=100 --m=32
# All options
php benchmark/benchmark.php --help
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.