PHP code example of mauricioperera / php-vector-store

1. Go to this page and download the library: Download mauricioperera/php-vector-store library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

mauricioperera / php-vector-store example snippets


use PHPVectorStore\VectorStore;
use PHPVectorStore\QuantizedStore;
use PHPVectorStore\IVFIndex;
use PHPVectorStore\HybridSearch;
use PHPVectorStore\HybridMode;
use PHPVectorStore\Distance;
use PHPVectorStore\BM25\Index as BM25Index;

// 1. Vector search
$store = new QuantizedStore( __DIR__ . '/vectors', 384 );
$store->set( 'articles', 'art-1', $embedding, ['title' => 'My Article'] );
$store->flush();

$results = $store->matryoshkaSearch( 'articles', $query, 5, [128, 256, 384] );

// 2. Full-text search (BM25)
$bm25 = new BM25Index();
$bm25->addDocument( 'articles', 'art-1', 'My article about machine learning...' );

$results = $bm25->search( 'articles', 'machine learning', 10 );

// 3. Hybrid search (vector + text combined)
$hybrid = new HybridSearch( $store, $bm25, HybridMode::RRF );
$results = $hybrid->search( 'articles', $query_vector, 'machine learning', 5 );

// 4. Multiple distance metrics
$results = $store->search( 'articles', $query, 5, 0, Distance::Euclidean );

// Full precision: dim x 4 bytes per vector
$store = new VectorStore( '/path', 768 );

// Int8 quantized: dim + 8 bytes per vector (4x smaller)
$q8 = new QuantizedStore( '/path', 384 );

// Binary quantized: ceil(dim/8) bytes per vector (32x smaller)
$b1 = new BinaryQuantizedStore( '/path', 768 );  // 96 B/vec

use PHPVectorStore\BM25\Index;
use PHPVectorStore\BM25\Config;
use PHPVectorStore\BM25\SimpleTokenizer;

$bm25 = new Index(
    config: new Config( k1: 1.5, b: 0.75 ),
    tokenizer: new SimpleTokenizer(),
);

// Index documents
$bm25->addDocument( 'articles', 'doc-1', 'The quick brown fox...' );
$bm25->addDocument( 'articles', 'doc-2', 'Database systems and SQL...' );

// Search
$results = $bm25->search( 'articles', 'quick fox', 10 );
// [['id' => 'doc-1', 'score' => 1.234, 'rank' => 1], ...]

// Get raw scores (for hybrid fusion)
$scores = $bm25->scoreAll( 'articles', 'quick fox' );
// ['doc-1' => 1.234, 'doc-2' => 0.0]

// Persist to disk
$bm25->save( '/path/vectors', 'articles' );  // writes articles.bm25.bin
$bm25->load( '/path/vectors', 'articles' );  // restores state

// Custom stop words for Spanish
$tokenizer = new SimpleTokenizer(
    stopWords: ['el', 'la', 'los', 'las', 'de', 'en', 'y', 'que', 'es', 'un', 'una'],
    minTokenLength: 2,
);
$bm25 = new Index( tokenizer: $tokenizer );

use PHPVectorStore\HybridSearch;
use PHPVectorStore\HybridMode;

// RRF fusion (recommended — robust, no tuning needed)
$hybrid = new HybridSearch( $store, $bm25, HybridMode::RRF );
$results = $hybrid->search( 'articles', $vector, 'search text', 5 );

// Weighted fusion (tunable weights)
$hybrid = new HybridSearch( $store, $bm25, HybridMode::Weighted );
$results = $hybrid->search( 'articles', $vector, 'search text', 5, [
    'vectorWeight' => 0.7,
    'textWeight'   => 0.3,
]);

// Multi-collection hybrid
$results = $hybrid->searchAcross(
    ['articles', 'comments'],
    $vector, 'search text', 10,
);

use PHPVectorStore\Distance;

// Cosine similarity (default) — best for normalized embeddings
$store->search( 'col', $query, 5, 0, Distance::Cosine );

// Euclidean distance — converted to similarity: 1/(1+dist)
$store->search( 'col', $query, 5, 0, Distance::Euclidean );

// Dot product — for pre-normalized vectors
$store->search( 'col', $query, 5, 0, Distance::DotProduct );

// Manhattan distance — robust to outliers: 1/(1+dist)
$store->search( 'col', $query, 5, 0, Distance::Manhattan );

$ivf = new IVFIndex( $store, numClusters: 100, numProbes: 20 );
$ivf->build( 'articles' );
$results = $ivf->search( 'articles', $query, 5 );
$results = $ivf->matryoshkaSearch( 'articles', $query, 5, [128, 256, 384] );

$store->matryoshkaSearch( 'col', $query, 5, [128, 384, 768] );

use PHPVectorStore\StoreInterface;

function buildIndex( StoreInterface $store ): void {
    $ivf = new IVFIndex( $store );
    $ivf->build( 'articles' );
}

// Works with any store
buildIndex( new VectorStore( '/path', 384 ) );
buildIndex( new QuantizedStore( '/path', 384 ) );
buildIndex( new BinaryQuantizedStore( '/path', 768 ) );

use PHPVectorStore\Document;
use PHPVectorStore\SearchResult;

$doc = new Document(
    id: 'doc-1',
    vector: [0.1, 0.2, ...],
    text: 'The quick brown fox...',
    metadata: ['title' => 'My Doc'],
);

$result = new SearchResult(
    id: 'doc-1',
    score: 0.95,
    rank: 1,
    metadata: ['title' => 'My Doc'],
    collection: 'articles',
);

use PHPVectorStore\Exception\VectorStoreException;
use PHPVectorStore\Exception\DimensionMismatchException;
use PHPVectorStore\Exception\CollectionNotFoundException;

// Write
->set( $collection, $id, $vector, $metadata = [] )
->remove( $collection, $id ): bool
->drop( $collection )
->flush()

// Read
->get( $collection, $id ): ?array     // {id, vector, metadata}
->has( $collection, $id ): bool
->count( $collection ): int
->ids( $collection ): string[]
->collections(): string[]
->stats(): array
->dimensions(): int
->directory(): string

// Search
->search( $collection, $query, $limit = 5, $dimSlice = 0, $distance = null )
->matryoshkaSearch( $collection, $query, $limit = 5, $stages = [...], $multiplier = 3, $distance = null )
->searchAcross( $collections, $query, $limit = 5, $dimSlice = 0, $distance = null )

// Import/Export
->import( $collection, $records ): int
->export( $collection ): array

->addDocument( $collection, $id, $text )
->removeDocument( $collection, $id )
->search( $collection, $query, $limit = 10 ): array
->scoreAll( $collection, $query ): array    // id => score
->count( $collection ): int
->vocabularySize( $collection ): int
->save( $directory, $collection )
->load( $directory, $collection )
->exportState( $collection ): array
->importState( $collection, $state )

->search( $collection, $vector, $text, $limit = 5, $options = [] )
->searchAcross( $collections, $vector, $text, $limit = 5, $options = [] )

new IVFIndex( StoreInterface $store, int $numClusters = 100, int $numProbes = 10 )

->build( $collection, $sampleDims = 128 ): array
->search( $collection, $query, $limit = 5, $dimSlice = 0 )
->matryoshkaSearch( $collection, $query, $limit, $stages, $multiplier = 3 )
->hasIndex( $collection ): bool
->indexStats( $collection ): ?array
->dropIndex( $collection )

VectorStore::normalize( $vector ): array
VectorStore::cosineSim( $a, $b, $dims ): float
VectorStore::euclideanDist( $a, $b, $dims ): float
VectorStore::dotProduct( $a, $b, $dims ): float
VectorStore::manhattanDist( $a, $b, $dims ): float
VectorStore::computeScore( $a, $b, $dims, Distance $distance ): float

$store = new QuantizedStore( WP_CONTENT_DIR . '/vectors', 384 );
$bm25  = new BM25\Index();

add_action( 'wp_after_insert_post', function( $id, $post ) use ( $store, $bm25 ) {
    if ( 'publish' !== $post->post_status ) return;
    $text   = $post->post_title . ' ' . wp_strip_all_tags( $post->post_content );
    $vector = array_slice( your_embedding_api( $text ), 0, 384 );
    $store->set( 'posts', (string) $id, $vector, ['title' => $post->post_title] );
    $bm25->addDocument( 'posts', (string) $id, $text );
    $store->flush();
    $bm25->save( WP_CONTENT_DIR . '/vectors', 'posts' );
}, 10, 2 );

// Hybrid search
$hybrid = new HybridSearch( $store, $bm25, HybridMode::RRF );
$results = $hybrid->search( 'posts', $query_vector, $search_text, 5 );

// Service Provider
$this->app->singleton( StoreInterface::class, fn() =>
    new QuantizedStore( storage_path( 'vectors' ), 384 )
);

// Controller
public function search( Request $request ) {
    $store   = app( StoreInterface::class );
    $query   = array_slice( $this->embed( $request->q ), 0, 384 );
    $results = $store->matryoshkaSearch( 'articles', $query, 10, [128, 256, 384] );
    return Article::whereIn( 'id', array_column( $results, 'id' ) )->get();
}

use PHPVectorStore\Integration\NeuronVectorStore;

class MyRAG extends RAG {
    protected function vectorStore(): VectorStoreInterface {
        return new NeuronVectorStore(
            directory:  __DIR__ . '/vectors',
            dimensions: 384,
            quantized:  true,
            matryoshka: true,
        );
    }
}