PHP code example of edgaras / strsim

1. Go to this page and download the library: Download edgaras/strsim library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

edgaras / strsim example snippets




use Edgaras\StrSim\Levenshtein;
use Edgaras\StrSim\DamerauLevenshtein;
use Edgaras\StrSim\Hamming;
use Edgaras\StrSim\Jaro;
use Edgaras\StrSim\JaroWinkler;
use Edgaras\StrSim\LCS;
use Edgaras\StrSim\SmithWaterman;
use Edgaras\StrSim\NeedlemanWunsch;
use Edgaras\StrSim\Cosine;
use Edgaras\StrSim\Jaccard;
use Edgaras\StrSim\MongeElkan;

// Detecting spelling error distance in user input
Levenshtein::distance("kitten", "sitting");  // Returns: 3

// Detecting typo distance with transposition correction
DamerauLevenshtein::distance("abcd", "acbd");  // Returns: 1

// Bit-level error detection (equal-length only)
Hamming::distance("1011101", "1001001");  // Returns: 2

// Comparing short strings with transposition support
Jaro::similarity("dixon", "dicksonx");  // Returns: 0.767 (similarity)
Jaro::distance("dixon", "dicksonx");    // Returns: 0.233 (distance = 1 - similarity)

// Matching names with common prefixes
JaroWinkler::similarity("martha", "marhta");  // Returns: 0.961 (similarity)
JaroWinkler::distance("martha", "marhta");    // Returns: 0.039 (distance = 1 - similarity)

// Finding common subsequence in DNA fragments
LCS::length("ACCGGTCGAGTGCGCGGAAGCCGGCCGAA", "GTCGTTCGGAATGCCGTTGCTCTGTAAA"); // Returns: 13

// Local alignment score for substring match
SmithWaterman::score("ACACACTA", "AGCACACA");  // Returns: 11

// Global alignment score for complete sequence match
NeedlemanWunsch::score("GATTACA", "GCATGCU");  // Returns: 0

// Comparing word frequency in short texts
Cosine::similarity("night", "nacht");  // Returns: 0.6

// Comparing embedding vectors from NLP model
Cosine::similarityFromVectors([0.1, 0.2, 0.3], [0.1, 0.3, 0.4]);  // Returns: 0.925

// Comparing token overlap in short strings
Jaccard::index("abc", "bcd"); // Returns: 0.5

// Fuzzy match between two multi-word names
MongeElkan::similarity("john smith", "jon smythe");  // Returns: 0.822

// All algorithms support Unicode characters
Levenshtein::distance("café", "caffe");  // Returns: 2
Levenshtein::distance("こんにちは", "こんにちわ");  // Returns: 1

// Emoji and complex characters
Levenshtein::distance("🚀🌟", "🚀⭐");  // Returns: 1
Hamming::distance("👍🏽", "👍🏾");  // Returns: 1

// Different scripts and languages
Jaro::similarity("привет", "привет");  // Returns: 1.0 (identical)
Jaro::distance("привет", "привет");    // Returns: 0.0 (no distance)
JaroWinkler::similarity("عربي", "عربى");  // Returns: 0.9 (high similarity)
JaroWinkler::distance("عربي", "عربى");    // Returns: 0.1 (low distance)

// ZWJ sequences and combining marks
Levenshtein::distance("👨‍👩‍👧‍👦", "👨👩👧👦");  // Returns: 3
Levenshtein::distance("é", "e\u{0301}");  // Returns: 2

// Smith-Waterman with custom scoring
SmithWaterman::score("ACGT", "ACGT", match: 5, mismatch: -2, gap: -1);  // Returns: 20

// Needleman-Wunsch with custom parameters
NeedlemanWunsch::score("ACGT", "ACGT", match: 3, mismatch: -1, gap: -2);  // Returns: 12

// Jaro-Winkler with custom prefix scaling
JaroWinkler::similarity("prefix_test", "prefix_demo", 0.2);  // Custom scale factor for similarity
JaroWinkler::distance("prefix_test", "prefix_demo", 0.2);    // Custom scale factor for distance

try {
    // This will throw InvalidArgumentException for unequal lengths
    Hamming::distance("abc", "abcd");
} catch (InvalidArgumentException $e) {
    echo $e->getMessage(); // "Strings must be of equal length."
}

try {
    // This will throw InvalidArgumentException for invalid UTF-8
    Levenshtein::distance("valid", "\xFF\xFF");
} catch (InvalidArgumentException $e) {
    echo $e->getMessage(); // "Input strings must be valid UTF-8."
}

try {
    // This will throw InvalidArgumentException for mismatched vector lengths
    Cosine::similarityFromVectors([1, 2], [1, 2, 3]);
} catch (InvalidArgumentException $e) {
    echo $e->getMessage(); // "Vectors must be the same length."
}