PHP code example of farzai / thai-word

1. Go to this page and download the library: Download farzai/thai-word library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

farzai / thai-word example snippets


use Farzai\ThaiWord\Composer;

// Simple text segmentation
$words = Composer::segment('สวัสดีครับผมชื่อสมชาย');
// Result: ['สวัสดี', 'ครับ', 'ผม', 'ชื่อ', 'สมชาย']

// Segment with custom delimiter
$text = Composer::segmentToString('สวัสดีครับผมชื่อสมชาย', ' ');
// Result: 'สวัสดี ครับ ผม ชื่อ สมชาย'

// Batch processing for multiple texts
$results = Composer::segmentBatch(['สวัสดีครับ', 'ขอบคุณค่ะ']);
// Result: [['สวัสดี', 'ครับ'], ['ขอบคุณ', 'ค่ะ']]

// Enable word suggestions via facade
// Use threshold 0.4-0.5 for single characters, 0.6-0.7 for multi-character words
Composer::enableSuggestions(['threshold' => 0.5]);

// Get suggestions for misspelled words
$suggestions = Composer::suggest('สวัสด');
// Result: [
//     ['word' => 'สวัสดี', 'score' => 0.833],
//     ['word' => 'สวัสดิ์', 'score' => 0.714],
//     ['word' => 'สวัสติ', 'score' => 0.667]
// ]

// Segment with automatic suggestions for single unrecognized characters
$result = Composer::segmentWithSuggestions('โอเคอไร');
// Result: [
//     ['word' => 'โอเค'],
//     ['word' => 'อ', 'suggestions' => [
//         ['word' => 'กอ', 'score' => 0.5],
//         ['word' => 'ขอ', 'score' => 0.5],
//         ['word' => 'คอ', 'score' => 0.5]
//     ]],
//     ['word' => 'ไร']
// ]

// Get performance statistics
$stats = Composer::getStats();

use Farzai\ThaiWord\Segmenter\ThaiSegmenter;

$segmenter = new ThaiSegmenter();
$words = $segmenter->segment('สวัสดีครับผมชื่อสมชาย');

// Result: ['สวัสดี', 'ครับ', 'ผม', 'ชื่อ', 'สมชาย']

use Farzai\ThaiWord\Segmenter\ThaiSegmenter;

$segmenter = new ThaiSegmenter();

// Enable word suggestions
$segmenter->enableSuggestions([
    'threshold' => 0.5,        // Minimum similarity score (0.0-1.0)
    'max_suggestions' => 5     // Maximum suggestions per word
]);

// Get suggestions for a misspelled word
$suggestions = $segmenter->suggest('สวัสด'); // Missing last character
// Result: [
//     ['word' => 'สวัสดี', 'score' => 0.833],
//     ['word' => 'สวัสดิ์', 'score' => 0.714],
//     ['word' => 'สวัสติ', 'score' => 0.667]
// ]

// Segment text with automatic suggestions for single unrecognized characters
$result = $segmenter->segmentWithSuggestions('ชื่ออไรนะ'); // 'อ' is unrecognized single character
// Result: [
//     ['word' => 'ชื่อ'],
//     ['word' => 'อ', 'suggestions' => [
//         ['word' => 'กอ', 'score' => 0.5],
//         ['word' => 'ขอ', 'score' => 0.5],
//         ['word' => 'คอ', 'score' => 0.5]
//     ]],
//     ['word' => 'ไร'],
//     ['word' => 'นะ']
// ]

$segmenter = new ThaiSegmenter();
$result = $segmenter->segment('ผมใช้ Computer ทำงาน');
// Result: ['ผม', 'ใช้', 'Computer', 'ทำงาน']

use Farzai\ThaiWord\Composer;

// Basic segmentation
$words = Composer::segment('สวัสดีครับผมชื่อสมชาย');
// Result: ['สวัสดี', 'ครับ', 'ผม', 'ชื่อ', 'สมชาย']

// Get performance statistics
$stats = Composer::getStats();
echo "Processing time: {$stats['avg_processing_time']}ms";

// Add custom words
Composer::getDictionary()->add('คำใหม่');

// Batch processing for multiple texts
$results = Composer::segmentBatch(['ข้อความ1', 'ข้อความ2']);

// Custom configuration
Composer::updateConfig([
    'enable_caching' => true,
    'memory_limit_mb' => 200
]);

use Farzai\ThaiWord\Segmenter\ThaiSegmenter;

// Create segmenter with custom configuration
$segmenter = new ThaiSegmenter(null, null, [
    'enable_caching' => true,
    'batch_size' => 500
]);

// Or use the facade to create custom instances
$customSegmenter = Composer::create(null, null, ['memory_limit_mb' => 150]);

// Set custom segmenter for facade
Composer::setSegmenter($customSegmenter);

use Farzai\ThaiWord\Segmenter\ThaiSegmenter;
use Farzai\ThaiWord\Suggestions\Strategies\LevenshteinSuggestionStrategy;

// Create custom suggestion strategy
$suggestionStrategy = new LevenshteinSuggestionStrategy;
$suggestionStrategy->setThreshold(0.8)              // Higher accuracy
                   ->setMaxWordLengthDiff(2);       // Stricter length filtering

// Initialize segmenter with custom strategy
$segmenter = new ThaiSegmenter(null, null, $suggestionStrategy);

// Or set strategy later
$segmenter->setSuggestionStrategy($suggestionStrategy);

$segmenter = new ThaiSegmenter();
$segmenter->enableSuggestions();

// Process text
$result = $segmenter->segmentWithSuggestions('สวัสดีครบผมชื่อโจน');

// Get detailed statistics
$stats = $segmenter->getStats();
echo "Cache hit ratio: " . ($stats['cache_hit_ratio'] * 100) . "%\n";

// Get suggestion-specific statistics
$suggestionStrategy = $segmenter->getSuggestionStrategy();
if ($suggestionStrategy instanceof LevenshteinSuggestionStrategy) {
    $cacheStats = $suggestionStrategy->getCacheStats();
    echo "Suggestion cache size: " . $cacheStats['cache_size'] . "\n";
    echo "Memory usage: " . $cacheStats['memory_usage_mb'] . "MB\n";
}

$texts = [
    'สวัสดีครบ',      // Contains typo
    'ขอบคนครับ',      // Contains typo  
    'ผมชื่อโจน'       // Might need suggestions
];

$segmenter = new ThaiSegmenter();
$segmenter->enableSuggestions(['threshold' => 0.7]);

foreach ($texts as $text) {
    $result = $segmenter->segmentWithSuggestions($text);
    
    foreach ($result as $item) {
        if (isset($item['suggestions'])) {
            echo "'{$item['word']}' → Suggested: '{$item['suggestions'][0]['word']}'\n";
        }
    }
}

// Example output:
// 'ครบ' → Suggested: 'ครับ'
// 'คน' → Suggested: 'คุณ'
// 'โจน' → Suggested: 'โจ้'

$segmenter = new ThaiSegmenter();
$segmenter->enableSuggestions(['threshold' => 0.5]);

// ✅ Will get suggestions - 'อ' is single character not in dictionary
$result = $segmenter->segmentWithSuggestions('โอเคอไร');
// 'อ' gets suggestions: ['กอ', 'ขอ', 'คอ', ...]

// ❌ Won't get suggestions - 'ครบ' is multi-character and in dictionary
$result = $segmenter->segmentWithSuggestions('สวัสดีครบ');
// 'ครบ' gets NO suggestions (even though 'ครับ' might be intended)

// ✅ For multi-character suggestions, use suggest() directly
$suggestions = $segmenter->suggest('ครบ');
// Returns: ['ครับ', 'ครอบ', 'คราบ', ...]

$segmenter = new ThaiSegmenter();

// Enable suggestions with proper threshold for single characters
$segmenter->enableSuggestions([
    'threshold' => 0.5,         // Optimal for single characters
    'max_suggestions' => 3      // Maximum suggestions per word
]);

// Update segmenter configuration
$segmenter->updateConfig([
    'enable_caching' => true,
    'memory_limit_mb' => 150,
    'suggestion_threshold' => 0.5,  // Adjusted for single characters
    'max_suggestions' => 5
]);

// Disable suggestions when not needed
$segmenter->disableSuggestions();