PHP code example of gioni06 / gpt3-tokenizer

1. Go to this page and download the library: Download gioni06/gpt3-tokenizer library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

gioni06 / gpt3-tokenizer example snippets


use Gioni06\Gpt3Tokenizer\Gpt3TokenizerConfig;

// default vocab path
// default merges path
// caching enabled
$defaultConfig = new Gpt3TokenizerConfig();

$customConfig = new Gpt3TokenizerConfig();
$customConfig
    ->vocabPath('custom_vocab.json') // path to a custom vocabulary file
    ->mergesPath('custom_merges.txt') // path to a custom merges file
    ->useCache(false)

use Gioni06\Gpt3Tokenizer\Gpt3TokenizerConfig;
use Gioni06\Gpt3Tokenizer\Gpt3Tokenizer;

$config = new Gpt3TokenizerConfig();
$tokenizer = new Gpt3Tokenizer($config);
$text = "This is some text";
$tokens = $tokenizer->encode($text);
// [1212,318,617,2420]

use Gioni06\Gpt3Tokenizer\Gpt3TokenizerConfig;
use Gioni06\Gpt3Tokenizer\Gpt3Tokenizer;

$config = new Gpt3TokenizerConfig();
$tokenizer = new Gpt3Tokenizer($config);
$tokens = [1212,318,617,2420]
$text = $tokenizer->decode($tokens);
// "This is some text"

use Gioni06\Gpt3Tokenizer\Gpt3TokenizerConfig;
use Gioni06\Gpt3Tokenizer\Gpt3Tokenizer;

$config = new Gpt3TokenizerConfig();
$tokenizer = new Gpt3Tokenizer($config);
$text = "This is some text";
$numberOfTokens = $tokenizer->count($text);
// 4

use Gioni06\Gpt3Tokenizer\Gpt3TokenizerConfig;
use Gioni06\Gpt3Tokenizer\Gpt3Tokenizer;

$config = new Gpt3TokenizerConfig();
$tokenizer = new Gpt3Tokenizer($config);
$text = "1 2 hello,world 3 4";
$tokenizer->encodeInChunks($text, 5)
// [[16, 362, 23748], [171, 120, 234, 6894, 513], [604]]

use Gioni06\Gpt3Tokenizer\Gpt3TokenizerConfig;
use Gioni06\Gpt3Tokenizer\Gpt3Tokenizer;

$config = new Gpt3TokenizerConfig();
$tokenizer = new Gpt3Tokenizer($config);
$text = "1 2 hello,world 3 4";
$tokenizer->chunk($text, 5)
// ['1 2 hello', ',world 3', ' 4']