PHP code example of ecourty / text-chunker

1. Go to this page and download the library: Download ecourty/text-chunker library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

ecourty / text-chunker example snippets


use Ecourty\TextChunker\TextChunker;
use Ecourty\TextChunker\Strategy\ParagraphChunkingStrategy;

$chunker = new TextChunker();

foreach ($chunker->setFile('document.txt')->chunk(new ParagraphChunkingStrategy()) as $chunk) {
    echo $chunk->getText();       // chunk content
    echo $chunk->getPosition();   // index in the sequence
    print_r($chunk->getMetadata()); // strategy, length, etc.
}

$chunker = new TextChunker();

foreach ($chunker->setText($myText)->chunk(new SentenceChunkingStrategy()) as $chunk) {
    // ...
}

use League\Flysystem\Filesystem;
use Ecourty\TextChunker\Contract\ReaderInterface;
use Ecourty\TextChunker\TextChunker;
use Ecourty\TextChunker\Strategy\ParagraphChunkingStrategy;

class FlysystemReader implements ReaderInterface
{
    public function __construct(private Filesystem $filesystem) {}

    public function readChunks(string $path, int $bufferSize): \Generator
    {
        $stream = $this->filesystem->readStream($path);

        try {
            while (!feof($stream)) {
                $data = fread($stream, $bufferSize);
                if ($data === false) {
                    break;
                }
                yield $data;
            }
        } finally {
            fclose($stream);
        }
    }
}

// S3 example
$adapter = new \League\Flysystem\AwsS3V3\AwsS3V3Adapter($s3Client, 'my-bucket');
$filesystem = new Filesystem($adapter);

foreach (
    (new TextChunker())
        ->withReader(new FlysystemReader($filesystem))
        ->setFile('documents/report.txt')  // S3 key
        ->chunk(new ParagraphChunkingStrategy())
    as $chunk
) {
    echo $chunk->getText();
}