PHP code example of lecodeurdudimanche / document-data-extractor

1. Go to this page and download the library: Download lecodeurdudimanche/document-data-extractor library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

lecodeurdudimanche / document-data-extractor example snippets


    $extractor = new Extractor();
    $regionsOfInterest = [
        // The name of the company is in the rectangle with the top left corner (700, 180) and a size of (1080, 160)
        new ROI('Name of the company')->setRect(700, 180, 1080, 160),
        new ROI('Total', 'integer')->setRect(1980, 1572, 58, 52);
    ];

    $tesseractConfiguration = [
        'psm' => 8, // Page segmentation method is set to 8 (single word)
        'tessdataDir' => '/usr/share/tessdata' // Other tesseract options ...
    ];
    $config = Configuration::fromArray(compact('regionsOfInterest', 'tesseractConfiguration'));
    $extractor->setConfig($config);

    $extractor->loadImage('/path/to/image.png'); // or
    $extractor->loadPDF('/path/to/document.pdf'); // or
    $extractor->setImage($imageData); // could be an Imagick or GD image or raw image data

    $data = $extractor->run();
    /*
    * $data = [
    * ['label' => 'Name of the company', 'type' => 'text', 'data' => 'Company Limited'],
    * ['label' => 'Total', 'type' => 'integer', 'data' => '55']
    * ];
    */