<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
lecodeurdudimanche / document-data-extractor example snippets
$extractor = new Extractor();
$regionsOfInterest = [
// The name of the company is in the rectangle with the top left corner (700, 180) and a size of (1080, 160)
new ROI('Name of the company')->setRect(700, 180, 1080, 160),
new ROI('Total', 'integer')->setRect(1980, 1572, 58, 52);
];
$tesseractConfiguration = [
'psm' => 8, // Page segmentation method is set to 8 (single word)
'tessdataDir' => '/usr/share/tessdata' // Other tesseract options ...
];
$config = Configuration::fromArray(compact('regionsOfInterest', 'tesseractConfiguration'));
$extractor->setConfig($config);
$extractor->loadImage('/path/to/image.png'); // or
$extractor->loadPDF('/path/to/document.pdf'); // or
$extractor->setImage($imageData); // could be an Imagick or GD image or raw image data