1. Go to this page and download the library: Download workable-cv/extract library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
$full_settings = [
'pdftohtml_path' => '/usr/bin/pdftohtml', // path to pdftohtml
'pdfinfo_path' => '/usr/bin/pdfinfo', // path to pdfinfo
'generate' => [ // settings for generating html
'singlePage' => false, // we want separate pages
'imageJpeg' => false, // we want png image
'ignoreImages' => false, // we need images
'zoom' => 1.5, // scale pdf
'noFrames' => false, // we want separate pages
],
'clearAfter' => true, // auto clear output dir (if removeOutputDir==false then output dir will remain)
'removeOutputDir' => true, // remove output dir
'outputDir' => '/tmp/'.uniqid(), // output dir
'html' => [ // settings for processing html
'inlineCss' => true, // replaces css classes to inline css rules
'inlineImages' => true, // looks for images in html and replaces the src attribute to base64 hash
'onlyContent' => true, // takes from html body content only
]
]
use WorkableCV\Extract\core\DocToPdf;
//test.doc is a convert to pdf file.
//test.pdf is the path to save the test.pdf file
$doc = new DocToPdf();
//window
$doc->generatePDF('test.doc', 'test.pdf');
//linux, masOs
$doc->generatePDFLinux('test.doc');
use WorkableCV\Extract\core\PdfOCR;
//test.pdf is a convert to path pdf file.
//test.ocr.pdf is the path pdf file to ouput
$pdfOCR = new PdfOCR();
$pdfOCR->pdfOCR('test.pdf', 'test.ocr.pdf');
use WorkableCV\Extract\core\PdfOCR;
use WorkableCV\Extract\core\PdfProtected;
use WorkableCV\Extract\core\PdfToHtml;
$name_file = 'cv105';
$file = storage_path('cv1/' . $name_file . '.pdf');
$options_check = config('extract.options_extract');
$pdf = new PdfToHtml($file, $options_check);
$output_dir = config('extract.options_extract.outputDir');
//Check pdf file scan from dom element or image
$checkPdf = $pdf->checkPdf($output_dir, $name_file);
if ($checkPdf)
{
$pdfOCR = new PdfOCR();
$result_pdfOCR = $pdfOCR->pdfOCR($file);
if (!$result_pdfOCR) exit('File not convert. Try again');
$path_file_ocr = $result_pdfOCR[1];
$pdfProtected = new PdfProtected($path_file_ocr, $options_check);
$path_cv_protected = $pdfProtected->pdfProtected($name_file, $output_dir, false, false, 'pdf', 1, config('extract.output_cv_protected'), false);
unlink($path_file_ocr);
}
else
{
//Check pdf file exist email or phone
$checkExistEmailPhone = $pdf->checkExitsEmailPhone($output_dir, $name_file);
if (!$checkExistEmailPhone)
{
$pdfOCR = new PdfOCR();
$result_pdfOCR = $pdfOCR->pdfOCR($file);
if (!$result_pdfOCR) exit('File not convert. Try again');
$path_file_ocr = $result_pdfOCR[1];
$pdfProtected = new PdfProtected($path_file_ocr, $options_check);
$path_cv_protected = $pdfProtected->pdfProtected($name_file, $output_dir, false, false, 'pdf', 1, config('extract.output_cv_protected'), false);
unlink($path_file_ocr);
}
else
{
$pdfProtected = new PdfProtected($file, $options_check);
$path_cv_protected = $pdfProtected->pdfProtected($name_file, $output_dir, false, false, 'pdf', 1, config('extract.output_cv_protected'), false);
}
}
return $path_cv_protected;