PHP code example of workable-cv / extract

1. Go to this page and download the library: Download workable-cv/extract library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

workable-cv / extract example snippets


    WorkableCV\Extract\ExtractServiceProvider::class

    $app->register(\WorkableCV\Extract\ExtractServiceProvider::class);

    use WorkableCV\Extract\core\PdfToHtml;

    $name_file = 'cv24';
    $file = $name_file . ".pdf";
    $id = uniqid();
    $options = [
        'pdftohtml_path' => '/usr/bin/pdftohtml',
        'pdfinfo_path' => '/usr/bin/pdfinfo'
        'clearAfter' => false,
        'outputDir' => storage_path('files/'.$id),
    ];
    //example for Window
    //$options = [
    //            'pdftohtml_path' => '/path/to/poppler/bin/pdftohtml.exe',
    //            'pdfinfo_path' => '/path/to/poppler/bin/pdfinfo.exe',
    //            'clearAfter' => false,
    //            'outputDir' => storage_path('files/'.$id),
    //        ];
    $pdf = new PdfToHtml($file, $options, 'pdf');
    $pdf->generateHTML($name_file, $id);

    $full_settings = [
        'pdftohtml_path' => '/usr/bin/pdftohtml', // path to pdftohtml
        'pdfinfo_path' => '/usr/bin/pdfinfo', // path to pdfinfo
    
        'generate' => [ // settings for generating html
            'singlePage' => false, // we want separate pages
            'imageJpeg' => false, // we want png image
            'ignoreImages' => false, // we need images
            'zoom' => 1.5, // scale pdf
            'noFrames' => false, // we want separate pages
        ],
    
        'clearAfter' => true, // auto clear output dir (if removeOutputDir==false then output dir will remain)
        'removeOutputDir' => true, // remove output dir
        'outputDir' => '/tmp/'.uniqid(), // output dir
    
        'html' => [ // settings for processing html
            'inlineCss' => true, // replaces css classes to inline css rules
            'inlineImages' => true, // looks for images in html and replaces the src attribute to base64 hash
            'onlyContent' => true, // takes from html body content only
        ]
    ]

    use WorkableCV\Extract\core\HtmlToPdf;

    $html = new HtmlToPdf();
    $file = 'test.html';
    $option = [
        'dpi' => 120
    ];
    $pdf_generate =  $html->generatePDF($path_file, $option);
    return $pdf_generate;

    use WorkableCV\Extract\core\DocToPdf;
    
    //test.doc is a convert to pdf file.
    //test.pdf is the path to save the test.pdf file
    $doc = new DocToPdf();
    
    //window
    $doc->generatePDF('test.doc', 'test.pdf');
    
    //linux, masOs
    $doc->generatePDFLinux('test.doc');

    use WorkableCV\Extract\core\PdfOCR;
        
    //test.pdf is a convert to path pdf file.
    //test.ocr.pdf is the path pdf file to ouput
    $pdfOCR = new PdfOCR();
    $pdfOCR->pdfOCR('test.pdf', 'test.ocr.pdf'); 

    use WorkableCV\Extract\core\PdfOCR;
    use WorkableCV\Extract\core\PdfProtected;
    use WorkableCV\Extract\core\PdfToHtml;
        
    $name_file = 'cv105';

    $file      = storage_path('cv1/' . $name_file . '.pdf');

    $options_check = config('extract.options_extract');

    $pdf           = new PdfToHtml($file, $options_check);

    $output_dir = config('extract.options_extract.outputDir');
    
    //Check pdf file scan from dom element or image
    $checkPdf = $pdf->checkPdf($output_dir, $name_file);
        
    if ($checkPdf)
    {
        $pdfOCR = new PdfOCR();

        $result_pdfOCR = $pdfOCR->pdfOCR($file);

        if (!$result_pdfOCR) exit('File not convert. Try again');

        $path_file_ocr = $result_pdfOCR[1];
        
        $pdfProtected = new PdfProtected($path_file_ocr, $options_check);

        $path_cv_protected = $pdfProtected->pdfProtected($name_file, $output_dir, false, false, 'pdf', 1,  config('extract.output_cv_protected'), false);

        unlink($path_file_ocr);

    }
    else
    {
        //Check pdf file exist email or phone
        $checkExistEmailPhone = $pdf->checkExitsEmailPhone($output_dir, $name_file);

        if (!$checkExistEmailPhone)
        {
            $pdfOCR = new PdfOCR();

            $result_pdfOCR = $pdfOCR->pdfOCR($file);

            if (!$result_pdfOCR) exit('File not convert. Try again');

            $path_file_ocr = $result_pdfOCR[1];
            
            $pdfProtected = new PdfProtected($path_file_ocr, $options_check);

           $path_cv_protected = $pdfProtected->pdfProtected($name_file, $output_dir, false, false, 'pdf', 1,  config('extract.output_cv_protected'), false);

            unlink($path_file_ocr);
        }
        else
        {
            $pdfProtected = new PdfProtected($file, $options_check);

            $path_cv_protected = $pdfProtected->pdfProtected($name_file, $output_dir, false, false, 'pdf', 1,  config('extract.output_cv_protected'), false);
        }
    }
    
    return $path_cv_protected;

    php artisan vendor:publish --provider="WorkableCV\Extract\ExtractServiceProvider"
app/Console/Kernel.php

    php artisan vendor:publish --provider="WorkableCV\Extract\ExtractServiceProvider"
    

    rootDir: "{app_directory}/vendor/dompdf/dompdf"
    tempDir: "/tmp" (available in config/dompdf.php)
    fontDir: "{app_directory}/storage/fonts/" (available in config/dompdf.php)
    fontCache: "{app_directory}/storage/fonts/" (available in config/dompdf.php)
    chroot: "{app_directory}" (available in config/dompdf.php)
    logOutputFile: "/tmp/log.htm"
    defaultMediaType: "screen" (available in config/dompdf.php)
    defaultPaperSize: "a4" (available in config/dompdf.php)
    defaultFont: "serif" (available in config/dompdf.php)
    dpi: 96 (available in config/dompdf.php)
    fontHeightRatio: 1.1 (available in config/dompdf.php)
    isPhpEnabled: false (available in config/dompdf.php)
    isRemoteEnabled: true (available in config/dompdf.php)
    isJavascriptEnabled: true (available in config/dompdf.php)
    isHtml5ParserEnabled: false (available in config/dompdf.php)
    isFontSubsettingEnabled: false (available in config/dompdf.php)
    debugPng: false
    debugKeepTemp: false
    debugCss: false
    debugLayout: false
    debugLayoutLines: true
    debugLayoutBlocks: true
    debugLayoutInline: true
    debugLayoutPaddingBox: true
    pdfBackend: "CPDF" (available in config/dompdf.php)
    pdflibLicense: ""
    adminUsername: "user"
    adminPassword: "password"
$id .'_image.html ' ($id generated in pdf to html)

       [PHP_COM_DOTNET]
       extension=php_com_dotnet.dll