PHP code example of shibashish / pdf-reader

1. Go to this page and download the library: Download shibashish/pdf-reader library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

shibashish / pdf-reader example snippets




return [
    // Path to pdftotext binary
    'pdftotext_binary' => env('PDFTOTEXT_BINARY', 'pdftotext'),
    
    // Path to pdftohtml binary
    'pdftohtml_binary' => env('PDFTOHTML_BINARY', 'pdftohtml'),
    
    // Path to pdfinfo binary
    'pdfinfo_binary' => env('PDFINFO_BINARY', 'pdfinfo'),
    
    // Path to pdfimages binary
    'pdfimages_binary' => env('PDFIMAGES_BINARY', 'pdfimages'),
];

use Shibashish\PdfReader\Facades\PdfReader;

$text = PdfReader::extractText('/path/to/document.pdf');
echo $text; // Plain text content

$text = PdfReader::extractText('/path/to/document.pdf', pages: '1-5');

$text = PdfReader::extractText('/path/to/document.pdf', pages: '3');

$text = PdfReader::extractText(
    '/path/to/document.pdf',
    keepFile: true
);
// File saved to: storage/app/public/pdf-reader/texts/pdf-text-{timestamp}.txt

public function extractText(
    string $pdfPath,      // Path to PDF file
    bool $keepFile = false, // Keep temporary file?
    ?string $pages = null   // Page range (e.g., "1-5")
): ?string

$html = PdfReader::extractHtml('/path/to/document.pdf');

$html = PdfReader::extractHtml('/path/to/document.pdf', pages: '1-3');

$html = PdfReader::extractHtml(
    '/path/to/document.pdf',
    keepFile: true
);
// File saved to: storage/app/public/pdf-reader/htmls/pdf-html-{timestamp}.html

public function extractHtml(
    string $pdfPath,
    bool $keepFile = false,
    ?string $pages = null
): ?string

$images = PdfReader::extractImages('/path/to/document.pdf');

// Returns array:
// [
//     [
//         'name' => 'pdf-img-123456789-000.jpg',
//         'path' => '/full/path/to/temp/file.jpg',
//         'data' => <binary image data>
//     ],
//     [
//         'name' => 'pdf-img-123456789-001.png',
//         'path' => '/full/path/to/temp/file.png',
//         'data' => <binary image data>
//     ]
// ]

$images = PdfReader::extractImages('/path/to/document.pdf', keepFiles: true);

// Returns array:
// [
//     [
//         'name' => 'pdf-img-123456789-000.jpg',
//         'path' => '/full/path/to/storage/app/public/pdf-reader/images/pdf-img-123456789-000.jpg'
//     ]
// ]

$images = PdfReader::extractImages('/path/to/document.pdf', pages: '1-5');

$images = PdfReader::extractImages('/path/to/document.pdf', keepFiles: true);

foreach ($images as $image) {
    // Copy to custom location
    copy($image['path'], public_path('images/' . $image['name']));
}

public function extractImages(
    string $pdfPath,
    bool $keepFiles = false,
    ?string $pages = null
): array

$info = PdfReader::getInfo('/path/to/document.pdf');

print_r($info);
// Array
// (
//     [Title] => Sample Document
//     [Author] => John Doe
//     [Creator] => Microsoft Word
//     [Producer] => Adobe PDF Library
//     [CreationDate] => Mon Dec  9 10:30:45 2024 IST
//     [ModDate] => Mon Dec  9 11:00:00 2024 IST
//     [Tagged] => no
//     [UserProperties] => no
//     [Suspects] => no
//     [Form] => none
//     [JavaScript] => no
//     [Pages] => 25
//     [Encrypted] => no
//     [Page size] => 612 x 792 pts (letter)
//     [Page rot] => 0
//     [File size] => 1234567 bytes
//     [Optimized] => no
//     [PDF version] => 1.7
// )

$info = PdfReader::getInfo('/path/to/document.pdf');

$pageCount = $info['Pages'] ?? 0;
$author = $info['Author'] ?? 'Unknown';
$title = $info['Title'] ?? 'Untitled';

public function getInfo(string $pdfPath): array

use Shibashish\PdfReader\Exceptions\InvalidPdfException;

try {
    $text = PdfReader::extractText('/path/to/file.pdf');
} catch (InvalidPdfException $e) {
    echo $e->getMessage();
    // "The file '/path/to/file.pdf' does not exist."
    // "The file '/path/to/file.pdf' is not readable."
    // "The file '/path/to/file.pdf' is not a valid PDF."
}

use Shibashish\PdfReader\Exceptions\BinaryNotFoundException;

try {
    $text = PdfReader::extractText('/path/to/file.pdf');
} catch (BinaryNotFoundException $e) {
    echo $e->getMessage();
    // "The 

use Shibashish\PdfReader\Exceptions\PdfReaderException;

try {
    $text = PdfReader::extractText('/path/to/file.pdf');
} catch (PdfReaderException $e) {
    echo $e->getMessage();
    // "Failed to extract text: [error details]"
}

use Shibashish\PdfReader\Facades\PdfReader;
use Shibashish\PdfReader\Exceptions\{
    InvalidPdfException,
    BinaryNotFoundException,
    PdfReaderException
};

try {
    $text = PdfReader::extractText($pdfPath);
    
} catch (InvalidPdfException $e) {
    // Handle invalid file
    Log::error('Invalid PDF file', ['path' => $pdfPath, 'error' => $e->getMessage()]);
    return response()->json(['error' => 'Invalid PDF file'], 400);
    
} catch (BinaryNotFoundException $e) {
    // Handle missing binary
    Log::critical('PDF binary not found', ['error' => $e->getMessage()]);
    return response()->json(['error' => 'Server configuration error'], 500);
    
} catch (PdfReaderException $e) {
    // Handle extraction error
    Log::error('PDF extraction failed', ['path' => $pdfPath, 'error' => $e->getMessage()]);
    return response()->json(['error' => 'Failed to process PDF'], 500);
}

$this->app->singleton('pdf-reader', function () {
    return new PdfReaderService;
});

PdfReader::extractText($path);
// Resolves to: app('pdf-reader')->extractText($path);

// Text file
$text = PdfReader::extractText($path, keepFile: true);
$filePath = storage_path('app/public/pdf-reader/texts/pdf-text-' . time() . '.txt');

// Make publicly accessible
$url = asset('storage/pdf-reader/texts/pdf-text-' . time() . '.txt');

try {
    $result = PdfReader::extractText($path);
} catch (PdfReaderException $e) {
    // Log and handle appropriately
}

if (!file_exists($path)) {
    throw new \InvalidArgumentException('File not found');
}

$text = PdfReader::extractText($path);

// Default behavior - auto-cleanup
$text = PdfReader::extractText($path); // Temp file deleted

// Or manually manage
$text = PdfReader::extractText($path, keepFile: true);
// Process the file...
// Then delete manually if needed

// Extract first 10 pages only
$text = PdfReader::extractText($largePdf, pages: '1-10');
bash
which pdftotext
which pdftohtml
which pdfinfo
which pdfimages