PHP code example of shibashish / pdf-reader
1. Go to this page and download the library: Download shibashish/pdf-reader library . Choose the download type require .
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
shibashish / pdf-reader example snippets
return [
// Path to pdftotext binary
'pdftotext_binary' => env('PDFTOTEXT_BINARY', 'pdftotext'),
// Path to pdftohtml binary
'pdftohtml_binary' => env('PDFTOHTML_BINARY', 'pdftohtml'),
// Path to pdfinfo binary
'pdfinfo_binary' => env('PDFINFO_BINARY', 'pdfinfo'),
// Path to pdfimages binary
'pdfimages_binary' => env('PDFIMAGES_BINARY', 'pdfimages'),
];
use Shibashish\PdfReader\Facades\PdfReader;
$text = PdfReader::extractText('/path/to/document.pdf');
echo $text; // Plain text content
$text = PdfReader::extractText('/path/to/document.pdf', pages: '1-5');
$text = PdfReader::extractText('/path/to/document.pdf', pages: '3');
$text = PdfReader::extractText(
'/path/to/document.pdf',
keepFile: true
);
// File saved to: storage/app/public/pdf-reader/texts/pdf-text-{timestamp}.txt
public function extractText(
string $pdfPath, // Path to PDF file
bool $keepFile = false, // Keep temporary file?
?string $pages = null // Page range (e.g., "1-5")
): ?string
$html = PdfReader::extractHtml('/path/to/document.pdf');
$html = PdfReader::extractHtml('/path/to/document.pdf', pages: '1-3');
$html = PdfReader::extractHtml(
'/path/to/document.pdf',
keepFile: true
);
// File saved to: storage/app/public/pdf-reader/htmls/pdf-html-{timestamp}.html
public function extractHtml(
string $pdfPath,
bool $keepFile = false,
?string $pages = null
): ?string
$images = PdfReader::extractImages('/path/to/document.pdf');
// Returns array:
// [
// [
// 'name' => 'pdf-img-123456789-000.jpg',
// 'path' => '/full/path/to/temp/file.jpg',
// 'data' => <binary image data>
// ],
// [
// 'name' => 'pdf-img-123456789-001.png',
// 'path' => '/full/path/to/temp/file.png',
// 'data' => <binary image data>
// ]
// ]
$images = PdfReader::extractImages('/path/to/document.pdf', keepFiles: true);
// Returns array:
// [
// [
// 'name' => 'pdf-img-123456789-000.jpg',
// 'path' => '/full/path/to/storage/app/public/pdf-reader/images/pdf-img-123456789-000.jpg'
// ]
// ]
$images = PdfReader::extractImages('/path/to/document.pdf', pages: '1-5');
$images = PdfReader::extractImages('/path/to/document.pdf', keepFiles: true);
foreach ($images as $image) {
// Copy to custom location
copy($image['path'], public_path('images/' . $image['name']));
}
public function extractImages(
string $pdfPath,
bool $keepFiles = false,
?string $pages = null
): array
$info = PdfReader::getInfo('/path/to/document.pdf');
print_r($info);
// Array
// (
// [Title] => Sample Document
// [Author] => John Doe
// [Creator] => Microsoft Word
// [Producer] => Adobe PDF Library
// [CreationDate] => Mon Dec 9 10:30:45 2024 IST
// [ModDate] => Mon Dec 9 11:00:00 2024 IST
// [Tagged] => no
// [UserProperties] => no
// [Suspects] => no
// [Form] => none
// [JavaScript] => no
// [Pages] => 25
// [Encrypted] => no
// [Page size] => 612 x 792 pts (letter)
// [Page rot] => 0
// [File size] => 1234567 bytes
// [Optimized] => no
// [PDF version] => 1.7
// )
$info = PdfReader::getInfo('/path/to/document.pdf');
$pageCount = $info['Pages'] ?? 0;
$author = $info['Author'] ?? 'Unknown';
$title = $info['Title'] ?? 'Untitled';
public function getInfo(string $pdfPath): array
use Shibashish\PdfReader\Exceptions\InvalidPdfException;
try {
$text = PdfReader::extractText('/path/to/file.pdf');
} catch (InvalidPdfException $e) {
echo $e->getMessage();
// "The file '/path/to/file.pdf' does not exist."
// "The file '/path/to/file.pdf' is not readable."
// "The file '/path/to/file.pdf' is not a valid PDF."
}
use Shibashish\PdfReader\Exceptions\BinaryNotFoundException;
try {
$text = PdfReader::extractText('/path/to/file.pdf');
} catch (BinaryNotFoundException $e) {
echo $e->getMessage();
// "The
use Shibashish\PdfReader\Exceptions\PdfReaderException;
try {
$text = PdfReader::extractText('/path/to/file.pdf');
} catch (PdfReaderException $e) {
echo $e->getMessage();
// "Failed to extract text: [error details]"
}
use Shibashish\PdfReader\Facades\PdfReader;
use Shibashish\PdfReader\Exceptions\{
InvalidPdfException,
BinaryNotFoundException,
PdfReaderException
};
try {
$text = PdfReader::extractText($pdfPath);
} catch (InvalidPdfException $e) {
// Handle invalid file
Log::error('Invalid PDF file', ['path' => $pdfPath, 'error' => $e->getMessage()]);
return response()->json(['error' => 'Invalid PDF file'], 400);
} catch (BinaryNotFoundException $e) {
// Handle missing binary
Log::critical('PDF binary not found', ['error' => $e->getMessage()]);
return response()->json(['error' => 'Server configuration error'], 500);
} catch (PdfReaderException $e) {
// Handle extraction error
Log::error('PDF extraction failed', ['path' => $pdfPath, 'error' => $e->getMessage()]);
return response()->json(['error' => 'Failed to process PDF'], 500);
}
$this->app->singleton('pdf-reader', function () {
return new PdfReaderService;
});
PdfReader::extractText($path);
// Resolves to: app('pdf-reader')->extractText($path);
// Text file
$text = PdfReader::extractText($path, keepFile: true);
$filePath = storage_path('app/public/pdf-reader/texts/pdf-text-' . time() . '.txt');
// Make publicly accessible
$url = asset('storage/pdf-reader/texts/pdf-text-' . time() . '.txt');
try {
$result = PdfReader::extractText($path);
} catch (PdfReaderException $e) {
// Log and handle appropriately
}
if (!file_exists($path)) {
throw new \InvalidArgumentException('File not found');
}
$text = PdfReader::extractText($path);
// Default behavior - auto-cleanup
$text = PdfReader::extractText($path); // Temp file deleted
// Or manually manage
$text = PdfReader::extractText($path, keepFile: true);
// Process the file...
// Then delete manually if needed
// Extract first 10 pages only
$text = PdfReader::extractText($largePdf, pages: '1-10');
bash
which pdftotext
which pdftohtml
which pdfinfo
which pdfimages