1. Go to this page and download the library: Download coderden/page-parser library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
coderden / page-parser example snippets
use CoderDen\PageParser\PageParser;
// Create parser instance
$parser = new PageParser();
// Load and parse a page
$parser->loadPage('https://example.com');
// Get page title
echo $parser->getTitle();
// Extract all links
$links = $parser->getAllLinks();
// Extract specific elements
$products = $parser->extractByXPath('//div[@class="product"]', [
'name' => './/h3/text()',
'price' => './/span[@class="price"]/text()',
'url' => './/a/@href',
]);
$parser = new PageParser([
'timeout' => 30,
'headers' => [
'User-Agent' => 'MyBot/1.0',
],
]);
// Load page
$parser->loadPage('https://example.com');
// Extract by XPath
$data = $parser->extractByXPath('//article', [
'title' => './/h2/text()',
'content' => './/p/text()',
]);
// Extract by CSS selector
$links = $parser->extractByCss('a.article-link', ['href', '_text']);
// Check element existence
if ($parser->exists('.pagination')) {
echo 'Pagination found!';
}
// Get element count
$imageCount = $parser->count('img');
use CoderDen\PageParser\ParserHelper;
// Quick extraction
$links = ParserHelper::extractLinks('https://example.com');
// Get page title
$title = ParserHelper::getTitle('https://example.com');
// Extract specific data
$products = ParserHelper::extract(
'https://example.com/products',
'//div[@class="product-item"]',
['name' => './/h3/text()', 'price' => './/span[@class="price"]/text()']
);
// Check URL availability
if (ParserHelper::checkUrl('https://example.com')) {
echo 'URL is accessible';
}
$parser = new PageParser();
$parser->loadPage('https://example.com');
// Get meta tags
$metaTags = $parser->getMetaTags();
// Get canonical URL
$canonical = $parser->getCanonicalUrl();
// Get page charset
$charset = $parser->getCharset();
// Get Open Graph data
$ogTitle = $parser->getAttribute('meta[property="og:title"]', 'content');
$ogImage = $parser->getAttribute('meta[property="og:image"]', 'content');
$parser = new PageParser();
$parser->loadPage('https://example.com/blog');
// All links are automatically resolved to absolute URLs
$links = $parser->extractLinksByXPath('//a[@href]');
// Images with relative paths become absolute
$images = $parser->extractImagesByXPath('//img[@src]');
$parser = new PageParser();
$parser->loadPage('https://example.com');
// Search for email addresses
$emails = $parser->searchByRegex('/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/');
// Search for phone numbers
$phones = $parser->searchByRegex('/\+?[\d\s\-\(\)]{7,}/');