1. Go to this page and download the library: Download ixnode/php-web-crawler library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
ixnode / php-web-crawler example snippets
use Ixnode\PhpWebCrawler\Output\Field;
use Ixnode\PhpWebCrawler\Source\Raw;
use Ixnode\PhpWebCrawler\Value\Text;
use Ixnode\PhpWebCrawler\Value\XpathTextNode;
$rawHtml = <<<HTML
<html>
<head>
<title>Test Page</title>
</head>
<body>
<h1>Test Title</h1>
<p>Test Paragraph</p>
</body>
</html>
HTML;
$html = new Raw(
$rawHtml,
new Field('version', new Text('1.0.0')),
new Field('title', new XpathTextNode('//h1')),
new Field('paragraph', new XpathTextNode('//p'))
);
$html->parse()->getJsonStringFormatted();
// See below
use Ixnode\PhpWebCrawler\Output\Field;
use Ixnode\PhpWebCrawler\Output\Group;
use Ixnode\PhpWebCrawler\Source\Raw;
use Ixnode\PhpWebCrawler\Value\XpathTextNode;
$rawHtml = <<<HTML
<html>
<head>
<title>Test Page</title>
</head>
<body>
<h1>Test Title</h1>
<p class="paragraph-1">Test Paragraph 1</p>
<p class="paragraph-2">Test Paragraph 2</p>
</body>
</html>
HTML;
$html = new Raw(
$rawHtml,
new Field('title', new XpathTextNode('/html/head/title')),
new Group(
'content',
new Group(
'header',
new Field('h1', new XpathTextNode('/html/body//h1')),
),
new Group(
'text',
new Field('p1', new XpathTextNode('/html/body//p[@class="paragraph-1"]')),
new Field('p2', new XpathTextNode('/html/body//p[@class="paragraph-2"]')),
)
)
);
$html->parse()->getJsonStringFormatted();
// See below
use Ixnode\PhpWebCrawler\Output\Field;
use Ixnode\PhpWebCrawler\Output\Group;
use Ixnode\PhpWebCrawler\Source\Raw;
use Ixnode\PhpWebCrawler\Source\XpathSection;
use Ixnode\PhpWebCrawler\Value\XpathTextNode;
$rawHtml = <<<HTML
<html>
<head>
<title>Test Page</title>
</head>
<body>
<div class="content">
<h1>Test Title</h1>
<p class="paragraph-1">Test Paragraph 1</p>
<p class="paragraph-2">Test Paragraph 2</p>
</div>
</body>
</html>
HTML;
$html = new Raw(
$rawHtml,
new Field('title', new XpathTextNode('/html/head/title')),
new Group(
'content',
new XpathSection(
'/html/body//div[@class="content"]',
new Group(
'header',
new Field('h1', new XpathTextNode('./h1')),
),
new Group(
'text',
new Field('p1', new XpathTextNode('./p[@class="paragraph-1"]')),
new Field('p2', new XpathTextNode('./p[@class="paragraph-2"]')),
)
)
)
);
$html->parse()->getJsonStringFormatted();
// See below
use Ixnode\PhpWebCrawler\Output\Field;
use Ixnode\PhpWebCrawler\Output\Group;
use Ixnode\PhpWebCrawler\Source\Raw;
use Ixnode\PhpWebCrawler\Source\XpathSections;
use Ixnode\PhpWebCrawler\Value\XpathTextNode;
$rawHtml = <<<HTML
<html>
<head>
<title>Test Page</title>
</head>
<body>
<div class="content">
<h1>Test Title</h1>
<p class="paragraph-1">Test Paragraph 1</p>
<p class="paragraph-2">Test Paragraph 2</p>
<ul>
<li>Test Item 1</li>
<li>Test Item 2</li>
</ul>
</div>
</body>
</html>
HTML;
$html = new Raw(
$rawHtml,
new Field('title', new XpathTextNode('/html/head/title')),
new Group(
'hits',
new XpathSections(
'/html/body//div[@class="content"]/ul',
new XpathTextNode('./li/text()'),
)
)
);
$html->parse()->getJsonStringFormatted();
// See below
use Ixnode\PhpWebCrawler\Output\Field;
use Ixnode\PhpWebCrawler\Output\Group;
use Ixnode\PhpWebCrawler\Source\Raw;
use Ixnode\PhpWebCrawler\Source\XpathSections;
use Ixnode\PhpWebCrawler\Value\XpathTextNode;
$rawHtml = <<<HTML
<html>
<head>
<title>Test Page</title>
</head>
<body>
<div class="content">
<h1>Test Title</h1>
<p class="paragraph-1">Test Paragraph 1</p>
<p class="paragraph-2">Test Paragraph 2</p>
<table>
<tbody>
<tr>
<th>Caption 1</th>
<td>Cell 1</td>
</tr>
<tr>
<th>Caption 2</th>
<td>Cell 2</td>
</tr>
</tbody>
</table>
</div>
</body>
</html>
HTML;
$html = new Raw(
$rawHtml,
new Field('title', new XpathTextNode('/html/head/title')),
new Group(
'hits',
new XpathSections(
'/html/body//div[@class="content"]/table/tbody/tr',
new Field('caption', new XpathTextNode('./th/text()')),
new Field('content', new XpathTextNode('./td/text()')),
)
)
);
$html->parse()->getJsonStringFormatted();
// See below
shell
composer
shell
vendor/bin/php-web-crawler -V
shell
php-web-crawler 0.1.0 (02-24-2024 14:46:26) - Björn Hempel <[email protected]>
bash
git clone [email protected]:ixnode/php-web-crawler.git && cd php-web-crawler
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.