1. Go to this page and download the library: Download coooold/crawler library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
coooold / crawler example snippets
use PHPCrawler\PHPCrawler;
use PHPCrawler\Response;
use Symfony\Component\DomCrawler\Crawler;
$logger = new Monolog\Logger("fox");
try {
$logger->pushHandler(new \Monolog\Handler\StreamHandler(STDOUT, \Monolog\Logger::INFO));
} catch (\Exception $e) {
}
$crawler = new PHPCrawler([
'maxConnections' => 2,
'domParser' => true,
'timeout' => 3000,
'retries' => 3,
'logger' => $logger,
]);
$crawler->on('response', function (Response $res) use ($cli) {
if (!$res->success) {
return;
}
$title = $res->dom->filter("title")->html();
echo ">>> title: {$title}\n";
$res->dom
->filter('.related-item a')
->each(function (Crawler $crawler) {
echo ">>> links: ", $crawler->text(), "\n";
});
});
$crawler->queue('https://www.foxnews.com/');
$crawler->run();
$crawler = new PHPCrawler([
'maxConnections' => 10,
'rateLimit' => 2, // reqs per second
'domParser' => true,
'timeout' => 30000,
'retries' => 3,
'logger' => $logger,
]);
for ($page = 1; $page <= 100; $page++) {
$crawler->queue([
'uri' => "http://www.qbaobei.com/jiaoyu/gshb/List_{$page}.html",
'type' => 'list',
]);
}
$crawler->run(); //between two tasks, avarage time gap is 1000 / 2 (ms)