1. Go to this page and download the library: Download johnroyer/crawler-php library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
johnroyer / crawler-php example snippets
class MyHandler extends \Zeroplex\Crawler\Handler\AbstractHandler
{
public function getDomain(): string
{
return 'test.com';
}
public function shouldFetch(\Psr\Http\Message\RequestInterface $request): bool
{
if (1 === preg_match('/(css|js|jpg|png|gif)$/', $request->getUri())) {
// ignore css, js and common images
return false;
}
return true;
}
public function handle(\Psr\Http\Message\ResponseInterface $response): void
{
// get content using $response->getBody()->getContents()
}
}
$crawler = new \Zeroplex\Crawler\Crawler();
$crawler->setDelay(0)
->setTimeout(3)
->setFollowRedirect(true)
->setUserAgent('Mozilla/5.0 (platform; rv:geckoversion) Gecko/geckotrail Firefox/100.1');
$crawler->addHandler(new BlogHandler());
// URL to start
$crawler->run('https://test.com');
class RedisQueue implements Zeroplex\Crawler\UrlQueue\UrlQueueInterface
{
private $redis;
public function __construct(string $host, int $port) { }
public function push(string $url): void
{
$this->redis->lpush($url);
}
public function pop(): string
{
return $this->redis->lpop();
}
// and so on
}
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.