1. Go to this page and download the library: Download ddliu/spider library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
ddliu / spider example snippets
use ddliu\spider\Spider;
use ddliu\spider\Pipe\NormalizeUrlPipe;
use ddliu\spider\Pipe\RequestPipe;
use ddliu\spider\Pipe\DomCrawlerPipe;
(new Spider())
->pipe(new NormalizeUrlPipe())
->pipe(new RequestPipe())
->pipe(new DomCrawlerPipe())
->pipe(function($spider, $task) {
$task['$dom']->filter('a')->each(function($a) use ($task) {
$href = $a->attr('href');
$task->fork($href);
})
})
// the entry task
->addTask('http://example.com')
->run()
->report();
function($spider, $task) {}
use ddliu\spider\Pipe\BasePipe;
class MyPipe extends BasePipe {
public function run($spider, $task) {
// process the task...
}
}
new NormalizeUrlPipe()
new RequestPipe(array(
'useragent' => 'myspider',
'timeout' => 10
));
$requestPipe = new RequestPipe();
$cacheForReqPipe = new FileCachePipe($requestPipe, [
'input' => 'url',
'output' => 'content',
'root' => '/path/to/cache/root',
]);
$requestPipe = new RequestPipe();
$retryForReqPipe = new RetryPipe($requestPipe, [
'count' => 10,
]);
new ReportPipe(array(
'seconds' => 600
))
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.