1. Go to this page and download the library: Download nadar/crawler library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
nadar / crawler example snippets
class MyCrawlHandler implements \Nadar\Crawler\Interfaces\HandlerInterface
{
public function afterRun(\Nadar\Crawler\Result $result)
{
echo $result->title . " with content " . $result->content . " for url " . $result->url->getNormalized();
}
public function onSetup(Crawler $crawler)
{
// do some stuff before the crawler runs, maybe truncate your temporary table where the results should be stored.
}
public function onEnd(Crawler $crawler)
{
// runs when the crawler is finished, maybe synchronize your temporary index table with the "real" site index.
}
}
$crawler = new Crawler('https://luya.io', new ArrayStorage, new LoopRunner);
// what kind of document types would you like to parse?
$crawler->addParser(new Nadar\Crawler\Parsers\Html);
// adding will increases memory consumption
// $crawler->addParser(new Nadar\Crawler\Parsers\Pdf);
// register your handler in order to interact with the results, maybe store them in a database?
$crawler->addHandler(new MyCrawlHandler);
// setup and start the crawl process
$crawler->setup();
$crawler->run();
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.