1. Go to this page and download the library: Download ssola/crawly library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
ssola / crawly example snippets
// Create a new Crawly object
$crawler = Crawly\Factory::generic();
// Discovers are allows you to extract links to follow
$crawler->attachDiscover(
new Crawly\Discovers\CssSelector('nav.pagination > ul > li > a')
);
// After we scrapped and discovered links you can add your own closures to handle the data
$crawler->attachExtractor(
function($response) {
// here we have the response, work with it!
}
);
// set seed page
$crawler->setSeed("http://www.webpage.com/test/");
// start the crawler
$crawler->run();
$crawler = Crawly\Factory::generic();
$crawler = Crawly\Factory::create(new MyHttpClass(), new MyUrlQueue(), new MyVisitedCollection());
class MyOwnDiscover implements Discoverable
{
private $configuration;
public function __construct($configuration)
{
$this->configuration = $configuration;
}
public function find(Crawly &$crawler, $response)
{
// $response has the crawled url content
// do some magin on the response and get a colleciton of links
foreach($links as $node) {
$uri = new Uri($node->getAttribute('href'), $crawler->getHost());
// if url was not visited we should
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.