1. Go to this page and download the library: Download codeguy/arachnid library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
codeguy / arachnid example snippets
ttp://www.example.com';
$linkDepth = 3;
// Initiate crawl, by default it will use http client (GoutteClient),
$crawler = new \Arachnid\Crawler($url, $linkDepth);
$crawler->traverse();
// Get link data
$links = $crawler->getLinksArray(); //to get links as objects use getLinks() method
print_r($links);
$crawler = new \Arachnid\Crawler($url, $linkDepth);
$crawler->enableHeadlessBrowserMode()
->traverse()
->getLinksArray();
use \Arachnid\Adapters\CrawlingFactory;
//third parameter is the options used to configure http client
$clientOptions = ['auth_basic' => array('username', 'password')];
$crawler = new \Arachnid\Crawler('http://github.com', 2, $clientOptions);
//or by creating and setting scrap client
$options = array(
'verify_host' => false,
'verify_peer' => false,
'timeout' => 30,
);
$scrapperClient = CrawlingFactory::create(CrawlingFactory::TYPE_HTTP_CLIENT, $options);
$crawler->setScrapClient($scrapperClient);
$crawler = new \Arachnid\Crawler($url, $linkDepth); // ... initialize crawler
//set logger for crawler activity (compatible with PSR-3)
$logger = new \Monolog\Logger('crawler logger');
$logger->pushHandler(new \Monolog\Handler\StreamHandler(sys_get_temp_dir().'/crawler.log'));
$crawler->setLogger($logger);
//filter links according to specific callback as closure
$links = $crawler->filterLinks(function($link) {
//crawling only links with /blog/ prefix
return (bool)preg_match('/.*\/blog.*$/u', $link);
})
->traverse()
->getLinks();
$links = $crawler->traverse()
->getLinks();
$collection = new LinksCollection($links);
//getting broken links
$brokenLinks = $collection->getBrokenLinks();
//getting links for specific depth
$depth2Links = $collection->getByDepth(2);
//getting external links inside site
$externalLinks = $collection->getExternalLinks();
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.