PHP code example of zrashwani / arachnid

1. Go to this page and download the library: Download zrashwani/arachnid library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

zrashwani / arachnid example snippets


    
    ttp://www.example.com';
    $linkDepth = 3;
    // Initiate crawl, by default it will use http client (GoutteClient), 
    $crawler = new \Arachnid\Crawler($url, $linkDepth);
    $crawler->traverse();

    // Get link data
    $links = $crawler->getLinksArray(); //to get links as objects use getLinks() method
    print_r($links);

    $crawler = new \Arachnid\Crawler($url, $linkDepth);
    $crawler->enableHeadlessBrowserMode()
            ->traverse()
            ->getLinksArray();

    
        use \Arachnid\Adapters\CrawlingFactory;
        //third parameter is the options used to configure http client
        $clientOptions = ['auth_basic' => array('username', 'password')];
        $crawler = new \Arachnid\Crawler('http://github.com', 2, $clientOptions);
           
        //or by creating and setting scrap client
        $options = array(
            'verify_host' => false,
            'verify_peer' => false,
            'timeout' => 30,
        );
                        
        $scrapperClient = CrawlingFactory::create(CrawlingFactory::TYPE_HTTP_CLIENT, $options);
        $crawler->setScrapClient($scrapperClient);

        
    $crawler = new \Arachnid\Crawler($url, $linkDepth); // ... initialize crawler   

    //set logger for crawler activity (compatible with PSR-3)
    $logger = new \Monolog\Logger('crawler logger');
    $logger->pushHandler(new \Monolog\Handler\StreamHandler(sys_get_temp_dir().'/crawler.log'));
    $crawler->setLogger($logger);
    

    
    //filter links according to specific callback as closure
    $links = $crawler->filterLinks(function($link) {
                        //crawling only links with /blog/ prefix
                        return (bool)preg_match('/.*\/blog.*$/u', $link); 
                    })
                    ->traverse()
                    ->getLinks();


    
    $links = $crawler->traverse()
                     ->getLinks();
    $collection = new LinksCollection($links);

    //getting broken links
    $brokenLinks = $collection->getBrokenLinks();
   
    //getting links for specific depth
    $depth2Links = $collection->getByDepth(2);

    //getting external links inside site
    $externalLinks = $collection->getExternalLinks();