PHP code example of heimrichhannot / crawler

1. Go to this page and download the library: Download heimrichhannot/crawler library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

heimrichhannot / crawler example snippets


Crawler::create()
    ->setCrawlObserver(<implementation of \Spatie\Crawler\CrawlObserver>)
    ->startCrawling($url);

/**
 * Called when the crawler will crawl the given url.
 *
 * @param \Spatie\Crawler\Url $url
 */
public function willCrawl(Url $url);

/**
 * Called when the crawler has crawled the given url.
 *
 * @param \Spatie\Crawler\Url $url
 * @param \Psr\Http\Message\ResponseInterface $response
 * @param \Spatie\Crawler\Url $foundOn
 */
public function hasBeenCrawled(Url $url, $response, Url $foundOn = null);

/**
 * Called when the crawl has ended.
 */
public function finishedCrawling();

Crawler::create()
    ->executeJavaScript()
    ...

Crawler::create()
    ->executeJavaScript($pathToChrome)
    ...

/*
 * Determine if the given url should be crawled.
 */
public function shouldCrawl(Url $url): bool;

Crawler::create()
    ->setConcurrency(1) //now all urls will be crawled one by one

// stop crawling after 5 urls

Crawler::create()
    ->setMaximumCrawlCount(5) 

Crawler::create()
    ->setMaximumDepth(2) 

Crawler::create()
    ->setCrawlQueue(<implementation of \Spatie\Crawler\CrawlQueue\CrawlQueue>)