PHP code example of crispy-computing-machine / supersimplecrawler

1. Go to this page and download the library: Download crispy-computing-machine/supersimplecrawler library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

crispy-computing-machine / supersimplecrawler example snippets


//Composer
se SuperSimple\Crawler;
use SuperSimple\CrawlerCompleteException;
use DOMDocument;

// Configure the crawler.
$crawler = new Crawler($verbose = true);
try {
    $crawler->setUrl("https://www.php.net/"); // Set the URL.
    $crawler->setPort(80); // Set the port (80 is the default HTTP port).
    $crawler->setFollowRedirects(true); // Follow redirects.
    $crawler->setFollowMode(2); // Follow only links within the same host.
    $crawler->setRequestLimit(10); // Limit the number of requests.
    $crawler->setContentSizeLimit(2000000); // Limit the content size (2 MB in this case).
    $crawler->setTrafficLimit(10000000); // Limit the traffic (10 MB in this case).
    $crawler->setUserAgentString("Mozilla/5.0 (compatible; MyCrawler/1.0)"); // Set a custom user agent.
    $crawler->setWorkingDirectory(sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'crawler'); // Set a directory for storing data.
    $crawler->setProxy(); // Set a proxy (if plete report
    Crawler::log("Total pages downloaded: " . $e->getTotalPages(), "info");
    Crawler::log("Total size downloaded: " . $e->getTotalSize() . " bytes", "info");
    Crawler::log("Total links followed: " . $e->getTotalLinks(), "info");
    Crawler::log("Abort Reason: " . $e->getMessage(), "info");

} finally {
    // Additional clean up or summary actions
    Crawler::log("Crawler has ended gracefully.", "success");
}