PHP code example of rajpurohithitesh / advance-phpscraper

1. Go to this page and download the library: Download rajpurohithitesh/advance-phpscraper library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

rajpurohithitesh / advance-phpscraper example snippets



dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
echo "Hooray! Advance PHP Scraper is ready to roll!\n";


dvancePHPSraper\Core\Scraper;

// Create a new scraper instance
$scraper = new Scraper();

// Go to the website
$scraper->go('https://example.com');

// Get the page title
$title = $scraper->title();

// Print the title
echo "The page title is: $title\n";


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$scraper->go('https://example.com');

// Get all links
$links = $scraper->links();

// Loop through links and print them
echo "Found " . count($links) . " links:\n";
foreach ($links as $link) {
    echo "- URL: {$link['href']}\n";
    echo "  Text: {$link['text']}\n";
    echo "  Nofollow: " . ($link['is_nofollow'] ? 'Yes' : 'No') . "\n";
}


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$scraper->go('https://example.com');

// Get all images
$images = $scraper->images();

// Print images
echo "Found " . count($images) . " images:\n";
foreach ($images as $image) {
    echo "- Source: {$image['src']}\n";
    echo "  Alt Text: {$image['alt']}\n";
    echo "  Dimensions: {$image['width']}x{$image['height']}\n";
}

  $scraper->go('https://www.wikipedia.org');
  


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$scraper->go('https://example.com');

// Get meta tags
$meta = $scraper->meta();

// Print meta tags
echo "Meta Tags:\n";
foreach ($meta as $type => $tags) {
    echo "$type:\n";
    foreach ($tags as $name => $content) {
        echo "  - $name: $content\n";
    }
}


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$scraper->go('https://example.com');

// Get sitemap URLs
$sitemap = $scraper->sitemap();

echo "Sitemap URLs:\n";
foreach ($sitemap as $url) {
    echo "- {$url['loc']} (Last Modified: {$url['lastmod']})\n";
}

  $scraper->go('https://www.wikipedia.org');
  


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$scraper->go('https://example.com');

// Get RSS feeds
$feeds = $scraper->rssFeed();

echo "RSS Feeds:\n";
foreach ($feeds as $feed) {
    echo "- Feed: {$feed['title']} ({$feed['url']})\n";
    foreach ($feed['items'] as $item) {
        echo "  - {$item['title']} ({$item['pubDate']})\n";
    }
}

  $scraper->go('https://www.bbc.com');
  


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$scraper->go('https://example.com');

// Parse a CSV file (assuming a link exists)
$content = $scraper->fetchAsset('https://example.com/data.csv');
$data = $scraper->parseCsv($content, true);

echo "CSV Data:\n";
foreach ($data as $row) {
    echo "- {$row['name']}: {$row['value']}\n";
}


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$scraper->go('https://example.com');

$status = $scraper->getStatusCode();
if ($status === 200) {
    echo "Page loaded successfully!\n";
} else {
    echo "Error: HTTP $status\n";
}

if ($scraper->isErrorPage()) {
    echo "This is an error page (e.g., 404 or 500).\n";
}


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$scraper->setRateLimit(3, 1); // 3 requests per second

$urls = [
    'https://example.com',
    'https://example.org',
    'https://iana.org',
    'https://wikipedia.org'
];

foreach ($urls as $url) {
    $scraper->go($url);
    echo "Scraped: $url\n";
}


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$urls = [
    'https://example.com',
    'https://example.org',
    'https://iana.org'
];

// Define a callback to extract titles
$callback = function ($crawler) {
    return $crawler->filter('title')->count() ? $crawler->filter('title')->text() : 'No title';
};

// Queue URLs
$scraper->queueUrls($urls, $callback);

// Process the queue
$results = $scraper->processQueue();

// Print results
echo "Scraping Results:\n";
foreach ($results as $url => $title) {
    echo "- $url: $title\n";
}


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$scraper->go('https://example.com');

// Scrape the title
$title = $scraper->title();

// Fetch related data from an API
$apiData = $scraper->apiRequest('https://jsonplaceholder.typicode.com/posts/1', [
    'query' => 'example'
], 'POST');

echo "Page Title: $title\n";
echo "API Data:\n";
echo json_encode($apiData, JSON_PRETTY_PRINT) . "\n";


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$scraper->go('https://example.com');

// Extract content from a specific div
$content = $scraper->filter('div.content')->count()
    ? $scraper->filter('div.content')->text()
    : 'No content found';

echo "Content: $content\n";


dvancePHPSraper\Core\Scraper;

$scraper = new Scraper();
$scraper->getPluginManager()->enablePlugin('CachePlugin');
$scraper->enableCache();
$scraper->go('https://example.com'); // Cached after first request

$scraper->setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124');

$scraper->setTimeout(30); // 30 seconds

$scraper->setFollowRedirects(true); // Follow redirects

$scraper = new Scraper([
    'user_agent' => 'MyBot/1.0',
    'timeout' => 30,
    'follow_redirects' => true,
]);


namespace AdvancePHPSraper\Tests;
use AdvancePHPSraper\Core\Scraper;
use PHPUnit\Framework\TestCase;

class CustomTest extends TestCase
{
    public function testCustomMethod()
    {
        $scraper = new Scraper();
        $scraper->go('https://example.com');
        $this->assertNotEmpty($scraper->title());
    }
}

  $scraper->setUserAgent('Mozilla/5.0...');
  
bash
  php -v
  
bash
  php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');"
  php composer-setup.php
  php -r "unlink('composer-setup.php');"
  mv composer.phar /usr/local/bin/composer
  
bash
php test.php

Hooray! Advance PHP Scraper is ready to roll!
bash
php scrape_title.php
bash
php scrape_links.php
bash
php scrape_images.php
bash
php scrape_meta.php
bash
php bin/scraper scrape https://example.com --extract=links,meta,content
bash
php scrape_sitemap.php
bash
php scrape_rss.php

  RSS Feeds:
  - Feed: BBC News (https://feeds.bbci.co.uk/news/rss.xml)
    - Breaking News (2025-05-19 10:00:00)
    - World Update (2025-05-19 09:00:00)
  
bash
php rate_limit.php
bash
php queue_scrape.php
bash
php api_scrape.php