PHP code example of rajpurohithitesh / advance-phpscraper
1. Go to this page and download the library: Download rajpurohithitesh/advance-phpscraper library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
rajpurohithitesh / advance-phpscraper example snippets
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
echo "Hooray! Advance PHP Scraper is ready to roll!\n";
dvancePHPSraper\Core\Scraper;
// Create a new scraper instance
$scraper = new Scraper();
// Go to the website
$scraper->go('https://example.com');
// Get the page title
$title = $scraper->title();
// Print the title
echo "The page title is: $title\n";
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$scraper->go('https://example.com');
// Get all links
$links = $scraper->links();
// Loop through links and print them
echo "Found " . count($links) . " links:\n";
foreach ($links as $link) {
echo "- URL: {$link['href']}\n";
echo " Text: {$link['text']}\n";
echo " Nofollow: " . ($link['is_nofollow'] ? 'Yes' : 'No') . "\n";
}
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$scraper->go('https://example.com');
// Get all images
$images = $scraper->images();
// Print images
echo "Found " . count($images) . " images:\n";
foreach ($images as $image) {
echo "- Source: {$image['src']}\n";
echo " Alt Text: {$image['alt']}\n";
echo " Dimensions: {$image['width']}x{$image['height']}\n";
}
$scraper->go('https://www.wikipedia.org');
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$scraper->go('https://example.com');
// Get meta tags
$meta = $scraper->meta();
// Print meta tags
echo "Meta Tags:\n";
foreach ($meta as $type => $tags) {
echo "$type:\n";
foreach ($tags as $name => $content) {
echo " - $name: $content\n";
}
}
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$scraper->go('https://example.com');
// Get sitemap URLs
$sitemap = $scraper->sitemap();
echo "Sitemap URLs:\n";
foreach ($sitemap as $url) {
echo "- {$url['loc']} (Last Modified: {$url['lastmod']})\n";
}
$scraper->go('https://www.wikipedia.org');
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$scraper->go('https://example.com');
// Get RSS feeds
$feeds = $scraper->rssFeed();
echo "RSS Feeds:\n";
foreach ($feeds as $feed) {
echo "- Feed: {$feed['title']} ({$feed['url']})\n";
foreach ($feed['items'] as $item) {
echo " - {$item['title']} ({$item['pubDate']})\n";
}
}
$scraper->go('https://www.bbc.com');
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$scraper->go('https://example.com');
// Parse a CSV file (assuming a link exists)
$content = $scraper->fetchAsset('https://example.com/data.csv');
$data = $scraper->parseCsv($content, true);
echo "CSV Data:\n";
foreach ($data as $row) {
echo "- {$row['name']}: {$row['value']}\n";
}
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$scraper->go('https://example.com');
$status = $scraper->getStatusCode();
if ($status === 200) {
echo "Page loaded successfully!\n";
} else {
echo "Error: HTTP $status\n";
}
if ($scraper->isErrorPage()) {
echo "This is an error page (e.g., 404 or 500).\n";
}
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$scraper->setRateLimit(3, 1); // 3 requests per second
$urls = [
'https://example.com',
'https://example.org',
'https://iana.org',
'https://wikipedia.org'
];
foreach ($urls as $url) {
$scraper->go($url);
echo "Scraped: $url\n";
}
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$urls = [
'https://example.com',
'https://example.org',
'https://iana.org'
];
// Define a callback to extract titles
$callback = function ($crawler) {
return $crawler->filter('title')->count() ? $crawler->filter('title')->text() : 'No title';
};
// Queue URLs
$scraper->queueUrls($urls, $callback);
// Process the queue
$results = $scraper->processQueue();
// Print results
echo "Scraping Results:\n";
foreach ($results as $url => $title) {
echo "- $url: $title\n";
}
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$scraper->go('https://example.com');
// Scrape the title
$title = $scraper->title();
// Fetch related data from an API
$apiData = $scraper->apiRequest('https://jsonplaceholder.typicode.com/posts/1', [
'query' => 'example'
], 'POST');
echo "Page Title: $title\n";
echo "API Data:\n";
echo json_encode($apiData, JSON_PRETTY_PRINT) . "\n";
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$scraper->go('https://example.com');
// Extract content from a specific div
$content = $scraper->filter('div.content')->count()
? $scraper->filter('div.content')->text()
: 'No content found';
echo "Content: $content\n";
dvancePHPSraper\Core\Scraper;
$scraper = new Scraper();
$scraper->getPluginManager()->enablePlugin('CachePlugin');
$scraper->enableCache();
$scraper->go('https://example.com'); // Cached after first request
$scraper->setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124');
namespace AdvancePHPSraper\Tests;
use AdvancePHPSraper\Core\Scraper;
use PHPUnit\Framework\TestCase;
class CustomTest extends TestCase
{
public function testCustomMethod()
{
$scraper = new Scraper();
$scraper->go('https://example.com');
$this->assertNotEmpty($scraper->title());
}
}