1. Go to this page and download the library: Download webcrawlerapi/sdk library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
webcrawlerapi / sdk example snippets
use WebCrawlerAPI\WebCrawlerAPI;
// Initialize the client
$crawler = new WebCrawlerAPI('your_api_key');
// Synchronous crawling (blocks until completion)
$job = $crawler->crawl(
url: 'https://example.com',
scrapeType: 'markdown',
itemsLimit: 10,
webhookUrl: 'https://yourserver.com/webhook',
allowSubdomains: false,
maxPolls: 100 // Optional: maximum number of status checks
);
echo "Job completed with status: {$job->status}\n";
// Access job items and their content
foreach ($job->jobItems as $item) {
echo "Page title: {$item->title}\n";
echo "Original URL: {$item->originalUrl}\n";
echo "Item status: {$item->status}\n";
// Get the content based on job's scrape_type
// Returns null if item is not in "done" status
$content = $item->getContent();
if ($content) {
echo "Content length: " . strlen($content) . "\n";
echo "Content preview: " . substr($content, 0, 200) . "...\n";
} else {
echo "Content not available or item not done\n";
}
}
// Access job items and their parent job
foreach ($job->jobItems as $item) {
echo "Item URL: {$item->originalUrl}\n";
echo "Parent job status: {$item->job->status}\n";
echo "Parent job URL: {$item->job->url}\n";
}
// Or use asynchronous crawling
$response = $crawler->crawlAsync(
url: 'https://example.com',
scrapeType: 'markdown',
itemsLimit: 10,
webhookUrl: 'https://yourserver.com/webhook',
allowSubdomains: false
);
// Get the job ID from the response
$jobId = $response->id;
echo "Crawling job started with ID: {$jobId}\n";
// Check job status and get results
$job = $crawler->getJob($jobId);
echo "Job status: {$job->status}\n";
// Access job details
echo "Crawled URL: {$job->url}\n";
echo "Created at: {$job->createdAt->format('Y-m-d H:i:s')}\n";
echo "Number of items: " . count($job->jobItems) . "\n";
// Cancel a running job if needed
$cancelResponse = $crawler->cancelJob($jobId);
echo "Cancellation response: " . json_encode($cancelResponse) . "\n";
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.