PHP code example of nggiahao / crawler
1. Go to this page and download the library: Download nggiahao/crawler library . Choose the download type require .
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
nggiahao / crawler example snippets
$sites = ['W123job'];
$config = [
'concurrency' => 10,
'proxy' => null,
'browser' => 'guzzle',
];
$reset = false; //reset queue
app(\Nggiahao\Crawler\Crawler::class)->run($sites, $config, $reset);
bash
php artisan vendor:publish --provider="Nggiahao\Crawler\CrawlerServiceProvider" --tag="config"
php artisan vendor:publish --provider="Nggiahao\Crawler\CrawlerServiceProvider" --tag="migrations"
php artisan migrate
php
use Nggiahao\Crawler\SitesConfig\SiteAbstract;
class W123job extends SiteAbstract {
public function rootUrl(): string
{
return 'https://123job.vn';
}
public function startUrls(): array {
return [
"https://123job.vn",
];
}
public function shouldCrawl( $url ) {
return preg_match( "/^https:\/\/123job\.vn\/viec-lam\//", $url) || preg_match( "/^https:\/\/123job\.vn\/company\//", $url);
}
public function shouldGetData( $url ) {
return preg_match( "/\/company\//", $url);
}
public function getInfoFromCrawler(Crawler $dom_crawler)
{
return parent::getInfoFromCrawler($dom_crawler);
}
}