PHP code example of nggiahao / crawler

1. Go to this page and download the library: Download nggiahao/crawler library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.

    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

nggiahao / crawler example snippets


    $sites = ['W123job'];
    $config = [
        'concurrency' => 10,
        'proxy'       => null,
        'browser'     => 'guzzle',
    ];
    $reset = false; //reset queue
    app(\Nggiahao\Crawler\Crawler::class)->run($sites, $config, $reset);

bash
php artisan vendor:publish --provider="Nggiahao\Crawler\CrawlerServiceProvider" --tag="config"
php artisan vendor:publish --provider="Nggiahao\Crawler\CrawlerServiceProvider" --tag="migrations"
php artisan migrate

 php
use Nggiahao\Crawler\SitesConfig\SiteAbstract;

class W123job extends SiteAbstract {

    public function rootUrl(): string
    {
        return 'https://123job.vn';
    }

    public function startUrls(): array {
        return [
            "https://123job.vn",
        ];
    }
    
    public function shouldCrawl( $url ) {
        return preg_match( "/^https:\/\/123job\.vn\/viec-lam\//", $url) || preg_match( "/^https:\/\/123job\.vn\/company\//", $url);
    }
    
    public function shouldGetData( $url ) {
        return preg_match( "/\/company\//", $url);
    }

    public function getInfoFromCrawler(Crawler $dom_crawler)
    {
        return parent::getInfoFromCrawler($dom_crawler);
    }
}