1. Go to this page and download the library: Download sobak/scrawler library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
sobak / scrawler example snippets
use App\PostEntity;
use Sobak\Scrawler\Block\Matcher\CssSelectorHtmlMatcher;
use Sobak\Scrawler\Block\Matcher\CssSelectorListMatcher;
use Sobak\Scrawler\Block\ResultWriter\FilenameProvider\EntityPropertyFilenameProvider;
use Sobak\Scrawler\Block\ResultWriter\JsonFileResultWriter;
use Sobak\Scrawler\Block\UrlListProvider\ArgumentAdvancerUrlListProvider;
use Sobak\Scrawler\Configuration\Configuration;
use Sobak\Scrawler\Configuration\ObjectConfiguration;
', new CssSelectorHtmlMatcher('div.entry-content'))
->addFieldDefinition('title', new CssSelectorHtmlMatcher('h1.entry-title a'))
->addEntityMapping(PostEntity::class)
->addResultWriter(PostEntity::class, new JsonFileResultWriter([
'directory' => 'posts/',
'filename' => new EntityPropertyFilenameProvider([
'property' => 'slug',
]),
]))
;
})
;
return $scrawler;
bash
php vendor/bin/scrawler crawl config.php
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.