1. Go to this page and download the library: Download laurentvw/scrapher library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
laurentvw / scrapher example snippets
use \Laurentvw\Scrapher\Scrapher;
use \Laurentvw\Scrapher\Selectors\RegexSelector;
$url = 'https://www.google.com/';
$scrapher = new Scrapher($url);
// Match all links on a page
$regex = '/<a.*?href=(?:"(.*?)").*
$scrapher = new Scrapher($url);
$scrapher = new Scrapher(array($url, $url2));
$scrapher = new Scrapher($content);
$scrapher = new Scrapher(array($content, $content2));
// Return all matches
$results = $matches->get();
// Return all matches with a subset of the data (either use multiple arguments or an array for more than one column)
$results = $matches->get('title');
// Return the first match
$result = $matches->first();
// Return the last match
$result = $matches->last();
// Count the number of matches
$numberOfMatches = $matches->count();
// Take the first N matches
$results = $matches->take(5)->get();
// Skip the first N matches
$results = $matches->skip(1)->get();
// Take 5 matches starting from the second one.
$results = $matches->skip(1)->take(5)->get();
// Order by title
$results = $matches->orderBy('title')->get();
// Order by title, then by URL
$results = $matches->orderBy('title')->orderBy('url', 'desc')->get();
// Custom sorting: For values that do not lend well with sorting, e.g. dates*.
$results = $matches->orderBy('date', 'desc', 'date_create')->get();
// Simply reverse the order of the results
$results = $matches->reverse()->get();
$matches->filter(function($match) {
// Return only matches that contain 'Google' in the link title.
return stristr($match['title'], 'Google') ? true : false;
});
$matchConfig = array(
array(
'name' => 'url',
'id' => 1,
// Add domain to relative URLs
'apply' => function($match, $sourceUrl)
{
if (!stristr($match, 'http')) {
return $sourceUrl . trim($match, '/');
}
return $match;
},
),
array(
'name' => 'title',
'id' => 2,
// Remove all html tags inside the link title
'apply' => function($match) {
return strip_tags($match);
},
),
...
);
$matchConfig = array(
array(
'name' => 'url',
'id' => 1,
// Make sure it is a valid url
'validate' => function($match) {
return filter_var($match, FILTER_VALIDATE_URL);
},
),
array(
'name' => 'title',
'id' => 2,
// We only want titles that are between 1 and 50 characters long.
'validate' => function($match) {
return strlen($match) >= 1 && strlen($match) <= 50;
},
),
...
);
$logs = $matches->getLogs();
$scrapher = new Scrapher();
$scrapher->addUrl($url)->with($regexSelector)->filter(...)->orderBy('title')->skip(1)->take(5)->get();
$scrapher = new Scrapher($url);
$h2Titles = $scrapher->with($h2RegexSelector)->get();
$links = $scrapher->with($linksRegexSelector)->get();
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.