PHP code example of laurentvw / scrapher

1. Go to this page and download the library: Download laurentvw/scrapher library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.

    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

laurentvw / scrapher example snippets


use \Laurentvw\Scrapher\Scrapher;
use \Laurentvw\Scrapher\Selectors\RegexSelector;

$url = 'https://www.google.com/';
$scrapher = new Scrapher($url);

// Match all links on a page
$regex = '/<a.*?href=(?:"(.*?)").*


$scrapher = new Scrapher($url);
$scrapher = new Scrapher(array($url, $url2));


$scrapher = new Scrapher($content);
$scrapher = new Scrapher(array($content, $content2));


$scrapher->addUrl($url);
$scrapher->addUrls(array($url, $url2));
$scrapher->addContent($content);
$scrapher->addContents(array($content, $content2));


$regExpression = '/<a.*?href=(?:"(.*?)").*


// Return all matches
$results = $matches->get();

// Return all matches with a subset of the data (either use multiple arguments or an array for more than one column)
$results = $matches->get('title');

// Return the first match
$result = $matches->first();

// Return the last match
$result = $matches->last();

// Count the number of matches
$numberOfMatches = $matches->count();


// Take the first N matches
$results = $matches->take(5)->get();

// Skip the first N matches
$results = $matches->skip(1)->get();

// Take 5 matches starting from the second one.
$results = $matches->skip(1)->take(5)->get();


// Order by title
$results = $matches->orderBy('title')->get();

// Order by title, then by URL
$results = $matches->orderBy('title')->orderBy('url', 'desc')->get();

// Custom sorting: For values that do not lend well with sorting, e.g. dates*.
$results = $matches->orderBy('date', 'desc', 'date_create')->get();

// Simply reverse the order of the results
$results = $matches->reverse()->get();


$matches->filter(function($match) {
    // Return only matches that contain 'Google' in the link title.
    return stristr($match['title'], 'Google') ? true : false;
});


$matchConfig = array(
    array(
        'name' => 'url',
        'id' => 1,
        // Add domain to relative URLs
        'apply' => function($match, $sourceUrl)
        {
            if (!stristr($match, 'http')) {
                return $sourceUrl . trim($match, '/');
            }
            return $match;
        },
    ),
    array(
        'name' => 'title',
        'id' => 2,
        // Remove all html tags inside the link title
        'apply' => function($match) {
            return strip_tags($match);
        },
    ),
    ...
);


$matchConfig = array(
    array(
        'name' => 'url',
        'id' => 1,
        // Make sure it is a valid url
        'validate' => function($match) {
            return filter_var($match, FILTER_VALIDATE_URL);
        },
    ),
    array(
        'name' => 'title',
        'id' => 2,
        // We only want titles that are between 1 and 50 characters long.
        'validate' => function($match) {
            return strlen($match) >= 1 && strlen($match) <= 50;
        },
    ),
    ...
);


$logs = $matches->getLogs();


$scrapher = new Scrapher();
$scrapher->addUrl($url)->with($regexSelector)->filter(...)->orderBy('title')->skip(1)->take(5)->get();


$scrapher = new Scrapher($url);
$h2Titles = $scrapher->with($h2RegexSelector)->get();
$links = $scrapher->with($linksRegexSelector)->get();