1. Go to this page and download the library: Download zrashwani/news-scrapper library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
zrashwani / news-scrapper example snippets
$url = "http://example.com/your-news-uri";
//use microdata standard for scrapping
$scrap_client = new \Zrashwani\NewsScrapper\Client('Microdata');
print_r($scrap_client->getLinkData($url));
$scrapClient = new \Zrashwani\NewsScrapper\Client('Custom');
/*@var $adapter \Zrashwani\NewsScrapper\Adapters\CustomAdapter */
$adapter = $scrapClient->getAdapter();
$adapter
->setTitleSelector('.single-post h1') //selectors can be either css or xpath
->setImageSelector(".sidebar img")
->setAuthorSelector('//a[@rel="author"]')
->setPublishDateSelector('//span[@class="published_data"]')
->setBodySelector('//div[@class="contents"]');
$newsData = ($scrapClient->getLinkData("http://example.com/your-news-uri"));
print_r($newsData);
$listingPageUrl = 'https://www.readability.com/topreads/'; //url containing news listing
$linksSelector = '.entry-title a'; //css or xpath selector for news links inside listing page
$numberOfArticles = 3; //number of links to scrap, use null to get all matching selector
$scrapClient = new \Zrashwani\NewsScrapper\Client();
$newsGroupData = $scrapClient->scrapLinkGroup($listingPageUrl, $linksSelector,$numberOfArticles);
foreach($newsGroupData as $singleNews){
print_r($singleNews);
}
Initiate scrapper
$scrap_client = new \Zrashwani\NewsScrapper\Client();
print_r($scrap_client->getLinkData($url));
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.