PHP code example of panakour / pkscraper

1. Go to this page and download the library: Download panakour/pkscraper library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

panakour / pkscraper example snippets


$httpClient = new \Pkscraper\Http\GuzzleClient();
$httpClient->setProxy('socks5://172.17.0.1:9050', 'socks5://172.17.0.1:9050');
$httpClient->setHeaders(['User-Agent' => 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36']);
$httpClient->newClient();


$resp = $httpClient->doGetRequest("https://example.com/");
$con = new Text("img", new SymfonyDomCrawler($resp->getBody()->getContents()), "//meta[@property='og:image']/@content");
$con->build();
\Pkscraper\ToolBox::debugResult($con->getExtractedValue());

$urls = UrlExtractor::extract($httpClient, 'https://www.example.com/feed', "//item/link");

$pool = $httpClient->concurrentRequests($urls);
foreach ($pool as $index => $response) {
    if ($response instanceof \GuzzleHttp\Exception\RequestException) {
        dd('something went wrong');
    }
    $domCrawler = new SymfonyDomCrawler($response->getBody()->getContents());
    $bags[$index] = new Bag($urls[$index]);
    $titleItem = new Text('title', $domCrawler, "//article/div[@class='box']/h2/a");
    $featuredImage = new Text('featuredImage', $domCrawler, '//meta[@property="og:image"]/@content');
    $htmlContentItem = new SafeHtml('mainContent', $domCrawler, "//article/div[@class='box']");
    $storeTitles = new TextArray('tags', $domCrawler, "//div[@class='box']/div[@class='cp-admin-row']//a[@rel='tag']");
    $storeTitles->setRequired(false);
    $bags[$index]->setItems($featuredImage, $titleItem, $htmlContentItem, $storeTitles);
    $bags[$index]->build();
}
ToolBox::debugResult($bags);


    $pool = $httpClient->concurrentRequests($urls);
    $bags = [];
    foreach ($pool as $index => $response) {
        try {
            if ($response instanceof \GuzzleHttp\Exception\RequestException) {
                continue;
            }
            $domCrawler = new SymfonyDomCrawler($response->getBody()->getContents());
            $bags[$index] = new Bag($urls[$index]);

            $titleItem = new Text('title', $domCrawler, "//div[@class='grayTopCnt topInfo ']/div[@class='row'][2]/div[@class='col col12']/div[@class='title']/h1");
            $featuredImage = new Text('featuredImage', $domCrawler, "//div[@class='imgWrp']/div[@class='topImg mainVideo']/div[@class='item']/picture/img[@class='lazyload']/@data-src");
            $safeHtmlContent = new \Pkscraper\Items\SafeHtml('contentTest', new SymfonyDomCrawler($resp->getBody()->getContents()), "//div[@id='main-post']/div[@class='post']/div[@class='blog-standard']/div[@class='cntTxt']");
            $safeHtmlContent->addTransformer(new \Pkscraper\Transform\ImageRelativeSourceToAbsoluteTransformer($httpClient->getCurrentUrlWithoutPath()));
            $safeHtmlContent->addRemover(new \Pkscraper\Remove\ElementByTagByIndexRemover('img', 0));
            $safeHtmlContent->addRemover(new \Pkscraper\Remove\ElementByTagByIndexRemover('a', 0));
            $safeHtmlContent->addCleaner(new \Pkscraper\Clean\TextCleaner('<p>                Loading...                						</p>', ''));
            $safeHtmlContent->addRemover(new \Pkscraper\Remove\ElementsByTagRemover('footer'));
            $safeHtmlContent->addTransformer(new \Pkscraper\Transform\ImageRelativeSourceToAbsoluteTransformer($httpClient->getCurrentUrlWithoutPath()));
            $safeHtmlContent->addCleaner(new \Pkscraper\Clean\RegExCleaner('/<\\/?a(\\s+.*