PHP code example of restyler / scrapeninja-api-php-client

1. Go to this page and download the library: Download restyler/scrapeninja-api-php-client library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

restyler / scrapeninja-api-php-client example snippets


use ScrapeNinja\Client;

$scraper = new Client([
        "rapidapi_key" => getenv('SCRAPENINJA_RAPIDAPI_KEY')
    ]
);

$response = $client->scrape([
  // target website URL
  "url" => "https://news.ycombinator.com/", 
  
  // Proxy geo. eu, br, de, fr, 4g-eu, us proxy locations are available. Default: "us"
  "geo" => "us", 
  
  // Custom headers to pass to target website. Space after ':' is mandatory according to HTTP spec. 
  // User-agent header is not e'] . "\n";
echo 'HTTP Response status: ' . print_r($response['info']['headers'], 1) . "\n";
echo 'HTTP Response body (truncated): ' . mb_substr($response['body'], 0, 300) . '...' . "\n";


/*
    Array
(
    [info] => Array
        (
            [version] => 1.1 (string)
            [statusCode] => 200 (integer)
            [statusMessage] => OK (string)
            [headers] => Array
                (
                    [server] => nginx
                    [date] => Mon, 02 May 2022 04:38:12 GMT
                    [content-type] => text/html; charset=utf-8
                    [content-encoding] => gzip
                )

        )

    [body] => <html lang="en" op="news"><head><meta name="referrer" content="origin"><meta name="viewport" content="width=device-width, initial-scale=1.0"><link rel="stylesheet" type="text/css" href="news.css?5eYyZbFhPFukXyt5EaSy">...
)
    */

$response = $client->scrapeJs([
    "url" => "https://news.ycombinator.com/"
]);


// javascript extractor function, executed on ScrapeNinja servers 
$extractor = "// define function which accepts body and cheerio as args
    function extract(input, cheerio) {
        // return object with extracted values              
        let $ = cheerio.load(input);
      
        let items = [];
        $('.titleline').map(function() {
                  let infoTr = $(this).closest('tr').next();
                  let commentsLink = infoTr.find('a:contains(comments)');
                items.push([
                    $(this).text(),
                      $('a', this).attr('href'),
                      infoTr.find('.hnuser').text(),
                      parseInt(infoTr.find('.score').text()),
                      infoTr.find('.age').attr('title'),
                      parseInt(commentsLink.text()),
                      'https://news.ycombinator.com/' + commentsLink.attr('href'),
                      new Date()
                ]);
            });
      
      return { items };
    }";

// the extractor function works identically with both scrape() and scrapeJs() ScrapeNinja rendering modes
$response = $client->scrapeJs([
    'url' => 'https://scrapeninja.net/samples/hackernews.html',
    'extractor' => $extractor
]);


echo '<h2>Extractor function test:</h2><pre>';
print_r($response['extractor']);

$response = $client->scrape([
    "url" => "https://news.ycombinator.com/", 
    "headers" => ["Content-Type: application/json"], 
    "method" => "POST" 
    "data" => "{\"fefe\":\"few\"}"
]);

$response = $client->scrape([
    "url" => "https://news.ycombinator.com/", 
    "headers" => ["Content-Type: application/x-www-form-urlencoded"], 
    "method" => "POST" 
    "data" => "key1=val1&key2=val2"
]);

$response = $client->scrape([
    "url" => "https://news.ycombinator.com/",
    "retryNum": 1, // 0 to disable retries
    "textNotExpected": [
        "random-captcha-text-which-might-appear"
    ],
    "statusNotExpected": [
        403,
        502
    ]
]);

try {
   $response = $ninja->scrape($requestOpts);
   
   // you might want to add your custom errors here
   if ($response['info']['statusCode'] != 200) {
     throw new \Exception('your custom exception because this you didn\'t expect this from target website');
   }
} catch (GuzzleHttp\Exception\ClientException $e) {
    $response = $e->getResponse();
    
    echo 'Status code: ' . $response->getStatusCode() . "\n";
    echo 'Err message: ' . $e->getMessage() . "\n";
    

} catch (\Exception $e) {
   // your custom error handling logic, this is a non-Guzzle error
}

composer 
bash
export SCRAPENINJA_RAPIDAPI_KEY=YOUR-KEY
php ./examples/extractor.php