1. Go to this page and download the library: Download hindmost/rolling-curl-mini library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
hindmost / rolling-curl-mini example snippets
php
...
mc = new RollingCurlMini(10);
...
$o_mc->add($url, $postdata, $callback, $userdata, $options, $headers);
...
$o_mc->execute();
...
php
/**
* @param string $content - content of request response
* @param string $url - URL of requested resource
* @param array $info - cURL handle info
* @param mixed $userdata - user-defined data passed with add() method
*/
function request_callback($content, $url, $info, $userdata) {
}
php
class MyScraper extends RollingScraperAbstract
{
...
public function __construct() {
...
$this->modConfig(array(
'state_time_storage' => '...', // temporal section of state storage (file path)
'state_data_storage' => '...', // data section of state storage (file path)
'scrape_life' => 0, // expiration time (secs) of scraped data
'run_timeout' => 30, // max. time (secs) to execute scraper script
'run_pages_loops' => 20, // max. number of loops through pages
'run_pages_buffer' => 500, // page requests buffer size
'curl_threads' => 10, // number of multi-curl threads
'curl_options' => array(...), // CURL options used in multi-curl requests
));
parent::__construct();
}
/**
* Initialize the starting list of page requests
*/
protected function _initPages() {
...
// add page request. $url - page URL
$this->addPage($url);
...
}
/**
* Process response of a page request
* @param string $cont - page content
* @param string $url - url of request
* @param array $aInfo - CURL info data
* @param int $index - # of page request
* @param array $aData - custom request data (part of request data)
* @return bool
*/
protected function _handlePage($cont, $url, $aInfo, $index, $aData) {
...
}
...
}
$scraper = new MyScraper();
$bool = $scraper->run();
list($time_start, $time_end, , $time_run_start, , $n_pages_total, $n_pages_passed) =
$scraper->getStateProgress();
if ($time_end) {
echo sprintf('Completed at %s', date('Y.m.d, H:i:s', $time_end));
}
else {
if ($bool)
echo sprintf('In progress: %d/%d pages', $n_pages_passed, $n_pages_total);
else
echo 'Cancelled since another script instance is still running';
}
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.