1. Go to this page and download the library: Download offdev/gpp library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
offdev / gpp example snippets
use GuzzleHttp\Client as GuzzleClient;
use GuzzleHttp\Psr7\Request;
use GuzzleHttp\Psr7\Response;
use Offdev\Gpp\Client;
use Offdev\Gpp\Http\MiddlewareInterface;
use Offdev\Gpp\Http\ResponseHandlerInterface;
use Psr\Http\Message\RequestInterface;
use Psr\Http\Message\ResponseInterface;
class DirectoryLister implements MiddlewareInterface
{
public function process(
RequestInterface $originalRequest,
ResponseInterface $response,
ResponseHandlerInterface $responseHandler
): ResponseInterface {
$content = (string)$response->getBody();
if (preg_match_all('/href="\/articles\/\d+\/([^"]+)"/m', $content, $matches, PREG_SET_ORDER)) {
$result = [];
foreach ($matches as $match) {
$result[] = $match[1];
}
return new Response(200, [], json_encode($result, JSON_PRETTY_PRINT));
}
return $responseHandler->handle($response);
}
}
$client = new Client(new GuzzleClient(), [DirectoryLister::class]);
$result = $client->send(new Request('GET', 'https://www.worldhunger.org/articles/12/'));
var_dump((string)$result->getBody());
use GuzzleHttp\Psr7\Request;
use Offdev\Gpp\Utils\IntegerEnumerator;
$enumerator = new IntegerEnumerator();
$nextRequest = $enumerator->getNextRequest(
new Request('GET', 'https://www.worldhunger.org/articles/12/')
);
var_dump((string)$nextRequest->getUri());
use GuzzleHttp\Client as GuzzleClient;
use GuzzleHttp\Psr7\Request;
use Offdev\Gpp\Client;
use Offdev\Gpp\Crawler;
use Offdev\Gpp\Utils\IntegerEnumerator;
use Psr\Http\Message\RequestInterface;
use Psr\Http\Message\ResponseInterface;
$client = new Client(new GuzzleClient(['exceptions' => false]));
$crawler = new Crawler($client, new IntegerEnumerator());
$crawler->crawl(
new Request('GET', 'https://www.worldhunger.org/articles/15/'),
5, // time between each request, in seconds
function ( // callback function, to control the crawler workflow
RequestInterface $originalRequest,
ResponseInterface $response
) {
echo $response->getStatusCode().' : '.(string)$originalRequest->getUri().PHP_EOL;
if ($response->getStatusCode() !== 200) {
return true; // cancel crawling
}
// go ahead, wait for the interval, and crawl the next result
return false;
}
);