PHP code example of vipnytt / sitemapparser

1. Go to this page and download the library: Download vipnytt/sitemapparser library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

vipnytt / sitemapparser example snippets


use vipnytt\SitemapParser;
use vipnytt\SitemapParser\Exceptions\SitemapParserException;

try {
    $parser = new SitemapParser();
    $parser->parse('http://php.net/sitemap.xml');
    foreach ($parser->getURLs() as $url => $tags) {
        echo $url . '<br>';
    }
} catch (SitemapParserException $e) {
    echo $e->getMessage();
}

use vipnytt\SitemapParser;
use vipnytt\SitemapParser\Exceptions\SitemapParserException;

try {
    $parser = new SitemapParser('MyCustomUserAgent');
    $parser->parse('http://php.net/sitemap.xml');
    foreach ($parser->getSitemaps() as $url => $tags) {
        echo 'Sitemap<br>';
        echo 'URL: ' . $url . '<br>';
        echo 'LastMod: ' . $tags['lastmod'] . '<br>';
        echo '<hr>';
    }
    foreach ($parser->getURLs() as $url => $tags) {
        echo 'URL: ' . $url . '<br>';
        echo 'LastMod: ' . $tags['lastmod'] . '<br>';
        echo 'ChangeFreq: ' . $tags['changefreq'] . '<br>';
        echo 'Priority: ' . $tags['priority'] . '<br>';
        echo '<hr>';
    }
} catch (SitemapParserException $e) {
    echo $e->getMessage();
}

use vipnytt\SitemapParser;
use vipnytt\SitemapParser\Exceptions\SitemapParserException;

try {
    $parser = new SitemapParser('MyCustomUserAgent');
    $parser->parseRecursive('http://www.google.com/robots.txt');
    echo '<h2>Sitemaps</h2>';
    foreach ($parser->getSitemaps() as $url => $tags) {
        echo 'URL: ' . $url . '<br>';
        echo 'LastMod: ' . $tags['lastmod'] . '<br>';
        echo '<hr>';
    }
    echo '<h2>URLs</h2>';
    foreach ($parser->getURLs() as $url => $tags) {
        echo 'URL: ' . $url . '<br>';
        echo 'LastMod: ' . $tags['lastmod'] . '<br>';
        echo 'ChangeFreq: ' . $tags['changefreq'] . '<br>';
        echo 'Priority: ' . $tags['priority'] . '<br>';
        echo '<hr>';
    }
} catch (SitemapParserException $e) {
    echo $e->getMessage();
}

use vipnytt\SitemapParser;
use vipnytt\SitemapParser\Exceptions\SitemapParserException;

try {
    $parser = new SitemapParser('MyCustomUserAgent', ['strict' => false]);
    $parser->parse('https://www.xml-sitemaps.com/urllist.txt');
    foreach ($parser->getSitemaps() as $url => $tags) {
            echo $url . '<br>';
    }
    foreach ($parser->getURLs() as $url => $tags) {
            echo $url . '<br>';
    }
} catch (SitemapParserException $e) {
    echo $e->getMessage();
}

$rules = new RequestLimitRuleset([
    'https://www.google.com' => [
        [
            'max_requests'     => 20,
            'request_interval' => 1
        ],
        [
            'max_requests'     => 100,
            'request_interval' => 120
        ]
    ]
]);

$stack = new HandlerStack();
$stack->setHandler(new CurlHandler());

$throttle = new ThrottleMiddleware($rules);

 // Invoke the middleware
$stack->push($throttle());
 
// OR: alternatively call the handle method directly
$stack->push($throttle->handle());

$client = new \GuzzleHttp\Client(['handler' => $stack]);

$parser = new SitemapParser();
$parser->setClient($client);

$stack = new HandlerStack();
$stack->setHandler(new CurlHandler());

$stack->push(GuzzleRetryMiddleware::factory());

$client = new \GuzzleHttp\Client(['handler' => $stack]);

$parser = new SitemapParser();
$parser->setClient($client);

$logger = new Logger();

$stack = new HandlerStack();
$stack->setHandler(new CurlHandler());

$stack->push(new LogMiddleware($logger));

$client = new \GuzzleHttp\Client(['handler' => $stack]);

$parser = new SitemapParser();
$parser->setClient($client);

$config = [
    'strict' => true, // (bool) Disallow parsing of line-separated plain text
    'guzzle' => [
        // GuzzleHttp request options
        // http://docs.guzzlephp.org/en/latest/request-options.html
    ],
    // use this to ignore URL when parsing sitemaps that contain multiple other sitemaps. Exact match only.
    'url_black_list' => []
];
$parser = new SitemapParser('MyCustomUserAgent', $config);