PHP code example of unique / scraper

1. Go to this page and download the library: Download unique/scraper library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

unique / scraper example snippets


    class SiteItem implements \unique\scraper\interfaces\SiteItemInterface {
        
        protected $id;
        protected $url;
        protected $title;
        
        // @todo: implement setter and getters for $id, $url, $title
    }

    class ItemListDownloader extends \unique\scraper\AbstractItemListDownloader {
        
        protected function getNumberOfItemsInPage( \Symfony\Component\DomCrawler\Crawler $doc ): ?int {

            // Or we could implement some logic of checking the website for the actual number.
            return 20;
        }

        protected function hasNextPage( \Symfony\Component\DomCrawler\Crawler $doc, int $current_page_num ): bool {

            // We could implement some logic of checking the page's paginator,
            // or we can just return true and let the scraper go through all of the listing
            // pages until it finds one, that has no items in it. It will then stop automatically.
            
            return true;
        }

        function getListUrl( ?int $page_num ): string {

            return 'https://some.website.here/?page_num=' . $page_num;
        }

        function getTotalItems( \Symfony\Component\DomCrawler\Crawler $doc ): ?int {

            // If possible, we could find the total number of items (that's in all of the listing pages)
            return null;
        }

        function getItems( \Symfony\Component\DomCrawler\Crawler $doc ): iterable {

            // We define a selector, where each item will be a unique ad.
            // The scraper will iterate these items and get all of them.
            // It doesn't need to be <a> tag, you define your own logic of how to get
            // to the actual item page.
            
            return $doc->filter( 'a.ad-item' );
        }

        function getItemUrl( \DOMElement $item ): ?string {

            // Here, $item is the item from the getItems() method,
            // we analyze it and return the url for scraping the item itself.
            return $item->getAttribute( 'href' );
        }

        function getItemId( string $url, \DOMElement $item ): string {

            // We return some string by which we can uniquely identify the ad.
            // This can later be used to skip the ads, that we already have in DB, for example.
            return $item->getAttribute( 'data-id' );
        }

        function getItemDownloader( string $url, string $id ): ?AbstractItemDownloader {

            return new ItemDownloader( 'https://some.website.here/' . $url, $id, $this, new SiteItem() );
        }
    }

    class ItemDownloader extends \unique\scraper\AbstractItemDownloader {
        
        protected function assignItemData( \Symfony\Component\DomCrawler\Crawler $doc ) {

            // We set all the attributes we need for our custom SiteItem object,
            // which can be accessed by the $this->item attribute.
            $this->item->setTitle( $doc->filter( 'h1' )->text() );
        }
    }

    class ItemDownloader extends \unique\scraper\AbstractJsonItemDownloader {

        protected function assignItemData( array $json ) {

            // We set all the attributes we need for our custom SiteItem object,
            // which can be accessed by the $this->item attribute.
            $this->item->setTitle( $json['title'] );
        }
    }

    class ScraperController implements \unique\scraper\interfaces\ConsoleInterface {
        
        // @todo implement stdOut() and stdErr() methods for logging.
        
        public function actionRun() {
            
            $transport = new GuzzleHttp\Client();
            $log_container = new LogContainerConsole( $this );
            $downloader = new ItemListDownloader( SiteItem::class, $transport, $log_container );

            $downloader->on( \unique\scraper\AbstractItemListDownloader::EVENT_ON_ITEM_END, function ( \unique\scraper\events\ItemEndEvent $event ) {
                
                if ( $event->site_item ) {

                    $event->site_item->save();
                }
            } );

            $downloader->scrape();
        }
    }