1. Go to this page and download the library: Download deravenedwriter/crawlengine library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
deravenedwriter / crawlengine example snippets
// First we need to add the namespace
use CrawlEngine\Engine;
// Now We Initialize the main Engine class
$engine = new Engine();
/**
* The Engine class accepts one optional parameter of an integer
* This Parameter would be set as the default timeout for all web requests made with that Engine Instance
* So for example if I want the Default timeout to be 20 seconds, I would do that as follows:
*/
$engine = new Engine(20);
// although by default, the timeout is 10 (10 seconds)
// First we need to add the namespace
use CrawlEngine\InputDetail;
/**
* Now We Initialize the Input Detail class
* In the example below the InputDetail is initialized with a name
* which refers to the name of the InputDetail in question as follows
*/
$input = new InputDetail('full_name');
/**
* The name parameter is always compolsory for the instantiation of any InputDetail object
* Other Optional Parameters tomatically
* so in this case I could just construct this InputDetail as follows:
*/
$input = new InputDetail("name", "", <input name='name' type='text' value='Joe' placeholder='Input Your Name'/>");
/**
* So in this case, other values would be generated by the constructor, so:
* $input->name is equal to 'name'
* $input->value is equal to 'Joe'
* $input->type is equal to 'text'
* $input->placeholder is equal to 'Input Your Name'
*/
// You could also echo out the properties of an InputDetail Instance:
echo $input;
// The above would display as follows:
/**
* Input Detail:
* Name: name
* Value: Joe
* Placeholder: Input Your Name
* Type: text
*/
// First we need to add the namespace
use CrawlEngine\InputDetail;
use CrawlEngine\CrawlEngine;
$inputs = (new Engine())->getLoginFields('https://example.com/login');
// $inputs would contain an array of all the input tags.
// found in the first form Element on the webpage of the uri given
// and they are in the form of an InputDetail instance
// so we could display them as shown:
foreach($inputs as $input){
echo $input;
}
// the above code would output as shown:
/**
* Input Detail:
* Name: email
* Value:
* Placeholder: Input Your Email
* Type: email
*
* Input Detail:
* Name: password
* Value:
* Placeholder: Input Your Password
* Type: password
*
* Input Detail:
* Name: _token
* Value: wi8AGQVAsR8sasNHcRFhgnVemspnNoRwmJfBQ0TH
* Placeholder:
* Type: hidden
*/
$inputs = (new Engine())->getLoginFields('https://example.com/login', 2);
// First we need to add the namespace
use CrawlEngine\InputDetail;
use CrawlEngine\CrawlEngine;
use CrawlEngine\Engine;
// we then create instances of the inputDetail class to carry values the form needs as follows:
$emailInput = new InputDetail('email', '[email protected]');
$passwordInput = new InputDetail('password','topSecretPassword');
// We could then arrange them in an array like so:
$formFields = [$emailInput, $passwordInput];
// We would then define the Uri where the formPage would be found:
$formPageUri = 'https://example.com/login';
// And the Submit Uri
$submitUri = 'https://example.com/login';
/**
* After Logging in, we would need to retrieve some information from some password protected areas of the site.
* Lets say this areas are located at https://example.com/dashboard and https://example.com/transactions
* We would also define as follows:
*/
$contentPagesUri = ['https://example.com/dashboard', 'https://example.com/transactions'];
// after which we can then make our request as shown:
$engine = new Engine();
$crawlers = $engine->resolveRequest(
$formPageUri,
$submitUri,
$formFields,
$contentPagesUri
);
// $crawlers[0] will contain crawler object for https://example.com/dashboard
// $crawlers[1] will contain crawler object for https://example.com/transactions
// so I can then access values from the page as shown:
echo $crawlers[0]->filterXPath('//body/section/div/span')->text(); // this would output: '200432234233'
//or this:
echo $crawlers[0]->filter('body > section > div > span')->text(); // this would also output: '200432234233'