PHP code example of lenonleite / simple-crawler
1. Go to this page and download the library: Download lenonleite/simple-crawler library . Choose the download type require .
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
lenonleite / simple-crawler example snippets
use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$general = new SimpleCrawler\General();
$result = $general->get_tags('div', $html );
array(4) {
[0]=>
string(29) "<div id="header" class="all">"
[1]=>
string(18) "<div id="content">"
[2]=>
string(32) "<div id="sidebar" class="right">"
[3]=>
string(17) "<div id='footer'>"
}
use Lenonleite\SimpleCrawler;
$html = '<div id="header" class="all">';
$general = new SimpleCrawler\General();
$result = $general->get_atribute_tag( $html );
array(3) {
["full"]=>
string(29) "<div id="header" class="all">"
["key"]=>
string(3) "div"
["value"]=>
string(23) "id="header" class="all""
}
use Lenonleite\SimpleCrawler;
$html[] = '<div id="header" class="all">';
$html[] = '<div id="content">';
$general = new SimpleCrawler\General();
$result = $general->get_attributes_array_tags( $html );
array(2) {
[0]=>
array(3) {
["full"]=>
string(29) "<div id="header" class="all">"
["key"]=>
string(3) "div"
["value"]=>
string(23) "id="header" class="all""
}
[1]=>
array(3) {
["full"]=>
string(18) "<div id="content">"
["key"]=>
string(3) "div"
["value"]=>
string(12) "id="content""
}
}
use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$general = new SimpleCrawler\General();
$result = $general->get_data_tags( 'div', $html );
array(3) {
["tags"]=>
array(4) {
[0]=>
string(29) "<div id="header" class="all">"
[1]=>
string(18) "<div id="content">"
[2]=>
string(32) "<div id="sidebar" class="right">"
[3]=>
string(17) "<div id='footer'>"
}
["html"]=>
string(322) "<html>...</html>"
["tags_atributes"]=>
array(4) {
[0]=>
array(3) {
["full"]=>
string(29) "<div id="header" class="all">"
["key"]=>
string(3) "div"
["value"]=>
string(23) "id="header" class="all""
}
[1]=>
array(3) {
["full"]=>
string(18) "<div id="content">"
["key"]=>
string(3) "div"
["value"]=>
string(12) "id="content""
}
[2]=>
array(3) {
["full"]=>
string(32) "<div id="sidebar" class="right">"
["key"]=>
string(3) "div"
["value"]=>
string(26) "id="sidebar" class="right""
}
[3]=>
array(3) {
["full"]=>
string(17) "<div id='footer'>"
["key"]=>
string(3) "div"
["value"]=>
string(11) "id='footer'"
}
}
}
use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$general = new SimpleCrawler\General();
$tag = 'div';
$attribute = 'id';
$value = 'sidebar';
$result = $general->get_html_between_tag_attr_and_value( $html, $tag, $attribute, $value );
array(1) {
[0]=>
string(51) "<div id="sidebar" class="right">
Sidebar
</div>"
}
use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$general = new SimpleCrawler\General();
$tag = 'div';
$value = 'internal';
$result = $general->get_html_between_tag_attr_and_value( $html, $tag, $value );
array(2) {
[0]=>
string(64) "<div id="header" class="all internal">
<h1>Title</h1>
</div>"
[1]=>
string(60) "<div id="sidebar" class="right internal">
Sidebar
</div>"
}
use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$general = new SimpleCrawler\General();
$tag = 'div';
$result = $general->get_html_between_tag( $html, $tag );
array(4) {
[0]=>
string(64) "<div id="header" class="all internal">
<h1>Title</h1>
</div>"
[1]=>
string(51) "<div id="content">
<p> Center right</p>
</div>"
[2]=>
string(60) "<div id="sidebar" class="right internal">
Sidebar
</div>"
[3]=>
string(25) "<div id='footer'>
</div>"
}
use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$general = new SimpleCrawler\General();
$tag = 'div';
$name_class_or_id = 'sidebar';
$result = $general->get_html_between_tag_attr_id_or_class( $html, $tag, $name_class_or_id );
array(1) {
[0]=>
string(60) "<div id="sidebar" class="right internal">
Sidebar
</div>"
use Lenonleite\SimpleCrawler;
$general = new SimpleCrawler\General();
$tag = '<div id="header" class="all">';
$result = $general->get_attribute_tag( $tag );
array(3) {
["full"]=>
string(29) "<div id="header" class="all">"
["key"]=>
string(3) "div"
["value"]=>
string(23) "id="header" class="all""
}
use Lenonleite\SimpleCrawler;
$html_php = file_get_contents( 'teste_php_methods.html' );
$php = new SimpleCrawler\Php\Methods();
$result = $php->get_parameters( $html_php );
array(3) {
[0]=>
array(6) {
["type_methdd"]=>
string(6) "public"
["static"]=>
string(0) ""
["name_method"]=>
string(6) " error"
["atributes"]=>
array(1) {
[0]=>
string(8) "$message"
}
["internal_context"]=>
string(87) "
$this->CleanUp();
if (!isset($this->info['error'])) {
$this->info['error'] = array();
"
["all_context"]=>
string(121) "public function error($message) {
$this->CleanUp();
if (!isset($this->info['error'])) {
$this->info['error'] = array();
}"
}
[1]=>
array(6) {
["type_methdd"]=>
string(0) ""
["static"]=>
string(0) ""
["name_method"]=>
string(8) " warning"
["atributes"]=>
array(1) {
[0]=>
string(8) "$message"
}
["internal_context"]=>
string(51) "
$this->info['warning'][] = $message;
return true;
"
["all_context"]=>
string(81) "
function warning($message) {
$this->info['warning'][] = $message;
return true;
}"
}
[2]=>
array(6) {
["type_methdd"]=>
string(7) "private"
["static"]=>
string(7) "static "
["name_method"]=>
string(8) " warning"
["atributes"]=>
array(2) {
[0]=>
string(8) "$message"
[1]=>
string(6) "$error"
}
["internal_context"]=>
string(51) "
$this->info['warning'][] = $message;
return true;
"
["all_context"]=>
string(102) "private static function warning($message,$error) {
$this->info['warning'][] = $message;
return true;
}"
}
}
use Lenonleite\SimpleCrawler;
$html_txt = '<a href="https://www.w3schools.com">Visit W3Schools</a>';
$html = new SimpleCrawler\Html();
$result = $html->get_parameters( $html_txt );
array(1) {
[0]=>
string(25) "https://www.w3schools.com"
}
use Lenonleite\SimpleCrawler;
$html = file_get_contents( 'teste.html' );
$login = new SimpleCrawler\Login();
$result = $login->get_forms( $html );
array(1) {
[0]=>
array(3) {
["html"]=>
string(280) "<form action="/action_page.php" method="POST">
First name:<br>
<input type="text" name="firstname" value="Mickey"><br>
Last name:<br>
<input type="text" name="lastname" value="Mouse"><br><br>
<input type="submit" value="Submit">
</form>"
["fields"]=>
array(2) {
["tags"]=>
array(3) {
[0]=>
string(55) "<input type="text" name="firstname" value="Mickey"><br>"
[1]=>
string(57) "<input type="text" name="lastname" value="Mouse"><br><br>"
[2]=>
string(36) "<input type="submit" value="Submit">"
}
["tags_atributes"]=>
array(3) {
[0]=>
array(3) {
["full"]=>
string(51) "<input type="text" name="firstname" value="Mickey">"
["key"]=>
string(5) "input"
["value"]=>
string(43) "type="text" name="firstname" value="Mickey""
}
[1]=>
array(3) {
["full"]=>
string(49) "<input type="text" name="lastname" value="Mouse">"
["key"]=>
string(5) "input"
["value"]=>
string(41) "type="text" name="lastname" value="Mouse""
}
[2]=>
array(3) {
["full"]=>
string(36) "<input type="submit" value="Submit">"
["key"]=>
string(5) "input"
["value"]=>
string(28) "type="submit" value="Submit""
}
}
}
["form"]=>
array(2) {
["tags"]=>
array(1) {
[0]=>
string(46) "<form action="/action_page.php" method="POST">"
}
["tags_atributes"]=>
array(1) {
[0]=>
array(3) {
["full"]=>
string(46) "<form action="/action_page.php" method="POST">"
["key"]=>
string(4) "form"
["value"]=>
string(39) "action="/action_page.php" method="POST""
}
}
}
}
}