1. Go to this page and download the library: Download mensbeam/html-parser library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
use MensBeam\HTML\Parser;
echo Parser::parse('<!DOCTYPE html><b>Hello world!</b>')->encoding;
// prints "windows-1252"
echo Parser::parse('<!DOCTYPE html><meta charset="UTF-8"><b>Hello world!</b>')->encoding;
// prints "UTF-8"
use MensBeam\HTML\Parser;
echo Parser::parse("<!DOCTYPE html>\u{3088}", "UTF-8")
->document
->getElementsByTagName("body")[0]
->textContent;
// prints "よ"
echo Parser::parse("<!DOCTYPE html>\u{3088}", "text/html; charset=utf-8")
->document
->getElementsByTagName("body")[0]
->textContent;
// also prints "よ"
use MensBeam\HTML\Parser;
use MensBeam\HTML\Parser\Config;
$config = new Config;
$config->encodingFallback = "Shift_JIS";
echo Parser::parse("<!DOCTYPE html>\x82\xE6", null, $config)
->document
->getElementsByTagName("body")[0]
->textContent;
// also also prints "よ"
use MensBeam\HTML\Parser;
use MensBeam\HTML\Parser\Config;
$config = new Config;
$config->htmlNamespace = true;
// set up two context nodes
$document = Parser::parse("<!DOCTYPE html><math></math>", "UTF-8", $config)->document;
$body = $document->getElementsByTagName("body")[0];
$math = $document->getElementsByTagName("math")[0];
echo $body->namespaceURI; // prints "http://www.w3.org/1999/xhtml"
echo $math->namespaceURI; // prints "http://www.w3.org/1998/Math/MathML"
// parse two identical fragments using different context elements
$htmlFragment = Parser::parseFragment($body, 0, "<mi>π</mi>", "UTF-8", $config);
$mathFragment = Parser::parseFragment($math, 0, "<mi>π</mi>", "UTF-8", $config);
echo $htmlFragment->firstChild->namespaceURI; // prints "http://www.w3.org/1999/xhtml"
echo $mathFragment->firstChild->namespaceURI; // prints "http://www.w3.org/1998/Math/MathML"
use MensBeam\HTML\Parser;
$document = Parser::parse("<!DOCTYPE html><a>Ook<p>Eek</a>")->document;
$body = $document->getElementsByTagName("body")[0];
echo Parser::serialize($document->documentElement); // prints "<html><head></head><body><a>Ook</a><p><a>Eek</a></p></body></html>
echo Parser::serializeInner($body); // prints "<a>Ook</a><p><a>Eek</a></p>
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.