PHP code example of mensbeam / html-parser

1. Go to this page and download the library: Download mensbeam/html-parser library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

mensbeam / html-parser example snippets


public static MensBeam\HTML\Parser::parse(
    string $data,
    ?string $encodingOrContentType = null,
    ?MensBeam\HTML\Parser\Config $config = null
): MensBeam\HTML\Parser\Output

class MensBeam\HTML\DOMParser {
  public function parseFromString(
    string $string,
    string $type
  ): \DOMDocument
}

public static MensBeam\HTML\Parser::parseInto(
    string $data,
    \DOMDocument $document,
    ?string $encodingOrContentType = null,
    ?MensBeam\HTML\Parser\Config $config = null
): MensBeam\HTML\Parser\Output

public static MensBeam\HTML\Parser::parse(
    DOMElement $contextElement,
    int $quirksMode,
    string $data,
    ?string $encodingOrContentType = null,
    ?MensBeam\HTML\Parser\Config $config = null
): DOMDocumentFragment

public static MensBeam\HTML\Parser::serialize(
    DOMNode $node,
    array $config = []
): string

public static MensBeam\HTML\Parser::serializeInner(
    DOMNode $node,
    array $config = []
): string

  use MensBeam\HTML\Parser;

  echo Parser::parse('<!DOCTYPE html><b>Hello world!</b>')->encoding;
  // prints "windows-1252"
  echo Parser::parse('<!DOCTYPE html><meta charset="UTF-8"><b>Hello world!</b>')->encoding;
  // prints "UTF-8"
  

  use MensBeam\HTML\Parser;

  echo Parser::parse("<!DOCTYPE html>\u{3088}", "UTF-8")
    ->document
    ->getElementsByTagName("body")[0]
    ->textContent;
  // prints "よ"
  echo Parser::parse("<!DOCTYPE html>\u{3088}", "text/html; charset=utf-8")
    ->document
    ->getElementsByTagName("body")[0]
    ->textContent;
  // also prints "よ"
  

  use MensBeam\HTML\Parser;
  use MensBeam\HTML\Parser\Config;

  $config = new Config;
  $config->encodingFallback = "Shift_JIS";

  echo Parser::parse("<!DOCTYPE html>\x82\xE6", null, $config)
    ->document
    ->getElementsByTagName("body")[0]
    ->textContent;
  // also also prints "よ"
  

  use MensBeam\HTML\Parser;
  use MensBeam\HTML\Parser\Config;

  $config = new Config;
  $config->htmlNamespace = true;

  // set up two context nodes
  $document = Parser::parse("<!DOCTYPE html><math></math>", "UTF-8", $config)->document;
  $body = $document->getElementsByTagName("body")[0];
  $math = $document->getElementsByTagName("math")[0];
  echo $body->namespaceURI; // prints "http://www.w3.org/1999/xhtml"
  echo $math->namespaceURI; // prints "http://www.w3.org/1998/Math/MathML"

  // parse two identical fragments using different context elements
  $htmlFragment = Parser::parseFragment($body, 0, "<mi>&pi;</mi>", "UTF-8", $config);
  $mathFragment = Parser::parseFragment($math, 0, "<mi>&pi;</mi>", "UTF-8", $config);
  echo $htmlFragment->firstChild->namespaceURI; // prints "http://www.w3.org/1999/xhtml"
  echo $mathFragment->firstChild->namespaceURI; // prints "http://www.w3.org/1998/Math/MathML"
  

  use MensBeam\HTML\Parser;

  $document = Parser::parse("<!DOCTYPE html><a>Ook<p>Eek</a>")->document;
  $body = $document->getElementsByTagName("body")[0];
  echo Parser::serialize($document->documentElement); // prints "<html><head></head><body><a>Ook</a><p><a>Eek</a></p></body></html>
  echo Parser::serializeInner($body); // prints "<a>Ook</a><p><a>Eek</a></p>