PHP code example of valhook / dparse

1. Go to this page and download the library: Download valhook/dparse library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

valhook / dparse example snippets


$dom = createdParseDOM($source, $args);

$defaultargs = array("method" => "GET", // just concatenate the url and the http body in the source parameter and specify here the HTTP Method
                     "fake_user_agent" => NULL,
                     "fake_http_referer" => NULL,
                     "force_input_charset" => NULL,
                     "output_charset" => NULL,
                     "strip_whitespaces" => false,
                     "connect_timeout" => 10,
                     "transfer_timeout" => 40,
                     "verify_peer" => false,
                     "http_auth_username" => NULL,
                     "http_auth_password" => NULL,
                     "cookie_file" => NULL,
                     "is_xml" => FALSE,
                     "enable_logger" => FALSE);


$contents = dParseGetContents($source, $args);

$dom->getRawContent(); or $dom // output is a string
$dom->showRawContent(); // echoes the raw content
$dom->saveRawContent($filename); // writes the content to a file
$dom->getSize(); // output is an int of the byte size of the content
$dom->setWhitepaceStripping($bool); // Tells dParse to strip all extra whitespaces whenever a string is returned or echoed.
$dom->getWhitespaceStripping(); // Get the current whitespace stripping status
$dom->setInputCharset($charset); // Tells dParse which charset should be used to interprate the document data, by default it is deduced from the HTTP/HTML headers
$dom->getInputCharset();
$dom->setOutputCharset($charset); // Tells dParse if a charset translation should be done when echoing or returning a string computed from the original DOM, by default no translation is done so the output charset is the same as the input's.
$dom->getOutputCharset();
$dom->getNoise(); // Return an array of string of unparsed data
        /* Noise regexes used by dParse, you may add yours at line 270 */
        $noise = array("'<!--(.*?)-->'is",
                        "'<!DOCTYPE(.*?)>'is",
                        "'<!\[CDATA\[(.*?)\]\]>'is",
                        "'(<\?)(.*?)(\

$nodes_that_are_images = $dom('img');
$nodes_that_are_link_with_btn_class = $dom('a.btn');

// Most of the CSS3 selecting standard is supported
$nodes = $dom('a + a');
$nodes = $dom('div ~ div');
$nodes = $dom('div > p');
$nodes = $dom('ul ul > li');
$nodes = $dom('input[type=text]');
$nodes = $dom('img[src^="https"]');
$nodes = $dom('img[src$=".jpg"]');
$nodes = $dom('a[href*=google]');
$nodes = $dom('body > *');
// Of course it is not funny if you cannot combine them
$nodes = $dom('article[class*=post] section > div + div.classy, #some_id ~ .classy');

// Getting the root element
$rootnode = $dom->root();

// Remaining bugs
// Multiple classes are not supported, use:
$nodes = $dom('a[class="btn btn-primary"]') /* instead of */ $nodes = $dom('a.btn.btn-primary');

/// Pseudo selectors, like :not, :first-child etc are not yet supported

// For PHP < 5.3 users, use:
$dom->find('foo'); /* instead of */ $dom('foo');

$nodes->merge($othernodes); // Returns a new MetaNode Object containing the union of all the nodes from both MetaNodes
$nodes->length(); // Returns the number of nodes inside this meta node.
$nodes->eq($n); /* or */ $nodes->elem($n); // Returns the nth node of this MetaNode.
    // If n is a metanode or node it will return the interesction of both sets.

/* Example */
$dom('a')->text(); // will return array("foo", "bar", "baz", ...)

/* Example */
$dom('#unique-id')->text(); // will return "foo" and not array("foo")

foreach($nodes as $node) {
    // $node->do_something();
}

$node->_dom(); // Returns the DOM linked to this node.
$node->index(); // Returns the index of the node in the DOM. HTML is 0, HEAD is 1, TITLE can be 2 etc...
$node->length(); // Always returns 1, it is an compatibility abstraction with the MetaNode object.
$node->tagName(); or $node->name(); // Returns the tag name (a, li, div etc...)
$node->attributes(); // Returns a dictionary of the attributes
   /* Example:
    array(2) {
    ["href"]=>
    string(8) "#contact"
    ["class"]=>
    string(23) "btn border-button-black"
    }
    Therefore you will get an array of array if you call it from a MetaNode
    */
$node->XXXX; // Will return the content of an attribute, examples:
    $node->href;
    $node->src;
    $node->type;
    $node->attr('XXXX'); or $node->prop('XXXX'); /* it is the same as */ $node->XXXX;
$node->depth(); // Will return the depth (int) of the node inside the DOM
$node->breadcrumb(); // Will return the full path from the root element to this node
$node->breadcrumb_size(); // Returns the size of the breadcrumb
$node->breadcrumb_element($i); // Returns a sub-element of the node's breadcrumb
$node->val(); // Same as $node->value, *I Will later add support for textareas and selects as the value attribute is irrelevant for them
$node->html(); // Returns the inner HTML of the node as a string
$node->htmlLength();
$node->outerHTML(); // Returns the outer HTML of the node
$node->outerHTMLLength();
$node->text(); // Returns the inner text, therefore the inner HTML with HTML tags stripped
$node->textLenght();
$node; // This is the __toString method, it is the same as $node->outerHTML();

$node->find($smartselector); // Finds the subnodes of this node matching this CSS selector
$node->parent($smartselector = NULL); // Returns the first parent, or the parents that match the selector
$node->parents(); // Returns all parents
$node->parentsUntil($smartselector); // Return all the parents until the selector
$node->prev($smartselector = NULL); // Returns the first previous element, same depth level, or the previous one that matches the selector.
$node->prevAll();
$node->prevUntil($smartSelector);
$node->next($smartselector = NULL);
$node->nextAll();
$node->nextUntil($smartselector);
$node->children($smartselector = NULL); // If the selector is empty it returns all the children, if it is an int *i* it returns the first i children in the order of declaration inside the DOM, if it is a CSS selector or a MetaNode it returns the children that intersect with the CSS selector or the nodes.
$node->is($smartselector); // Returns itself (castable to true) for chaining purposes or false according to wether the node is part of the metanode or the results of the css query
$node->has($smartselector = NULL); or $node->hasChild($smartselector = NULL); // Returns itself or false
$node->hasParent($smartselector = NULL);
$node->hasPrev($smartselector = NULL);
$node->hasNext($smartselector = NULL);

$node->XXXX = "YYYY"; or $node->attr('XXXX', 'YYYY'); or $node->prop('XXXX', 'YYYY'); // Changes an attribute
$node->addClass($class);
$node->removeClass($class);
$node->setTagName($name); // Changes the tag name, ex span to div;

$dom->getLogger(); // Returns the logger object
$logger->isEnabled(); // Tells wether the logger is enabled
$logger->enable($bool); // Enables or disables the logger
$logger->getLogs(); // Returns an array of strings that are the logs
$logger->getLastLog(); // Returns the last entry in the logbook
$logger->clear(); // Clears all the logs
$logger->showLogs(); // Echoes all the logs
$logger->saveLogs($filename); // Writes all the logs to a file
$logger->log($message); // Logs a message if the logger is enabled


$wiki_root = "https://fr.wikipedia.org/wiki/";
$article = "Batman";
$doc = createDParseDOM($wiki_root.$article, array("strip_whitespaces", true));
$contents = $doc('#bodyContent')->children('h1, h2, h3, h4, h5, h6')->text();
print_r($contents);

$youtube_root = "https://www.youtube.com/results?search_query=";
$search = urlencode("funny potato");
$doc = createDParseDOM($youtube_root.$search);
$doc->setWhitespaceStripping(true);
$links = $doc('h3 a[href*=watch]');
$out = array();
foreach ($links as $l)
    $out[] = array("title" => $l->text(), "url" => $l->href);
    
print_r($out);