1. Go to this page and download the library: Download cacing69/cquery library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
cacing69 / cquery example snippets
$html = file_get_contents("src/Samples/sample.html");
$data = new Cacing69\Cquery\Cquery($html);
$result = $query
->from("#lorem .link") // next will be from("(#lorem .link) as el")
->define(
"h1 as title",
"a as description",
"attr(href, a) as url", // get href attribute from all element at #lorem .link a
"attr(class, a) as class"
)
// just imagine this is your table, and every element as your column
->filter("attr(class, a)", "has", "vip") // add some filter here
// ->orFilter("attr(class, a)", "has", "super") // add another condition its has OR condition SQL
// ->filter("attr(class, a)", "has", "blocked") // add another condition its has AND condition SQL
->get(); // -> return type is \Doctrine\Common\Collections\ArrayCollection
$html = file_get_contents("src/Samples/sample.html");
$data = new Cacing69\Cquery\Cquery($html);
$result = $query
->raw("
from (#lorem .link)
define
h1 as title,
a as description,
attr(href, a) as url,
attr(class, a) as class
filter
attr(class, a) has 'vip'
");
use Cacing69\Cquery\Definer;
$html = file_get_contents("src/Samples/sample.html");
$data = new Cacing69\Cquery\Cquery($html);
$result_1 = $data
->from("#lorem .link")
->define(
"upper(h1) as title_upper",
new Definer( "a", "col_2", function($value) use ($date) {
return "{$value} fetched on: {$date}";
})
)
->filter("attr(class, a)", "has", "vip")
->limit(2)
->get() // -> return type is \Doctrine\Common\Collections\ArrayCollection
->toArray();
// another example, filter with closure
$result_2 = $data
->from("#lorem .link")
->define("reverse(h1) as title", "attr(href, a) as url")
->filter("h1", function ($e) {
return $e->text() === "Title 3";
})
->get() // -> return type is \Doctrine\Common\Collections\ArrayCollection
->toArray();
// another example, to load data from url used browserkit
$url = "https://free-proxy-list.net/";
$data = new Cquery($url);
$result_3 = $data
->from(".fpl-list")
->pick(
"td:nth-child(1) as ip_address",
"td:nth-child(4) as country",
"td:nth-child(7) as https",
)->filter('td:nth-child(7)', "=", "no")
->limit(1)
->get() // -> return type is \Doctrine\Common\Collections\ArrayCollection
->toArray();
// another example, to load data from url used browserkit
$url = "http://quotes.toscrape.com/";
$data = new Cquery($url);
$result_4 = $data
->from(".col-md-8 > .quote")
->define(
"span.text as text",
"span:nth-child(2) > small as author",
"append_node(div > .tags, a) as tags",
)
->get() // -> return type is \Doctrine\Common\Collections\ArrayCollection
->toArray();
// another example, to load data from url used browserkit
$url = "http://quotes.toscrape.com/";
$data = new Cquery($url);
$result_5 = $data
->from(".col-md-8 > .quote")
->define(
"span.text as text",
"append_node(div > .tags, a) as tags.key", // grab child `a` on element `div > .tags` and place it into tags['key']
)
->get() // -> return type is \Doctrine\Common\Collections\ArrayCollection
->toArray();
// another example, to load data from url used browserkit
$url = "http://quotes.toscrape.com/";
$data = new Cquery($url);
$result_6 = $data
->from(".col-md-8 > .quote")
->define(
"span.text as text",
"append_node(div > .tags, a) as _tags",
"append_node(div > .tags, a) as tags.*.text",
"append_node(div > .tags, attr(href, a)) as tags.*.url", // [*] means each index, for now ots limitd only one level
)
->get() // -> return type is \Doctrine\Common\Collections\ArrayCollection
->toArray();
// how to use replace with single string
$content = file_get_contents(SAMPLE_HTML);
$data = new Cquery($content);
$result = $data
->from(".col-md-8 > .quote")
->define(
"replace('The', 'Lorem', span.text) as text",
)
->get();
// how to use replace with array arguments
$data_2 = new Cquery($content);
$result = $data_2
->from(".col-md-8 > .quote")
->define(
"replace(['The', 'are'], ['Please ', 'son'], span.text) as text",
// "replace(['The', 'are'], ['Please'], span.text) as text", // or you can do this if just want to use single replacement
)
->get();
// how to use replace with array arguments and single replacement
$data_3 = new Cquery($simpleHtml);
$result = $data_3
->from("#lorem .link")
->define("replace(['Title', '331'], 'LOREM', h1) as title")
->get();
...->onObtainedResults(function ($results){
// u can do any operation here
return array_map(function ($_item) use ($results) {
$_item["sub"] = [
"foo" => "bar"
];
return $_item;
}, $results);
})
use Cacing69\Cquery\Cquery;
use React\EventLoop\Loop;
use React\Http\Browser;
use Psr\Http\Message\ResponseInterface;
$url = "http://www.classiccardatabase.com/postwar-models/Cadillac.php";
$data = new Cquery($url);
$loop = Loop::get();
$client = new Browser($loop);
// detail is on another page
$result = $data
->from(".content")
->define(
".car-model-link > a as name",
"replace('../', 'http://www.classiccardatabase.com/', attr(href, .car-model-link > a)) as url",
)
->filter("attr(href, .car-model-link > a)", "!=", "#")
->onObtainedResults(function ($results) use ($loop, $client){
// I've come across a maximum threshold of 25 chunk, when I input 30, there is some null data.
$results = array_chunk($results, 25);
foreach ($results as $key => $_chunks) {
foreach ($_chunks as $_key => $_result) {
$client
->get($_result["url"])
->then(function (ResponseInterface $response) use (&$results, $key, $_key) {
$detail = new Cquery((string) $response->getBody());
$resultDetail = $detail
->from(".spec")
->define(
".specleft tr:nth-child(1) > td.data as price"
)
->first();
$results[$key][$_key]["price"] = $resultDetail["price"];
});
}
$loop->run();
}
return $results;
})
->get();
$url = "https://user-agents.net/random";
$data = new Cquery($url);
$result = $data
->onContentLoaded(function (HttpBrowser $browser) {
$browser->submitForm("Generate random list", [
"limit" => 5,
]);
return $browser;
})
->from("section > article")
->define(
"ol > li > a as user_agent",
)
->get();
$url = "https://id.wikipedia.org/wiki/Halaman_Utama";
$data = new Cquery($url);
$result = $data
->onContentLoaded(function (HttpBrowser $browser, Crawler $crawler) {
// This is a native function available in the dom-crawler.
$form = new Form($crawler->filter("#searchform")->getNode(0), $url);
$browser->submit($form, [
"search" => "sambas",
]);
return $browser;
})
->from("html")
->define(
"title as title",
)
->get();
$url = "https://semver.org/";
$data = new Cquery($url);
$result = $data
->onContentLoaded(function (HttpBrowser $browser, Crawler $crawler) {
$browser->clickLink("Bahasa Indonesia (id)");
return $browser;
})
->from("#spec")
->define(
"h2 as text",
)
->get();
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.