1. Go to this page and download the library: Download didix16/php-grammar library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
didix16 / php-grammar example snippets
composer
// APILexer.php
use didix16\Grammar\Lexer;
use didix16\Grammar\Token;
class APILexer extends Lexer {
/**
* Token identifiers
*/
const T_ACTION = 2;
const T_WHITESPACE = 3;
const T_ARG = 4;
const T_NEWLINE = 5;
/**
* For the sake of tokenization, its necessary to make a general regular expression
* to identify the tokens inside the raw text.
*/
const REG_TOKEN = '/([A-Za-z_][A-Za-z0-9_\-]*)(?= )|( )|([^ \n\r]+)|(\n?)/';
/**
* These are the string representation of each token. It is using the token ID
* to get the name so, the order matters and should be the same you specified
* on Token identifiers.
*
* Index 0 and 1 should be allways "n/a" for 0 and <EOF> for 1.
*/
static $tokenNames = ["n/a", "<EOF>", "T_ACTION", "T_WHITESPACE", "T_ARG", "T_NEWLINE"];
/**
* These are the regular expressions that identifies each token.
* They will be used to compare against the next word found by REG_TOKEN and must
* check if is one of our language tokens.
*/
const REG_T_ACTION = '/^[a-zA-Z_][a-zA-Z0-9_]*$/';
const REG_T_WHITESPACE = '/^ $/';
const REG_T_ARG = '/^([^ \n]+)$/';
const REG_T_NEWLINE = '/^(\n)$/'
/**
* Returns the next word is ahead current lexer pointer
* @return string
*/
protected function lookahead(): string
{
$word = self::LAMBDA;
if (0 != preg_match(self::REG_TOKEN, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->p + strlen($this->word))){
$word = $matches[0][0];
}
return $word;
}
/**
* From input string, consume a word and advance the internal pointer to the next word
*/
public function consume(): Lexer
{
if (0 != preg_match(self::REG_TOKEN, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->p)){
$this->word = $matches[0][0];
$this->p = $matches[0][1] + strlen($this->word);
} else {
$this->word = self::LAMBDA;
}
return $this;
}
/**
* Check if word $text is an action
*/
protected function isAction($text){
return 1 === preg_match(self::REG_T_ACTION, $text);
}
/**
* Check if word $text is a whitespace character
*/
protected function isWhitespace($text){
return 1 === preg_match(self::REG_T_WHITESPACE, $text);
}
/**
* Check if word $text is an argument
*/
protected function isArg($text){
return 1 === preg_match(self::REG_T_ARG, $text);
}
/**
* Check if word $text is a new line character
*/
protected function isNewLine($text){
return 1 === preg_match(self::REG_T_NEWLINE, $text);
}
/**
* Returns the next token identified by this lexer
* @return Token
* @throws \Exception
*/
public function nextToken(): Token {
// current word being processed
$word = $this->word;
if ($this->word != self::LAMBDA) {
// action and arg are very similiar. We have to differentiate them
if (
$this->isAction($this->word) ||
$this->isArg($this->word)
){
$lastTokenType = $this->lastToken()->getType();
$this->consume();
// if last token was a whitspace then we are on an arg token
$type = $lastTokenType !== self::T_WHITESPACE ? self::T_ACTION : self::T_ARG;
return $this->returnToken($type, $word);
} else
if ($this->isWhitespace($this->word)) {
$this->consume();
return $this->returnToken(self::T_WHITESPACE, $word);
} else
if ($this->isNewLine($this->word)) {
$this->consume();
return $this->returnToken(self::T_NEWLINE, $word);
}
else {
throw new \Exception("Invalid symbol [" . $word . "]");
}
}
return $this->returnToken(self::EOF_TYPE, self::$tokenNames[1]);
}
/**
* Given a token type, returns its token name representation
* @return string
*/
public function getTokenName($tokenType): string {
return APILexer::$tokenNames[$tokenType];
}
}
// APIParser.php
use didix16\Grammar\Lexer;
use didix16\Grammar\Parser;
/**
* Grammar: S (Syntax) is the entrypoint
* ------------------------------------------------------
* S := LineList
* Line := Action Whitespace Argument
* LineList := Line [NewLine LineList]
* Action := /^[a-zA-Z_][a-zA-Z0-9_]*$/
* Argument := /^[^ \n]+/
* Whitespace := /^ $/
* NewLine := /^\n$/
*/
class APIParser extends Parser {
/**
* Line := Action Whitespace Argument
*/
public function line(){
$this->action();
$this->whitespace();
$this->argument();
}
/**
* LineList := Line [NewLine LineList]
*/
public function lineList(){
$this->line();
if ( $this->lookahead->getType() === APILexer::T_NEWLINE ){
$this->newLine();
$this->lineList();
}
}
/**
* Action := ^[a-zA-Z_][a-zA-Z0-9_]*$
*/
public function action(){
$this->match(APILexer::T_ACTION);
}
/**
* Argument := /^[^ \n]+/
*/
public function argument(){
$this->match(APILexer::T_ARG);
}
/**
* Whitespace := /^ $/
*/
public function whitespace(){
$this->match(APILexer::T_WHITESPACE);
}
/**
* NewLine := ^\n$
*/
public function newLine(){
$this->match(APILexer::T_NEWLINE);
}
/**
* S := LineList
*/
public function parse(): array {
$this->lineList();
$this->match(Lexer::EOF_TYPE);
return $this->getTokens();
}
}