PHP code example of didix16 / php-grammar

1. Go to this page and download the library: Download didix16/php-grammar library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

didix16 / php-grammar example snippets


composer 



// APILexer.php

use didix16\Grammar\Lexer;
use didix16\Grammar\Token;

class APILexer extends Lexer {

    /**
     * Token identifiers
     */
    const T_ACTION = 2;
    const T_WHITESPACE = 3;
    const T_ARG = 4;
    const T_NEWLINE = 5;

    /**
     * For the sake of tokenization, its necessary to make a general regular expression
     * to identify the tokens inside the raw text.
     */
    const REG_TOKEN = '/([A-Za-z_][A-Za-z0-9_\-]*)(?= )|( )|([^ \n\r]+)|(\n?)/';

    /**
     * These are the string representation of each token. It is using the token ID
     * to get the name so, the order matters and should be the same you specified
     * on Token identifiers.
     * 
     * Index 0 and 1 should be allways "n/a" for 0 and <EOF> for 1.
     */
    static $tokenNames = ["n/a", "<EOF>", "T_ACTION", "T_WHITESPACE", "T_ARG", "T_NEWLINE"];

    /**
     * These are the regular expressions that identifies each token.
     * They will be used to compare against the next word found by REG_TOKEN and must
     * check if is one of our language tokens.
     */
    const REG_T_ACTION = '/^[a-zA-Z_][a-zA-Z0-9_]*$/';
    const REG_T_WHITESPACE = '/^ $/';
    const REG_T_ARG = '/^([^ \n]+)$/';
    const REG_T_NEWLINE = '/^(\n)$/'
    


    /**
     * Returns the next word is ahead current lexer pointer
     * @return string
     */
    protected function lookahead(): string
    {
        $word = self::LAMBDA;
        if (0 != preg_match(self::REG_TOKEN, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->p + strlen($this->word))){
            $word = $matches[0][0];
        }
        return $word;
    }

    /**
     * From input string, consume a word and advance the internal pointer to the next word
     */
    public function consume(): Lexer
    {
        if (0 != preg_match(self::REG_TOKEN, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->p)){

            $this->word = $matches[0][0];
            $this->p = $matches[0][1] + strlen($this->word);
        } else {
            $this->word = self::LAMBDA;
        }

        return $this;
    }

    /**
     * Check if word $text is an action
     */
    protected function isAction($text){

        return 1 === preg_match(self::REG_T_ACTION, $text);
    }

    /**
     * Check if word $text is a whitespace character
     */
    protected function isWhitespace($text){
        
        return 1 === preg_match(self::REG_T_WHITESPACE, $text);
    }

    /**
     * Check if word $text is an argument
     */
    protected function isArg($text){
        
        return 1 === preg_match(self::REG_T_ARG, $text);
    }

    /**
     * Check if word $text is a new line character
     */
    protected function isNewLine($text){

        return 1 === preg_match(self::REG_T_NEWLINE, $text);
    }

    /**
     * Returns the next token identified by this lexer
     * @return Token
     * @throws \Exception
     */
    public function nextToken(): Token {

        // current word being processed
        $word = $this->word;
        if ($this->word != self::LAMBDA) {
            
            // action and arg are very similiar. We have to differentiate them
            if (
                $this->isAction($this->word) ||
                $this->isArg($this->word)
            ){
                
                $lastTokenType = $this->lastToken()->getType();

                $this->consume();

                // if last token was a whitspace then we are on an arg token
                $type = $lastTokenType !== self::T_WHITESPACE ? self::T_ACTION : self::T_ARG;

                return $this->returnToken($type, $word);

                
            } else 
            if ($this->isWhitespace($this->word)) {

                $this->consume();
                return $this->returnToken(self::T_WHITESPACE, $word);
            } else 
            if ($this->isNewLine($this->word)) {

                $this->consume();
                return $this->returnToken(self::T_NEWLINE, $word);
            }
            
            else {
                throw new \Exception("Invalid symbol [" . $word . "]");
            }


        }

        return $this->returnToken(self::EOF_TYPE, self::$tokenNames[1]);
    }

    /**
     * Given a token type, returns its token name representation
     * @return string
     */
    public function getTokenName($tokenType): string {

        return APILexer::$tokenNames[$tokenType];
    }
}



// APIParser.php

use didix16\Grammar\Lexer;
use didix16\Grammar\Parser;

/**
 * Grammar: S (Syntax) is the entrypoint
 * ------------------------------------------------------
 * S            := LineList
 * Line         := Action Whitespace Argument
 * LineList     := Line [NewLine LineList]
 * Action       := /^[a-zA-Z_][a-zA-Z0-9_]*$/
 * Argument     := /^[^ \n]+/
 * Whitespace   := /^ $/
 * NewLine      := /^\n$/
 */
class APIParser extends Parser {

    /**
     * Line := Action Whitespace Argument
     */
    public function line(){

        $this->action();

        $this->whitespace();

        $this->argument();

    }

    /**
     * LineList := Line [NewLine LineList]
     */
    public function lineList(){

        $this->line();

        if ( $this->lookahead->getType() === APILexer::T_NEWLINE ){

            $this->newLine();
            $this->lineList();
        }

    }

    /**
     * Action := ^[a-zA-Z_][a-zA-Z0-9_]*$
     */
    public function action(){

        $this->match(APILexer::T_ACTION);
    }

    /**
     * Argument := /^[^ \n]+/
     */
    public function argument(){

        $this->match(APILexer::T_ARG);
    }

    /**
     * Whitespace := /^ $/
     */
    public function whitespace(){

        $this->match(APILexer::T_WHITESPACE);
    }

    /**
     * NewLine := ^\n$
     */
    public function newLine(){

        $this->match(APILexer::T_NEWLINE);
    }

    /**
     * S := LineList
     */
    public function parse(): array {

        $this->lineList();
        $this->match(Lexer::EOF_TYPE);

        return $this->getTokens();
    }
}



$script = "GET https://some-awesome-api.com/endpoint?token=TOKEN
PIPETO $mySuperService
SAVEDB localhost:10000";


$lexer = new APILexer($script);
$parser = new APIParser($lexer);

$tokens = $parser->parse();

// var_dump($tokens) should be:

array(12) {
  [0]=>
  object(Token)#4 (2) {
    ["value":protected]=>
    string(3) "GET"
    ["type":protected]=>
    int(2)
  }
  [1]=>
  object(Token)#2 (2) {
    ["value":protected]=>
    string(1) " "
    ["type":protected]=>
    int(3)
  }
  [2]=>
  object(Token)#5 (2) {
    ["value":protected]=>
    string(49) "https://some-awesome-api.com/endpoint?token=TOKEN"
    ["type":protected]=>
    int(4)
  }
  [3]=>
  object(Token)#6 (2) {
    ["value":protected]=>
    string(1) "
"
    ["type":protected]=>
    int(5)
  }
  [4]=>
  object(Token)#7 (2) {
    ["value":protected]=>
    string(6) "PIPETO"
    ["type":protected]=>
    int(2)
  }
  [5]=>
  object(Token)#8 (2) {
    ["value":protected]=>
    string(1) " "
    ["type":protected]=>
    int(3)
  }
  [6]=>
  object(Token)#9 (2) {
    ["value":protected]=>
    string(15) "$mySuperService"
    ["type":protected]=>
    int(4)
  }
  [7]=>
  object(Token)#10 (2) {
    ["value":protected]=>
    string(1) "
"
    ["type":protected]=>
    int(5)
  }
  [8]=>
  object(Token)#11 (2) {
    ["value":protected]=>
    string(6) "SAVEDB"
    ["type":protected]=>
    int(2)
  }
  [9]=>
  object(Token)#12 (2) {
    ["value":protected]=>
    string(1) " "
    ["type":protected]=>
    int(3)
  }
  [10]=>
  object(Token)#13 (2) {
    ["value":protected]=>
    string(15) "localhost:10000"
    ["type":protected]=>
    int(4)
  }
  [11]=>
  object(Token)#14 (2) {
    ["value":protected]=>
    string(5) "<EOF>"
    ["type":protected]=>
    int(1)
  }
}