PHP code example of benbjurstrom / markdown-object
1. Go to this page and download the library: Download benbjurstrom/markdown-object library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
benbjurstrom / markdown-object example snippets
use League\CommonMark\Environment\Environment;
use League\CommonMark\Parser\MarkdownParser;
use League\CommonMark\Extension\CommonMark\CommonMarkCoreExtension;
use League\CommonMark\Extension\Table\TableExtension;
use BenBjurstrom\MarkdownObject\Build\MarkdownObjectBuilder;
use BenBjurstrom\MarkdownObject\Tokenizer\TikTokenizer;
// 1) Parse Markdown with CommonMark
$env = new Environment();
$env->addExtension(new CommonMarkCoreExtension());
$env->addExtension(new TableExtension());
$parser = new MarkdownParser($env);
$filename = 'guide.md';
$markdown = file_get_contents($filename);
$doc = $parser->parse($markdown);
// 2) Build the structured model
$builder = new MarkdownObjectBuilder();
$tokenizer = TikTokenizer::forModel('gpt-3.5-turbo');
$mdObj = $builder->build($doc, $filename, $markdown, $tokenizer);
// 3) Emit hierarchically-packed chunks
$chunks = $mdObj->toMarkdownChunks(target: 512, hardCap: 1024);
foreach ($chunks as $chunk) {
echo "---\n";
echo "Chunk: {$chunk->id} | {$chunk->tokenCount} tokens";
// Source position tracking for finding chunks in original document
$pos = $chunk->sourcePosition;
if ($pos->lines !== null) {
echo " | Line: {$pos->lines->startLine}";
}
echo "\n";
echo implode(' › ', $chunk->breadcrumb) . "\n";
echo "---\n\n";
echo $chunk->markdown . "\n\n";
}
/*
---
Chunk: 1 | 163 tokens | Line: 1
demo.md › Getting Started
---
# Getting Started
Welcome to the Markdown Object demo! This tool helps you visualize how markdown is parsed and chunked.
## Features
### Real-time Processing
Type or paste markdown in the left pane and see the results instantly.
### Hierarchical Chunking
Content is automatically organized into semantic chunks that keep related information together…
---
Chunk: 2 | 287 tokens | Line: 18
demo.md › Getting Started › Advanced Options
---
## Advanced Options
Configure chunking parameters to see how different settings affect the output.
### Token Limits
Adjust the target and hard cap values to control chunk sizes…
*/
// Serialize to JSON
$json = $mdObj->toJson(JSON_PRETTY_PRINT);
// Deserialize from JSON
$copy = \BenBjurstrom\MarkdownObject\Model\MarkdownObject::fromJson($json);
use BenBjurstrom\MarkdownObject\Tokenizer\TikTokenizer;
// Use a different model
$tokenizer = TikTokenizer::forModel('gpt-4');
// Or use a specific encoding
$tokenizer = TikTokenizer::forEncoding('p50k_base');
// Pass to both build() and toMarkdownChunks()
$mdObj = $builder->build($doc, $filename, $markdown, $tokenizer);
$chunks = $mdObj->toMarkdownChunks(
target: 512,
hardCap: 1024,
tok: $tokenizer
);
$chunks = $mdObj->toMarkdownChunks(
target: 256, // Smaller target for content splitting
hardCap: 512, // Smaller hard cap for hierarchy
tok: $customTokenizer, // Optional: use different tokenizer
repeatTableHeaders: false // Optional: don't repeat headers in split tables
);
// Build-time: sum of nodes (no separators)
echo $mdObj->tokenCount; // e.g., 155
// Chunk:
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.