PHP code example of llm-html-extractor / symfony-bundle

1. Go to this page and download the library: Download llm-html-extractor/symfony-bundle library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

llm-html-extractor / symfony-bundle example snippets


use LlmHtmlExtractor\SymfonyBundle\Attribute\AsLlmExtractableProperty;

class ArticleExtractionResult
{
    public function __construct(
        #[AsLlmExtractableProperty('Extract the article title')]
        public string $title,

        #[AsLlmExtractableProperty('Extract the author name')]
        public string $author,

        #[AsLlmExtractableProperty('Extract publication date in YYYY-MM-DD format')]
        public string $publishedAt,

        #[AsLlmExtractableProperty('Extract the main article content')]
        public string $content,
    ) {}
}

use LlmHtmlExtractor\SymfonyBundle\Extractor\ExtractionHandler;

class ArticleScraper
{
    public function __construct(
        private ExtractionHandler $extractionHandler,
    ) {}

    public function scrape(string $html): ArticleExtractionResult
    {
        return $this->extractionHandler->handle(
            ArticleExtractionResult::class,
            $html
        );
    }
}

use LlmHtmlExtractor\SymfonyBundle\Extractor\FromHtmlExtractorInterface;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\DependencyInjection\Attribute\AutoconfigureTag;

#[AutoconfigureTag('llm_extractor.extractor', ['priority' => 50])]
class CustomPdfUrlExtractor implements FromHtmlExtractorInterface
{
    public function extract(string $html, array $context = []): mixed
    {
        $crawler = new Crawler($html);
        return $crawler->filterXPath('//a[contains(@href, ".pdf")]')
            ->each(fn($node) => $node->attr('href'));
    }

    public function supports(string $className, string $propertyName): bool
    {
        return $className === ArticleExtractionResult::class
            && $propertyName === 'pdfUrls';
    }
}