1. Go to this page and download the library: Download helgesverre/extractor library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
helgesverre / extractor example snippets
use HelgeSverre\Extractor\Facades\Extractor;
use HelgeSverre\Extractor\Facades\Text;
use Illuminate\Support\Facades\Storage;
$image = Storage::get("restaurant_menu.png")
// Extract text from images
$textFromImage = Text::textract($image);
// Extract structured data from plain text
$menu = Extractor::fields($textFromImage,
fields: [
'restaurantName',
'phoneNumber',
'dishes' => [
'name' => 'name of the dish',
'description' => 'description of the dish',
'price' => 'price of the dish as a number',
],
],
model: "gpt-3.5-turbo-1106",
maxTokens: 4000,
);
$sample = Text::pdf(file_get_contents(__DIR__.'/../samples/helge-cv.pdf'));
$data = Extractor::fields($sample,
fields: [
'name' => 'the name of the candidate',
'email',
'certifications' => 'list of certifications, if any',
'workHistory' => [
'companyName',
'from' => 'Y-m-d if available, Year only if not, null if missing',
'to' => 'Y-m-d if available, Year only if not, null if missing',
'text',
],
],
model: Engine::GPT_3_TURBO_1106,
);
use HelgeSverre\Extractor\Text\ImageContent;
$imagePath = __DIR__ . '/../samples/sample-image.jpg';
$imageContent = ImageContent::file($imagePath);
use HelgeSverre\Extractor\Text\ImageContent;
$rawImageData = file_get_contents(__DIR__ . '/../samples/sample-image.jpg');
$imageContent = ImageContent::raw($rawImageData);
use HelgeSverre\Extractor\Text\ImageContent;
$imageUrl = 'https://example.com/sample-image.jpg';
$imageContent = ImageContent::url($imageUrl);
namespace App\Extractors;
use HelgeSverre\Extractor\Extraction\Extractor;use HelgeSverre\Extractor\Text\TextContent;
class JobPostingExtractor extends Extractor
{
public function prompt(string|TextContent $input): string
{
$outputKey = $this->expectedOutputKey();
return "Extract the following fields from the job posting below:"
. "\n- jobTitle: The title or designation of the job."
. "\n- companyName: The name of the company or organization posting the job."
. "\n- location: The geographical location or workplace where the job is based."
. "\n- jobType: The nature of employment (e.g., Full-time, Part-time, Contract)."
. "\n- description: A brief summary or detailed description of the job."
. "\n- applicationDeadline: The closing date for applications, if specified."
. "\n\nThe output should be a JSON object under the key '{$outputKey}'."
. "\n\nINPUT STARTS HERE\n\n$input\n\nOUTPUT IN JSON:\n";
}
public function expectedOutputKey(): string
{
return 'extractedData';
}
}
use HelgeSverre\Extractor\Extractor;
Extractor::extend("job-posting", fn() => new JobPostingExtractor());
use HelgeSverre\Extractor\Facades\Text;
use HelgeSverre\Extractor\Extractor;
$jobPostingContent = Text::web("https://www.finn.no/job/fulltime/ad.html?finnkode=329443482");
$extractedData = Extractor::extract('job-posting', $jobPostingContent);
// Or you can specify the class-string instead
// ex: Extractor::extract(JobPostingExtractor::class, $jobPostingContent);
// $extractedData now contains structured information from the job posting
namespace App\Extractors;
use HelgeSverre\Extractor\Extraction\Concerns\HasValidation;
use HelgeSverre\Extractor\Extraction\Extractor;
class JobPostingExtractor extends Extractor
{
use HasValidation;
public function rules(): array
{
return [
'jobTitle' => ['ed', 'date']
];
}
}
namespace App\Extractors;
use DateTime;
use App\Extractors\JobPostingDto;
use HelgeSverre\Extractor\Extraction\Concerns\HasDto;
use HelgeSverre\Extractor\Extraction\Extractor;
use Spatie\LaravelData\Data;
class JobPostingDto extends Data
{
public function __construct(
public string $jobTitle,
public string $companyName,
public string $location,
public string $jobType,
public int|float $salary,
public string $description,
public DateTime $applicationDeadline
) {
}
}
class JobPostingExtractor extends Extractor
{
use HasDto;
public function dataClass(): string
{
return JobPostingDto::class;
}
public function isCollection(): bool
{
return false;
}
}
// Delete the file from the S3 bucket
TextractUsingS3Upload::cleanupFileUsing(function (string $filePath) {
Storage::disk('textract')->delete($filePath);
}