1. Go to this page and download the library: Download codename/parquet library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
codename / parquet example snippets
use codename\parquet\ParquetReader;
// open file stream (in this example for reading only)
$fileStream = fopen(__DIR__.'/test.parquet', 'r');
// open parquet file reader
$parquetReader = new ParquetReader($fileStream);
// Print custom metadata or do other stuff with it
print_r($parquetReader->getCustomMetadata());
// get file schema (available straight after opening parquet reader)
// however, get only data fields as only they contain data values
$dataFields = $parquetReader->schema->GetDataFields();
// enumerate through row groups in this file
for($i = 0; $i < $parquetReader->getRowGroupCount(); $i++)
{
// create row group reader
$groupReader = $parquetReader->OpenRowGroupReader($i);
// read all columns inside each row group (you have an option to read only
//
use codename\parquet\ParquetWriter;
use codename\parquet\data\Schema;
use codename\parquet\data\DataField;
use codename\parquet\data\DataColumn;
//create data columns with schema metadata and the data you need
$idColumn = new DataColumn(
DataField::createFromType('id', 'integer'), // NOTE: this is a little bit different to C# due to the type system of PHP
[ 1, 2 ]
);
$cityColumn = new DataColumn(
DataField::createFromType('city', 'string'),
[ "London", "Derby" ]
);
// create file schema
$schema = new Schema([$idColumn->getField(), $cityColumn->getField()]);
// create file handle with w+ flag, to create a new file - if it doesn't exist yet - or truncate, if it exists
$fileStream = fopen(__DIR__.'/test.parquet', 'w+');
$parquetWriter = new ParquetWriter($schema, $fileStream);
// optional, write custom metadata
$metadata = ['author'=>'santa', 'date'=>'2020-01-01'];
$parquetWriter->setCustomMetadata($metadata);
// create a new row group in the file
$groupWriter = $parquetWriter->CreateRowGroup();
$groupWriter->WriteColumn($idColumn);
$groupWriter->WriteColumn($cityColumn);
// As we have no 'using' in PHP, I implemented finish() methods
// for ParquetWriter and ParquetRowGroupWriter
$groupWriter->finish(); // finish inner writer(s)
$parquetWriter->finish(); // finish the parquet writer last
use codename\parquet\helper\ParquetDataIterator;
$iterateMe = ParquetDataIterator::fromFile('your-parquet-file.parquet');
foreach($iterateMe as $dataset) {
// $dataset is an associative array
// and already combines data of all columns
// back to a row-like structure
}
use codename\parquet\helper\ParquetDataWriter;
$schema = new Schema([
DataField::createFromType('id', 'integer'),
DataField::createFromType('name', 'string'),
]);
$handle = fopen('sample.parquet', 'r+');
$dataWriter = new ParquetDataWriter($handle, $schema);
// add two records at once
$dataToWrite = [
[ 'id' => 1, 'name' => 'abc' ],
[ 'id' => 2, 'name' => 'def' ],
];
$dataWriter->putBatch($dataToWrite);
// we add a third, single one
$dataWriter->put([ 'id' => 3, 'name' => 'ghi' ]);
$dataWriter->finish(); // Don't forget to finish at some point.
fclose($handle); // You may close the handle, if you have to.
use codename\parquet\helper\ParquetDataIterator;
use codename\parquet\helper\ParquetDataWriter;
$schema = new Schema([
DataField::createFromType('id', 'integer'),
new MapField(
'aMapField',
DataField::createFromType('someKey', 'string'),
StructField::createWithFieldArray(
'aStructField'
[
DataField::createFromType('anInteger', 'integer'),
DataField::createFromType('aString', 'string'),
]
)
),
StructField::createWithFieldArray(
'rootLevelStructField'
[
DataField::createFromType('anotherInteger', 'integer'),
DataField::createFromType('anotherString', 'string'),
]
),
new ListField(
'aListField',
DataField::createFromType('someInteger', 'integer'),
)
]);
$handle = fopen('complex.parquet', 'r+');
$dataWriter = new ParquetDataWriter($handle, $schema);
$dataToWrite = [
// This is a single dataset:
[
'id' => 1,
'aMapField' => [
'key1' => [ 'anInteger' => 123, 'aString' => 'abc' ],
'key2' => [ 'anInteger' => 456, 'aString' => 'def' ],
],
'rootLevelStructField' => [
'anotherInteger' => 7,
'anotherString' => 'in paradise'
],
'aListField' => [ 1, 2, 3 ]
],
// ... add more datasets as you wish.
];
$dataWriter->putBatch($dataToWrite);
$dataWriter->finish();
$iterateMe = ParquetDataIterator::fromFile('complex.parquet');
// f.e. write back into a full-blown php array:
$readData = [];
foreach($iterateMe as $dataset) {
$readData[] = $dataset;
}
// and now compare this to the original data supplied.
// manually, by print_r, var_dump, assertions, comparisons or whatever you like.
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.