PHP code example of codename / parquet

1. Go to this page and download the library: Download codename/parquet library. Choose the download type require.

2. Extract the ZIP file and open the index.php.

3. Add this code to the index.php.
    
        
<?php
require_once('vendor/autoload.php');

/* Start to develop here. Best regards https://php-download.com/ */

    

codename / parquet example snippets


use codename\parquet\ParquetReader;

// open file stream (in this example for reading only)
$fileStream = fopen(__DIR__.'/test.parquet', 'r');

// open parquet file reader
$parquetReader = new ParquetReader($fileStream);

 // Print custom metadata or do other stuff with it
 print_r($parquetReader->getCustomMetadata());  

// get file schema (available straight after opening parquet reader)
// however, get only data fields as only they contain data values
$dataFields = $parquetReader->schema->GetDataFields();

// enumerate through row groups in this file
for($i = 0; $i < $parquetReader->getRowGroupCount(); $i++)
{
  // create row group reader
  $groupReader = $parquetReader->OpenRowGroupReader($i);
  // read all columns inside each row group (you have an option to read only
  // 

use codename\parquet\ParquetWriter;

use codename\parquet\data\Schema;
use codename\parquet\data\DataField;
use codename\parquet\data\DataColumn;

//create data columns with schema metadata and the data you need
$idColumn = new DataColumn(
  DataField::createFromType('id', 'integer'), // NOTE: this is a little bit different to C# due to the type system of PHP
  [ 1, 2 ]
);

$cityColumn = new DataColumn(
  DataField::createFromType('city', 'string'),
  [ "London", "Derby" ]
);


// create file schema
$schema = new Schema([$idColumn->getField(), $cityColumn->getField()]);

// create file handle with w+ flag, to create a new file - if it doesn't exist yet - or truncate, if it exists
$fileStream = fopen(__DIR__.'/test.parquet', 'w+');

$parquetWriter = new ParquetWriter($schema, $fileStream);

// optional, write custom metadata
$metadata = ['author'=>'santa', 'date'=>'2020-01-01'];
$parquetWriter->setCustomMetadata($metadata);

// create a new row group in the file
$groupWriter = $parquetWriter->CreateRowGroup();

$groupWriter->WriteColumn($idColumn);
$groupWriter->WriteColumn($cityColumn);

// As we have no 'using' in PHP, I implemented finish() methods
// for ParquetWriter and ParquetRowGroupWriter

$groupWriter->finish();   // finish inner writer(s)
$parquetWriter->finish(); // finish the parquet writer last

use codename\parquet\helper\ParquetDataIterator;

$iterateMe = ParquetDataIterator::fromFile('your-parquet-file.parquet');

foreach($iterateMe as $dataset) {
  // $dataset is an associative array
  // and already combines data of all columns
  // back to a row-like structure
}

use codename\parquet\helper\ParquetDataWriter;

$schema = new Schema([
  DataField::createFromType('id', 'integer'),
  DataField::createFromType('name', 'string'),
]);

$handle = fopen('sample.parquet', 'r+');
$dataWriter = new ParquetDataWriter($handle, $schema);

// add two records at once
$dataToWrite = [
  [ 'id' => 1, 'name' => 'abc' ],
  [ 'id' => 2, 'name' => 'def' ],
];
$dataWriter->putBatch($dataToWrite);

// we add a third, single one
$dataWriter->put([ 'id' => 3, 'name' => 'ghi' ]);

$dataWriter->finish(); // Don't forget to finish at some point.
fclose($handle); // You may close the handle, if you have to.

use codename\parquet\helper\ParquetDataIterator;
use codename\parquet\helper\ParquetDataWriter;

$schema = new Schema([
  DataField::createFromType('id', 'integer'),
  new MapField(
    'aMapField',
    DataField::createFromType('someKey', 'string'),
    StructField::createWithFieldArray(
      'aStructField'
      [
        DataField::createFromType('anInteger', 'integer'),
        DataField::createFromType('aString', 'string'),
      ]
    )
  ),
  StructField::createWithFieldArray(
    'rootLevelStructField'
    [
      DataField::createFromType('anotherInteger', 'integer'),
      DataField::createFromType('anotherString', 'string'),
    ]
  ),
  new ListField(
    'aListField',
    DataField::createFromType('someInteger', 'integer'),
  )
]);

$handle = fopen('complex.parquet', 'r+');
$dataWriter = new ParquetDataWriter($handle, $schema);

$dataToWrite = [
  // This is a single dataset:
  [
    'id' => 1,
    'aMapField' => [
      'key1' => [ 'anInteger' => 123, 'aString' => 'abc' ],
      'key2' => [ 'anInteger' => 456, 'aString' => 'def' ],
    ],
    'rootLevelStructField' => [
      'anotherInteger' => 7,
      'anotherString' => 'in paradise'
    ],
    'aListField' => [ 1, 2, 3 ]
  ],
  // ... add more datasets as you wish.
];
$dataWriter->putBatch($dataToWrite);
$dataWriter->finish();

$iterateMe = ParquetDataIterator::fromFile('complex.parquet');

// f.e. write back into a full-blown php array:
$readData = [];
foreach($iterateMe as $dataset) {
  $readData[] = $dataset;
}

// and now compare this to the original data supplied.
// manually, by print_r, var_dump, assertions, comparisons or whatever you like.