PHP code example of permafrost-dev / text-classifier
1. Go to this page and download the library: Download permafrost-dev/text-classifier library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
permafrost-dev / text-classifier example snippets
use Permafrost\TextClassifier\TextClassifier;
use Permafrost\TextClassifier\Classifiers\NaiveBayes;
use Permafrost\TextClassifier\Pipelines\TextProcessingPipeline;
use Permafrost\TextClassifier\Tokenizers\EmailAddressTokenizer;
use Permafrost\TextClassifier\Processors\EmailAddressNormalizer;
$processors = new TextProcessingPipeline([
new EmailAddressNormalizer(),
]);
$tc = new TextClassifier($processors, [new EmailAddressTokenizer()], new NaiveBayes());
$tc = $tc->trainFromFile(__DIR__ . '/email-train.txt');
$emails = [
'[email protected]',
'[email protected]',
];
foreach ($emails as $email) {
echo "classification for '$email': " . $tc->classify($email) . PHP_EOL;
}
use Skyeng\Lemmatizer;
use Permafrost\TextClassifier\TextClassifier;
use Permafrost\TextClassifier\Classifiers\NaiveBayes;
use Permafrost\TextClassifier\Processors\TextLemmatizer;
use Permafrost\TextClassifier\Tokenizers\BasicTokenizer;
use Permafrost\TextClassifier\Tokenizers\NGramTokenizer;
use Permafrost\TextClassifier\Processors\StopwordRemover;
use Permafrost\TextClassifier\Processors\BasicTextNormalizer;
use Permafrost\TextClassifier\Pipelines\TextProcessingPipeline;
//Use different processors for training and classifying. Since we're using keyword tokens,
//add all lemmas for each token during training to increase the size of the training data.
$trainingProcessors = [new TextLemmatizer(new Lemmatizer()), new BasicTextNormalizer()];
//When classifying, let's remove stopwords in addition to basic text normalization, because
//we'll be processing phrases.
$classifyProcessors = [new StopwordRemover(), new BasicTextNormalizer()];
//Let's use a basic tokenizer (word-based tokens), and an NGram tokenizer, which creates
//trigrams (N=3). This should give us a good mix of keywords and partial keywords to look
//for when classifying text.
$tokenizers = [new BasicTokenizer(), new NGramTokenizer(3)];
$textClassifier = new TextClassifier(
new TextProcessingPipeline($trainingProcessors, $classifyProcessors),
$tokenizers,
new NaiveBayes() //use Naive-Bayes as the classifier
);
$textClassifier->trainFromFile(__DIR__ . '/sentiment-train.txt');
$phrases = [
'this is fantastic',
'this is terrible',
];
foreach ($phrases as $phrase) {
echo $phrase . ' - ' . $textClassifier->classify($phrase) . PHP_EOL;
}
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.