1. Go to this page and download the library: Download cywolf/nlp-tools library. Choose the download type require.
2. Extract the ZIP file and open the index.php.
3. Add this code to the index.php.
<?php
require_once('vendor/autoload.php');
/* Start to develop here. Best regards https://php-download.com/ */
cywolf / nlp-tools example snippets
use Cywolf\NlpTools\Service\StopWordsFactory;
class YourClass {
protected StopWordsFactory $stopWordsFactory;
public function __construct(StopWordsFactory $stopWordsFactory)
{
$this->stopWordsFactory = $stopWordsFactory;
}
public function stopWordsExample(): void
{
// Get stop words for a language
$frenchStopWords = $this->stopWordsFactory->getStopWords('fr');
// Check if a word is a stop word
if ($frenchStopWords->isStopWord('le')) {
// It's a stop word
}
// Get the complete list of stop words
$allStopWords = $frenchStopWords->getStopWords();
}
}
use Cywolf\NlpTools\Service\LanguageDetectionService;
class YourClass {
protected LanguageDetectionService $languageDetector;
public function __construct(LanguageDetectionService $languageDetector)
{
$this->languageDetector = $languageDetector;
}
public function detectionExample(): string
{
$text = "This is an example of English text";
return $this->languageDetector->detectLanguage($text); // Returns 'en'
}
}
use Cywolf\NlpTools\Service\TextAnalysisService;
class YourClass {
protected TextAnalysisService $textAnalyzer;
public function __construct(TextAnalysisService $textAnalyzer)
{
$this->textAnalyzer = $textAnalyzer;
}
public function analysisExample(): array
{
$text = "Here is an example text to analyze";
// Tokenization
$tokens = $this->textAnalyzer->tokenize($text);
// Stemming
$stemmed = $this->textAnalyzer->stem($text, 'en');
// Remove stop words
$withoutStopWords = $this->textAnalyzer->removeStopWords($text, 'en');
return [
'tokens' => $tokens,
'stemmed' => $stemmed,
'cleaned' => $withoutStopWords
];
}
}
use Cywolf\NlpTools\Service\TextVectorizerService;
class YourClass {
protected TextVectorizerService $vectorizer;
public function __construct(TextVectorizerService $vectorizer)
{
$this->vectorizer = $vectorizer;
}
public function vectorizationExample(): array
{
$texts = [
"This is the first document to analyze",
"A second document with different content",
"And finally a third example"
];
// Create TF-IDF vectors
$tfIdfData = $this->vectorizer->createTfIdfVectors($texts, 'en');
// Create document-term matrix
$dtmData = $this->vectorizer->createDocumentTermMatrix($texts, 'en');
// Calculate similarity between two vectors
$similarity = $this->vectorizer->cosineSimilarity(
$tfIdfData['vectors'][0],
$tfIdfData['vectors'][1]
);
// Calculate similarity matrix
$similarityMatrix = $this->vectorizer->calculateSimilarityMatrix($tfIdfData['vectors']);
return [
'tfidf' => $tfIdfData,
'dtm' => $dtmData,
'similarity' => $similarity,
'matrix' => $similarityMatrix
];
}
}
use Cywolf\NlpTools\Service\TextClusteringService;
class YourClass {
protected TextClusteringService $clustering;
public function __construct(TextClusteringService $clustering)
{
$this->clustering = $clustering;
}
public function clusteringExample(): array
{
$texts = [
"The cat sleeps on the couch",
"My dog plays in the garden",
"I like cats and domestic felines",
"The dog is man's best friend",
"Pets bring joy"
];
// K-means clustering (k=2 groups)
$kMeansClusters = $this->clustering->kMeansClustering($texts, 2, 'en');
// Hierarchical clustering
$hierarchicalClusters = $this->clustering->hierarchicalClustering(
$texts,
0.6, // Distance threshold
'en'
);
// Similarity-based clustering
$similarityClusters = $this->clustering->similarityBasedClustering(
$texts,
0.7, // Similarity threshold
'en'
);
return [
'kmeans' => $kMeansClusters,
'hierarchical' => $hierarchicalClusters,
'similarity' => $similarityClusters
];
}
}
use Cywolf\NlpTools\Service\TopicModelingService;
class YourClass {
protected TopicModelingService $topicModeling;
public function __construct(TopicModelingService $topicModeling)
{
$this->topicModeling = $topicModeling;
}
public function topicsExample(): array
{
$texts = [
"The new economic policy favors local businesses",
"The government announces an economic recovery plan",
"Researchers have discovered a new medical treatment",
"A scientific study reveals the impact of climate on health",
"The stock market saw a strong rise following economic announcements"
];
// Extract topics
$topics = $this->topicModeling->extractTopics(
$texts,
2, // Number of topics to extract
5 // Number of terms per topic
);
// Extract representative terms from a group of texts
$terms = $this->topicModeling->extractTopicTerms(
$texts,
10 // Number of terms to extract
);
// Extract key phrases from a text
$keyPhrases = $this->topicModeling->extractKeyPhrases(
$texts[0],
3 // Number of phrases to extract
);
return [
'topics' => $topics,
'terms' => $terms,
'key_phrases' => $keyPhrases
];
}
}
namespace YourVendor\YourExtension\Service;
use Cywolf\NlpTools\Service\TextAnalysisService;
use Cywolf\NlpTools\Service\LanguageDetectionService;
use Cywolf\NlpTools\Service\TextClusteringService;
use Cywolf\NlpTools\Service\TopicModelingService;
class TextProcessingService
{
protected TextAnalysisService $textAnalyzer;
protected LanguageDetectionService $languageDetector;
protected TextClusteringService $clustering;
protected TopicModelingService $topicModeling;
public function __construct(
TextAnalysisService $textAnalyzer,
LanguageDetectionService $languageDetector,
TextClusteringService $clustering,
TopicModelingService $topicModeling
) {
$this->textAnalyzer = $textAnalyzer;
$this->languageDetector = $languageDetector;
$this->clustering = $clustering;
$this->topicModeling = $topicModeling;
}
public function processText(string $text): array
{
// Language detection
$language = $this->languageDetector->detectLanguage($text);
// Complete analysis
return [
'language' => $language,
'tokens' => $this->textAnalyzer->tokenize($text),
'stemmed' => $this->textAnalyzer->stem($text, $language),
'without_stopwords' => $this->textAnalyzer->removeStopWords($text, $language),
'key_phrases' => $this->topicModeling->extractKeyPhrases($text, 3, $language)
];
}
public function analyzeMultipleTexts(array $texts): array
{
// Clustering and topic analysis
$clusters = $this->clustering->kMeansClustering($texts, 3);
$topics = $this->topicModeling->extractTopics($texts, 3);
return [
'clusters' => $clusters,
'topics' => $topics
];
}
}
use TYPO3\CMS\Core\Cache\CacheManager;
use Cywolf\NlpTools\Service\TextAnalysisService;
class YourController
{
protected TextAnalysisService $textAnalyzer;
protected CacheManager $cacheManager;
public function __construct(
TextAnalysisService $textAnalyzer,
CacheManager $cacheManager
) {
$this->textAnalyzer = $textAnalyzer;
$this->cacheManager = $cacheManager;
}
public function yourAction(): void
{
// Get the cache
$cache = $this->cacheManager->getCache('nlp_tools');
// Pass it to a service for faster calculations
$this->textAnalyzer->setCache($cache);
// Use the service normally
$tokens = $this->textAnalyzer->tokenize($text);
}
}
Loading please wait ...
Before you can download the PHP files, the dependencies should be resolved. This can take some minutes. Please be patient.