Lucene++ - a full-featured, c++ search engine
API Documentation


Loading...
Searching...
No Matches
PorterStemmer.h
Go to the documentation of this file.
1
2// Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3// Distributable under the terms of either the Apache License (Version 2.0)
4// or the GNU Lesser General Public License.
6
7#ifndef PORTERSTEMMER_H
8#define PORTERSTEMMER_H
9
10#include "LuceneObject.h"
11
12namespace Lucene {
13
27public:
29 virtual ~PorterStemmer();
30
32
33protected:
34 wchar_t* b; // buffer for word to be stemmed
35 int32_t k; // offset to the end of the string
36 int32_t j; // a general offset into the string
37 int32_t i; // initial length of word
38 bool dirty;
39
40public:
41 bool stem(CharArray word);
42
46 bool stem(wchar_t* b, int32_t k);
47
48 wchar_t* getResultBuffer();
49 int32_t getResultLength();
50
51protected:
53 bool cons(int32_t i);
54
63 int32_t m();
64
67
69 bool doublec(int32_t j);
70
76 bool cvc(int32_t i);
77
79 bool ends(const wchar_t* s);
80
82 void setto(const wchar_t* s);
83
84 void r(const wchar_t* s);
85
105 void step1ab();
106
108 void step1c();
109
112 void step2();
113
115 void step3();
116
118 void step4();
119
121 void step5();
122};
123
124}
125
126#endif
#define LUCENE_CLASS(Name)
Definition LuceneObject.h:24
Base class for all Lucene classes.
Definition LuceneObject.h:31
This is the Porter stemming algorithm, coded up as thread-safe ANSI C by the author.
Definition PorterStemmer.h:26
void step1ab()
step1ab() gets rid of plurals and -ed or -ing. eg.
bool stem(CharArray word)
void step1c()
Turns terminal y to i when there is another vowel in the stem.
bool dirty
Definition PorterStemmer.h:38
bool stem(wchar_t *b, int32_t k)
In stem(b, k), b is a char pointer, and the string to be stemmed is from b[0] to b[k] inclusive....
void r(const wchar_t *s)
int32_t i
Definition PorterStemmer.h:37
void step4()
Takes off -ant, -ence etc., in context vcvc<v>.
void setto(const wchar_t *s)
Sets (j+1),...k to the characters in the string s, readjusting k.
bool ends(const wchar_t *s)
Returns true if 0,...k ends with the string s.
int32_t j
Definition PorterStemmer.h:36
int32_t m()
Measures the number of consonant sequences between 0 and j. If c is a consonant sequence and v a vowe...
bool cons(int32_t i)
Returns true if b[i] is a consonant. ('b' means 'z->b', but here and below we drop 'z->' in comments.
wchar_t * getResultBuffer()
bool doublec(int32_t j)
Return true if j,(j-1) contain a double consonant.
void step3()
Deals with -ic-, -full, -ness etc. similar strategy to step2.
int32_t k
Definition PorterStemmer.h:35
void step5()
Removes a final -e if m() > 1, and changes -ll to -l if m() > 1.
bool cvc(int32_t i)
Return true if i-2,i-1,i has the form consonant - vowel - consonant and also if the second c is not w...
bool vowelinstem()
Return true if 0,...j contains a vowel.
wchar_t * b
Definition PorterStemmer.h:34
void step2()
Maps double suffices to single ones. so -ization ( = -ize plus -ation) maps to -ize etc....
Definition AbstractAllTermDocs.h:12

clucene.sourceforge.net