Lucene++ - a full-featured, c++ search engine
API Documentation


Loading...
Searching...
No Matches
Token.h
Go to the documentation of this file.
1
2// Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3// Distributable under the terms of either the Apache License (Version 2.0)
4// or the GNU Lesser General Public License.
6
7#ifndef TOKEN_H
8#define TOKEN_H
9
10#include "Attribute.h"
11#include "AttributeSource.h"
12
13namespace Lucene {
14
76class LPPAPI Token : public Attribute {
77public:
80
84 Token(int32_t start, int32_t end);
85
90 Token(int32_t start, int32_t end, const String& type);
91
96 Token(int32_t start, int32_t end, int32_t flags);
97
103 Token(const String& text, int32_t start, int32_t end);
104
111 Token(const String& text, int32_t start, int32_t end, const String& type);
112
119 Token(const String& text, int32_t start, int32_t end, int32_t flags);
120
122 Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end);
123
124 virtual ~Token();
125
127
128public:
129 static const String& DEFAULT_TYPE();
130
131protected:
132 static const int32_t MIN_BUFFER_SIZE;
133
134 CharArray _termBuffer;
135 int32_t _termLength;
137 int32_t _endOffset;
138 String _type;
139 int32_t flags;
142
143public:
163 virtual void setPositionIncrement(int32_t positionIncrement);
164
167 virtual int32_t getPositionIncrement();
168
174 virtual String term();
175
180 virtual void setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length);
181
184 virtual void setTermBuffer(const String& buffer);
185
190 virtual void setTermBuffer(const String& buffer, int32_t offset, int32_t length);
191
195 virtual CharArray termBuffer();
196
198 virtual wchar_t* termBufferArray();
199
206 virtual CharArray resizeTermBuffer(int32_t newSize);
207
209 virtual int32_t termLength();
210
215 virtual void setTermLength(int32_t length);
216
222 virtual int32_t startOffset();
223
226 virtual void setStartOffset(int32_t offset);
227
230 virtual int32_t endOffset();
231
234 virtual void setEndOffset(int32_t offset);
235
238 virtual void setOffset(int32_t startOffset, int32_t endOffset);
239
241 virtual String type();
242
245 virtual void setType(const String& type);
246
252 virtual int32_t getFlags();
253
255 virtual void setFlags(int32_t flags);
256
259
261 virtual void setPayload(const PayloadPtr& payload);
262
263 virtual String toString();
264
266 virtual void clear();
267
269
272 TokenPtr clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset);
273
274 virtual bool equals(const LuceneObjectPtr& other);
275 virtual int32_t hashCode();
276
280 TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType);
281
285 TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset);
286
290 TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset, const String& newType);
291
295 TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType);
296
300 TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset);
301
305 TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset);
306
308 void reinit(const TokenPtr& prototype);
309
311 void reinit(const TokenPtr& prototype, const String& newTerm);
312
314 void reinit(const TokenPtr& prototype, CharArray newTermBuffer, int32_t offset, int32_t length);
315
316 virtual void copyTo(const AttributePtr& target);
317
320
321protected:
323 void ConstructToken(int32_t start, int32_t end, const String& type, int32_t flags);
324
328 void growTermBuffer(int32_t newSize);
329
331
334};
335
339public:
342
344
345protected:
347
348public:
349 virtual AttributePtr createAttributeInstance(const String& className);
350 virtual bool equals(const LuceneObjectPtr& other);
351 virtual int32_t hashCode();
352};
353
354}
355
356#endif
#define LUCENE_CLASS(Name)
Definition LuceneObject.h:24
Definition AttributeSource.h:14
Base class for Attributes that can be added to a AttributeSource.
Definition Attribute.h:18
Creates a TokenAttributeFactory returning Token as instance for the basic attributes and for all othe...
Definition Token.h:338
TokenAttributeFactory(const AttributeFactoryPtr &delegate)
virtual AttributePtr createAttributeInstance(const String &className)
returns an Attribute.
virtual int32_t hashCode()
Return hash code for this object.
AttributeFactoryPtr delegate
Definition Token.h:346
virtual bool equals(const LuceneObjectPtr &other)
Return whether two objects are equal.
A Token is an occurrence of a term from the text of a field. It consists of a term's text,...
Definition Token.h:76
virtual String type()
Returns this Token's lexical type. Defaults to "word".
int32_t positionIncrement
Definition Token.h:141
int32_t _endOffset
Definition Token.h:137
Token()
Constructs a Token will null text.
virtual void setTermLength(int32_t length)
Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate the...
virtual bool equals(const LuceneObjectPtr &other)
All values used for computation of hashCode() should be checked here for equality.
void reinit(const TokenPtr &prototype)
Copy the prototype token's fields into this one. Note: Payloads are shared.
virtual CharArray resizeTermBuffer(int32_t newSize)
Grows the termBuffer to at least size newSize, preserving the existing content. Note: If the next ope...
virtual wchar_t * termBufferArray()
Optimized implementation of termBuffer.
virtual int32_t hashCode()
Subclasses must implement this method and should compute a hashCode similar to this:
virtual int32_t endOffset()
Returns this Token's ending offset, one greater than the position of the last character corresponding...
virtual int32_t getPositionIncrement()
Returns the position increment of this Token.
int32_t _termLength
Definition Token.h:135
virtual void setTermBuffer(const String &buffer, int32_t offset, int32_t length)
Copies the contents of buffer, starting at offset and continuing for length characters,...
virtual void setOffset(int32_t startOffset, int32_t endOffset)
Set the starting and ending offset.
virtual void setFlags(int32_t flags)
virtual void setPayload(const PayloadPtr &payload)
Sets this Token's payload.
Token(int32_t start, int32_t end, int32_t flags)
Constructs a Token with null text and start and end offsets plus flags.
int32_t _startOffset
Definition Token.h:136
void initTermBuffer()
String _type
Definition Token.h:138
static AttributeFactoryPtr TOKEN_ATTRIBUTE_FACTORY()
Convenience factory that returns Token as implementation for the basic attributes.
void clearNoTermBuffer()
Like clear() but doesn't clear termBuffer/text.
Token(int32_t start, int32_t end)
Constructs a Token with null text and start and end offsets.
virtual CharArray termBuffer()
Returns the internal termBuffer character array which you can then directly alter....
static const int32_t MIN_BUFFER_SIZE
Definition Token.h:132
Token(const String &text, int32_t start, int32_t end, int32_t flags)
Constructs a Token with the given term text, start and end offsets and flags. NOTE: for better indexi...
Token(const String &text, int32_t start, int32_t end, const String &type)
Constructs a Token with the given term text, start and end offsets and type. NOTE: for better indexin...
virtual ~Token()
void ConstructToken(int32_t start, int32_t end, const String &type, int32_t flags)
Construct Token and initialize values.
virtual void clear()
Resets the term text, payload, flags, and positionIncrement, startOffset, endOffset and token type to...
TokenPtr reinit(const String &newTerm, int32_t newStartOffset, int32_t newEndOffset, const String &newType)
Shorthand for calling clear, setTermBuffer(String), setStartOffset, setEndOffset, setType.
virtual void setStartOffset(int32_t offset)
Set the starting offset.
virtual int32_t getFlags()
Get the bitset for any bits that have been set. This is completely distinct from type(),...
TokenPtr reinit(const String &newTerm, int32_t newStartOffset, int32_t newEndOffset)
Shorthand for calling clear, setTermBuffer(String), setStartOffset, setEndOffset, setType on Token::D...
TokenPtr clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset)
Makes a clone, but replaces the term buffer and start/end offset in the process. This is more efficie...
Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end)
Constructs a Token with the given term buffer (offset and length), start and end offsets.
void reinit(const TokenPtr &prototype, CharArray newTermBuffer, int32_t offset, int32_t length)
Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
virtual int32_t startOffset()
Returns this Token's starting offset, the position of the first character corresponding to this token...
void reinit(const TokenPtr &prototype, const String &newTerm)
Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
Token(const String &text, int32_t start, int32_t end)
Constructs a Token with the given term text, start and end offsets. The type defaults to "word....
virtual int32_t termLength()
Return number of valid characters (length of the term) in the termBuffer array.
PayloadPtr payload
Definition Token.h:140
void growTermBuffer(int32_t newSize)
Allocates a buffer char[] of at least newSize, without preserving the existing content....
CharArray _termBuffer
Definition Token.h:134
TokenPtr reinit(const String &newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset)
Shorthand for calling clear, setTermBuffer(String, int, int), setStartOffset, setEndOffset,...
Token(int32_t start, int32_t end, const String &type)
Constructs a Token with null text and start and end offsets plus the Token type.
virtual void setPositionIncrement(int32_t positionIncrement)
Set the position increment. This determines the position of this token relative to the previous Token...
TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String &newType)
Shorthand for calling clear, setTermBuffer(char[], int, int), setStartOffset, setEndOffset,...
virtual PayloadPtr getPayload()
Returns this Token's payload.
virtual String toString()
Returns a string representation of the object.
TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset)
Shorthand for calling clear, setTermBuffer(char[], int, int), setStartOffset, setEndOffset,...
virtual LuceneObjectPtr clone(const LuceneObjectPtr &other=LuceneObjectPtr())
Shallow clone. Subclasses must override this if they need to clone any members deeply.
TokenPtr reinit(const String &newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String &newType)
Shorthand for calling clear, setTermBuffer(String), setStartOffset, setEndOffset, setType.
virtual void setTermBuffer(const wchar_t *buffer, int32_t offset, int32_t length)
Copies the contents of buffer, starting at offset for length characters, into the termBuffer array.
virtual void setType(const String &type)
Set the lexical type.
virtual void setTermBuffer(const String &buffer)
Copies the contents of buffer into the termBuffer array.
static const String & DEFAULT_TYPE()
virtual String term()
Returns the Token's term text.
int32_t flags
Definition Token.h:139
virtual void copyTo(const AttributePtr &target)
Copies the values from this Attribute into the passed-in target attribute. The target implementation ...
virtual void setEndOffset(int32_t offset)
Set the ending offset.
Definition AbstractAllTermDocs.h:12
boost::shared_ptr< LuceneObject > LuceneObjectPtr
Definition LuceneTypes.h:539
boost::shared_ptr< Token > TokenPtr
Definition LuceneTypes.h:59
boost::shared_ptr< Payload > PayloadPtr
Definition LuceneTypes.h:198
boost::shared_ptr< AttributeFactory > AttributeFactoryPtr
Definition LuceneTypes.h:519
boost::shared_ptr< Attribute > AttributePtr
Definition LuceneTypes.h:518

clucene.sourceforge.net