Lucene++ - a full-featured, c++ search engine
API Documentation
This is a helper class to generate prefix-encoded representations for numerical values and supplies converters to represent double values as sortable integers/longs. More...
#include <NumericUtils.h>
Public Member Functions | |
virtual | ~NumericUtils () |
virtual String | getClassName () |
boost::shared_ptr< NumericUtils > | shared_from_this () |
![]() | |
virtual | ~LuceneObject () |
virtual void | initialize () |
Called directly after instantiation to create objects that depend on this object being fully constructed. | |
virtual LuceneObjectPtr | clone (const LuceneObjectPtr &other=LuceneObjectPtr()) |
Return clone of this object. | |
virtual int32_t | hashCode () |
Return hash code for this object. | |
virtual bool | equals (const LuceneObjectPtr &other) |
Return whether two objects are equal. | |
virtual int32_t | compareTo (const LuceneObjectPtr &other) |
Compare two objects. | |
virtual String | toString () |
Returns a string representation of the object. | |
![]() | |
virtual | ~LuceneSync () |
virtual SynchronizePtr | getSync () |
Return this object synchronize lock. | |
virtual LuceneSignalPtr | getSignal () |
Return this object signal. | |
virtual void | lock (int32_t timeout=0) |
Lock this object using an optional timeout. | |
virtual void | unlock () |
Unlock this object. | |
virtual bool | holdsLock () |
Returns true if this object is currently locked by current thread. | |
virtual void | wait (int32_t timeout=0) |
Wait for signal using an optional timeout. | |
virtual void | notifyAll () |
Notify all threads waiting for signal. | |
Static Public Member Functions | |
static String | _getClassName () |
static int32_t | longToPrefixCoded (int64_t val, int32_t shift, CharArray buffer) |
Returns prefix coded bits after reducing the precision by shift bits. This is method is used by NumericTokenStream . | |
static String | longToPrefixCoded (int64_t val, int32_t shift) |
Returns prefix coded bits after reducing the precision by shift bits. This is method is used by LongRangeBuilder . | |
static String | longToPrefixCoded (int64_t val) |
This is a convenience method, that returns prefix coded bits of a long without reducing the precision. It can be used to store the full precision value as a stored field in index. To decode, use prefixCodedToLong . | |
static int32_t | intToPrefixCoded (int32_t val, int32_t shift, CharArray buffer) |
Returns prefix coded bits after reducing the precision by shift bits. This is method is used by NumericTokenStream . | |
static String | intToPrefixCoded (int32_t val, int32_t shift) |
Returns prefix coded bits after reducing the precision by shift bits. This is method is used by IntRangeBuilder . | |
static String | intToPrefixCoded (int32_t val) |
This is a convenience method, that returns prefix coded bits of an int without reducing the precision. It can be used to store the full precision value as a stored field in index. To decode, use prefixCodedToInt . | |
static int64_t | prefixCodedToLong (const String &prefixCoded) |
Returns a long from prefixCoded characters. Rightmost bits will be zero for lower precision codes. This method can be used to decode eg. a stored field. | |
static int32_t | prefixCodedToInt (const String &prefixCoded) |
Returns an int from prefixCoded characters. Rightmost bits will be zero for lower precision codes. This method can be used to decode eg. a stored field. | |
static int64_t | doubleToSortableLong (double val) |
Converts a double value to a sortable signed long. The value is converted by getting their IEEE 754 floating-point "double format" bit layout and then some bits are swapped, to be able to compare the result as int64_t. By this the precision is not reduced, but the value can easily used as a int64_t. | |
static String | doubleToPrefixCoded (double val) |
Convenience method: this just returns: longToPrefixCoded(doubleToSortableLong(val)) | |
static double | sortableLongToDouble (int64_t val) |
Converts a sortable long back to a double. | |
static double | prefixCodedToDouble (const String &val) |
Convenience method: this just returns: sortableLongToDouble(prefixCodedToLong(val)) | |
static void | splitLongRange (const LongRangeBuilderPtr &builder, int32_t precisionStep, int64_t minBound, int64_t maxBound) |
Splits a int64_t range recursively. You may implement a builder that adds clauses to a BooleanQuery for each call to its LongRangeBuilder#addRange(String,String) method. This method is used by NumericRangeQuery . | |
static void | splitIntRange (const IntRangeBuilderPtr &builder, int32_t precisionStep, int32_t minBound, int32_t maxBound) |
Splits an int32_t range recursively. You may implement a builder that adds clauses to a BooleanQuery for each call to its IntRangeBuilder#addRange(String,String) method. This method is used by NumericRangeQuery . | |
static void | splitRange (const LuceneObjectPtr &builder, int32_t valSize, int32_t precisionStep, int64_t minBound, int64_t maxBound) |
This helper does the splitting for both 32 and 64 bit. | |
static void | addRange (const LuceneObjectPtr &builder, int32_t valSize, int64_t minBound, int64_t maxBound, int32_t shift) |
Helper that delegates to correct range builder. | |
Static Public Attributes | |
static const int32_t | PRECISION_STEP_DEFAULT |
The default precision step used by NumericField , NumericTokenStream , NumericRangeQuery , and NumericRangeFilter as default. | |
static const wchar_t | SHIFT_START_LONG |
Longs are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_LONG + shift in the first character. | |
static const int32_t | BUF_SIZE_LONG |
The maximum term length (used for char[] buffer size) for encoding long values. | |
static const wchar_t | SHIFT_START_INT |
Integers are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_INT + shift in the first character. | |
static const int32_t | BUF_SIZE_INT |
The maximum term length (used for char[] buffer size) for encoding int values. | |
Additional Inherited Members | |
![]() | |
LuceneObject () | |
![]() | |
SynchronizePtr | objectLock |
LuceneSignalPtr | objectSignal |
This is a helper class to generate prefix-encoded representations for numerical values and supplies converters to represent double values as sortable integers/longs.
To quickly execute range queries in Apache Lucene, a range is divided recursively into multiple intervals for searching: The center of the range is searched only with the lowest possible precision in the trie, while the boundaries are matched more exactly. This reduces the number of terms dramatically.
This class generates terms to achieve this: First the numerical integer values need to be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned and the bits are converted to ASCII chars with each 7 bit. The resulting string is sortable like the original integer value. Each value is also prefixed (in the first char) by the shift value (number of bits removed) used during encoding.
To also index floating point numbers, this class supplies two methods to convert them to integer values by changing their bit layout: doubleToSortableLong
, doubleToSortableInt
. You will have no precision loss by converting floating point numbers to integers and back (only that the integer form is not usable). Other data types like dates can easily converted to longs or ints (eg. date to long).
For easy usage, the trie algorithm is implemented for indexing inside NumericTokenStream
that can index int, long, and double. For querying, NumericRangeQuery
and NumericRangeFilter
implement the query part for the same data types.
This class can also be used, to generate lexicographically sortable (according std::string#compare
) representations of numeric data types for other usages (eg. sorting).
|
virtual |
|
inlinestatic |
|
static |
Helper that delegates to correct range builder.
|
static |
Convenience method: this just returns: longToPrefixCoded(doubleToSortableLong(val))
|
static |
Converts a double value to a sortable signed long. The value is converted by getting their IEEE 754 floating-point "double format" bit layout and then some bits are swapped, to be able to compare the result as int64_t. By this the precision is not reduced, but the value can easily used as a int64_t.
|
inlinevirtual |
|
static |
This is a convenience method, that returns prefix coded bits of an int without reducing the precision. It can be used to store the full precision value as a stored field in index. To decode, use prefixCodedToInt
.
|
static |
Returns prefix coded bits after reducing the precision by shift bits. This is method is used by IntRangeBuilder
.
val | the numeric value |
shift | how many bits to strip from the right |
|
static |
Returns prefix coded bits after reducing the precision by shift bits. This is method is used by NumericTokenStream
.
val | the numeric value |
shift | how many bits to strip from the right |
buffer | that will contain the encoded chars, must be at least of BUF_SIZE_INT length |
|
static |
This is a convenience method, that returns prefix coded bits of a long without reducing the precision. It can be used to store the full precision value as a stored field in index. To decode, use prefixCodedToLong
.
|
static |
Returns prefix coded bits after reducing the precision by shift bits. This is method is used by LongRangeBuilder
.
val | the numeric value |
shift | how many bits to strip from the right |
|
static |
Returns prefix coded bits after reducing the precision by shift bits. This is method is used by NumericTokenStream
.
val | the numeric value |
shift | how many bits to strip from the right |
buffer | that will contain the encoded chars, must be at least of BUF_SIZE_LONG length |
|
static |
Convenience method: this just returns: sortableLongToDouble(prefixCodedToLong(val))
|
static |
Returns an int from prefixCoded characters. Rightmost bits will be zero for lower precision codes. This method can be used to decode eg. a stored field.
|
static |
Returns a long from prefixCoded characters. Rightmost bits will be zero for lower precision codes. This method can be used to decode eg. a stored field.
|
inline |
|
static |
Converts a sortable long back to a double.
|
static |
Splits an int32_t range recursively. You may implement a builder that adds clauses to a BooleanQuery
for each call to its IntRangeBuilder#addRange(String,String)
method. This method is used by NumericRangeQuery
.
|
static |
Splits a int64_t range recursively. You may implement a builder that adds clauses to a BooleanQuery
for each call to its LongRangeBuilder#addRange(String,String)
method. This method is used by NumericRangeQuery
.
|
static |
This helper does the splitting for both 32 and 64 bit.
|
static |
The maximum term length (used for char[] buffer size) for encoding int values.
|
static |
The maximum term length (used for char[] buffer size) for encoding long values.
|
static |
The default precision step used by NumericField
, NumericTokenStream
, NumericRangeQuery
, and NumericRangeFilter
as default.
|
static |
Integers are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_INT + shift in the first character.
|
static |
Longs are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_LONG + shift in the first character.