Lucene++ - a full-featured, c++ search engine
API Documentation


Loading...
Searching...
No Matches
TermVectorsReader.h
Go to the documentation of this file.
1
2// Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3// Distributable under the terms of either the Apache License (Version 2.0)
4// or the GNU Lesser General Public License.
6
7#ifndef TERMVECTORSREADER_H
8#define TERMVECTORSREADER_H
9
10#include "TermVectorMapper.h"
11
12namespace Lucene {
13
14class LPPAPI TermVectorsReader : public LuceneObject {
15public:
17 TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos);
18 TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos,
19 int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0);
21
23
24public:
26 static const int32_t FORMAT_VERSION;
27
29 static const int32_t FORMAT_VERSION2;
30
32 static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES;
33
35 static const int32_t FORMAT_CURRENT;
36
38 static const int32_t FORMAT_SIZE;
39
40 static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR;
41 static const uint8_t STORE_OFFSET_WITH_TERMVECTOR;
42
43protected:
45
49 int32_t _size;
50 int32_t numTotalDocs;
51
54
55 int32_t format;
56
57public:
60
63
65
69 void rawDocs(Collection<int32_t> tvdLengths, Collection<int32_t> tvfLengths, int32_t startDocID, int32_t numDocs);
70
71 void close();
72
74 int32_t size();
75
76 void get(int32_t docNum, const String& field, const TermVectorMapperPtr& mapper);
77
83 TermFreqVectorPtr get(int32_t docNum, const String& field);
84
90
91 void get(int32_t docNumber, const TermVectorMapperPtr& mapper);
92
94
95protected:
96 void ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size);
97
98 void seekTvx(int32_t docNum);
99
100 int32_t checkValidFormat(const IndexInputPtr& in);
101
103 Collection<String> readFields(int32_t fieldCount);
104
107
110
114 void readTermVector(const String& field, int64_t tvfPointer, const TermVectorMapperPtr& mapper);
115};
116
147
148}
149
150#endif
#define LUCENE_CLASS(Name)
Definition LuceneObject.h:24
Utility template class to handle collections that can be safely copied and shared.
Definition Collection.h:17
Base class for all Lucene classes.
Definition LuceneObject.h:31
Models the existing parallel array structure.
Definition TermVectorsReader.h:118
int32_t currentPosition
Definition TermVectorsReader.h:130
Collection< Collection< int32_t > > positions
Definition TermVectorsReader.h:128
bool storingPositions
Definition TermVectorsReader.h:132
String field
Definition TermVectorsReader.h:133
TermFreqVectorPtr materializeVector()
Construct the vector.
Collection< int32_t > termFreqs
Definition TermVectorsReader.h:127
Collection< String > terms
Definition TermVectorsReader.h:126
virtual void map(const String &term, int32_t frequency, Collection< TermVectorOffsetInfoPtr > offsets, Collection< int32_t > positions)
Map the Term Vector information into your own structure.
Collection< Collection< TermVectorOffsetInfoPtr > > offsets
Definition TermVectorsReader.h:129
bool storingOffsets
Definition TermVectorsReader.h:131
virtual void setExpectations(const String &field, int32_t numTerms, bool storeOffsets, bool storePositions)
Tell the mapper what to expect in regards to field, number of terms, offset and position storage....
The TermVectorMapper can be used to map Term Vectors into your own structure instead of the parallel ...
Definition TermVectorMapper.h:18
Definition TermVectorsReader.h:14
int32_t checkValidFormat(const IndexInputPtr &in)
FieldInfosPtr fieldInfos
Definition TermVectorsReader.h:44
IndexInputPtr getTvfStream()
Used for bulk copy when merging.
static const int32_t FORMAT_VERSION
NOTE: if you make a new format, it must be larger than the current format.
Definition TermVectorsReader.h:26
int32_t format
Definition TermVectorsReader.h:55
void readTermVector(const String &field, int64_t tvfPointer, const TermVectorMapperPtr &mapper)
void readTermVectors(Collection< String > fields, Collection< int64_t > tvfPointers, const TermVectorMapperPtr &mapper)
Collection< TermFreqVectorPtr > readTermVectors(int32_t docNum, Collection< String > fields, Collection< int64_t > tvfPointers)
static const int32_t FORMAT_CURRENT
NOTE: always change this if you switch to a new format.
Definition TermVectorsReader.h:35
TermFreqVectorPtr get(int32_t docNum, const String &field)
Retrieve the term vector for the given document and field.
static const uint8_t STORE_OFFSET_WITH_TERMVECTOR
Definition TermVectorsReader.h:41
static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR
Definition TermVectorsReader.h:40
static const int32_t FORMAT_VERSION2
Changes to speed up bulk merging of term vectors.
Definition TermVectorsReader.h:29
IndexInputPtr tvd
Definition TermVectorsReader.h:47
void get(int32_t docNumber, const TermVectorMapperPtr &mapper)
void rawDocs(Collection< int32_t > tvdLengths, Collection< int32_t > tvfLengths, int32_t startDocID, int32_t numDocs)
Retrieve the length (in bytes) of the tvd and tvf entries for the next numDocs starting with startDoc...
IndexInputPtr getTvdStream()
Used for bulk copy when merging.
Collection< String > readFields(int32_t fieldCount)
Reads the String[] fields; you have to pre-seek tvd to the right point.
void seekTvx(int32_t docNum)
TermVectorsReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos, int32_t readBufferSize, int32_t docStoreOffset=-1, int32_t size=0)
static const int32_t FORMAT_SIZE
The size in bytes that the FORMAT_VERSION will take up at the beginning of each file.
Definition TermVectorsReader.h:38
Collection< int64_t > readTvfPointers(int32_t fieldCount)
Reads the long[] offsets into TVF; you have to pre-seek tvx/tvd to the right point.
TermVectorsReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos)
IndexInputPtr tvx
Definition TermVectorsReader.h:46
Collection< TermFreqVectorPtr > get(int32_t docNum)
Return all term vectors stored for this document or null if the could not be read in.
int32_t numTotalDocs
Definition TermVectorsReader.h:50
int32_t docStoreOffset
The docID offset where our docs begin in the index file. This will be 0 if we have our own private fi...
Definition TermVectorsReader.h:53
int32_t _size
Definition TermVectorsReader.h:49
void ConstructReader(const DirectoryPtr &d, const String &segment, const FieldInfosPtr &fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size)
void get(int32_t docNum, const String &field, const TermVectorMapperPtr &mapper)
virtual LuceneObjectPtr clone(const LuceneObjectPtr &other=LuceneObjectPtr())
Return clone of this object.
IndexInputPtr tvf
Definition TermVectorsReader.h:48
static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES
Changed strings to UTF8 with length-in-bytes not length-in-chars.
Definition TermVectorsReader.h:32
Definition AbstractAllTermDocs.h:12
boost::shared_ptr< LuceneObject > LuceneObjectPtr
Definition LuceneTypes.h:539
boost::shared_ptr< FieldInfos > FieldInfosPtr
Definition LuceneTypes.h:127
boost::shared_ptr< TermVectorMapper > TermVectorMapperPtr
Definition LuceneTypes.h:254
boost::shared_ptr< IndexInput > IndexInputPtr
Definition LuceneTypes.h:493
boost::shared_ptr< Directory > DirectoryPtr
Definition LuceneTypes.h:489
boost::shared_ptr< TermFreqVector > TermFreqVectorPtr
Definition LuceneTypes.h:237

clucene.sourceforge.net