Lucene++ - a full-featured, c++ search engine
API Documentation
This class accepts multiple added documents and directly writes a single segment file. It does this more efficiently than creating a single segment per document (with DocumentWriter) and doing standard merges on those segments. More...
#include <DocumentsWriter.h>
Public Member Functions | |
DocumentsWriter (const DirectoryPtr &directory, const IndexWriterPtr &writer, const IndexingChainPtr &indexingChain) | |
virtual | ~DocumentsWriter () |
virtual String | getClassName () |
boost::shared_ptr< DocumentsWriter > | shared_from_this () |
virtual void | initialize () |
Called directly after instantiation to create objects that depend on this object being fully constructed. | |
PerDocBufferPtr | newPerDocBuffer () |
Create and return a new DocWriterBuffer. | |
void | updateFlushedDocCount (int32_t n) |
int32_t | getFlushedDocCount () |
void | setFlushedDocCount (int32_t n) |
bool | hasProx () |
Returns true if any of the fields in the current buffered docs have omitTermFreqAndPositions==false. | |
void | setInfoStream (const InfoStreamPtr &infoStream) |
If non-null, various details of indexing are printed here. | |
void | setMaxFieldLength (int32_t maxFieldLength) |
void | setSimilarity (const SimilarityPtr &similarity) |
void | setRAMBufferSizeMB (double mb) |
Set how much RAM we can use before flushing. | |
double | getRAMBufferSizeMB () |
void | setMaxBufferedDocs (int32_t count) |
Set max buffered docs, which means we will flush by doc count instead of by RAM usage. | |
int32_t | getMaxBufferedDocs () |
String | getSegment () |
Get current segment name we are writing. | |
int32_t | getNumDocsInRAM () |
Returns how many docs are currently buffered in RAM. | |
String | getDocStoreSegment () |
Returns the current doc store segment we are writing to. | |
int32_t | getDocStoreOffset () |
Returns the doc offset into the shared doc store for the current buffered docs. | |
String | closeDocStore () |
Closes the current open doc stores an returns the doc store segment name. This returns null if there are no buffered documents. | |
HashSet< String > | abortedFiles () |
void | message (const String &message) |
HashSet< String > | openFiles () |
Returns Collection of files in use by this instance, including any flushed segments. | |
HashSet< String > | closedFiles () |
void | addOpenFile (const String &name) |
void | removeOpenFile (const String &name) |
void | setAborting () |
void | abort () |
Called if we hit an exception at a bad time (when updating the index files) and must discard all currently buffered docs. This resets our state, discarding any docs added since last flush. | |
bool | pauseAllThreads () |
Returns true if an abort is in progress. | |
void | resumeAllThreads () |
bool | anyChanges () |
void | initFlushState (bool onlyDocStore) |
int32_t | flush (bool _closeDocStore) |
Flush all pending docs to a new segment. | |
HashSet< String > | getFlushedFiles () |
void | createCompoundFile (const String &segment) |
Build compound file for the segment we just flushed. | |
bool | setFlushPending () |
Set flushPending if it is not already set and returns whether it was set. This is used by IndexWriter to trigger a single flush even when multiple threads are trying to do so. | |
void | clearFlushPending () |
void | pushDeletes () |
void | close () |
void | initSegmentName (bool onlyDocStore) |
DocumentsWriterThreadStatePtr | getThreadState (const DocumentPtr &doc, const TermPtr &delTerm) |
Returns a free (idle) ThreadState that may be used for indexing this one document. This call also pauses if a flush is pending. If delTerm is non-null then we buffer this deleted term after the thread state has been acquired. | |
bool | addDocument (const DocumentPtr &doc, const AnalyzerPtr &analyzer) |
Returns true if the caller (IndexWriter) should now flush. | |
bool | updateDocument (const TermPtr &t, const DocumentPtr &doc, const AnalyzerPtr &analyzer) |
bool | updateDocument (const DocumentPtr &doc, const AnalyzerPtr &analyzer, const TermPtr &delTerm) |
int32_t | getNumBufferedDeleteTerms () |
MapTermNum | getBufferedDeleteTerms () |
void | remapDeletes (const SegmentInfosPtr &infos, Collection< Collection< int32_t > > docMaps, Collection< int32_t > delCounts, const OneMergePtr &merge, int32_t mergeDocCount) |
Called whenever a merge has completed and the merged segments had deletions. | |
bool | bufferDeleteTerms (Collection< TermPtr > terms) |
bool | bufferDeleteTerm (const TermPtr &term) |
bool | bufferDeleteQueries (Collection< QueryPtr > queries) |
bool | bufferDeleteQuery (const QueryPtr &query) |
bool | deletesFull () |
bool | doApplyDeletes () |
void | setMaxBufferedDeleteTerms (int32_t maxBufferedDeleteTerms) |
int32_t | getMaxBufferedDeleteTerms () |
bool | hasDeletes () |
bool | applyDeletes (const SegmentInfosPtr &infos) |
bool | doBalanceRAM () |
void | waitForWaitQueue () |
int64_t | getRAMUsed () |
IntArray | getIntBlock (bool trackAllocations) |
void | bytesAllocated (int64_t numBytes) |
void | bytesUsed (int64_t numBytes) |
void | recycleIntBlocks (Collection< IntArray > blocks, int32_t start, int32_t end) |
CharArray | getCharBlock () |
void | recycleCharBlocks (Collection< CharArray > blocks, int32_t numBlocks) |
String | toMB (int64_t v) |
void | balanceRAM () |
We have four pools of RAM: Postings, byte blocks (holds freq/prox posting data), char blocks (holds characters in the term) and per-doc buffers (stored fields/term vectors). Different docs require varying amount of storage from these four classes. | |
![]() | |
virtual | ~LuceneObject () |
virtual LuceneObjectPtr | clone (const LuceneObjectPtr &other=LuceneObjectPtr()) |
Return clone of this object. | |
virtual int32_t | hashCode () |
Return hash code for this object. | |
virtual bool | equals (const LuceneObjectPtr &other) |
Return whether two objects are equal. | |
virtual int32_t | compareTo (const LuceneObjectPtr &other) |
Compare two objects. | |
virtual String | toString () |
Returns a string representation of the object. | |
![]() | |
virtual | ~LuceneSync () |
virtual SynchronizePtr | getSync () |
Return this object synchronize lock. | |
virtual LuceneSignalPtr | getSignal () |
Return this object signal. | |
virtual void | lock (int32_t timeout=0) |
Lock this object using an optional timeout. | |
virtual void | unlock () |
Unlock this object. | |
virtual bool | holdsLock () |
Returns true if this object is currently locked by current thread. | |
virtual void | wait (int32_t timeout=0) |
Wait for signal using an optional timeout. | |
virtual void | notifyAll () |
Notify all threads waiting for signal. | |
Static Public Member Functions | |
static String | _getClassName () |
static IndexingChainPtr | getDefaultIndexingChain () |
Data Fields | |
INTERNAL | : IndexWriterWeakPtr _writer |
DirectoryPtr | directory |
IndexingChainPtr | indexingChain |
String | segment |
int32_t | numDocsInStore |
bool | flushPending |
bool | bufferIsFull |
InfoStreamPtr | infoStream |
int32_t | maxFieldLength |
SimilarityPtr | similarity |
DocConsumerPtr | consumer |
HashSet< String > | _openFiles |
HashSet< String > | _closedFiles |
WaitQueuePtr | waitQueue |
SkipDocWriterPtr | skipDocWriter |
ByteBlockAllocatorPtr | byteBlockAllocator |
ByteBlockAllocatorPtr | perDocAllocator |
int64_t | numBytesAlloc |
int64_t | numBytesUsed |
TermPtr | lastDeleteTerm |
Static Public Attributes | |
static const int32_t | OBJECT_HEADER_BYTES |
Coarse estimates used to measure RAM usage of buffered deletes. | |
static const int32_t | POINTER_NUM_BYTE |
static const int32_t | INT_NUM_BYTE |
static const int32_t | CHAR_NUM_BYTE |
static const int32_t | BYTES_PER_DEL_TERM |
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object with Term key, BufferedDeletes.Num val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is object with String field and String text (OBJ_HEADER + 2*POINTER). We don't count Term's field since it's interned. Term's text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). BufferedDeletes.num is OBJ_HEADER + INT. | |
static const int32_t | BYTES_PER_DEL_DOCID |
Rough logic: del docIDs are List<Integer>. Say list allocates ~2X size (2*POINTER). Integer is OBJ_HEADER + int. | |
static const int32_t | BYTES_PER_DEL_QUERY |
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object with Query key, Integer val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Query we often undercount (say 24 bytes). Integer is OBJ_HEADER + INT. | |
static const int32_t | BYTE_BLOCK_SHIFT |
Initial chunks size of the shared byte[] blocks used to store postings data. | |
static const int32_t | BYTE_BLOCK_SIZE |
static const int32_t | BYTE_BLOCK_MASK |
static const int32_t | BYTE_BLOCK_NOT_MASK |
static const int32_t | CHAR_BLOCK_SHIFT |
Initial chunk size of the shared char[] blocks used to store term text. | |
static const int32_t | CHAR_BLOCK_SIZE |
static const int32_t | CHAR_BLOCK_MASK |
static const int32_t | MAX_TERM_LENGTH |
static const int32_t | INT_BLOCK_SHIFT |
Initial chunks size of the shared int[] blocks used to store postings data. | |
static const int32_t | INT_BLOCK_SIZE |
static const int32_t | INT_BLOCK_MASK |
static const int32_t | PER_DOC_BLOCK_SIZE |
Protected Member Functions | |
void | doAfterFlush () |
Reset after a flush. | |
bool | allThreadsIdle () |
void | waitReady (const DocumentsWriterThreadStatePtr &state) |
bool | timeToFlushDeletes () |
bool | checkDeleteTerm (const TermPtr &term) |
bool | applyDeletes (const IndexReaderPtr &reader, int32_t docIDStart) |
void | addDeleteTerm (const TermPtr &term, int32_t docCount) |
void | addDeleteDocID (int32_t docID) |
Buffer a specific docID for deletion. Currently only used when we hit a exception when adding a document. | |
void | addDeleteQuery (const QueryPtr &query, int32_t docID) |
void | finishDocument (const DocumentsWriterThreadStatePtr &perThread, const DocWriterPtr &docWriter) |
Does the synchronized work to finish/flush the inverted document. | |
![]() | |
LuceneObject () | |
Protected Attributes | |
String | docStoreSegment |
int32_t | docStoreOffset |
int32_t | nextDocID |
int32_t | numDocsInRAM |
Collection< DocumentsWriterThreadStatePtr > | threadStates |
MapThreadDocumentsWriterThreadState | threadBindings |
int32_t | pauseThreads |
bool | aborting |
DocFieldProcessorPtr | docFieldProcessor |
BufferedDeletesPtr | deletesInRAM |
Deletes done after the last flush; these are discarded on abort. | |
BufferedDeletesPtr | deletesFlushed |
Deletes done before the last flush; these are still kept on abort. | |
int32_t | maxBufferedDeleteTerms |
The max number of delete terms that can be buffered before they must be flushed to disk. | |
int64_t | ramBufferSize |
How much RAM we can use before flushing. This is 0 if we are flushing by doc count instead. | |
int64_t | waitQueuePauseBytes |
int64_t | waitQueueResumeBytes |
int64_t | freeTrigger |
If we've allocated 5% over our RAM budget, we then free down to 95%. | |
int64_t | freeLevel |
int32_t | maxBufferedDocs |
Flush @ this number of docs. If ramBufferSize is non-zero we will flush by RAM usage instead. | |
int32_t | flushedDocCount |
How many docs already flushed to index. | |
bool | closed |
HashSet< String > | _abortedFiles |
List of files that were written before last abort() | |
SegmentWriteStatePtr | flushState |
Collection< IntArray > | freeIntBlocks |
Collection< CharArray > | freeCharBlocks |
![]() | |
SynchronizePtr | objectLock |
LuceneSignalPtr | objectSignal |
Static Protected Attributes | |
static const int32_t | MAX_THREAD_STATE |
Max # ThreadState instances; if there are more threads than this they share ThreadStates. | |
This class accepts multiple added documents and directly writes a single segment file. It does this more efficiently than creating a single segment per document (with DocumentWriter) and doing standard merges on those segments.
Each added document is passed to the DocConsumer
, which in turn processes the document and interacts with other consumers in the indexing chain. Certain consumers, like StoredFieldsWriter
and TermVectorsTermsWriter
, digest a document and immediately write bytes to the "doc store" files (ie, they do not consume RAM per document, except while they are processing the document).
Other consumers, eg FreqProxTermsWriter
and NormsWriter
, buffer bytes in RAM and flush only when a new segment is produced.
Once we have used our allowed RAM buffer, or the number of added docs is large enough (in the case we are flushing by doc count instead of RAM usage), we create a real segment and flush it to the Directory.
Threads: Multiple threads are allowed into addDocument at once. There is an initial synchronized call to getThreadState which allocates a ThreadState for this thread. The same thread will get the same ThreadState over time (thread affinity) so that if there are consistent patterns (for example each thread is indexing a different content source) then we make better use of RAM. Then processDocument is called on that ThreadState without synchronization (most of the "heavy lifting" is in this call). Finally the synchronized "finishDocument" is called to flush changes to the directory.
When flush is called by IndexWriter we forcefully idle all threads and flush only once they are all idle. This means you can call flush with a given thread even while other threads are actively adding/deleting documents.
Exceptions: Because this class directly updates in-memory posting lists, and flushes stored fields and term vectors directly to files in the directory, there are certain limited times when an exception can corrupt this state. For example, a disk full while flushing stored fields leaves this file in a corrupt state. Or, an std::bad_alloc exception while appending to the in-memory posting lists can corrupt that posting list. We call such exceptions "aborting exceptions". In these cases we must call abort() to discard all docs added since the last flush.
All other exceptions ("non-aborting exceptions") can still partially update the index structures. These updates are consistent, but, they represent only a part of the document seen up until the exception was hit. When this happens, we immediately mark the document as deleted so that the document is always atomically ("all or none") added to the index.
Lucene::DocumentsWriter::DocumentsWriter | ( | const DirectoryPtr & | directory, |
const IndexWriterPtr & | writer, | ||
const IndexingChainPtr & | indexingChain | ||
) |
|
virtual |
|
inlinestatic |
void Lucene::DocumentsWriter::abort | ( | ) |
Called if we hit an exception at a bad time (when updating the index files) and must discard all currently buffered docs. This resets our state, discarding any docs added since last flush.
HashSet< String > Lucene::DocumentsWriter::abortedFiles | ( | ) |
|
protected |
Buffer a specific docID for deletion. Currently only used when we hit a exception when adding a document.
|
protected |
|
protected |
bool Lucene::DocumentsWriter::addDocument | ( | const DocumentPtr & | doc, |
const AnalyzerPtr & | analyzer | ||
) |
Returns true if the caller (IndexWriter) should now flush.
void Lucene::DocumentsWriter::addOpenFile | ( | const String & | name | ) |
|
protected |
bool Lucene::DocumentsWriter::anyChanges | ( | ) |
|
protected |
bool Lucene::DocumentsWriter::applyDeletes | ( | const SegmentInfosPtr & | infos | ) |
void Lucene::DocumentsWriter::balanceRAM | ( | ) |
We have four pools of RAM: Postings, byte blocks (holds freq/prox posting data), char blocks (holds characters in the term) and per-doc buffers (stored fields/term vectors). Different docs require varying amount of storage from these four classes.
For example, docs with many unique single-occurrence short terms will use up the Postings RAM and hardly any of the other two. Whereas docs with very large terms will use alot of char blocks RAM and relatively less of the other two. This method just frees allocations from the pools once we are over-budget, which balances the pools to match the current docs.
bool Lucene::DocumentsWriter::bufferDeleteQueries | ( | Collection< QueryPtr > | queries | ) |
bool Lucene::DocumentsWriter::bufferDeleteQuery | ( | const QueryPtr & | query | ) |
bool Lucene::DocumentsWriter::bufferDeleteTerm | ( | const TermPtr & | term | ) |
bool Lucene::DocumentsWriter::bufferDeleteTerms | ( | Collection< TermPtr > | terms | ) |
void Lucene::DocumentsWriter::bytesAllocated | ( | int64_t | numBytes | ) |
void Lucene::DocumentsWriter::bytesUsed | ( | int64_t | numBytes | ) |
|
protected |
void Lucene::DocumentsWriter::clearFlushPending | ( | ) |
void Lucene::DocumentsWriter::close | ( | ) |
HashSet< String > Lucene::DocumentsWriter::closedFiles | ( | ) |
String Lucene::DocumentsWriter::closeDocStore | ( | ) |
Closes the current open doc stores an returns the doc store segment name. This returns null if there are no buffered documents.
void Lucene::DocumentsWriter::createCompoundFile | ( | const String & | segment | ) |
Build compound file for the segment we just flushed.
bool Lucene::DocumentsWriter::deletesFull | ( | ) |
|
protected |
Reset after a flush.
bool Lucene::DocumentsWriter::doApplyDeletes | ( | ) |
bool Lucene::DocumentsWriter::doBalanceRAM | ( | ) |
|
protected |
Does the synchronized work to finish/flush the inverted document.
int32_t Lucene::DocumentsWriter::flush | ( | bool | _closeDocStore | ) |
Flush all pending docs to a new segment.
MapTermNum Lucene::DocumentsWriter::getBufferedDeleteTerms | ( | ) |
CharArray Lucene::DocumentsWriter::getCharBlock | ( | ) |
|
inlinevirtual |
|
static |
int32_t Lucene::DocumentsWriter::getDocStoreOffset | ( | ) |
Returns the doc offset into the shared doc store for the current buffered docs.
String Lucene::DocumentsWriter::getDocStoreSegment | ( | ) |
Returns the current doc store segment we are writing to.
int32_t Lucene::DocumentsWriter::getFlushedDocCount | ( | ) |
HashSet< String > Lucene::DocumentsWriter::getFlushedFiles | ( | ) |
IntArray Lucene::DocumentsWriter::getIntBlock | ( | bool | trackAllocations | ) |
int32_t Lucene::DocumentsWriter::getMaxBufferedDeleteTerms | ( | ) |
int32_t Lucene::DocumentsWriter::getMaxBufferedDocs | ( | ) |
int32_t Lucene::DocumentsWriter::getNumBufferedDeleteTerms | ( | ) |
int32_t Lucene::DocumentsWriter::getNumDocsInRAM | ( | ) |
Returns how many docs are currently buffered in RAM.
double Lucene::DocumentsWriter::getRAMBufferSizeMB | ( | ) |
int64_t Lucene::DocumentsWriter::getRAMUsed | ( | ) |
String Lucene::DocumentsWriter::getSegment | ( | ) |
Get current segment name we are writing.
DocumentsWriterThreadStatePtr Lucene::DocumentsWriter::getThreadState | ( | const DocumentPtr & | doc, |
const TermPtr & | delTerm | ||
) |
Returns a free (idle) ThreadState that may be used for indexing this one document. This call also pauses if a flush is pending. If delTerm is non-null then we buffer this deleted term after the thread state has been acquired.
bool Lucene::DocumentsWriter::hasDeletes | ( | ) |
bool Lucene::DocumentsWriter::hasProx | ( | ) |
Returns true if any of the fields in the current buffered docs have omitTermFreqAndPositions==false.
void Lucene::DocumentsWriter::initFlushState | ( | bool | onlyDocStore | ) |
|
virtual |
Called directly after instantiation to create objects that depend on this object being fully constructed.
Reimplemented from Lucene::LuceneObject.
void Lucene::DocumentsWriter::initSegmentName | ( | bool | onlyDocStore | ) |
void Lucene::DocumentsWriter::message | ( | const String & | message | ) |
PerDocBufferPtr Lucene::DocumentsWriter::newPerDocBuffer | ( | ) |
Create and return a new DocWriterBuffer.
HashSet< String > Lucene::DocumentsWriter::openFiles | ( | ) |
Returns Collection of files in use by this instance, including any flushed segments.
bool Lucene::DocumentsWriter::pauseAllThreads | ( | ) |
Returns true if an abort is in progress.
void Lucene::DocumentsWriter::pushDeletes | ( | ) |
void Lucene::DocumentsWriter::recycleCharBlocks | ( | Collection< CharArray > | blocks, |
int32_t | numBlocks | ||
) |
void Lucene::DocumentsWriter::recycleIntBlocks | ( | Collection< IntArray > | blocks, |
int32_t | start, | ||
int32_t | end | ||
) |
void Lucene::DocumentsWriter::remapDeletes | ( | const SegmentInfosPtr & | infos, |
Collection< Collection< int32_t > > | docMaps, | ||
Collection< int32_t > | delCounts, | ||
const OneMergePtr & | merge, | ||
int32_t | mergeDocCount | ||
) |
Called whenever a merge has completed and the merged segments had deletions.
void Lucene::DocumentsWriter::removeOpenFile | ( | const String & | name | ) |
void Lucene::DocumentsWriter::resumeAllThreads | ( | ) |
void Lucene::DocumentsWriter::setAborting | ( | ) |
void Lucene::DocumentsWriter::setFlushedDocCount | ( | int32_t | n | ) |
bool Lucene::DocumentsWriter::setFlushPending | ( | ) |
Set flushPending if it is not already set and returns whether it was set. This is used by IndexWriter to trigger a single flush even when multiple threads are trying to do so.
void Lucene::DocumentsWriter::setInfoStream | ( | const InfoStreamPtr & | infoStream | ) |
If non-null, various details of indexing are printed here.
void Lucene::DocumentsWriter::setMaxBufferedDeleteTerms | ( | int32_t | maxBufferedDeleteTerms | ) |
void Lucene::DocumentsWriter::setMaxBufferedDocs | ( | int32_t | count | ) |
Set max buffered docs, which means we will flush by doc count instead of by RAM usage.
void Lucene::DocumentsWriter::setMaxFieldLength | ( | int32_t | maxFieldLength | ) |
void Lucene::DocumentsWriter::setRAMBufferSizeMB | ( | double | mb | ) |
Set how much RAM we can use before flushing.
void Lucene::DocumentsWriter::setSimilarity | ( | const SimilarityPtr & | similarity | ) |
|
inline |
|
protected |
String Lucene::DocumentsWriter::toMB | ( | int64_t | v | ) |
bool Lucene::DocumentsWriter::updateDocument | ( | const DocumentPtr & | doc, |
const AnalyzerPtr & | analyzer, | ||
const TermPtr & | delTerm | ||
) |
bool Lucene::DocumentsWriter::updateDocument | ( | const TermPtr & | t, |
const DocumentPtr & | doc, | ||
const AnalyzerPtr & | analyzer | ||
) |
void Lucene::DocumentsWriter::updateFlushedDocCount | ( | int32_t | n | ) |
void Lucene::DocumentsWriter::waitForWaitQueue | ( | ) |
|
protected |
INTERNAL Lucene::DocumentsWriter::__pad0__ |
|
protected |
List of files that were written before last abort()
HashSet<String> Lucene::DocumentsWriter::_closedFiles |
HashSet<String> Lucene::DocumentsWriter::_openFiles |
|
protected |
bool Lucene::DocumentsWriter::bufferIsFull |
|
static |
|
static |
|
static |
Initial chunks size of the shared byte[] blocks used to store postings data.
|
static |
ByteBlockAllocatorPtr Lucene::DocumentsWriter::byteBlockAllocator |
|
static |
Rough logic: del docIDs are List<Integer>. Say list allocates ~2X size (2*POINTER). Integer is OBJ_HEADER + int.
|
static |
|
static |
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object with Term key, BufferedDeletes.Num val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is object with String field and String text (OBJ_HEADER + 2*POINTER). We don't count Term's field since it's interned. Term's text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). BufferedDeletes.num is OBJ_HEADER + INT.
|
static |
|
static |
Initial chunk size of the shared char[] blocks used to store term text.
|
static |
|
static |
|
protected |
DocConsumerPtr Lucene::DocumentsWriter::consumer |
|
protected |
Deletes done before the last flush; these are still kept on abort.
|
protected |
Deletes done after the last flush; these are discarded on abort.
DirectoryPtr Lucene::DocumentsWriter::directory |
|
protected |
|
protected |
|
protected |
|
protected |
How many docs already flushed to index.
bool Lucene::DocumentsWriter::flushPending |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
If we've allocated 5% over our RAM budget, we then free down to 95%.
IndexingChainPtr Lucene::DocumentsWriter::indexingChain |
InfoStreamPtr Lucene::DocumentsWriter::infoStream |
|
static |
|
static |
Initial chunks size of the shared int[] blocks used to store postings data.
|
static |
|
static |
TermPtr Lucene::DocumentsWriter::lastDeleteTerm |
|
static |
|
staticprotected |
Max # ThreadState instances; if there are more threads than this they share ThreadStates.
|
protected |
The max number of delete terms that can be buffered before they must be flushed to disk.
|
protected |
Flush @ this number of docs. If ramBufferSize is non-zero we will flush by RAM usage instead.
int32_t Lucene::DocumentsWriter::maxFieldLength |
|
protected |
int64_t Lucene::DocumentsWriter::numBytesAlloc |
int64_t Lucene::DocumentsWriter::numBytesUsed |
|
protected |
int32_t Lucene::DocumentsWriter::numDocsInStore |
|
static |
Coarse estimates used to measure RAM usage of buffered deletes.
|
protected |
|
static |
ByteBlockAllocatorPtr Lucene::DocumentsWriter::perDocAllocator |
|
static |
|
protected |
How much RAM we can use before flushing. This is 0 if we are flushing by doc count instead.
String Lucene::DocumentsWriter::segment |
SimilarityPtr Lucene::DocumentsWriter::similarity |
SkipDocWriterPtr Lucene::DocumentsWriter::skipDocWriter |
|
protected |
|
protected |
WaitQueuePtr Lucene::DocumentsWriter::waitQueue |
|
protected |
|
protected |