7#ifndef DOCUMENTSWRITER_H
8#define DOCUMENTSWRITER_H
#define LUCENE_CLASS(Name)
Definition LuceneObject.h:24
Definition DocumentsWriter.h:497
int32_t blockSize
Definition DocumentsWriter.h:508
virtual ByteArray getByteBlock(bool trackAllocations)
Allocate another byte[] from the shared pool.
DocumentsWriterWeakPtr _docWriter
Definition DocumentsWriter.h:505
ByteBlockAllocator(const DocumentsWriterPtr &docWriter, int32_t blockSize)
virtual void recycleByteBlocks(Collection< ByteArray > blocks, int32_t start, int32_t end)
Return byte[]'s to the pool.
virtual void recycleByteBlocks(Collection< ByteArray > blocks)
Collection< ByteArray > freeByteBlocks
Definition DocumentsWriter.h:509
virtual ~ByteBlockAllocator()
Definition ByteBlockPool.h:54
Utility template class to handle collections that can be safely copied and shared.
Definition Collection.h:17
This is the current indexing chain: DocConsumer / DocConsumerPerThread --> code: DocFieldProcessor / ...
Definition DocumentsWriter.h:447
virtual DocConsumerPtr getChain(const DocumentsWriterPtr &documentsWriter)
virtual ~DefaultIndexingChain()
Definition DocumentsWriter.h:356
InfoStreamPtr infoStream
Definition DocumentsWriter.h:367
String maxTermPrefix
Definition DocumentsWriter.h:371
SimilarityPtr similarity
Definition DocumentsWriter.h:368
virtual bool testPoint(const String &name)
Only called by asserts.
DocumentPtr doc
Definition DocumentsWriter.h:370
AnalyzerPtr analyzer
Definition DocumentsWriter.h:365
int32_t maxFieldLength
Definition DocumentsWriter.h:366
int32_t docID
Definition DocumentsWriter.h:369
DocumentsWriterWeakPtr _docWriter
Definition DocumentsWriter.h:364
Consumer returns this on each doc. This holds any state that must be flushed synchronized "in docID o...
Definition DocumentsWriter.h:402
DocWriterPtr next
Definition DocumentsWriter.h:410
virtual void setNext(const DocWriterPtr &next)
virtual int64_t sizeInBytes()=0
int32_t docID
Definition DocumentsWriter.h:411
This class accepts multiple added documents and directly writes a single segment file....
Definition DocumentsWriter.h:54
void setMaxFieldLength(int32_t maxFieldLength)
int32_t nextDocID
Definition DocumentsWriter.h:65
HashSet< String > openFiles()
Returns Collection of files in use by this instance, including any flushed segments.
DocumentsWriterThreadStatePtr getThreadState(const DocumentPtr &doc, const TermPtr &delTerm)
Returns a free (idle) ThreadState that may be used for indexing this one document....
ByteBlockAllocatorPtr perDocAllocator
Definition DocumentsWriter.h:178
void recycleIntBlocks(Collection< IntArray > blocks, int32_t start, int32_t end)
static const int32_t BYTES_PER_DEL_QUERY
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER)....
Definition DocumentsWriter.h:132
bool bufferDeleteQueries(Collection< QueryPtr > queries)
void finishDocument(const DocumentsWriterThreadStatePtr &perThread, const DocWriterPtr &docWriter)
Does the synchronized work to finish/flush the inverted document.
SegmentWriteStatePtr flushState
Definition DocumentsWriter.h:106
String closeDocStore()
Closes the current open doc stores an returns the doc store segment name. This returns null if there ...
double getRAMBufferSizeMB()
void setRAMBufferSizeMB(double mb)
Set how much RAM we can use before flushing.
int32_t numDocsInStore
Definition DocumentsWriter.h:160
String getSegment()
Get current segment name we are writing.
PerDocBufferPtr newPerDocBuffer()
Create and return a new DocWriterBuffer.
int32_t getDocStoreOffset()
Returns the doc offset into the shared doc store for the current buffered docs.
static const int32_t INT_BLOCK_SHIFT
Initial chunks size of the shared int[] blocks used to store postings data.
Definition DocumentsWriter.h:148
ByteBlockAllocatorPtr byteBlockAllocator
Definition DocumentsWriter.h:177
SimilarityPtr similarity
Definition DocumentsWriter.h:167
HashSet< String > _closedFiles
Definition DocumentsWriter.h:172
DirectoryPtr directory
Definition DocumentsWriter.h:156
void doAfterFlush()
Reset after a flush.
static const int32_t CHAR_BLOCK_SHIFT
Initial chunk size of the shared char[] blocks used to store term text.
Definition DocumentsWriter.h:141
bool bufferDeleteTerms(Collection< TermPtr > terms)
int32_t getMaxBufferedDocs()
MapThreadDocumentsWriterThreadState threadBindings
Definition DocumentsWriter.h:71
int64_t waitQueueResumeBytes
Definition DocumentsWriter.h:90
int32_t getNumDocsInRAM()
Returns how many docs are currently buffered in RAM.
BufferedDeletesPtr deletesFlushed
Deletes done before the last flush; these are still kept on abort.
Definition DocumentsWriter.h:82
bool aborting
Definition DocumentsWriter.h:74
int64_t numBytesAlloc
Definition DocumentsWriter.h:180
int64_t numBytesUsed
Definition DocumentsWriter.h:181
void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms)
bool timeToFlushDeletes()
String docStoreSegment
Definition DocumentsWriter.h:62
void recycleCharBlocks(Collection< CharArray > blocks, int32_t numBlocks)
static const int32_t MAX_TERM_LENGTH
Definition DocumentsWriter.h:145
static IndexingChainPtr getDefaultIndexingChain()
bool pauseAllThreads()
Returns true if an abort is in progress.
IndexingChainPtr indexingChain
Definition DocumentsWriter.h:157
static const int32_t BYTE_BLOCK_MASK
Definition DocumentsWriter.h:137
bool closed
Definition DocumentsWriter.h:102
void removeOpenFile(const String &name)
int64_t ramBufferSize
How much RAM we can use before flushing. This is 0 if we are flushing by doc count instead.
Definition DocumentsWriter.h:88
int32_t pauseThreads
Definition DocumentsWriter.h:73
int32_t maxBufferedDocs
Flush @ this number of docs. If ramBufferSize is non-zero we will flush by RAM usage instead.
Definition DocumentsWriter.h:97
virtual void initialize()
Called directly after instantiation to create objects that depend on this object being fully construc...
InfoStreamPtr infoStream
Definition DocumentsWriter.h:165
bool updateDocument(const DocumentPtr &doc, const AnalyzerPtr &analyzer, const TermPtr &delTerm)
void addOpenFile(const String &name)
void abort()
Called if we hit an exception at a bad time (when updating the index files) and must discard all curr...
bool updateDocument(const TermPtr &t, const DocumentPtr &doc, const AnalyzerPtr &analyzer)
int32_t flush(bool _closeDocStore)
Flush all pending docs to a new segment.
HashSet< String > abortedFiles()
int32_t maxBufferedDeleteTerms
The max number of delete terms that can be buffered before they must be flushed to disk.
Definition DocumentsWriter.h:85
static const int32_t BYTES_PER_DEL_DOCID
Rough logic: del docIDs are List<Integer>. Say list allocates ~2X size (2*POINTER)....
Definition DocumentsWriter.h:127
void setInfoStream(const InfoStreamPtr &infoStream)
If non-null, various details of indexing are printed here.
void addDeleteTerm(const TermPtr &term, int32_t docCount)
int64_t waitQueuePauseBytes
Definition DocumentsWriter.h:89
int32_t flushedDocCount
How many docs already flushed to index.
Definition DocumentsWriter.h:100
bool applyDeletes(const SegmentInfosPtr &infos)
HashSet< String > _openFiles
Definition DocumentsWriter.h:171
Collection< IntArray > freeIntBlocks
Definition DocumentsWriter.h:108
bool bufferDeleteTerm(const TermPtr &term)
bool addDocument(const DocumentPtr &doc, const AnalyzerPtr &analyzer)
Returns true if the caller (IndexWriter) should now flush.
DocConsumerPtr consumer
Definition DocumentsWriter.h:169
static const int32_t CHAR_BLOCK_SIZE
Definition DocumentsWriter.h:142
static const int32_t INT_BLOCK_MASK
Definition DocumentsWriter.h:150
void initSegmentName(bool onlyDocStore)
void bytesUsed(int64_t numBytes)
int32_t getNumBufferedDeleteTerms()
int64_t freeTrigger
If we've allocated 5% over our RAM budget, we then free down to 95%.
Definition DocumentsWriter.h:93
static const int32_t CHAR_BLOCK_MASK
Definition DocumentsWriter.h:143
void updateFlushedDocCount(int32_t n)
void bytesAllocated(int64_t numBytes)
String segment
Definition DocumentsWriter.h:158
static const int32_t INT_NUM_BYTE
Definition DocumentsWriter.h:115
static const int32_t BYTE_BLOCK_SIZE
Definition DocumentsWriter.h:136
int32_t getFlushedDocCount()
static const int32_t POINTER_NUM_BYTE
Definition DocumentsWriter.h:114
Collection< DocumentsWriterThreadStatePtr > threadStates
Definition DocumentsWriter.h:70
static const int32_t CHAR_NUM_BYTE
Definition DocumentsWriter.h:116
static const int32_t OBJECT_HEADER_BYTES
Coarse estimates used to measure RAM usage of buffered deletes.
Definition DocumentsWriter.h:113
void remapDeletes(const SegmentInfosPtr &infos, Collection< Collection< int32_t > > docMaps, Collection< int32_t > delCounts, const OneMergePtr &merge, int32_t mergeDocCount)
Called whenever a merge has completed and the merged segments had deletions.
void waitReady(const DocumentsWriterThreadStatePtr &state)
bool bufferIsFull
Definition DocumentsWriter.h:163
virtual ~DocumentsWriter()
TermPtr lastDeleteTerm
Definition DocumentsWriter.h:184
void setFlushedDocCount(int32_t n)
void balanceRAM()
We have four pools of RAM: Postings, byte blocks (holds freq/prox posting data), char blocks (holds c...
bool flushPending
Definition DocumentsWriter.h:162
void setMaxBufferedDocs(int32_t count)
Set max buffered docs, which means we will flush by doc count instead of by RAM usage.
HashSet< String > closedFiles()
int32_t numDocsInRAM
Definition DocumentsWriter.h:66
Collection< CharArray > freeCharBlocks
Definition DocumentsWriter.h:109
static const int32_t INT_BLOCK_SIZE
Definition DocumentsWriter.h:149
bool bufferDeleteQuery(const QueryPtr &query)
BufferedDeletesPtr deletesInRAM
Deletes done after the last flush; these are discarded on abort.
Definition DocumentsWriter.h:79
void initFlushState(bool onlyDocStore)
int32_t docStoreOffset
Definition DocumentsWriter.h:63
MapTermNum getBufferedDeleteTerms()
static const int32_t BYTE_BLOCK_SHIFT
Initial chunks size of the shared byte[] blocks used to store postings data.
Definition DocumentsWriter.h:135
DocFieldProcessorPtr docFieldProcessor
Definition DocumentsWriter.h:76
static const int32_t MAX_THREAD_STATE
Max # ThreadState instances; if there are more threads than this they share ThreadStates.
Definition DocumentsWriter.h:69
static const int32_t BYTES_PER_DEL_TERM
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER)....
Definition DocumentsWriter.h:123
HashSet< String > _abortedFiles
List of files that were written before last abort()
Definition DocumentsWriter.h:105
IntArray getIntBlock(bool trackAllocations)
int32_t maxFieldLength
Definition DocumentsWriter.h:166
static const int32_t BYTE_BLOCK_NOT_MASK
Definition DocumentsWriter.h:138
void addDeleteQuery(const QueryPtr &query, int32_t docID)
String getDocStoreSegment()
Returns the current doc store segment we are writing to.
DocumentsWriter(const DirectoryPtr &directory, const IndexWriterPtr &writer, const IndexingChainPtr &indexingChain)
WaitQueuePtr waitQueue
Definition DocumentsWriter.h:174
SkipDocWriterPtr skipDocWriter
Definition DocumentsWriter.h:175
HashSet< String > getFlushedFiles()
bool setFlushPending()
Set flushPending if it is not already set and returns whether it was set. This is used by IndexWriter...
bool hasProx()
Returns true if any of the fields in the current buffered docs have omitTermFreqAndPositions==false.
bool applyDeletes(const IndexReaderPtr &reader, int32_t docIDStart)
bool checkDeleteTerm(const TermPtr &term)
int32_t getMaxBufferedDeleteTerms()
void message(const String &message)
void addDeleteDocID(int32_t docID)
Buffer a specific docID for deletion. Currently only used when we hit a exception when adding a docum...
void createCompoundFile(const String &segment)
Build compound file for the segment we just flushed.
static const int32_t PER_DOC_BLOCK_SIZE
Definition DocumentsWriter.h:152
void setSimilarity(const SimilarityPtr &similarity)
int64_t freeLevel
Definition DocumentsWriter.h:94
Utility template class to handle hash set collections that can be safely copied and shared.
Definition HashSet.h:17
The IndexingChain must define the getChain(DocumentsWriter) method which returns the DocConsumer that...
Definition DocumentsWriter.h:423
virtual DocConsumerPtr getChain(const DocumentsWriterPtr &documentsWriter)=0
Base class for all Lucene classes.
Definition LuceneObject.h:31
RAMFile buffer for DocWriters.
Definition DocumentsWriter.h:381
PerDocBuffer(const DocumentsWriterPtr &docWriter)
DocumentsWriterWeakPtr _docWriter
Definition DocumentsWriter.h:389
void recycle()
Recycle the bytes used.
virtual ByteArray newBuffer(int32_t size)
Allocate bytes used from shared pool.
File used as buffer in RAMDirectory.
Definition RAMFile.h:15
Definition DocumentsWriter.h:457
virtual int64_t sizeInBytes()
Definition DocumentsWriter.h:469
WaitQueue(const DocumentsWriterPtr &docWriter)
int32_t nextWriteDocID
Definition DocumentsWriter.h:481
void writeDocument(const DocWriterPtr &doc)
bool add(const DocWriterPtr &doc)
int32_t numWaiting
Definition DocumentsWriter.h:483
int32_t nextWriteLoc
Definition DocumentsWriter.h:482
int64_t waitingBytes
Definition DocumentsWriter.h:484
Collection< DocWriterPtr > waiting
Definition DocumentsWriter.h:480
DocumentsWriterWeakPtr _docWriter
Definition DocumentsWriter.h:477
Definition AbstractAllTermDocs.h:12
boost::shared_ptr< InfoStream > InfoStreamPtr
Definition LuceneTypes.h:532
boost::shared_ptr< SkipDocWriter > SkipDocWriterPtr
Definition LuceneTypes.h:226
boost::shared_ptr< Query > QueryPtr
Definition LuceneTypes.h:420
boost::weak_ptr< IndexWriter > IndexWriterWeakPtr
Definition LuceneTypes.h:160
boost::shared_ptr< OneMerge > OneMergePtr
Definition LuceneTypes.h:192
boost::shared_ptr< DocFieldProcessor > DocFieldProcessorPtr
Definition LuceneTypes.h:115
boost::shared_ptr< DocumentsWriter > DocumentsWriterPtr
Definition LuceneTypes.h:123
boost::shared_ptr< SegmentWriteState > SegmentWriteStatePtr
Definition LuceneTypes.h:222
boost::shared_ptr< DocConsumer > DocConsumerPtr
Definition LuceneTypes.h:106
boost::shared_ptr< Similarity > SimilarityPtr
Definition LuceneTypes.h:435
boost::shared_ptr< Analyzer > AnalyzerPtr
Definition LuceneTypes.h:20
boost::weak_ptr< DocumentsWriter > DocumentsWriterWeakPtr
Definition LuceneTypes.h:123
boost::shared_ptr< Term > TermPtr
Definition LuceneTypes.h:233
boost::shared_ptr< BufferedDeletes > BufferedDeletesPtr
Definition LuceneTypes.h:87
boost::shared_ptr< IndexingChain > IndexingChainPtr
Definition LuceneTypes.h:156
boost::shared_ptr< Directory > DirectoryPtr
Definition LuceneTypes.h:489
boost::shared_ptr< ByteBlockAllocator > ByteBlockAllocatorPtr
Definition LuceneTypes.h:88
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition LuceneTypes.h:157
boost::shared_ptr< DocWriter > DocWriterPtr
Definition LuceneTypes.h:125
boost::shared_ptr< DocumentsWriterThreadState > DocumentsWriterThreadStatePtr
Definition LuceneTypes.h:124
boost::shared_ptr< Document > DocumentPtr
Definition LuceneTypes.h:74
boost::shared_ptr< PerDocBuffer > PerDocBufferPtr
Definition LuceneTypes.h:199
boost::shared_ptr< SegmentInfos > SegmentInfosPtr
Definition LuceneTypes.h:210
boost::shared_ptr< WaitQueue > WaitQueuePtr
Definition LuceneTypes.h:265
boost::shared_ptr< IndexWriter > IndexWriterPtr
Definition LuceneTypes.h:160