public abstract class FileResourceCrawler extends java.lang.Object implements java.util.concurrent.Callable<IFileProcessorFutureResult>
| Modifier and Type | Field and Description |
|---|---|
private int |
added |
protected static int |
ADDED |
private int |
considered |
private DocumentSelector |
documentSelector |
private boolean |
hasCompletedCrawling |
private boolean |
isActive |
protected static org.slf4j.Logger |
LOG |
private long |
maxConsecWaitInMillis |
private int |
maxFilesToAdd |
private int |
maxFilesToConsider |
private int |
numConsumers |
private static long |
PAUSE_INCREMENT_MILLIS |
private java.util.concurrent.ArrayBlockingQueue<FileResource> |
queue |
private boolean |
shutDownNoPoison |
protected static int |
SKIPPED |
protected static int |
STOP_NOW |
private boolean |
timedOut |
| Constructor and Description |
|---|
FileResourceCrawler(java.util.concurrent.ArrayBlockingQueue<FileResource> queue,
int numConsumers) |
| Modifier and Type | Method and Description |
|---|---|
FileResourceCrawlerFutureResult |
call() |
int |
getAdded() |
int |
getConsidered() |
boolean |
isActive()
If the crawler stops for any reason, it is no longer active.
|
boolean |
isQueueEmpty()
Use sparingly.
|
protected boolean |
select(Metadata m) |
void |
setDocumentSelector(DocumentSelector documentSelector) |
void |
setMaxConsecWaitInMillis(long maxConsecWaitInMillis) |
void |
setMaxFilesToAdd(int maxFilesToAdd)
Maximum number of files to add.
|
void |
setMaxFilesToConsider(int maxFilesToConsider)
Maximum number of files to consider.
|
private void |
shutdown() |
void |
shutDownNoPoison()
Set to true to shut down the FileResourceCrawler without
adding poison.
|
abstract void |
start()
Implement this to control the addition of FileResources.
|
protected int |
tryToAdd(FileResource fileResource) |
boolean |
wasTimedOut()
Returns whether the crawler timed out while trying to add a resource
to the queue.
|
protected static final org.slf4j.Logger LOG
protected static final int SKIPPED
protected static final int ADDED
protected static final int STOP_NOW
private volatile boolean hasCompletedCrawling
private volatile boolean shutDownNoPoison
private volatile boolean isActive
private volatile boolean timedOut
private static final long PAUSE_INCREMENT_MILLIS
private int maxFilesToAdd
private int maxFilesToConsider
private final java.util.concurrent.ArrayBlockingQueue<FileResource> queue
private final int numConsumers
private long maxConsecWaitInMillis
private DocumentSelector documentSelector
private int added
private int considered
public FileResourceCrawler(java.util.concurrent.ArrayBlockingQueue<FileResource> queue, int numConsumers)
queue - shared queuenumConsumers - number of consumers (needs to know how many poisons to add when done)public abstract void start()
throws java.lang.InterruptedException
tryToAdd(org.apache.tika.batch.FileResource)
to add FileResources to the queue.java.lang.InterruptedExceptionpublic FileResourceCrawlerFutureResult call()
call in interface java.util.concurrent.Callable<IFileProcessorFutureResult>protected int tryToAdd(FileResource fileResource) throws java.lang.InterruptedException
fileResource - resource to addjava.lang.InterruptedExceptionprivate void shutdown()
throws java.lang.InterruptedException
java.lang.InterruptedExceptionpublic boolean isActive()
public void setMaxConsecWaitInMillis(long maxConsecWaitInMillis)
public void setDocumentSelector(DocumentSelector documentSelector)
public int getConsidered()
protected boolean select(Metadata m)
public void setMaxFilesToAdd(int maxFilesToAdd)
maxFilesToAdd < 0 (default),
then this crawler will add all documents.maxFilesToAdd - maximum number of files to add to the queuepublic void setMaxFilesToConsider(int maxFilesToConsider)
maxFilesToConsider < 0 (default), then this crawler
will add all documents.maxFilesToConsider - maximum number of files to consider adding to the queuepublic boolean isQueueEmpty()
public boolean wasTimedOut()
public int getAdded()
public void shutDownNoPoison()