public class ExternalParser extends AbstractParser
| Modifier and Type | Class and Description |
|---|---|
static interface |
ExternalParser.LineConsumer
Consumer contract
|
| Modifier and Type | Field and Description |
|---|---|
private java.lang.String[] |
command
The external command to invoke.
|
private ExternalParser.LineConsumer |
ignoredLineConsumer
A consumer for ignored Lines
|
static java.lang.String |
INPUT_FILE_TOKEN
The token, which if present in the Command string, will
be replaced with the input filename.
|
private java.util.Map<java.util.regex.Pattern,java.lang.String> |
metadataPatterns
Regular Expressions to run over STDOUT to
extract Metadata.
|
static java.lang.String |
OUTPUT_FILE_TOKEN
The token, which if present in the Command string, will
be replaced with the output filename.
|
private static long |
serialVersionUID |
private java.util.Set<MediaType> |
supportedTypes
Media types supported by the external program.
|
| Constructor and Description |
|---|
ExternalParser() |
| Modifier and Type | Method and Description |
|---|---|
static boolean |
check(java.lang.String[] checkCmd,
int... errorValue) |
static boolean |
check(java.lang.String checkCmd,
int... errorValue)
Checks to see if the command can be run.
|
private void |
extractMetadata(java.io.InputStream stream,
Metadata metadata) |
private void |
extractOutput(java.io.InputStream stream,
XHTMLContentHandler xhtml)
Starts a thread that extracts the contents of the standard output
stream of the given process to the given XHTML content handler.
|
java.lang.String[] |
getCommand() |
ExternalParser.LineConsumer |
getIgnoredLineConsumer()
Gets lines consumer
|
java.util.Map<java.util.regex.Pattern,java.lang.String> |
getMetadataExtractionPatterns() |
java.util.Set<MediaType> |
getSupportedTypes() |
java.util.Set<MediaType> |
getSupportedTypes(ParseContext context)
Returns the set of media types supported by this parser when used
with the given parse context.
|
private static void |
ignoreStream(java.io.InputStream stream)
Starts a thread that reads and discards the contents of the
standard stream of the given process.
|
private static java.lang.Thread |
ignoreStream(java.io.InputStream stream,
boolean waitForDeath)
Starts a thread that reads and discards the contents of the
standard stream of the given process.
|
void |
parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Executes the configured external command and passes the given document
stream as a simple XHTML document to the given SAX content handler.
|
private void |
parse(TikaInputStream stream,
XHTMLContentHandler xhtml,
Metadata metadata,
TemporaryResources tmp) |
private void |
sendInput(java.lang.Process process,
java.io.InputStream stream)
Starts a thread that sends the contents of the given input stream
to the standard input stream of the given process.
|
void |
setCommand(java.lang.String... command)
Sets the command to be run.
|
void |
setIgnoredLineConsumer(ExternalParser.LineConsumer ignoredLineConsumer)
Set a consumer for the lines ignored by the parse functions
|
void |
setMetadataExtractionPatterns(java.util.Map<java.util.regex.Pattern,java.lang.String> patterns)
Sets the map of regular expression patterns and Metadata
keys.
|
void |
setSupportedTypes(java.util.Set<MediaType> supportedTypes) |
parseprivate static final long serialVersionUID
public static final java.lang.String INPUT_FILE_TOKEN
public static final java.lang.String OUTPUT_FILE_TOKEN
private java.util.Set<MediaType> supportedTypes
private java.util.Map<java.util.regex.Pattern,java.lang.String> metadataPatterns
private java.lang.String[] command
Runtime.exec(String[])private ExternalParser.LineConsumer ignoredLineConsumer
public java.util.Set<MediaType> getSupportedTypes(ParseContext context)
Parsercontext - parse contextpublic java.util.Set<MediaType> getSupportedTypes()
public void setSupportedTypes(java.util.Set<MediaType> supportedTypes)
public java.lang.String[] getCommand()
public void setCommand(java.lang.String... command)
INPUT_FILE_TOKEN or OUTPUT_FILE_TOKEN
if the command needs filenames.Runtime.exec(String[])public ExternalParser.LineConsumer getIgnoredLineConsumer()
public void setIgnoredLineConsumer(ExternalParser.LineConsumer ignoredLineConsumer)
ignoredLineConsumer - consumer instancepublic java.util.Map<java.util.regex.Pattern,java.lang.String> getMetadataExtractionPatterns()
public void setMetadataExtractionPatterns(java.util.Map<java.util.regex.Pattern,java.lang.String> patterns)
public void parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
throws java.io.IOException,
org.xml.sax.SAXException,
TikaException
setMetadataExtractionPatterns(Map)
has been called to set patterns.stream - the document stream (input)handler - handler for the XHTML SAX events (output)metadata - document metadata (input and output)context - parse contextjava.io.IOException - if the document stream could not be readorg.xml.sax.SAXException - if the SAX events could not be processedTikaException - if the document could not be parsedprivate void parse(TikaInputStream stream, XHTMLContentHandler xhtml, Metadata metadata, TemporaryResources tmp) throws java.io.IOException, org.xml.sax.SAXException, TikaException
java.io.IOExceptionorg.xml.sax.SAXExceptionTikaExceptionprivate void extractOutput(java.io.InputStream stream,
XHTMLContentHandler xhtml)
throws org.xml.sax.SAXException,
java.io.IOException
process - processxhtml - XHTML content handlerorg.xml.sax.SAXException - if the XHTML SAX events could not be handledjava.io.IOException - if an input error occurredprivate void sendInput(java.lang.Process process,
java.io.InputStream stream)
process - processstream - input streamprivate static void ignoreStream(java.io.InputStream stream)
stream - stream to be ignoredprivate static java.lang.Thread ignoreStream(java.io.InputStream stream,
boolean waitForDeath)
stream - stream to sent to black hole (a k a null)waitForDeath - when true the caller thread will be blocked till the death of new thread.private void extractMetadata(java.io.InputStream stream,
Metadata metadata)
public static boolean check(java.lang.String checkCmd,
int... errorValue)
checkCmd - The check command to runerrorValue - What is considered an error value?public static boolean check(java.lang.String[] checkCmd,
int... errorValue)