| Modifier and Type | Method and Description |
|---|---|
java.lang.String |
Tika.parseToString(java.io.File file)
Parses the given file and returns the extracted text content.
|
java.lang.String |
Tika.parseToString(java.io.InputStream stream)
Parses the given document and returns the extracted text content.
|
java.lang.String |
Tika.parseToString(java.io.InputStream stream,
Metadata metadata)
Parses the given document and returns the extracted text content.
|
java.lang.String |
Tika.parseToString(java.io.InputStream stream,
Metadata metadata,
int maxLength)
Parses the given document and returns the extracted text content.
|
java.lang.String |
Tika.parseToString(java.nio.file.Path path)
Parses the file at the given path and returns the extracted text content.
|
java.lang.String |
Tika.parseToString(java.net.URL url)
Parses the resource at the given URL and returns the extracted
text content.
|
| Modifier and Type | Method and Description |
|---|---|
(package private) ConfigurableThreadPoolExecutor |
TikaConfig.ExecutorServiceXmlLoader.decorate(ConfigurableThreadPoolExecutor created,
org.w3c.dom.Element element) |
(package private) Parser |
TikaConfig.ParserXmlLoader.decorate(Parser created,
org.w3c.dom.Element element) |
(package private) abstract T |
TikaConfig.XmlLoader.decorate(T created,
org.w3c.dom.Element element) |
private static java.io.InputStream |
TikaConfig.getConfigInputStream(java.lang.String config,
ServiceLoader serviceLoader) |
private static java.util.List<org.w3c.dom.Element> |
TikaConfig.getTopLevelElementChildren(org.w3c.dom.Element element,
java.lang.String parentName,
java.lang.String childrenName) |
static <T> Param<T> |
Param.load(java.io.InputStream stream) |
(package private) T |
TikaConfig.XmlLoader.loadOne(org.w3c.dom.Element element,
MimeTypes mimeTypes,
ServiceLoader loader) |
(package private) ConfigurableThreadPoolExecutor |
TikaConfig.ExecutorServiceXmlLoader.loadOne(org.w3c.dom.Element element,
MimeTypes mimeTypes,
ServiceLoader loader) |
(package private) CT |
TikaConfig.XmlLoader.loadOverall(org.w3c.dom.Element element,
MimeTypes mimeTypes,
ServiceLoader loader) |
private static java.util.Set<MediaType> |
TikaConfig.mediaTypesListFromDomElement(org.w3c.dom.Element node,
java.lang.String tag) |
(package private) ConfigurableThreadPoolExecutor |
TikaConfig.ExecutorServiceXmlLoader.preLoadOne(java.lang.Class<? extends ConfigurableThreadPoolExecutor> loadedClass,
java.lang.String classname,
MimeTypes mimeTypes) |
(package private) Detector |
TikaConfig.DetectorXmlLoader.preLoadOne(java.lang.Class<? extends Detector> loadedClass,
java.lang.String classname,
MimeTypes mimeTypes) |
(package private) EncodingDetector |
TikaConfig.EncodingDetectorXmlLoader.preLoadOne(java.lang.Class<? extends EncodingDetector> loadedClass,
java.lang.String classname,
MimeTypes mimeTypes) |
(package private) Parser |
TikaConfig.ParserXmlLoader.preLoadOne(java.lang.Class<? extends Parser> loadedClass,
java.lang.String classname,
MimeTypes mimeTypes) |
(package private) abstract T |
TikaConfig.XmlLoader.preLoadOne(java.lang.Class<? extends T> loadedClass,
java.lang.String classname,
MimeTypes mimeTypes) |
(package private) Translator |
TikaConfig.TranslatorXmlLoader.preLoadOne(java.lang.Class<? extends Translator> loadedClass,
java.lang.String classname,
MimeTypes mimeTypes) |
void |
Param.save(java.io.OutputStream stream) |
private static MimeTypes |
TikaConfig.typesFromDomElement(org.w3c.dom.Element element) |
| Constructor and Description |
|---|
TikaConfig()
Creates a default Tika configuration.
|
TikaConfig(org.w3c.dom.Document document) |
TikaConfig(org.w3c.dom.Document document,
ServiceLoader loader) |
TikaConfig(org.w3c.dom.Element element) |
TikaConfig(org.w3c.dom.Element element,
java.lang.ClassLoader loader) |
TikaConfig(org.w3c.dom.Element element,
ServiceLoader loader) |
TikaConfig(java.io.File file) |
TikaConfig(java.io.File file,
ServiceLoader loader) |
TikaConfig(java.io.InputStream stream) |
TikaConfig(java.nio.file.Path path) |
TikaConfig(java.nio.file.Path path,
ServiceLoader loader) |
TikaConfig(java.lang.String file) |
TikaConfig(java.net.URL url) |
TikaConfig(java.net.URL url,
java.lang.ClassLoader loader) |
TikaConfig(java.net.URL url,
ServiceLoader loader) |
| Modifier and Type | Method and Description |
|---|---|
private static java.nio.charset.Charset |
AutoDetectReader.detect(java.io.InputStream input,
Metadata metadata,
java.util.List<EncodingDetector> detectors,
LoadErrorHandler handler) |
| Constructor and Description |
|---|
AutoDetectReader(java.io.InputStream stream) |
AutoDetectReader(java.io.InputStream stream,
Metadata metadata) |
AutoDetectReader(java.io.InputStream stream,
Metadata metadata,
EncodingDetector encodingDetector) |
AutoDetectReader(java.io.InputStream stream,
Metadata metadata,
java.util.List<EncodingDetector> detectors,
LoadErrorHandler handler) |
AutoDetectReader(java.io.InputStream stream,
Metadata metadata,
ServiceLoader loader) |
| Modifier and Type | Method and Description |
|---|---|
void |
Embedder.embed(Metadata metadata,
java.io.InputStream originalStream,
java.io.OutputStream outputStream,
ParseContext context)
Embeds related document metadata from the given metadata object into the
given output stream.
|
void |
ExternalEmbedder.embed(Metadata metadata,
java.io.InputStream inputStream,
java.io.OutputStream outputStream,
ParseContext context)
Executes the configured external command and passes the given document
stream as a simple XHTML document to the given SAX content handler.
|
| Modifier and Type | Class and Description |
|---|---|
class |
AccessPermissionException
Exception to be thrown when a document does not allow content extraction.
|
class |
EncryptedDocumentException |
class |
TikaConfigException
Tika Config Exception is an exception to occur when there is an error
in Tika config file and/or one or more of the parsers failed to initialize
from that erroneous config.
|
class |
TikaMemoryLimitException |
class |
UnsupportedFormatException
Parsers should throw this exception when they encounter
a file format that they do not support.
|
class |
ZeroByteFileException
Exception thrown by the AutoDetectParser when a file contains zero-bytes.
|
| Modifier and Type | Method and Description |
|---|---|
void |
ContainerExtractor.extract(TikaInputStream stream,
ContainerExtractor recurseExtractor,
EmbeddedResourceHandler handler)
Processes a container file, and extracts all the embedded
resources from within it.
|
void |
ParserContainerExtractor.extract(TikaInputStream stream,
ContainerExtractor recurseExtractor,
EmbeddedResourceHandler handler) |
void |
ParserContainerExtractor.RecursiveParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler ignored,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
private ForkClient |
ForkParser.acquireClient() |
java.lang.Throwable |
ForkClient.call(java.lang.String method,
java.lang.Object... args) |
void |
ForkParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
private void |
ForkClient.sendObject(java.lang.Object object,
java.util.List<ForkResource> resources)
Serializes the object first into an in-memory buffer and then
writes it to the output stream with a preceding size integer.
|
| Constructor and Description |
|---|
ForkClient(java.lang.ClassLoader loader,
java.lang.Object object,
java.util.List<java.lang.String> java,
long serverPulseMillis) |
| Modifier and Type | Class and Description |
|---|---|
static class |
EndianUtils.BufferUnderrunException |
| Modifier and Type | Method and Description |
|---|---|
void |
TemporaryResources.dispose()
Calls the
TemporaryResources.close() method and wraps the potential
IOException into a TikaException for convenience
when used within Tika. |
| Modifier and Type | Method and Description |
|---|---|
static LanguageProfilerBuilder |
LanguageProfilerBuilder.create(java.lang.String name,
java.io.InputStream is,
java.lang.String encoding)
Deprecated.
Creates a new Language profile from (preferably quite large - 5-10k of
lines) text file
|
float |
LanguageProfilerBuilder.getSimilarity(LanguageProfilerBuilder another)
Deprecated.
Calculates a score how well NGramProfiles match each other
|
| Modifier and Type | Method and Description |
|---|---|
java.lang.String |
Translator.translate(java.lang.String text,
java.lang.String targetLanguage)
Translate text to the given language
This method attempts to auto-detect the source language of the text.
|
java.lang.String |
DefaultTranslator.translate(java.lang.String text,
java.lang.String targetLanguage)
Translate, using the first available service-loaded translator
|
java.lang.String |
Translator.translate(java.lang.String text,
java.lang.String sourceLanguage,
java.lang.String targetLanguage)
Translate text between given languages.
|
java.lang.String |
DefaultTranslator.translate(java.lang.String text,
java.lang.String sourceLanguage,
java.lang.String targetLanguage)
Translate, using the first available service-loaded translator
|
| Modifier and Type | Method and Description |
|---|---|
static Metadata |
JsonMetadata.fromJson(java.io.Reader reader)
Read metadata from reader.
|
static java.util.List<Metadata> |
JsonMetadataList.fromJson(java.io.Reader reader)
Read metadata from reader.
|
static void |
JsonMetadataList.toJson(java.util.List<Metadata> metadataList,
java.io.Writer writer)
Serializes a Metadata object to Json.
|
static void |
JsonMetadata.toJson(Metadata metadata,
java.io.Writer writer)
Serializes a Metadata object to Json.
|
| Modifier and Type | Class and Description |
|---|---|
class |
MimeTypeException
A class to encapsulate MimeType related exceptions.
|
| Modifier and Type | Method and Description |
|---|---|
javax.xml.parsers.DocumentBuilder |
ParseContext.getDocumentBuilder()
Returns the DOM builder specified in this parsing context.
|
javax.xml.parsers.SAXParser |
ParseContext.getSAXParser()
Returns the SAX parser specified in this parsing context.
|
javax.xml.transform.Transformer |
ParseContext.getTransformer()
Returns the transformer specified in this parsing context.
|
org.xml.sax.XMLReader |
ParseContext.getXMLReader()
Returns the XMLReader specified in this parsing context.
|
void |
AbstractParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata)
Deprecated.
use the
Parser.parse(InputStream, ContentHandler, Metadata, ParseContext) method instead |
void |
AutoDetectParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata) |
void |
CryptoParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
DelegatingParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Looks up the delegate parser from the parsing context and
delegates the parse operation to it.
|
void |
ParserDecorator.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Delegates the method call to the decorated parser.
|
void |
AutoDetectParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
RecursiveParserWrapper.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler ignore,
Metadata metadata,
ParseContext context)
Acts like a regular parser except it ignores the ContentHandler
and it automatically sets/overwrites the embedded Parser in the
ParseContext object.
|
void |
RecursiveParserWrapper.EmbeddedParserDecorator.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler ignore,
Metadata metadata,
ParseContext context) |
void |
NetworkParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
NetworkParser.ParsingTask.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
Parser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Parses a document stream into a sequence of XHTML SAX events.
|
void |
ParserPostProcessor.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Forwards the call to the delegated parser and post-processes the
results as described above.
|
void |
CompositeParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Delegates the call to the matching component parser.
|
void |
DigestingParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
ErrorParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
private void |
NetworkParser.parse(TikaInputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
private java.util.List<AppleSingleFileParser.FieldInfo> |
AppleSingleFileParser.getSortedFieldInfoList(java.io.InputStream stream,
short numEntries) |
void |
AppleSingleFileParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
private long |
AppleSingleFileParser.processFieldEntries(java.io.InputStream stream,
java.util.List<AppleSingleFileParser.FieldInfo> fieldInfoList,
Metadata embeddedMetadata,
long bytesRead) |
private short |
AppleSingleFileParser.readThroughNumEntries(java.io.InputStream stream) |
| Modifier and Type | Method and Description |
|---|---|
void |
AudioParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
MidiParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
void |
Pkcs7Parser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
TSDParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
void |
EnviHeaderParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
void |
EpubParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
void |
EpubContentParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
static void |
ExternalParsersFactory.attachExternalParsers(TikaConfig config) |
static java.util.List<ExternalParser> |
ExternalParsersFactory.create() |
static java.util.List<ExternalParser> |
ExternalParsersFactory.create(ServiceLoader loader) |
static java.util.List<ExternalParser> |
ExternalParsersFactory.create(java.lang.String filename,
ServiceLoader loader) |
static java.util.List<ExternalParser> |
ExternalParsersFactory.create(java.net.URL... urls) |
void |
ExternalParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context)
Executes the configured external command and passes the given document
stream as a simple XHTML document to the given SAX content handler.
|
private void |
ExternalParser.parse(TikaInputStream stream,
XHTMLContentHandler xhtml,
Metadata metadata,
TemporaryResources tmp) |
static java.util.List<ExternalParser> |
ExternalParsersConfigReader.read(org.w3c.dom.Document document) |
static java.util.List<ExternalParser> |
ExternalParsersConfigReader.read(org.w3c.dom.Element element) |
static java.util.List<ExternalParser> |
ExternalParsersConfigReader.read(java.io.InputStream stream) |
private static ExternalParser |
ExternalParsersConfigReader.readParser(org.w3c.dom.Element parserDef)
Builds and Returns an ExternalParser, or null if a check
command was given that didn't match.
|
| Constructor and Description |
|---|
CompositeExternalParser() |
CompositeExternalParser(MediaTypeRegistry registry) |
| Modifier and Type | Method and Description |
|---|---|
void |
GDALParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
void |
IptcAnpaParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata)
Deprecated.
This method will be removed in Apache Tika 1.0.
|
void |
IptcAnpaParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
void |
IWorkPackageParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
void |
IWork13PackageParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
private int |
StringsParser.doStrings(java.io.File input,
StringsConfig config,
XHTMLContentHandler xhtml)
Runs the "strings" command on the given file.
|
void |
StringsParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
void |
FLVParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
void |
XMLParser.parse(java.io.InputStream stream,
org.xml.sax.ContentHandler handler,
Metadata metadata,
ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
void |
SecureContentHandler.throwIfCauseOf(org.xml.sax.SAXException e)
Converts the given
SAXException to a corresponding
TikaException if it's caused by this instance detecting
a zip bomb. |
| Modifier and Type | Method and Description |
|---|---|
static javax.xml.parsers.DocumentBuilder |
XMLReaderUtils.getDocumentBuilder()
Returns the DOM builder specified in this parsing context.
|
static javax.xml.parsers.SAXParser |
XMLReaderUtils.getSAXParser()
Returns the SAX parser specified in this parsing context.
|
static javax.xml.transform.Transformer |
XMLReaderUtils.getTransformer()
Returns a new transformer
The transformer instance is configured to to use
secure XML processing. |
static org.xml.sax.XMLReader |
XMLReaderUtils.getXMLReader()
Returns the XMLReader specified in this parsing context.
|