extract
extract copied to clipboard
Ignore system files within archives
Ignore files like .DS_Store inside archives. Otherwise an exception like the following is logged:
Sep 13, 2016 4:48:44 PM org.icij.extract.core.ParsingEmbeddedDocumentExtractor parseEmbedded
SEVERE: Unable to parse embedded document in document: Archive.zip.
org.apache.tika.exception.TikaException: Unsupported media type: multipart/appledouble.
at org.icij.extract.core.ErrorParser.parse(ErrorParser.java:55)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
at org.apache.tika.parser.DelegatingParser.parse(DelegatingParser.java:72)
at org.icij.extract.core.ParsingEmbeddedDocumentExtractor.parseEmbedded(ParsingEmbeddedDocumentExtractor.java:101)
at org.apache.tika.parser.pkg.PackageParser.parseEntry(PackageParser.java:219)
at org.apache.tika.parser.pkg.PackageParser.parse(PackageParser.java:182)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
at org.icij.extract.core.ParsingReader$ParsingTask.run(ParsingReader.java:267)
at org.icij.extract.core.TextParsingReader$ParsingTask.run(TextParsingReader.java:87)
at java.lang.Thread.run(Thread.java:745)