-
Type: Task
-
Status: Resolved
-
Priority: Major
-
Resolution: Fixed
-
Affects Version/s: None
-
Fix Version/s: 10.10-HF49, 11.x, 2021.5
-
Component/s: Core
-
Release Notes Summary:Possible OOM is prevented on XLSLX fulltext extraction.
-
Tags:
-
Sprint:nxplatform #37
-
Story Points:2
Extracting Fulltext from XLS with POI is known to consume lots of memory since NXP-14416.
A fallback converter has been implemented for blob bigger than 5MB.
The problem can still happen if the blob length is unknown and thus return -1.
This could explain MAT analysis shows that the thread java.lang.Thread @ 0x64578e370 Nuxeo-Work-default-3:484498979951476.1028022363 keeps local variables with total size 4,863,244,520 (97.22%) bytes.
Nuxeo-Work-default-3:484498979951476.1028022363 at org.apache.xmlbeans.impl.store.Cur.createElementXobj(Lorg/apache/xmlbeans/impl/store/Locale;Ljavax/xml/namespace/QName;Ljavax/xml/namespace/QName;)Lorg/apache/xmlbeans/impl/store/Xobj; (Cur.java:260) at org.apache.xmlbeans.impl.store.Cur$CurLoadContext.startElement(Ljavax/xml/namespace/QName;)V (Cur.java:2997) at org.apache.xmlbeans.impl.store.Locale.loadNode(Lorg/w3c/dom/Node;Lorg/apache/xmlbeans/impl/store/Locale$LoadContext;)V (Locale.java:1415) at org.apache.xmlbeans.impl.store.Locale.loadNodeChildren(Lorg/w3c/dom/Node;Lorg/apache/xmlbeans/impl/store/Locale$LoadContext;)V (Locale.java:1398) at org.apache.xmlbeans.impl.store.Locale.loadNode(Lorg/w3c/dom/Node;Lorg/apache/xmlbeans/impl/store/Locale$LoadContext;)V (Locale.java:1440) at org.apache.xmlbeans.impl.store.Locale.loadNodeChildren(Lorg/w3c/dom/Node;Lorg/apache/xmlbeans/impl/store/Locale$LoadContext;)V (Locale.java:1398) at org.apache.xmlbeans.impl.store.Locale.loadNode(Lorg/w3c/dom/Node;Lorg/apache/xmlbeans/impl/store/Locale$LoadContext;)V (Locale.java:1440) at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Lorg/w3c/dom/Node;Lorg/apache/xmlbeans/SchemaType;Lorg/apache/xmlbeans/XmlOptions;)Lorg/apache/xmlbeans/XmlObject; (Locale.java:1380) at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Lorg/apache/xmlbeans/SchemaTypeLoader;Lorg/w3c/dom/Node;Lorg/apache/xmlbeans/SchemaType;Lorg/apache/xmlbeans/XmlOptions;)Lorg/apache/xmlbeans/XmlObject; (Locale.java:1365) at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(Lorg/w3c/dom/Node;Lorg/apache/xmlbeans/SchemaType;Lorg/apache/xmlbeans/XmlOptions;)Lorg/apache/xmlbeans/XmlObject; (SchemaTypeLoaderBase.java:370) at org.apache.poi.POIXMLTypeLoader.parse(Ljava/io/InputStream;Lorg/apache/xmlbeans/SchemaType;Lorg/apache/xmlbeans/XmlOptions;)Lorg/apache/xmlbeans/XmlObject; (POIXMLTypeLoader.java:164) at org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPivotCacheRecords$Factory.parse(Ljava/io/InputStream;Lorg/apache/xmlbeans/XmlOptions;)Lorg/openxmlformats/schemas/spreadsheetml/x2006/main/CTPivotCacheRecords; (Unknown Source) at org.apache.poi.xssf.usermodel.XSSFPivotCacheRecords.readFrom(Ljava/io/InputStream;)V (XSSFPivotCacheRecords.java:64) at org.apache.poi.xssf.usermodel.XSSFPivotCacheRecords.<init>(Lorg/apache/poi/openxml4j/opc/PackagePart;)V (XSSFPivotCacheRecords.java:55) at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Ljava/lang/reflect/Constructor;[Ljava/lang/Object;)Ljava/lang/Object; (Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance([Ljava/lang/Object;)Ljava/lang/Object; (NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance([Ljava/lang/Object;)Ljava/lang/Object; (DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance([Ljava/lang/Object;)Ljava/lang/Object; (Constructor.java:423) at org.apache.poi.xssf.usermodel.XSSFFactory.createDocumentPart(Ljava/lang/Class;[Ljava/lang/Class;[Ljava/lang/Object;)Lorg/apache/poi/POIXMLDocumentPart; (XSSFFactory.java:56) at org.apache.poi.POIXMLFactory.createDocumentPart(Lorg/apache/poi/POIXMLDocumentPart;Lorg/apache/poi/openxml4j/opc/PackagePart;)Lorg/apache/poi/POIXMLDocumentPart; (POIXMLFactory.java:60) at org.apache.poi.POIXMLDocumentPart.read(Lorg/apache/poi/POIXMLFactory;Ljava/util/Map;)V (POIXMLDocumentPart.java:580) at org.apache.poi.POIXMLDocumentPart.read(Lorg/apache/poi/POIXMLFactory;Ljava/util/Map;)V (POIXMLDocumentPart.java:592) at org.apache.poi.POIXMLDocument.load(Lorg/apache/poi/POIXMLFactory;)V (POIXMLDocument.java:165) at org.apache.poi.xssf.usermodel.XSSFWorkbook.<init>(Lorg/apache/poi/openxml4j/opc/OPCPackage;)V (XSSFWorkbook.java:270) at org.nuxeo.ecm.core.convert.plugins.text.extractors.XLX2TextConverter.convert(Lorg/nuxeo/ecm/core/api/blobholder/BlobHolder;Ljava/util/Map;)Lorg/nuxeo/ecm/core/api/blobholder/BlobHolder; (XLX2TextConverter.java:67) at org.nuxeo.ecm.core.convert.service.ConversionServiceImpl.convert(Ljava/lang/String;Lorg/nuxeo/ecm/core/api/blobholder/BlobHolder;Ljava/util/Map;)Lorg/nuxeo/ecm/core/api/blobholder/BlobHolder; (ConversionServiceImpl.java:332) at org.nuxeo.ecm.core.convert.plugins.text.extractors.FullTextConverter.convert(Lorg/nuxeo/ecm/core/api/blobholder/BlobHolder;Ljava/util/Map;)Lorg/nuxeo/ecm/core/api/blobholder/BlobHolder; (FullTextConverter.java:70) at org.nuxeo.ecm.core.convert.service.ConversionServiceImpl.convert(Ljava/lang/String;Lorg/nuxeo/ecm/core/api/blobholder/BlobHolder;Ljava/util/Map;)Lorg/nuxeo/ecm/core/api/blobholder/BlobHolder; (ConversionServiceImpl.java:332) at org.nuxeo.ecm.core.storage.FulltextExtractorWork.blobToText(Lorg/nuxeo/ecm/core/api/Blob;)Ljava/lang/String; (FulltextExtractorWork.java:284) at org.nuxeo.ecm.core.storage.FulltextExtractorWork$$Lambda$977.apply(Ljava/lang/Object;)Ljava/lang/Object; (Unknown Source) at java.util.Map.computeIfAbsent(Ljava/lang/Object;Ljava/util/function/Function;)Ljava/lang/Object; (Map.java:957) at org.nuxeo.ecm.core.storage.FulltextExtractorWork.extractAndUpdateBinaryText()V (FulltextExtractorWork.java:235) at org.nuxeo.ecm.core.storage.FulltextExtractorWork.extractAndUpdate()V (FulltextExtractorWork.java:181) at org.nuxeo.ecm.core.storage.FulltextExtractorWork.work()V (FulltextExtractorWork.java:145) at org.nuxeo.ecm.core.work.AbstractWork.runWorkWithTransaction()V (AbstractWork.java:493) at org.nuxeo.ecm.core.work.AbstractWork.run()V (AbstractWork.java:383) at org.nuxeo.ecm.core.work.WorkHolder.run()V (WorkHolder.java:57) at java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V (ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run()V (ThreadPoolExecutor.java:624) at java.lang.Thread.run()V (Thread.java:748)