-
Type: Bug
-
Status: Resolved
-
Priority: Major
-
Resolution: Won't Fix
-
Affects Version/s: 7.10, 8.10, 9.2
-
Component/s: Query & PageProvider
-
Backlog priority:900
-
Sprint:nxGang Sprint 9.3.5, nxGang Sprint 9.3.6
-
Story Points:3
When trying to retrieve all the results of a NXQL using operation Repository.Query in an automation scripting as described in blog post https://www.nuxeo.com/blog/paginated-query-with-javaScript-automation/, 2 problems were detected:
- if parameter pageSize is not provided, whatever the value of parameter currentPageIndex provided to the operation Repository.Query, the first page of result is always returned
- as described in the blog post, and provided that operation Repository.Query returns a list of documents, the only way to exit the loop retrieving all the result pages is to expect that a list with NO documents is returned after retrieving the last page; This never happens, it always returns the last page of results, which causes an infinite loop
Here are 2 unit tests demonstrating both problems:
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import java.util.HashMap; import java.util.List; import java.util.Map; import org.junit.Test; import org.junit.runner.RunWith; import org.nuxeo.ecm.automation.AutomationService; import org.nuxeo.ecm.automation.OperationContext; import org.nuxeo.ecm.automation.test.AutomationFeature; import org.nuxeo.ecm.core.api.CoreSession; import org.nuxeo.ecm.core.api.DocumentModel; import org.nuxeo.ecm.core.api.DocumentModelList; import org.nuxeo.ecm.core.api.impl.DocumentModelListImpl; import org.nuxeo.ecm.platform.query.api.PageProviderService; import org.nuxeo.runtime.test.runner.Features; import org.nuxeo.runtime.test.runner.FeaturesRunner; import com.google.inject.Inject; @RunWith(FeaturesRunner.class) @Features({ AutomationFeature.class }) public class SUPNXP20891Test { private static final int NBR_DOCS = 3000; private static final int PAGE_SIZE = 1000; @Inject protected CoreSession session; @Inject PageProviderService pps; @Inject AutomationService as; @Test public void testServiceIsDeployed() { // assertNotNull(pps); assertNotNull(as); } /** * This unit test demonstrates that operation 'Repository.Query' does not return * paginated results if parameter 'pageSize' is not set, it always returns * the first page whatever the value of parameter 'currentPageIndex'. * See blog post https://www.nuxeo.com/blog/paginated-query-with-javaScript-automation/ * * @throws Exception */ @Test public void testAutomationWithoutPageSizeParameter() throws Exception { for (int i=0; i < NBR_DOCS;i++) { DocumentModel doc = session.createDocumentModel("File"); doc.setPathInfo("/", "File " + i); doc.setPropertyValue("dc:source", "me"); doc = session.createDocument(doc); if (i > 0 && (i % 400) == 0) { session.save(); System.out.println("save session " + i + " docs created"); } } session.save(); OperationContext ctx = new OperationContext(session); Map<String, Object> args = new HashMap<String, Object>(); args.put("query", "SELECT * FROM Document WHERE dc:source = 'me'"); int pageIndex = 0; int totalNbr = 0; DocumentModelList docs = new DocumentModelListImpl(); String prevId = ""; do { if (pageIndex > 0) { prevId = docs.get(0).getId(); } args.put("currentPageIndex", pageIndex); docs = (DocumentModelList) as.run(ctx, "Repository.Query", args); System.out.println("Nbr docs: " + docs.size()); System.out.println(" Total nbr of docs: " + docs.totalSize()); System.out.println(" Page index: " + pageIndex); if (!docs.isEmpty()) { System.out.println(" 1st doc ID of set: " + docs.get(0).getId()); } pageIndex++; totalNbr += docs.size(); assertFalse("First document's ID of current page is the same of the first document of previous page, should not happen.", prevId.equals(docs.get(0).getId())); } while (docs.size() > 0); assertEquals(NBR_DOCS, totalNbr); } /** * This unit test demonstrates that operation 'Repository.Query' does not return * a page with 0 document after the last page of results, as it is supposed to, * because it is the only way in an automation scripting to determine all the results * were returned. * See blog post https://www.nuxeo.com/blog/paginated-query-with-javaScript-automation/ * * @throws Exception */ @Test public void testAutomationWithPageSizeParameter() throws Exception { for (int i=0; i < NBR_DOCS;i++) { DocumentModel doc = session.createDocumentModel("File"); doc.setPathInfo("/", "File " + i); doc.setPropertyValue("dc:source", "me"); doc = session.createDocument(doc); if (i > 0 && (i % 400) == 0) { session.save(); System.out.println("save session " + i + " docs created"); } } session.save(); OperationContext ctx = new OperationContext(session); Map<String, Object> args = new HashMap<String, Object>(); args.put("query", "SELECT * FROM Document WHERE dc:source = 'me'"); args.put("pageSize", PAGE_SIZE); // MUST be set but overridden (if higher) by env. config. variable 'org.nuxeo.ecm.core.max.results' or config. variable 'nuxeo.pageprovider.default-max-page-size' // disable results limit with env. config. variable 'org.nuxeo.ecm.core.limit.results=false' int pageIndex = 0; int totalNbr = 0; DocumentModelList docs = new DocumentModelListImpl(); String prevId = ""; do { if (pageIndex > 0) { prevId = docs.get(0).getId(); } args.put("currentPageIndex", pageIndex); docs = (DocumentModelList) as.run(ctx, "Repository.Query", args); System.out.println("Nbr docs: " + docs.size()); System.out.println(" Total nbr of docs: " + docs.totalSize()); System.out.println(" Page index: " + pageIndex); if (!docs.isEmpty()) { System.out.println(" 1st doc ID of set: " + docs.get(0).getId()); } pageIndex++; totalNbr += docs.size(); assertFalse("First document's ID of current page is the same of the first document of previous page, should not happen.", prevId.equals(docs.get(0).getId())); } while (docs.size() > 0); assertEquals(NBR_DOCS, totalNbr); } }
Here is a javascript sample for case 1:
var pageNr = 0; var docsNbr = 0; var res; var totalNbr = 0; var prevId = ''; do { if (pageNr > 0) { prevId = res.get(0).id; } res = Document.Query(null, { 'query': 'SELECT * FROM Document', 'currentPageIndex': ''+pageNr }); Console.warn('#size: ' + res.length + '/' + pageNr); Console.warn('1st doc ID: ' + res.get(0).id); totalNbr += res.length; pageNr++; } while (res.length > 0 && prevId != res.get(0).id // needed to avoid infinite loop as res.length is never 0 );
Here is a javascript sample for case 2:
var pageNr = 0; var docsNbr = 0; var res; var totalNbr = 0; var prevId = ''; do { if (pageNr > 0) { prevId = res.get(0).id; } res = Document.Query(null, { 'query': 'SELECT * FROM Document', 'pageSize': 1000, // MUST be set to get paginated results, overridden (if higher) by 'nuxeo.pageprovider.default-max-page-size' 'currentPageIndex': ''+pageNr }); Console.warn('#size: ' + res.length + '/' + pageNr); Console.warn('1st doc ID: ' + res.get(0).id); totalNbr += res.length; pageNr++; } while (res.length > 0 && prevId != res.get(0).id // needed to avoid infinite loop as res.length is never 0 );