Uploaded image for project: 'Nuxeo Platform'
  1. Nuxeo Platform
  2. NXP-22969

Fix operation 'Repository.Query' when retrieving paginated result

    XMLWordPrintable

    Details

      Description

      When trying to retrieve all the results of a NXQL using operation Repository.Query in an automation scripting as described in blog post https://www.nuxeo.com/blog/paginated-query-with-javaScript-automation/, 2 problems were detected:

      1. if parameter pageSize is not provided, whatever the value of parameter currentPageIndex provided to the operation Repository.Query, the first page of result is always returned
      2. as described in the blog post, and provided that operation Repository.Query returns a list of documents, the only way to exit the loop retrieving all the result pages is to expect that a list with NO documents is returned after retrieving the last page; This never happens, it always returns the last page of results, which causes an infinite loop

      Here are 2 unit tests demonstrating both problems:

      import static org.junit.Assert.assertEquals;
      import static org.junit.Assert.assertFalse;
      import static org.junit.Assert.assertNotNull;
      
      import java.util.HashMap;
      import java.util.List;
      import java.util.Map;
      
      import org.junit.Test;
      import org.junit.runner.RunWith;
      import org.nuxeo.ecm.automation.AutomationService;
      import org.nuxeo.ecm.automation.OperationContext;
      import org.nuxeo.ecm.automation.test.AutomationFeature;
      import org.nuxeo.ecm.core.api.CoreSession;
      import org.nuxeo.ecm.core.api.DocumentModel;
      import org.nuxeo.ecm.core.api.DocumentModelList;
      import org.nuxeo.ecm.core.api.impl.DocumentModelListImpl;
      import org.nuxeo.ecm.platform.query.api.PageProviderService;
      import org.nuxeo.runtime.test.runner.Features;
      import org.nuxeo.runtime.test.runner.FeaturesRunner;
      
      import com.google.inject.Inject;
      
      @RunWith(FeaturesRunner.class)
      @Features({ AutomationFeature.class })
      public class SUPNXP20891Test {
      
          private static final int NBR_DOCS = 3000;
      
          private static final int PAGE_SIZE = 1000;
      
          @Inject protected CoreSession session;
      
          @Inject PageProviderService pps;
      
          @Inject AutomationService as;
      
          @Test public void testServiceIsDeployed() {
      //        assertNotNull(pps);
              assertNotNull(as);
          }
      
          /**
           * This unit test demonstrates that operation 'Repository.Query' does not return
           * paginated results if parameter 'pageSize' is not set, it always returns
           * the first page whatever the value of parameter 'currentPageIndex'.
           * See blog post https://www.nuxeo.com/blog/paginated-query-with-javaScript-automation/
           * 
           * @throws Exception
           */
          @Test public void testAutomationWithoutPageSizeParameter() throws Exception {
              for (int i=0; i < NBR_DOCS;i++) {
                  DocumentModel doc = session.createDocumentModel("File");
                  doc.setPathInfo("/", "File " + i);
                  doc.setPropertyValue("dc:source", "me");
                  doc = session.createDocument(doc);
                  if (i > 0 && (i % 400) == 0) {
                      session.save();
                      System.out.println("save session " + i + " docs created");
                  }
              }
              session.save();
              OperationContext ctx = new OperationContext(session);
              Map<String, Object> args = new HashMap<String, Object>();
              args.put("query", "SELECT * FROM Document WHERE dc:source = 'me'");
              int pageIndex = 0;
              int totalNbr = 0;
              DocumentModelList docs = new DocumentModelListImpl();
              String prevId = "";
              do {
                  if (pageIndex > 0) {
                      prevId = docs.get(0).getId();
                  }
                  args.put("currentPageIndex", pageIndex);
                  docs = (DocumentModelList) as.run(ctx, "Repository.Query", args);
                  System.out.println("Nbr docs: " + docs.size());
                  System.out.println(" Total nbr of docs: " + docs.totalSize());
                  System.out.println(" Page index: " + pageIndex);
                  if (!docs.isEmpty()) {
                      System.out.println(" 1st doc ID of set: " + docs.get(0).getId());
                  }
                  pageIndex++;
                  totalNbr += docs.size();
                  assertFalse("First document's ID of current page is the same of the first document of previous page, should not happen.", prevId.equals(docs.get(0).getId()));
              } while (docs.size() > 0);
              assertEquals(NBR_DOCS, totalNbr);
          }
      
          /**
           * This unit test demonstrates that operation 'Repository.Query' does not return
           * a page with 0 document after the last page of results, as it is supposed to,
           * because it is the only way in an automation scripting to determine all the results
           * were returned. 
           * See blog post https://www.nuxeo.com/blog/paginated-query-with-javaScript-automation/
           * 
           * @throws Exception
           */
          @Test public void testAutomationWithPageSizeParameter() throws Exception {
              for (int i=0; i < NBR_DOCS;i++) {
                  DocumentModel doc = session.createDocumentModel("File");
                  doc.setPathInfo("/", "File " + i);
                  doc.setPropertyValue("dc:source", "me");
                  doc = session.createDocument(doc);
                  if (i > 0 && (i % 400) == 0) {
                      session.save();
                      System.out.println("save session " + i + " docs created");
                  }
              }
              session.save();
              OperationContext ctx = new OperationContext(session);
              Map<String, Object> args = new HashMap<String, Object>();
              args.put("query", "SELECT * FROM Document WHERE dc:source = 'me'");
              args.put("pageSize", PAGE_SIZE); // MUST be set but overridden (if higher) by env. config. variable 'org.nuxeo.ecm.core.max.results' or config. variable 'nuxeo.pageprovider.default-max-page-size'
              // disable results limit with env. config. variable 'org.nuxeo.ecm.core.limit.results=false'
              int pageIndex = 0;
              int totalNbr = 0;
              DocumentModelList docs = new DocumentModelListImpl();
              String prevId = "";
              do {
                  if (pageIndex > 0) {
                      prevId = docs.get(0).getId();
                  }
                  args.put("currentPageIndex", pageIndex);
                  docs = (DocumentModelList) as.run(ctx, "Repository.Query", args);
                  System.out.println("Nbr docs: " + docs.size());
                  System.out.println(" Total nbr of docs: " + docs.totalSize());
                  System.out.println(" Page index: " + pageIndex);
                  if (!docs.isEmpty()) {
                      System.out.println(" 1st doc ID of set: " + docs.get(0).getId());
                  }
                  pageIndex++;
                  totalNbr += docs.size();
                  assertFalse("First document's ID of current page is the same of the first document of previous page, should not happen.", prevId.equals(docs.get(0).getId()));
              } while (docs.size() > 0);
              assertEquals(NBR_DOCS, totalNbr);
          }
      }
      

      Here is a javascript sample for case 1:

        var pageNr = 0;
        var docsNbr = 0;
        var res;
        var totalNbr = 0;
        var prevId = '';
        do {
          if (pageNr > 0) {
            prevId = res.get(0).id;
          }
          res = Document.Query(null, { 'query': 'SELECT * FROM Document',
            'currentPageIndex': ''+pageNr
          });
          Console.warn('#size: ' + res.length + '/' + pageNr);
          Console.warn('1st doc ID: ' + res.get(0).id);
          totalNbr += res.length;
          pageNr++;
        } while (res.length > 0
                 && prevId != res.get(0).id // needed to avoid infinite loop as res.length is never 0
          );
      

      Here is a javascript sample for case 2:

        var pageNr = 0;
        var docsNbr = 0;
        var res;
        var totalNbr = 0;
        var prevId = '';
        do {
          if (pageNr > 0) {
            prevId = res.get(0).id;
          }
          res = Document.Query(null, { 'query': 'SELECT * FROM Document',
            'pageSize': 1000, // MUST be set to get paginated results, overridden (if higher) by 'nuxeo.pageprovider.default-max-page-size'
            'currentPageIndex': ''+pageNr
          });
          Console.warn('#size: ' + res.length + '/' + pageNr);
          Console.warn('1st doc ID: ' + res.get(0).id);
          totalNbr += res.length;
          pageNr++;
        } while (res.length > 0
                 && prevId != res.get(0).id // needed to avoid infinite loop as res.length is never 0
          );
      

        Attachments

          Issue Links

            Activity

              People

              • Votes:
                0 Vote for this issue
                Watchers:
                5 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved:

                  Time Tracking

                  Estimated:
                  Original Estimate - Not Specified
                  Not Specified
                  Remaining:
                  Remaining Estimate - 0 minutes
                  0m
                  Logged:
                  Time Spent - 4 days, 4 hours
                  4d 4h