Uploaded image for project: 'Nuxeo Drive '
  1. Nuxeo Drive
  2. NXDRIVE-2497

[Direct Edit] Fix removal of disappeared temporary downloaded file

    XMLWordPrintable

    Details

    • Type: Bug
    • Status: Resolved
    • Priority: Minor
    • Resolution: Fixed
    • Affects Version/s: 4.4.4
    • Fix Version/s: 4.5.1
    • Component/s: Direct Edit

      Description

      The test is failing every time:

      $ python -m pytest -n0 tests/old_functional/test_direct_edit.py::TestDirectEdit::test_corrupted_download
      $ python -m pytest -n0 tests/old_functional/test_direct_edit.py::TestDirectEditNoSync::test_corrupted_download
      
          def test_corrupted_download(self):
              """Test corrupted downloads that finally works."""
          
              def request(*args, **kwargs):
                  """We need to inspect headers to catch if "Range" is defined.
                  If that header is set, it means that a download is resumed, and it should not as
                  a corrupted download must be restarted from ground.
                  """
                  headers = kwargs.get("headers", {})
                  assert "Range" not in headers
                  return original_request(*args, **kwargs)
          
              def save_to_file(*args, **kwargs):
                  """Make the download raise a CorruptedFile error for tries 1 and 2."""
                  nonlocal try_count
                  try_count += 1
          
                  if try_count < 2:
                      file_out = args[2]
                      file_out.write_bytes(b"invalid data")
                  else:
                      original_save_to_file(*args, **kwargs)
          
              original_save_to_file = self.engine_1.remote.operations.save_to_file
              original_request = self.engine_1.remote.client.request
          
              # Create the test file, it should be large enough to trigger chunk downloads (here 26 MiB)
              filename = "download corrupted.txt"
              doc_id = self.remote.make_file_with_blob(
                  "/", filename, b"Some content." * 1024 * 1024 * 2
              )
          
              # Start Direct Edit'ing the document
              with patch.object(
                  self.engine_1.remote.operations, "save_to_file", new=save_to_file
              ):
                  with patch.object(self.engine_1.remote.client, "request", new=request):
                      try_count = 0
                      url = f"nxfile/default/{doc_id}/file:content/{filename}"
      >               file = self.direct_edit._prepare_edit(
                          self.nuxeo_url, doc_id, download_url=url
                      )
      
      doc_id     = '2e3d157e-24b5-4522-b8b2-51e2b92c57f2'
      filename   = 'download corrupted.txt'
      original_request = <bound method NuxeoClient.request of NuxeoClient<host='http://192.168.2.39:8080/nuxeo/', version='11.5-SNAPSHOT'>>
      original_save_to_file = <bound method API.save_to_file of <nuxeo.operations.API object at 0x7f9b209c0250>>
      request    = <function MixinTests.test_corrupted_download.<locals>.request at 0x7f9b209d9e50>
      save_to_file = <function MixinTests.test_corrupted_download.<locals>.save_to_file at 0x7f9b209d9ee0>
      self       = <tests.old_functional.test_direct_edit.TestDirectEdit testMethod=test_corrupted_download>
      try_count  = 1
      url        = ('nxfile/default/2e3d157e-24b5-4522-b8b2-51e2b92c57f2/file:content/download '
       'corrupted.txt')
      
      tests/old_functional/test_direct_edit.py:742: 
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      self = <DirectEdit id=2909881>, server_url = 'http://192.168.2.39:8080/nuxeo'
      doc_id = '2e3d157e-24b5-4522-b8b2-51e2b92c57f2'
      
          def _prepare_edit(
              self,
              server_url: str,
              doc_id: str,
              /,
              *,
              user: str = None,
              download_url: str = None,
          ) -> Optional[Path]:
              start_time = current_milli_time()
              engine = self._get_engine(server_url, doc_id=doc_id, user=user)
              if not engine:
                  return None
          
              # Avoid any link with the engine, remote_doc are not cached so we
              # can do that
              info = self._get_info(engine, doc_id)
              if not info:
                  return None
          
              if not self.use_autolock:
                  log.warning(
                      "Server-side document locking is disabled: you are not protected against concurrent updates."
                  )
          
              url = None
              url_info: Dict[str, str] = {}
              if download_url:
                  import re
          
                  urlmatch = re.match(
                      r"([^\/]+\/){3}(?P<xpath>.+)\/(?P<filename>[^\?]*).*",
                      download_url,
                      re.I,
                  )
                  if urlmatch:
                      url_info = urlmatch.groupdict()
          
                  url = server_url
                  if not url.endswith("/"):
                      url += "/"
                  url += download_url
          
              xpath = url_info.get("xpath")
              if not xpath and info.doc_type == "Note":
                  xpath = "note:note"
              elif not xpath or xpath == "blobholder:0":
                  xpath = "file:content"
              blob = info.get_blob(xpath)
              if not blob:
                  log.warning(
                      f"No blob associated with xpath {xpath!r} for file {info.path!r}"
                  )
                  return None
          
              filename = blob.name
              self.directEditStarting.emit(engine.hostname, filename)
          
              # Create local structure
              folder_name = safe_filename(f"{doc_id}_{xpath}")
              dir_path = self._folder / folder_name
              dir_path.mkdir(exist_ok=True)
          
              log.info(f"Editing {filename!r}")
              file_path = dir_path / filename
              file_out = self._get_tmp_file(doc_id, filename)
          
              try:
                  # Download the file
      >           tmp_file = self._download(
                      engine, info, file_path, file_out, blob, xpath, url=url
                  )
      
      blob       = Blob(name='download corrupted.txt', digest='ed4704e997b9e6bddbedb38926c106b2', digest_algorithm='md5', size=27262976, mimetype='text/plain', data='http://192.168.2.39:8080/nuxeo/nxfile/default/2e3d157e-24b5-4522-b8b2-51e2b92c57f2/file:content/download%20corrupted.txt?changeToken=2-0')
      dir_path   = PosixPath('/private/var/folders/zg/vkhwn19d0cz9vvq_t8rwq68c0000gn/T/c5f3cd2d/1/conf/edit/2e3d157e-24b5-4522-b8b2-51e2b92c57f2_file-content')
      doc_id     = '2e3d157e-24b5-4522-b8b2-51e2b92c57f2'
      download_url = ('nxfile/default/2e3d157e-24b5-4522-b8b2-51e2b92c57f2/file:content/download '
       'corrupted.txt')
      engine     = <Engine name='192.168.2.39', server_url='http://192.168.2.39:8080/nuxeo/', has_token=True, is_offline=False, uid='9e9e32a8608b11eba0411e00621ed686', type='NXDRIVE'>
      file_out   = PosixPath('/private/var/folders/zg/vkhwn19d0cz9vvq_t8rwq68c0000gn/T/c5f3cd2d/1/conf/edit/2e3d157e-24b5-4522-b8b2-51e2b92c57f2.dl/download corrupted.txt')
      file_path  = PosixPath('/private/var/folders/zg/vkhwn19d0cz9vvq_t8rwq68c0000gn/T/c5f3cd2d/1/conf/edit/2e3d157e-24b5-4522-b8b2-51e2b92c57f2_file-content/download corrupted.txt')
      filename   = 'download corrupted.txt'
      folder_name = '2e3d157e-24b5-4522-b8b2-51e2b92c57f2_file-content'
      info       = NuxeoDocumentInfo(root=None, name='download corrupted.txt', uid='2e3d157e-24b5-4522-b8b2-51e2b92c57f2', parent_uid=None, path='/default-domain/workspaces/test_corrupted_download-darwin-76769/download corrupted.txt', folderish=False, last_modification_time=datetime.datetime(2021, 1, 27, 10, 37, 29, 185000, tzinfo=tzutc()), last_contributor='ndt-hailey-37078', repository='default', doc_type='File', version='0.0', state='project', is_trashed=False, is_proxy=False, is_version=False, lock_owner=None, lock_created=None, permissions=['Write', 'WriteVersion', 'ReadProperties', 'ReadSecurity', 'Remove', 'ReadVersion', 'Read', 'WriteLifeCycle', 'ReadChildren', 'AddChildren', 'ReadLifeCycle', 'RemoveChildren', 'ReviewParticipant', 'ReadWrite', 'Browse', 'WriteProperties', 'ManageWorkflows'], properties={'uid:uid': None, 'uid:major_version': 0, 'uid:minor_version': 0, 'thumb:thumbnail': None, 'file:content': {'name': 'download corrupted.txt', 'mime-type': 'text/plain', 'encoding': None, 'digestAlgorithm': 'MD5', 'digest': 'ed4704e997b9e6bddbedb38926c106b2', 'length': '27262976', 'data': 'http://192.168.2.39:8080/nuxeo/nxfile/default/2e3d157e-24b5-4522-b8b2-51e2b92c57f2/file:content/download%20corrupted.txt?changeToken=2-0'}, 'common:icon-expanded': None, 'common:icon': '/icons/text.png', 'files:files': [], 'dc:description': None, 'dc:language': None, 'dc:coverage': None, 'dc:valid': None, 'dc:creator': 'ndt-hailey-37078', 'dc:modified': '2021-01-27T10:37:29.185Z', 'dc:lastContributor': 'ndt-hailey-37078', 'dc:rights': None, 'dc:expired': None, 'dc:format': None, 'dc:created': '2021-01-27T10:37:24.162Z', 'dc:title': 'download corrupted.txt', 'dc:issued': None, 'dc:nature': None, 'dc:subjects': [], 'dc:contributors': ['ndt-hailey-37078'], 'dc:source': None, 'dc:publisher': None, 'relatedtext:relatedtextresources': [], 'nxtag:tags': []})
      re         = <module 're' from '/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/re.py'>
      self       = <DirectEdit id=2909881>
      server_url = 'http://192.168.2.39:8080/nuxeo'
      start_time = 1611743845934
      url        = ('http://192.168.2.39:8080/nuxeo/nxfile/default/2e3d157e-24b5-4522-b8b2-51e2b92c57f2/file:content/download '
       'corrupted.txt')
      url_info   = {'filename': 'download corrupted.txt', 'xpath': 'file:content'}
      urlmatch   = <re.Match object; span=(0, 87), match='nxfile/default/2e3d157e-24b5-4522-b8b2-51e2b92c57>
      user       = None
      xpath      = 'file:content'
      
      nxdrive/direct_edit.py:517: 
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      self = <DirectEdit id=2909881>
      engine = <Engine name='192.168.2.39', server_url='http://192.168.2.39:8080/nuxeo/', has_token=True, is_offline=False, uid='9e9e32a8608b11eba0411e00621ed686', type='NXDRIVE'>
      info = NuxeoDocumentInfo(root=None, name='download corrupted.txt', uid='2e3d157e-24b5-4522-b8b2-51e2b92c57f2', parent_uid=Non...ndt-hailey-37078'], 'dc:source': None, 'dc:publisher': None, 'relatedtext:relatedtextresources': [], 'nxtag:tags': []})
      file_path = PosixPath('/private/var/folders/zg/vkhwn19d0cz9vvq_t8rwq68c0000gn/T/c5f3cd2d/1/conf/edit/2e3d157e-24b5-4522-b8b2-51e2b92c57f2_file-content/download corrupted.txt')
      file_out = PosixPath('/private/var/folders/zg/vkhwn19d0cz9vvq_t8rwq68c0000gn/T/c5f3cd2d/1/conf/edit/2e3d157e-24b5-4522-b8b2-51e2b92c57f2.dl/download corrupted.txt')
      blob = Blob(name='download corrupted.txt', digest='ed4704e997b9e6bddbedb38926c106b2', digest_algorithm='md5', size=27262976, ...:8080/nuxeo/nxfile/default/2e3d157e-24b5-4522-b8b2-51e2b92c57f2/file:content/download%20corrupted.txt?changeToken=2-0')
      xpath = 'file:content'
      
          def _download(
              self,
              engine: "Engine",
              info: NuxeoDocumentInfo,
              file_path: Path,
              file_out: Path,
              blob: Blob,
              xpath: str,
              /,
              *,
              url: str = None,
          ) -> Optional[Path]:
              # Close to processor method - should try to refactor ?
              pair = None
              kwargs: Dict[str, Any] = {}
          
              if blob.digest:
                  # The digest is available in the Blob, use it and disable parameters check
                  # as 'digest' is not a recognized param for the Blob.Get operation.
                  kwargs["digest"] = blob.digest
                  kwargs["check_params"] = False
          
                  pair = engine.dao.get_valid_duplicate_file(blob.digest)
          
              # Remove the eventual temporary file. We do not want to be able to resume an
              # old download because of several issues and does not make sens for that feature.
              # See NXDRIVE-2112 and NXDRIVE-2116 for more context.
              file_out.unlink(missing_ok=True)
          
              if pair:
                  existing_file_path = engine.local.abspath(pair.local_path)
                  try:
                      # copyfile() is used to prevent metadata copy
                      shutil.copyfile(existing_file_path, file_out)
                  except FileNotFoundError:
                      pair = None
                  else:
                      log.info(
                          f"Local file matches remote digest {blob.digest!r}, "
                          f"copied it from {existing_file_path!r}"
                      )
                      if pair.is_readonly():
                          log.info(f"Unsetting readonly flag on copied file {file_out!r}")
                          unset_path_readonly(file_out)
          
              if not pair:
                  if url:
                      try:
                          for try_count in range(self._error_threshold):
                              try:
                                  engine.remote.download(
                                      quote(url, safe="/:"),
                                      file_path,
                                      file_out,
                                      blob.digest,
                                      callback=self.stop_client,
                                      is_direct_edit=True,
                                      engine_uid=engine.uid,
                                  )
                                  break
                              except CorruptedFile:
                                  self.directEditError.emit(
                                      "DIRECT_EDIT_CORRUPTED_DOWNLOAD_RETRY", []
                                  )
          
                                  # Remove the faultive tmp file
      >                           file_out.unlink()
      
      blob       = Blob(name='download corrupted.txt', digest='ed4704e997b9e6bddbedb38926c106b2', digest_algorithm='md5', size=27262976, mimetype='text/plain', data='http://192.168.2.39:8080/nuxeo/nxfile/default/2e3d157e-24b5-4522-b8b2-51e2b92c57f2/file:content/download%20corrupted.txt?changeToken=2-0')
      engine     = <Engine name='192.168.2.39', server_url='http://192.168.2.39:8080/nuxeo/', has_token=True, is_offline=False, uid='9e9e32a8608b11eba0411e00621ed686', type='NXDRIVE'>
      file_out   = PosixPath('/private/var/folders/zg/vkhwn19d0cz9vvq_t8rwq68c0000gn/T/c5f3cd2d/1/conf/edit/2e3d157e-24b5-4522-b8b2-51e2b92c57f2.dl/download corrupted.txt')
      file_path  = PosixPath('/private/var/folders/zg/vkhwn19d0cz9vvq_t8rwq68c0000gn/T/c5f3cd2d/1/conf/edit/2e3d157e-24b5-4522-b8b2-51e2b92c57f2_file-content/download corrupted.txt')
      info       = NuxeoDocumentInfo(root=None, name='download corrupted.txt', uid='2e3d157e-24b5-4522-b8b2-51e2b92c57f2', parent_uid=None, path='/default-domain/workspaces/test_corrupted_download-darwin-76769/download corrupted.txt', folderish=False, last_modification_time=datetime.datetime(2021, 1, 27, 10, 37, 29, 185000, tzinfo=tzutc()), last_contributor='ndt-hailey-37078', repository='default', doc_type='File', version='0.0', state='project', is_trashed=False, is_proxy=False, is_version=False, lock_owner=None, lock_created=None, permissions=['Write', 'WriteVersion', 'ReadProperties', 'ReadSecurity', 'Remove', 'ReadVersion', 'Read', 'WriteLifeCycle', 'ReadChildren', 'AddChildren', 'ReadLifeCycle', 'RemoveChildren', 'ReviewParticipant', 'ReadWrite', 'Browse', 'WriteProperties', 'ManageWorkflows'], properties={'uid:uid': None, 'uid:major_version': 0, 'uid:minor_version': 0, 'thumb:thumbnail': None, 'file:content': {'name': 'download corrupted.txt', 'mime-type': 'text/plain', 'encoding': None, 'digestAlgorithm': 'MD5', 'digest': 'ed4704e997b9e6bddbedb38926c106b2', 'length': '27262976', 'data': 'http://192.168.2.39:8080/nuxeo/nxfile/default/2e3d157e-24b5-4522-b8b2-51e2b92c57f2/file:content/download%20corrupted.txt?changeToken=2-0'}, 'common:icon-expanded': None, 'common:icon': '/icons/text.png', 'files:files': [], 'dc:description': None, 'dc:language': None, 'dc:coverage': None, 'dc:valid': None, 'dc:creator': 'ndt-hailey-37078', 'dc:modified': '2021-01-27T10:37:29.185Z', 'dc:lastContributor': 'ndt-hailey-37078', 'dc:rights': None, 'dc:expired': None, 'dc:format': None, 'dc:created': '2021-01-27T10:37:24.162Z', 'dc:title': 'download corrupted.txt', 'dc:issued': None, 'dc:nature': None, 'dc:subjects': [], 'dc:contributors': ['ndt-hailey-37078'], 'dc:source': None, 'dc:publisher': None, 'relatedtext:relatedtextresources': [], 'nxtag:tags': []})
      kwargs     = {'check_params': False, 'digest': 'ed4704e997b9e6bddbedb38926c106b2'}
      pair       = None
      self       = <DirectEdit id=2909881>
      try_count  = 0
      url        = ('http://192.168.2.39:8080/nuxeo/nxfile/default/2e3d157e-24b5-4522-b8b2-51e2b92c57f2/file:content/download '
       'corrupted.txt')
      xpath      = 'file:content'
      
      nxdrive/direct_edit.py:360: 
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      self = PosixPath('/private/var/folders/zg/vkhwn19d0cz9vvq_t8rwq68c0000gn/T/c5f3cd2d/1/conf/edit/2e3d157e-24b5-4522-b8b2-51e2b92c57f2.dl/download corrupted.txt')
      missing_ok = False
      
          def unlink(self, missing_ok=False):
              """
              Remove this file or link.
              If the path is a directory, use rmdir() instead.
              """
              try:
      >           self._accessor.unlink(self)
      E           FileNotFoundError: [Errno 2] No such file or directory: '/private/var/folders/zg/vkhwn19d0cz9vvq_t8rwq68c0000gn/T/c5f3cd2d/1/conf/edit/2e3d157e-24b5-4522-b8b2-51e2b92c57f2.dl/download corrupted.txt'
      

      Fix

      Use the missing_ok=True kwarg.

        Attachments

          Issue Links

            Activity

              People

              • Votes:
                0 Vote for this issue
                Watchers:
                2 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved:

                  Time Tracking

                  Estimated:
                  Original Estimate - Not Specified
                  Not Specified
                  Remaining:
                  Remaining Estimate - 0 minutes
                  0m
                  Logged:
                  Time Spent - 5 minutes
                  5m