【问题标题】:Downloading a directory tree with ftplib使用 ftplib 下载目录树
【发布时间】:2011-02-06 00:01:55
【问题描述】:

这不会下载子目录的内容;我该怎么做?

import ftplib
import configparser
import os

directories = []

def add_directory(line):
 if line.startswith('d'):
  bits = line.split()
  dirname = bits[8]
  directories.append(dirname)

def makeDir(archiveTo):
 for dir in directories:
  newDir = os.path.join(archiveTo, dir)
  if os.path.isdir(newDir) == True:
   print("Directory \"" + dir + "\" already exists!")
  else:
   os.mkdir(newDir)

def getFiles(archiveTo, ftp):
 files = ftp.nlst()
 for filename in files:
  try:
   directories.index(filename)
  except:
   ftp.retrbinary('RETR %s' % filename, open(os.path.join(archiveTo, filename), 'wb').write)

def runBackups():

 #Load INI
 filename = 'connections.ini'
 config = configparser.SafeConfigParser()
 config.read(filename)

 connections = config.sections()
 i = 0

 while i < len(connections):
  #Load Settings
  uri = config.get(connections[i], "uri")
  username = config.get(connections[i], "username")
  password = config.get(connections[i], "password")
  backupPath = config.get(connections[i], "backuppath")
  archiveTo = config.get(connections[i], "archiveto")

  #Start Back-ups
  ftp = ftplib.FTP(uri)
  ftp.login(username, password)
  ftp.cwd(backupPath)

  #Map Directory Tree
  ftp.retrlines('LIST', add_directory)

  #Make Directories Locally
  makeDir(archiveTo)

  #Gather Files
  getFiles(archiveTo, ftp)

  #End connection and increase counter.
  ftp.quit()
  i += 1

 print()
 print("Back-ups complete.")
 print()

【问题讨论】:

    标签: python ftp ftplib


    【解决方案1】:

    使用 ftp.mlsd() 代替 ftp.nlst():

    import sys
    import ftplib
    import os
    from ftplib import FTP
    
    
    def fetchFiles(ftp, path, destination, overwrite=True):
        '''Fetch a whole folder from ftp. \n
        Parameters
        ----------
        ftp         : ftplib.FTP object
        path        : string ('/dir/folder/')
        destination : string ('D:/dir/folder/') folder where the files will be saved
        overwrite   : bool - Overwrite file if already exists.
        '''
        try:
            ftp.cwd(path)
            os.mkdir(destination[:-1] + path)
            print('New folder made: ' + destination[:-1] + path)
        except OSError:
            # folder already exists at the destination
            pass
        except ftplib.error_perm:
            # invalid entry (ensure input form: "/dir/folder/")
            print("error: could not change to " + path)
            sys.exit("ending session")
    
        # list children:
        filelist = [i for i in ftp.mlsd()]
        print('Current folder: ' + filelist.pop(0)[0])
    
        for file in filelist:
            if file[1]['type'] == 'file':
                fullpath = os.path.join(destination[:-1] + path, file[0])
                if (not overwrite and os.path.isfile(fullpath)):
                    continue
                else:
                    with open(fullpath, 'wb') as f:
                        ftp.retrbinary('RETR ' + file[0], f.write)
                    print(file[0] + '  downloaded')
            elif file[1]['type'] == 'dir':
                fetchFiles(ftp, path + file[0] + '/', destination, overwrite)
            else:
                print('Unknown type: ' + file[1]['type'])
    
    
    if __name__ == "__main__":
    
        ftp = FTP('ftp address')
        ftp.login('user', 'password')
        source = r'/Folder/'
        dest = r'D:/Data/'
        fetchFiles(ftp, source, dest, overwrite=True)
        ftp.quit()
    

    【讨论】:

      【解决方案2】:

      使用 ftputil,一个快速的解决方案可能是:

      def download(folder):
          for item in ftp.walk(folder):
              print("Creating dir " + item[0])
              os.mkdir(item[0])
              for subdir in item[1]:
                  print("Subdirs " +  subdir)
              for file in item[2]:
                  print(r"Copying File {0} \ {1}".format(item[0], file))
                  ftp.download(ftp.path.join(item[0],file), os.path.join(item[0],file))
      

      【讨论】:

        【解决方案3】:

        这是一个非常古老的问题,但我有一个类似的需求,我想以一种非常笼统的方式来满足。我最终编写了自己的解决方案,对我来说效果很好。我已经把它放在 Gist 上 https://gist.github.com/Jwely/ad8eb800bacef9e34dd775f9b3aad987

        并将其粘贴在下面,以防我将要点脱机。

        示例用法:

        import ftplib
        ftp = ftplib.FTP(mysite, username, password)
        download_ftp_tree(ftp, remote_dir, local_dir)
        

        上面的代码将在 ftp 主机上寻找一个名为“remote_dir”的目录,然后将该目录及其全部内容复制到“local_dir”中。 它调用下面的脚本。

        import ftplib
        import os
        
        def _is_ftp_dir(ftp_handle, name, guess_by_extension=True):
            """ simply determines if an item listed on the ftp server is a valid directory or not """
        
            # if the name has a "." in the fourth to last position, its probably a file extension
            # this is MUCH faster than trying to set every file to a working directory, and will work 99% of time.
            if guess_by_extension is True:
                if name[-4] == '.':
                    return False
        
            original_cwd = ftp_handle.pwd()     # remember the current working directory
            try:
                ftp_handle.cwd(name)            # try to set directory to new name
                ftp_handle.cwd(original_cwd)    # set it back to what it was
                return True
            except:
                return False
        
        
        def _make_parent_dir(fpath):
            """ ensures the parent directory of a filepath exists """
            dirname = os.path.dirname(fpath)
            while not os.path.exists(dirname):
                try:
                    os.mkdir(dirname)
                    print("created {0}".format(dirname))
                except:
                    _make_parent_dir(dirname)
        
        
        def _download_ftp_file(ftp_handle, name, dest, overwrite):
            """ downloads a single file from an ftp server """
            _make_parent_dir(dest)
            if not os.path.exists(dest) or overwrite is True:
                with open(dest, 'wb') as f:
                    ftp_handle.retrbinary("RETR {0}".format(name), f.write)
                print("downloaded: {0}".format(dest))
            else:
                print("already exists: {0}".format(dest))
        
        
        def _mirror_ftp_dir(ftp_handle, name, overwrite, guess_by_extension):
            """ replicates a directory on an ftp server recursively """
            for item in ftp_handle.nlst(name):
                if _is_ftp_dir(ftp_handle, item):
                    _mirror_ftp_dir(ftp_handle, item, overwrite, guess_by_extension)
                else:
                    _download_ftp_file(ftp_handle, item, item, overwrite)
        
        
        def download_ftp_tree(ftp_handle, path, destination, overwrite=False, guess_by_extension=True):
            """
            Downloads an entire directory tree from an ftp server to the local destination
        
            :param ftp_handle: an authenticated ftplib.FTP instance
            :param path: the folder on the ftp server to download
            :param destination: the local directory to store the copied folder
            :param overwrite: set to True to force re-download of all files, even if they appear to exist already
            :param guess_by_extension: It takes a while to explicitly check if every item is a directory or a file.
                if this flag is set to True, it will assume any file ending with a three character extension ".???" is
                a file and not a directory. Set to False if some folders may have a "." in their names -4th position.
            """
            os.chdir(destination)
            _mirror_ftp_dir(ftp_handle, path, overwrite, guess_by_extension)
        

        【讨论】:

        • 太棒了。奇迹般有效。应该是一个图书馆!
        【解决方案4】:

        这应该可以解决问题:)

        import sys
        import ftplib
        import os
        from ftplib import FTP
        ftp=FTP("ftp address")
        ftp.login("user","password")
        
        def downloadFiles(path,destination):
        #path & destination are str of the form "/dir/folder/something/"
        #path should be the abs path to the root FOLDER of the file tree to download
            try:
                ftp.cwd(path)
                #clone path to destination
                os.chdir(destination)
                os.mkdir(destination[0:len(destination)-1]+path)
                print destination[0:len(destination)-1]+path+" built"
            except OSError:
                #folder already exists at destination
                pass
            except ftplib.error_perm:
                #invalid entry (ensure input form: "/dir/folder/something/")
                print "error: could not change to "+path
                sys.exit("ending session")
        
            #list children:
            filelist=ftp.nlst()
        
            for file in filelist:
                try:
                    #this will check if file is folder:
                    ftp.cwd(path+file+"/")
                    #if so, explore it:
                    downloadFiles(path+file+"/",destination)
                except ftplib.error_perm:
                    #not a folder with accessible content
                    #download & return
                    os.chdir(destination[0:len(destination)-1]+path)
                    #possibly need a permission exception catch:
                    ftp.retrbinary("RETR "+file, open(os.path.join(destination,file),"wb").write)
                    print file + " downloaded"
            return
        
        source="/ftproot/folder_i_want/"
        dest="/systemroot/where_i_want_it/"
        downloadFiles(source,dest)
        

        【讨论】:

        • 另外,您可能不想在第一个ftplib.error_perm 中退出,除非您的用户帐户有权限问题。 pass 可以工作。
        【解决方案5】:

        至少这不是微不足道的。在最简单的情况下,您只假设您有文件和目录。情况并非总是如此,有软链接和硬链接以及 Windows 风格的快捷方式。软链接和目录快捷方式尤其成问题,因为它们使递归目录成为可能,这会使简单实现的 ftp 抓取器感到困惑。

        你将如何处理这样的递归目录取决于你的需要;您可能根本不关注软链接,或者您可能会尝试检测递归链接。检测递归链接本身就很棘手,你不能可靠地做到这一点。

        【讨论】:

          【解决方案6】:

          这是另一种选择。您可以尝试使用ftputil 包。然后您可以使用它来walk the remote directories 并获取您的文件

          【讨论】:

          • 真的那么复杂到我应该考虑包的地步吗?我想它只需要 10 行非常复杂的代码。
          猜你喜欢
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 2013-06-27
          相关资源
          最近更新 更多