如何使用 Python 下载特定的 Google Drive 文件夹？答案

【问题标题】：How to download specific Google Drive folder using Python?如何使用 Python 下载特定的 Google Drive 文件夹？
【发布时间】：2016-12-24 12:16:34
【问题描述】：

我正在尝试从 Google 云端硬盘下载特定文件夹。

我试过这个例子 http://www.mwclearning.com/?p=1608 但它从 G-Drive 下载所有文件。

EX：如果我在 Google 云端硬盘中有两个文件夹，请说..

一个文件夹有 -> 1 , 2 个文件
B 文件夹有 -> 3、4、5 个文件

如果我想下载文件夹 A，那么应该只下载 1、2 个文件..

任何建议或帮助都会很有帮助。

提前致谢。

【问题讨论】：

如果您不介意调用命令行工具，我喜欢使用 gdrive github.com/prasmussen/gdrive，但我在要下载/上传的文件中硬编码

标签： python-2.7 google-drive-api google-api-python-client

【解决方案1】：

使用 Drive credentials.json从您的 Drive API 下载

from __future__ import print_function
import pickle
import os
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
from apiclient.http import MediaFileUpload, MediaIoBaseDownload
import io
from apiclient import errors
from apiclient import http
import logging

from apiclient import discovery

# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive']


# To list folders
def listfolders(service, filid, des):
    results = service.files().list(
        pageSize=1000, q="\'" + filid + "\'" + " in parents",
        fields="nextPageToken, files(id, name, mimeType)").execute()
    # logging.debug(folder)
    folder = results.get('files', [])
    for item in folder:
        if str(item['mimeType']) == str('application/vnd.google-apps.folder'):
            if not os.path.isdir(des+"/"+item['name']):
                os.mkdir(path=des+"/"+item['name'])
            print(item['name'])
            listfolders(service, item['id'], des+"/"+item['name'])  # LOOP un-till the files are found
        else:
            downloadfiles(service, item['id'], item['name'], des)
            print(item['name'])
    return folder


# To Download Files
def downloadfiles(service, dowid, name,dfilespath):
    request = service.files().get_media(fileId=dowid)
    fh = io.BytesIO()
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print("Download %d%%." % int(status.progress() * 100))
    with io.open(dfilespath + "/" + name, 'wb') as f:
        fh.seek(0)
        f.write(fh.read())


def main():
    """Shows basic usage of the Drive v3 API.
    Prints the names and ids of the first 10 files the user has access to.
    """
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)  # credentials.json download from drive API
            creds = flow.run_local_server()
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    service = build('drive', 'v3', credentials=creds)
    # Call the Drive v3 API

    Folder_id = "'PAST YOUR SHARED FOLDER ID'"  # Enter The Downloadable folder ID From Shared Link

    results = service.files().list(
        pageSize=1000, q=Folder_id+" in parents", fields="nextPageToken, files(id, name, mimeType)").execute()
    items = results.get('files', [])
    if not items:
        print('No files found.')
    else:
        print('Files:')
        for item in items:
            if item['mimeType'] == 'application/vnd.google-apps.folder':
                if not os.path.isdir("Folder"):
                    os.mkdir("Folder")
                bfolderpath = os.getcwd()+"/Folder/"
                if not os.path.isdir(bfolderpath+item['name']):
                    os.mkdir(bfolderpath+item['name'])

                folderpath = bfolderpath+item['name']
                listfolders(service, item['id'], folderpath)
            else:
                if not os.path.isdir("Folder"):
                    os.mkdir("Folder")
                bfolderpath = os.getcwd()+"/Folder/"
                if not os.path.isdir(bfolderpath + item['name']):
                    os.mkdir(bfolderpath + item['name'])

                filepath = bfolderpath + item['name']
                downloadfiles(service, item['id'], item['name'], filepath)


if __name__ == '__main__':
    main()

【讨论】：

我可以下载带有公共链接的文件夹吗？
是的，您可以通过提供您在共享链接中获得的文件夹 ID 来下载
这里是文件夹 ID 的更精确说明：ploi.io/documentation/mysql/…

【解决方案2】：

尝试检查 Google Drive API documentation，您可以在此处查看使用 Python 执行文件下载的示例代码。

file_id = '0BwwA4oUTeiV1UVNwOHItT0xfa2M'
request = drive_service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print "Download %d%%." % int(status.progress() * 100)

文件夹部分可以查看here获取方法。

更多信息，您可以查看tutorial和YT video。

【讨论】：

我找到了这个link，它通过指定fileID选择特定的Gdrive文件夹来解决问题
如果有人（像我一样）想知道drive_service 是否来自，您需要安装库（例如pip install google-api-python-client -t app/lib），然后可以使用from lib.apiclient import discovery 和lang_service = discovery.build('drive', 'v3') 获取它。这段代码改编自here，它更深入地解释了这些东西。
如何获取文件ID？或者它应该是文件夹ID

【解决方案3】：

这里只是专门处理递归下载文件夹的代码。

我试图保持重点，省略了教程中已经描述的代码。我希望您已经拥有要下载的文件夹ID。

elif not itemType.startswith('application/'): 部分用于跳过任何 Drive 格式的文档。但是，检查过于简单，因此您可能需要改进或删除它。

from __future__ import print_function
import pickle
import os.path
import io
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

def main():
    """Based on the quickStart.py example at
    https://developers.google.com/drive/api/v3/quickstart/python
    """
    creds = getCredentials()
    service = build('drive', 'v3', credentials=creds)
    
    folderId = ""
    destinationFolder = ""
    downloadFolder(service, folderId, destinationFolder)


def downloadFolder(service, fileId, destinationFolder):
    if not os.path.isdir(destinationFolder):
        os.mkdir(path=destinationFolder)

    results = service.files().list(
        pageSize=300,
        q="parents in '{0}'".format(fileId),
        fields="files(id, name, mimeType)"
        ).execute()

    items = results.get('files', [])

    for item in items:
        itemName = item['name']
        itemId = item['id']
        itemType = item['mimeType']
        filePath = destinationFolder + "/" + itemName

        if itemType == 'application/vnd.google-apps.folder':
            print("Stepping into folder: {0}".format(filePath))
            downloadFolder(service, itemId, filePath) # Recursive call
        elif not itemType.startswith('application/'):
            downloadFile(service, itemId, filePath)
        else:
            print("Unsupported file: {0}".format(itemName))


def downloadFile(service, fileId, filePath):
    # Note: The parent folders in filePath must exist
    print("-> Downloading file with id: {0} name: {1}".format(fileId, filePath))
    request = service.files().get_media(fileId=fileId)
    fh = io.FileIO(filePath, mode='wb')
    
    try:
        downloader = MediaIoBaseDownload(fh, request, chunksize=1024*1024)

        done = False
        while done is False:
            status, done = downloader.next_chunk(num_retries = 2)
            if status:
                print("Download %d%%." % int(status.progress() * 100))
        print("Download Complete!")
    finally:
        fh.close()

【讨论】：

【解决方案4】：

请按照步骤 5-7 下载 tutorial link for downloading 中指定的 'client_id.json' 文件

在代码的最后一行，通过右键单击文件夹并启用共享链接，将“folder_id”更改为要从驱动器下载的文件夹的 ID。 id 将是“id=”之后的 URL 的一部分，并将“savepath”更改为您要将下载的文件夹保存到系统上的路径。

from __future__ import print_function

from googleapiclient import discovery
from httplib2 import Http
from oauth2client import file, client, tools
import os, io
from apiclient.http import MediaFileUpload, MediaIoBaseDownload

SCOPES = 'https://www.googleapis.com/auth/drive'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
    flow = client.flow_from_clientsecrets('client_id.json', SCOPES)
    creds = tools.run_flow(flow, store)
DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http()))

def retaining_folder_structure(query,filepath):
    results = DRIVE.files().list(fields="nextPageToken, files(id, name, kind, mimeType)",q=query).execute()
    items = results.get('files', [])
    for item in items:
        #print(item['name'])
        if item['mimeType']=='application/vnd.google-apps.folder':
            fold=item['name']
            path=filepath+'/'+fold
            if os.path.isdir(path):
                retaining_folder_structure("'%s' in parents"%(item['id']),path)
            else:
                os.mkdir(path)
                retaining_folder_structure("'%s' in parents"%(item['id']),path)
        else:
            request = DRIVE.files().get_media(fileId=item['id'])
            fh = io.BytesIO()
            downloader = MediaIoBaseDownload(fh, request)
            done = False
            while done is False:
                status, done = downloader.next_chunk()
                print("Download %d%%." % int(status.progress() * 100))
            path=filepath+'/'+item['name']
            #print(path)
            with io.open(path,'wb') as f:
                fh.seek(0)
                f.write(fh.read())

retaining_floder_structure("'folder_id' in parents",'savepath')

【讨论】：

我今天需要这些，但遗憾的是，由于不是 G Suite 的成员，Google 在他们的 OAuth 同意屏幕中将我限制为仅用户类型：外部，这意味着我需要在存储（或他们称之为的任何东西）并将其公开，如果不这样做，我无法获得使用您的代码的凭据，真可惜！有什么解决方法吗？