如果您使用 python 从谷歌云存储中下载数据并希望保持相同的文件夹结构,请按照我在 python 中编写的这段代码。
选项 1
from google.cloud import storage
def findOccurrences(s, ch): # to find position of '/' in blob path ,used to create folders in local storage
return [i for i, letter in enumerate(s) if letter == ch]
def download_from_bucket(bucket_name, blob_path, local_path):
# Create this folder locally
if not os.path.exists(local_path):
os.makedirs(local_path)
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blobs=list(bucket.list_blobs(prefix=blob_path))
startloc = 0
for blob in blobs:
startloc = 0
folderloc = findOccurrences(blob.name.replace(blob_path, ''), '/')
if(not blob.name.endswith("/")):
if(blob.name.replace(blob_path, '').find("/") == -1):
downloadpath=local_path + '/' + blob.name.replace(blob_path, '')
logging.info(downloadpath)
blob.download_to_filename(downloadpath)
else:
for folder in folderloc:
if not os.path.exists(local_path + '/' + blob.name.replace(blob_path, '')[startloc:folder]):
create_folder=local_path + '/' +blob.name.replace(blob_path, '')[0:startloc]+ '/' +blob.name.replace(blob_path, '')[startloc:folder]
startloc = folder + 1
os.makedirs(create_folder)
downloadpath=local_path + '/' + blob.name.replace(blob_path, '')
blob.download_to_filename(downloadpath)
logging.info(blob.name.replace(blob_path, '')[0:blob.name.replace(blob_path, '').find("/")])
logging.info('Blob {} downloaded to {}.'.format(blob_path, local_path))
bucket_name = 'google-cloud-storage-bucket-name' # do not use gs://
blob_path = 'training/data' # blob path in bucket where data is stored
local_dir = 'local-folder name' #trainingData folder in local
download_from_bucket(bucket_name, blob_path, local_dir)
选项 2:使用 gsutil sdk
下面是通过 python 程序执行此操作的另一种选择。
def download_bucket_objects(bucket_name, blob_path, local_path):
# blob path is bucket folder name
command = "gsutil cp -r gs://{bucketname}/{blobpath} {localpath}".format(bucketname = bucket_name, blobpath = blob_path, localpath = local_path)
os.system(command)
return command
选项 3 - 没有 python,直接使用终端和谷歌 SDK
先决条件:Google Cloud SDK 已安装并初始化($ glcoud init)
命令参考以下链接:
https://cloud.google.com/storage/docs/gsutil/commands/cp