【发布时间】:2020-06-14 15:44:24
【问题描述】:
我是 Python 新手,确信可以优化以下内容,但是我在脚本的最后一步遇到了问题。
目标不是下载之前已下载的文件。此时我将下载记录在一个名为 download_history.log 的文件中
因此,我需要在此处实施检查以执行以下检查日志 - 如果它存在于日志中,则不执行任何操作,如果不存在则移动到下一个文件下载文件并将其登录到文件中。
任何帮助将不胜感激。
#!/usr/bin/env python3
import boto
import sys, os
import zipfile
import shutil
import glob
import re
from boto.s3.key import Key
from boto.exception import S3ResponseError
#Make the download files
DOWNLOAD_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Downloads/"
if not os.path.exists(DOWNLOAD_LOCATION_PATH):
print ("Making download directory")
os.mkdir(DOWNLOAD_LOCATION_PATH)
#Delete Output Folder if it exsists
OUTPUT_FOLDER = os.path.expanduser("~") + "/AWSSplunk/Output/"
shutil.rmtree(OUTPUT_FOLDER)
#Define the AWS Bucket
def backup_s3_folder():
BUCKET_NAME = "my-bucket-name"
AWS_ACCESS_KEY_ID= os.getenv("##################")
AWS_ACCESS_SECRET_KEY = os.getenv("#########################")
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_SECRET_KEY)
bucket = conn.get_bucket(BUCKET_NAME)
#goto through the list of files
bucket_list = bucket.list()
for l in bucket_list:
key_string = str(l.key)
s3_path = DOWNLOAD_LOCATION_PATH + key_string
try:
# Add files to the log file
print ("Downloading file ", key_string)
file_object = open('download_history.log', 'a')
file_object.write(key_string)
file_object.write("\n")
# Working code
file_object.close()
l.get_contents_to_filename(s3_path)
except (OSError,S3ResponseError) as e:
pass
# check if the file has been downloaded locally
if not os.path.exists(s3_path):
try:
os.makedirs(s3_path)
except OSError as exc:
# let guard againts race conditions
import errno
if exc.errno != errno.EEXIST:
raise
if __name__ == '__main__':
backup_s3_folder()
# Start the unzipping process
print("Unzipping Starting")
dir_path = os.path.expanduser("~") + "/AWSSplunk/Downloads/"
for path, dir_list, file_list in os.walk(dir_path):
for file_name in file_list:
if file_name.endswith(".zip"):
abs_file_path = os.path.join(path, file_name)
parent_path = os.path.split(abs_file_path)[0]
output_folder_name = os.path.splitext(abs_file_path)[0]
output_path = os.path.join(parent_path, output_folder_name)
zip_obj = zipfile.ZipFile(abs_file_path, 'r')
zip_obj.extractall(output_path)
zip_obj.close()
print("Unzipping Completed")
# Start moving files to output
print("Moving Files")
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
if not os.path.exists(FILE_LOCATION_PATH):
print ("Making download directory")
os.mkdir(FILE_LOCATION_PATH)
# .log files move
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.log'):
count = 1
destination_file = os.path.join(FILE_LOCATION_PATH, file)
while os.path.exists(destination_file):
destination_file = os.path.join(FILE_LOCATION_PATH, f"{file}_{count}")
count += 1
shutil.move(os.path.join(root, file), destination_file)
# .txt files move
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.txt'):
count = 1
destination_file = os.path.join(FILE_LOCATION_PATH, file)
while os.path.exists(destination_file):
destination_file = os.path.join(FILE_LOCATION_PATH, f"{file}_{count}")
count += 1
shutil.move(os.path.join(root, file), destination_file)
# .json files move
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.json'):
count = 1
destination_file = os.path.join(FILE_LOCATION_PATH, file)
while os.path.exists(destination_file):
destination_file = os.path.join(FILE_LOCATION_PATH, f"{file}_{count}")
count += 1
shutil.move(os.path.join(root, file), destination_file)
print("Files Move Complete")
# Delete Directory
print("Cleaning up Downloads Directory")
shutil.rmtree(DOWNLOAD_LOCATION_PATH)
# Remove EFR Audit Logs stratinbg with 2020
print("Remove the encrypted Audit Logs")
pattern = "^(2020)"
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
for root, dirs, files in os.walk(FILE_LOCATION_PATH):
for file in filter(lambda x: re.match(pattern, x), files):
os.remove(os.path.join(root, file))
# Remove EFR Audit Logs stratinbg with EFR
pattern = "^(EFR)"
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
for root, dirs, files in os.walk(FILE_LOCATION_PATH):
for file in filter(lambda x: re.match(pattern, x), files):
os.remove(os.path.join(root, file))
# Remove EFR Audit Logs stratinbg with 2019
pattern = "^(2019)"
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
for root, dirs, files in os.walk(FILE_LOCATION_PATH):
for file in filter(lambda x: re.match(pattern, x), files):
os.remove(os.path.join(root, file))
# Script clean up
print("Script Complete")
#with open("download_history.log", "a") as myfile:
# myfile.write('New Line\n')
【问题讨论】:
-
如果你想在 download_history.log 中搜索要下载的文件名,你可以像这个答案建议的那样做:stackoverflow.com/a/4940068/8446061
标签: python amazon-web-services amazon-s3 aws-sdk boto3