本章内容

  1、rawlog处理

    2、域名item接口刷新

  3、备案结果查询

  4、多级域名中取主域

  5、发送邮件

  6、通过api获取cdn edge ip

  7、多线程下载

 

 

1、rawlog处理

  脚本里面涉及的内容

  1、使用多cpu处理

  2、UTC与GMT-8时间的转换

  3、一个目录下对子目录的文件遍历

    工作中的Python脚本下面有多个panther-*目录

  4、gzip文件的读取处理

Parsing Per-Hit (PerHit) Log using Python3 (incl. Multi-Thread version)

#!/usr/bin/env python3
# coding: utf-8
 
import os
import sys
import traceback
import re
import gzip
from datetime import datetime
from dateutil import tz
 
base_path = "/home/xuanjia/static.trthi.com"
file_name_prefix = "F114BC2216604A2C93AF5F6821168CA5_"
file_name_sufix = "_pca_cn_cas_001.log"
 
def conv_date(input_date, input_hour):
    local_date = datetime.strptime(input_date + " " + input_hour, "%Y%m%d %H").replace(tzinfo=tz.gettz('UTC')).astimezone(tz.gettz('Asia/Shanghai'))
    re_date = []
    re_date.append(local_date.strftime("%Y%m%d"))
    re_date.append(local_date.strftime("%H"))
    return re_date
 
def main():
    global base_path
    global file_name_prefix
    global file_name_sufix
    array_subdirs = []
    array_hours = []
     
    array_subdirs = os.listdir(base_path)
     
    for i in range(0, 24, 1):
        array_hours.append('{0:02}'.format(i))
     
    for hour in array_hours:
        for subdir in array_subdirs:
            f_name = conv_date(subdir, hour)
            print(f_name)
            with open(base_path + "/" + file_name_prefix + f_name[0] + "_" + f_name[1] + file_name_sufix, 'w') as output_f:
                # output_lines = []
                for root, dirs, files in os.walk(base_path + "/" + subdir):
                    for file_name in files:
                        if re.search(".*_upstream_.*", file_name) is None and re.search(".*_" + subdir + "_" + hour + "_.*", file_name) is not None:
                            try:
                                with gzip.open(os.path.join(root + "/" + file_name), 'rt', encoding='utf-8') as input_f:
                                    for input_line in input_f:
                                        array_line = input_line.split(' ')
                                        dict_line = {}
                                        dict_line["Event-Type"] = array_line[0]
                                        dict_line["Site-ID"] = array_line[1]
                                        dict_line["Date"] = array_line[2]
                                        dict_line["Time"] = array_line[3]
                                        dict_line["C-IP"] = array_line[4]
                                        dict_line["CS-UserName"] = array_line[5]
                                        dict_line["S-SiteName"] = array_line[6]
                                        dict_line["S-ComputerName"] = array_line[7]
                                        dict_line["S-IP"] = array_line[8]
                                        dict_line["S-Port"] = array_line[9]
                                        dict_line["CS-Method"] = array_line[10]
                                        dict_line["CS-URI"] = array_line[11]
                                        dict_line["CS-URI-Query"] = array_line[12]
                                        dict_line["SC-Status"] = array_line[13]
                                        dict_line["SC-Win32-Status"] = array_line[14]
                                        dict_line["SC-Bytes"] = array_line[15]
                                        dict_line["CS-Bytes"] = array_line[16]
                                        dict_line["Time-Taken"] = array_line[17]
                                        dict_line["CS-Version"] = array_line[18]
                                        dict_line["CS-Host"] = array_line[19]
                                        dict_line["CS-UserAgent"] = array_line[20]
                                        dict_line["CS-Cookie"] = array_line[21]
                                        dict_line["CS-Referer"] = array_line[22]
                                        dict_line["SC-Sub-Status"] = array_line[23]
                                        dict_line["CS-Range"] = array_line[24]
                                        dict_line["SC-Initial"] = array_line[25]
                                        dict_line["SC-Complete"] = array_line[26]
                                        dict_line["SC-ContentType"] = array_line[27]
                                        dict_line["Protocol"] = array_line[28]
                                        dict_line["SC-Bytes-Body"] = array_line[29]
                                        dict_line["Bytes-Origin-Uncompressed"] = array_line[30]
                                        dict_line["C-RemotePort"] = array_line[31]
                                        # print(dict_line)
                                         
                                        array_output = []
                                        array_output.append(dict_line["C-IP"])
                                        array_output.append("-")
                                        array_output.append("-")
                                        array_output.append("[" + datetime.strptime(dict_line["Date"] + " " + dict_line["Time"], "%Y-%m-%d %H:%M:%S").replace(tzinfo=tz.gettz('UTC')).astimezone(tz.gettz('Asia/Shanghai')).strftime("%d/%b/%Y:%H:%M:%S +08:00") + "]")
                                        array_output.append("\"" + dict_line["CS-Method"])
                                        array_output.append("http://" + dict_line["CS-Host"] + dict_line["CS-URI"] + "?" + dict_line["CS-URI-Query"])
                                        array_output.append(dict_line["CS-Version"] + "\"")
                                        array_output.append(dict_line["SC-Status"])
                                        array_output.append(dict_line["SC-Bytes"])
                                        array_output.append("\"" + dict_line["CS-Referer"] + "\"")
                                        array_output.append("\"" + dict_line["CS-UserAgent"] + "\"")
                                        array_output.append("\"-\"")
                                        array_output.append(dict_line["S-IP"])
                                        # print(" ".join(array_output))
                                        # output_lines.append(" ".join(array_output))
                                        output_f.write(" ".join(array_output) + '\n')
                            except Exception as e:
                                traceback.print_exc(file=sys.stdout)
                                print(root + "/" + file_name)
                                continue
     
    exit()
 
if __name__ == '__main__':
    main()
Single thread version:

相关文章:

  • 2022-12-23
  • 2021-10-26
  • 2022-01-21
  • 2021-05-12
  • 2021-05-22
  • 2021-06-26
  • 2022-01-18
  • 2022-12-23
猜你喜欢
  • 2022-12-23
  • 2021-06-17
  • 2022-12-23
  • 2022-02-05
  • 2022-02-24
  • 2022-02-10
  • 2022-02-15
相关资源
相似解决方案