【问题标题】:Python Requests Stream Via Tor - Connection DiesPython 通过 Tor 请求流 - 连接断开
【发布时间】:2020-12-06 00:49:48
【问题描述】:

我正在使用 python 请求库以多处理方式在“onionurl”下载文件,以从 tor 服务下载许多文件。

这就是代码背后的原因。

但是,随着这些文件的下载,它们会在一两分钟后被删除。由于流无法下载,没有给出错误但返回“关闭文本文件”。这意味着无法下载托管在这些洋葱服务器上的文件,每个服务器都有数百 GB。

对于解决此问题的任何帮助将不胜感激。

    session = requests.session()
    session.proxies = {}
    session.proxies['http'] = 'socks5h://localhost:9050'
    session.proxies['https'] = 'socks5h://localhost:9050'
    #print(onionurlforrequest)
    

    url = onionurl

    try:
        if not os.path.isdir(foldername):
            os.makedirs(foldername)
        # download the body of response by chunk, not immediately
        with session.get(url, stream=True, verify=False, timeout=1000000) as response:
            # get the total file size
            file_size = int(response.headers.get("Content-Length", 0))
            print(file_size)
            # get the file name

            filename = dataloc


            with open(filename, "wb") as text_file: 
                for chunk in response.iter_content(chunk_size=1024):

                    text_file.write(chunk)
 
                    if (file_size  > 1000000):
                        filesizemb = file_size / 1000000
                    else:
                        filesizemb = 1
            print("closing text file")
            text_file.close()

【问题讨论】:

    标签: python python-3.x python-requests multiprocessing tor


    【解决方案1】:

    设法解决它,只需接受连接将断开并编写一个新函数以在确切的偏移量处恢复下载,该问题的理论在此问题中进行了解释 - How to resume file download in Python?

    我的代码(警告,混乱):

    def onionrequestthreadeddataleakdownloadresume(onionurl,resume_byte_pos):
        print("rerunning")
        companyname = onionurl[0]
        onionurl = onionurl[1]
        dataloc = '/media/archangel/Elements/clop/dataleaks/'
        foldername = dataloc
        dataloc = dataloc + companyname + "/"
        try:
           if not os.path.isdir(dataloc):
    
               os.mkdir(dataloc)
        except Exception as e:
    
            print(e)
            print("folder not created")
    
    
        filename = os.path.basename(onionurl)
        filenamebasename = filename
    
    
    
        dataloc = dataloc + filename
    
        try:
     #       seconds = 20
      #      timeout = Timeout(seconds)
       #     timeout.start()
    
    
    
            session = requests.session()
            session.proxies = {}
            session.proxies['http'] = 'socks5h://localhost:9050'
            session.proxies['https'] = 'socks5h://localhost:9050'
            #print(onionurlforrequest)
            
          #  onionurlforrequest = "http://" + onionurl
            print("dataloc")
            print(dataloc)
            print("onionurl")
            print(onionurl)
            url = onionurl
    
            try:
                print("url")
                print(url)
                if not os.path.isdir(foldername):
                    os.makedirs(foldername)
                # download the body of response by chunk, not immediately
    #https://stackoverflow.com/questions/16694907/download-large-file-in-python-with-requests?rq=1
                try:
                    try:
                        seconds = 20
                        timeout = Timeout(seconds)
                        timeout.start()
                    except Exception as ex:
                        print(ex)
    
                    resume_header = {'Accept-Encoding': None, 'Range': 'bytes=%d-' % resume_byte_pos}
                    try:
                        with session.get(url, stream=True, verify=False, headers=resume_header, timeout=600) as response:
                            #response.raise_for_status()
    
                            # get the total file size
                            file_size = int(response.headers['Content-Length'])
                            if (file_size  > 1000000):
                                filesizemb = file_size / 1000000
                            else:
                                filesizemb = 1
                            print(file_size)
                            # get the file name
    
                            filename = dataloc
                #            filename = os.path.join(dataloc, url.split("/")[-1])
                            # progress bar, changing the unit to bytes instead of iteration (default by tqdm)
                 #           response = session.get(url, stream = True)
                #            progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
                            try:
                                with open(filename, "ab") as text_file: 
                                    for chunk in response.iter_content(chunk_size=1024*1024):
                                        #https://www.kite.com/python/answers/how-to-download-large-files-with-requests-in-python
                                        #if len(chunk) != 1024*36:
                                        if chunk: 
                                            #print(len(chunk))
                                            text_file.write(chunk)
                                            text_file.flush()
                            except Exception as ex:
                                logging.error(f'write failed with error: {ex}')
                                print(ex)
                                    #else:
                                    
                                        # write data read to the file
                    #                    f.write(data)
                                        # update the progress bar manually
                     #                   progress.update(len(data))
                                    # finally, if the url is valid
    
                            #logging.info('Download finished successfully')
    
                            print("exited with for file")
                    except Exception as ex:
                        logging.error(f'Request failed with error: {ex}')
                        print(ex)
    
                except Exception as ex:
                    logging.error(f'Attempt failed with error: {ex}')
                    print(ex)
    
                print("closing text file")
              #  text_file.close()
    
                    #list composed of dataleaklocation (location in external), filename (filename after / slash) , dataleakurl (urlofonion) , contentsize
    
            except Exception as e:
                print("FAILED DOWNLOAD 2")
    
                print(e)
        except Exception as e:
            print("FAILED DOWNLOAD 5")
            print(e)
    
    
    
    
    
    
    
    
    
    
    
    
    def onionrequestthreadeddataleakdownload2(onionurl):
        companyname = onionurl[0]
        onionurl = onionurl[1]
        dataloc = '/media/archangel/Elements/clop/dataleaks/'
        foldername = dataloc
        dataloc = dataloc + companyname + "/"
        try:
           if not os.path.isdir(dataloc):
    
               os.mkdir(dataloc)
        except Exception as e:
    
            print(e)
            print("folder not created")
    
    
        filename = os.path.basename(onionurl)
        filenamebasename = filename
    
    
    
        dataloc = dataloc + filename
    
        try:
     #       seconds = 20
      #      timeout = Timeout(seconds)
       #     timeout.start()
    
    
    
            session = requests.session()
            session.proxies = {}
            session.proxies['http'] = 'socks5h://localhost:9050'
            session.proxies['https'] = 'socks5h://localhost:9050'
            #print(onionurlforrequest)
            
          #  onionurlforrequest = "http://" + onionurl
            print("dataloc")
            print(dataloc)
            print("onionurl")
            print(onionurl)
            url = onionurl
    
            try:
                print("url")
                print(url)
                if not os.path.isdir(foldername):
                    os.makedirs(foldername)
                # download the body of response by chunk, not immediately
    #https://stackoverflow.com/questions/16694907/download-large-file-in-python-with-requests?rq=1
                try:
                    try:
                        seconds = 20
                        timeout = Timeout(seconds)
                        timeout.start()
                    except Exception as ex:
                        print(ex)
    
                   # resume_header = ({'Range': f'bytes=0-2000000'})
                    #file_size_online = int(r.headers.get('content-length', 0))
                    headersac = {'Accept-Encoding': None}
                    try:
                        with session.get(url, stream=True, verify=False, headers = headersac, timeout=600) as response:
                            #response.raise_for_status()
    
                            # get the total file size
        #                    file_size = int(response.headers.get("Content-Length", 0))
                            file_size = int(response.headers['Content-Length'])
                            if (file_size  > 1000000):
                                filesizemb = file_size / 1000000
                            else:
                                filesizemb = 1
                            print(file_size)
                            #e
                            # get the file name
    
                            filename = dataloc
                #            filename = os.path.join(dataloc, url.split("/")[-1])
                            # progress bar, changing the unit to bytes instead of iteration (default by tqdm)
                 #           response = session.get(url, stream = True)
                #            progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
                            try:
                                with open(filename, "wb") as text_file: 
                                    for chunk in response.iter_content(chunk_size=1024*1024):
                                        #https://www.kite.com/python/answers/how-to-download-large-files-with-requests-in-python
                                        #if len(chunk) != 1024*36:
                                        if chunk: 
                                           # print(len(chunk))
                                            text_file.write(chunk)
                                            text_file.flush()
                            except Exception as ex:
                                logging.error(f'write failed with error: {ex}')
                                print(ex)
                                    #else:
                                    
                                        # write data read to the file
                    #                    f.write(data)
                                        # update the progress bar manually
                     #                   progress.update(len(data))
                                    # finally, if the url is valid
    
                            #logging.info('Download finished successfully')
                    except Exception as ex:
                        logging.error(f'request failed with error: {ex}')
                        print(ex)
                        print("exited with for file")
                    #path = Path(filename)
                    file_size_offline = Path(filename).stat().st_size
                    print("file size offline")
                    while (file_size_offline != file_size):
                        try:
                            print(file_size_offline)
                            print(file_size)
                            print("file size incomplete")
                            file_size_offline = Path(filename).stat().st_size
                            onionurllist = []
                            onionurllist.append(companyname)
    
                            onionurllist.append(onionurl)
                            onionrequestthreadeddataleakdownloadresume(onionurllist, file_size_offline)
                            file_size_offline = Path(filename).stat().st_size
    
                        except Exception as ex:
                            print("redownload failed")
                            print(ex)
                    print("LOOP FINISHED")
    
                    print(file_size)
                    print(file_size_offline)
                    print(filename)
                except Exception as ex:
                    logging.error(f'Attempt failed with error: {ex}')
                    print(ex)
    
    #            print("closing text file")
              #  text_file.close()
                if(file_size_offline != file_size):
                    while (file_size_offline != file_size):
                        try:
                            print(file_size_offline)
                            print(file_size)
                            print("file size incomplete")
                            file_size_offline = Path(filename).stat().st_size
                            onionurllist = []
                            onionurllist.append(companyname)
    
                            onionurllist.append(onionurl)
                            onionrequestthreadeddataleakdownloadresume(onionurllist, file_size_offline)
                            file_size_offline = Path(filename).stat().st_size
    
                        except Exception as ex:
                            print("redownload failed")
                            print(ex)
                else:
                    #list composed of dataleaklocation (location in external), filename (filename after / slash) , dataleakurl (urlofonion) , contentsize
                    returnedlist = []
                    returnedlist.append(dataloc)
                    returnedlist.append(filenamebasename)
                    returnedlist.append(url)
                    returnedlist.append(filesizemb)
                    return returnedlist
                if(file_size_offline != file_size):
                    print("rerunning a final FINAL time")
                    while (file_size_offline != file_size):
                        try:
                            print(file_size_offline)
                            print(file_size)
                            print("file size incomplete")
                            file_size_offline = Path(filename).stat().st_size
                            onionurllist = []
                            onionurllist.append(companyname)
    
                            onionurllist.append(onionurl)
                            onionrequestthreadeddataleakdownloadresume(onionurllist, file_size_offline)
                            file_size_offline = Path(filename).stat().st_size
    
                        except Exception as ex:
                            print("redownload failed")
                            print(ex)
                else:
                    #list composed of dataleaklocation (location in external), filename (filename after / slash) , dataleakurl (urlofonion) , contentsize
                    returnedlist = []
                    returnedlist.append(dataloc)
                    returnedlist.append(filenamebasename)
                    returnedlist.append(url)
                    returnedlist.append(filesizemb)
                    return returnedlist
                    
    
    
                returnedlist = []
                returnedlist.append(dataloc)
                returnedlist.append(filenamebasename)
                returnedlist.append(url)
                returnedlist.append(filesizemb)
                return returnedlist
            except Exception as e:
                print("FAILED DOWNLOAD 2")
    
                print(e)
        except Exception as e:
            print("FAILED DOWNLOAD 5")
            print(e)
    

    【讨论】:

      猜你喜欢
      • 2019-06-04
      • 2022-11-26
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2020-04-20
      • 2019-09-16
      • 2018-10-26
      • 2018-03-08
      相关资源
      最近更新 更多