【问题标题】:Pivot tabele automation with Python使用 Python 进行数据透视表自动化
【发布时间】:2022-10-25 15:31:22
【问题描述】:

我正在尝试使用 Python 自动创建数据透视表,但在运行我的代码后出现错误:NameError:名称“run_excel”未定义我在这篇文章的基础上写了我的剧本:https://towardsdatascience.com/automate-excel-with-python-pivot-table-899eab993966 有人可以向我解释如何解决这个问题吗? enter image description here

这是我的代码:

import win32com.client as win32
import pandas as pd
import numpy as np
from pathlib import Path
import re
import sys
win32c = win32.constants

df = pd.read_csv("act 05.01.2022.csv", encoding = 'unicode_escape', engine ='python')

df = df.dropna()

df.to_excel(r"C:\Users\szczepan.czarniak\Desktop\Visual Studio\1\act 05.01.2022.xlsx", sheet_name = 'act 05.01.2022', index = False)

def pivot_table(wb: object, ws1: object, pt_ws: object, ws_name: str, pt_name: str, pt_rows: list, pt_filters: list, pt_fields: list):
    """
    wb = workbook1 reference
    ws1 = worksheet1 that contain the data
    pt_ws = pivot table worksheet number
    ws_name = pivot table worksheet name
    pt_name = name given to pivot table
    pt_rows, pt_cols, pt_filters, pt_fields: values selected for filling the pivot tables
    """

    # pivot table location
    pt_loc = len(pt_filters) + 2
    
    # grab the pivot table source data
    pc = wb.PivotCaches().Create(SourceType=win32c.xlDatabase, SourceData=ws1.UsedRange)
    
    # create the pivot table object
    pc.CreatePivotTable(TableDestination=f'{ws_name}!R{pt_loc}C1', TableName=pt_name)

    # selecte the pivot table work sheet and location to create the pivot table
    pt_ws.Select()
    pt_ws.Cells(pt_loc, 1).Select()

    # Sets the rows, columns and filters of the pivot table
    for field_list, field_r in ((pt_filters, win32c.xlPageField), 
                                (pt_rows, win32c.xlRowField)):
        for i, value in enumerate(field_list):
            pt_ws.PivotTables(pt_name).PivotFields(value).Orientation = field_r
            pt_ws.PivotTables(pt_name).PivotFields(value).Position = i + 1

    # Sets the Values of the pivot table
    for field in pt_fields:
        pt_ws.PivotTables(pt_name).AddDataField(pt_ws.PivotTables(pt_name).PivotFields(field[0]), field[1], field[2]).NumberFormat = field[3]

    # Visiblity True or Valse
    pt_ws.PivotTables(pt_name).ShowValuesRow = True
    pt_ws.PivotTables(pt_name).ColumnGrand = True

    def run_excel(f_path: Path, f_name: str, sheet_name: str):

        filename = f_path / f_name

    # create excel object
        excel = win32.gencache.EnsureDispatch('Excel.Application')

    # excel can be visible or not
        excel.Visible = True  # False
    
    # try except for file / path
        try:
            wb = excel.Workbooks.Open(filename)
        except com_error as e:
            if e.excepinfo[5] == -2146827284:
                print(f'Failed to open spreadsheet.  Invalid filename or location: {filename}')
        else:
            raise e
        sys.exit(1)

    # set worksheet
    ws1 = wb.Sheets('act 05.01.2022')
    
    # Setup and call pivot_table
    ws2_name = 'pivot_table'
    wb.Sheets.Add().Name = ws2_name
    ws2 = wb.Sheets(ws2_name)
    
    # update the pt_name, pt_rows, pt_cols, pt_filters, pt_fields at your preference
    pt_name = 'example'  # pivot table name, must be a string
    pt_rows = ['decision_date']  # rows of pivot table, must be a list
    # pt_cols = []  # columns of pivot table, must be a list
    pt_filters = ['open_loan_count_all', 'days_between_repayments']  # filter to be applied on pivot table, must be a list
    # [0]: field name [1]: pivot table column name [3]: calulation method [4]: number format (explain the list item of pt_fields below)
    pt_fields = [['id', 'Total id', win32c.xlCount, '0']]
    # calculation method: xlAverage, xlSum, xlCount
    pivot_table(wb, ws1, ws2, ws2_name, pt_name, pt_rows, pt_filters, pt_fields)
    wb.Save() # save the pivot table created
#    wb.Close(True)
#    excel.Quit()
def main():
    # sheet name for data
    sheet_name = 'act 05.01.2022'  # update with sheet name from your file
    # file path
    f_path = Path.cwd()  # file in current working directory
#   f_path = Path(r'c:\...\Documents')  # file located somewhere else
    # excel file
    f_name = r"C:\Users\szczepan.czarniak\Desktop\Visual Studio\1\act 05.01.2022.xlsx" # change to your Excel file name
    
    # function calls
    run_excel(f_path, f_name, sheet_name)
    
main()

f_path = Path.cwd()
f_name = 'act 05.01.2022.xlsx'
filename = f_path / f_name
# create excel object
excel = win32.gencache.EnsureDispatch('Excel.Application')
# excel can be visible or not
excel.Visible = True  # False
wb = excel.Workbooks.Open(filename)  
pvtTable = wb.Sheets("pivot_table").Range("A3").PivotTable

page_range_item = []
for i in pvtTable.PageRange:
    page_range_item.append(str(i))
    
print(page_range_item)
pvtTable.PivotFields('open_loan_count_all', 'days_between_repayments').ClearAllFilters()

【问题讨论】:

    标签: python excel


    【解决方案1】:

    您的 run_excel 函数是在 pivot_table 函数中定义的,因此它不在 main() 函数的范围内。未测试此代码,但修复了以下格式。

    import win32com.client as win32
    import pandas as pd
    import numpy as np
    from pathlib import Path
    import re
    import sys
    win32c = win32.constants
    
    df = pd.read_csv("act 05.01.2022.csv", encoding = 'unicode_escape', engine ='python')
    
    df = df.dropna()
    
    df.to_excel(r"C:Usersszczepan.czarniakDesktopVisual Studioct 05.01.2022.xlsx", sheet_name = 'act 05.01.2022', index = False)
    
    def pivot_table(wb: object, ws1: object, pt_ws: object, ws_name: str, pt_name: str, pt_rows: list, pt_filters: list, pt_fields: list):
        """
        wb = workbook1 reference
        ws1 = worksheet1 that contain the data
        pt_ws = pivot table worksheet number
        ws_name = pivot table worksheet name
        pt_name = name given to pivot table
        pt_rows, pt_cols, pt_filters, pt_fields: values selected for filling the pivot tables
        """
    
        # pivot table location
        pt_loc = len(pt_filters) + 2
        
        # grab the pivot table source data
        pc = wb.PivotCaches().Create(SourceType=win32c.xlDatabase, SourceData=ws1.UsedRange)
        
        # create the pivot table object
        pc.CreatePivotTable(TableDestination=f'{ws_name}!R{pt_loc}C1', TableName=pt_name)
    
        # selecte the pivot table work sheet and location to create the pivot table
        pt_ws.Select()
        pt_ws.Cells(pt_loc, 1).Select()
    
        # Sets the rows, columns and filters of the pivot table
        for field_list, field_r in ((pt_filters, win32c.xlPageField), 
                                    (pt_rows, win32c.xlRowField)):
            for i, value in enumerate(field_list):
                pt_ws.PivotTables(pt_name).PivotFields(value).Orientation = field_r
                pt_ws.PivotTables(pt_name).PivotFields(value).Position = i + 1
    
        # Sets the Values of the pivot table
        for field in pt_fields:
            pt_ws.PivotTables(pt_name).AddDataField(pt_ws.PivotTables(pt_name).PivotFields(field[0]), field[1], field[2]).NumberFormat = field[3]
    
        # Visiblity True or Valse
        pt_ws.PivotTables(pt_name).ShowValuesRow = True
        pt_ws.PivotTables(pt_name).ColumnGrand = True
    
    def run_excel(f_path: Path, f_name: str, sheet_name: str):
    
        filename = f_path / f_name
    
        # create excel object
        excel = win32.gencache.EnsureDispatch('Excel.Application')
    
        # excel can be visible or not
        excel.Visible = True  # False
        
        # try except for file / path
        try:
            wb = excel.Workbooks.Open(filename)
        except com_error as e:
            if e.excepinfo[5] == -2146827284:
                print(f'Failed to open spreadsheet.  Invalid filename or location: {filename}')
            else:
                raise e
            sys.exit(1)
    
        # set worksheet
        ws1 = wb.Sheets('Sales')
        
        # Setup and call pivot_table
        ws2_name = 'pivot_table'
        wb.Sheets.Add().Name = ws2_name
        ws2 = wb.Sheets(ws2_name)
        
        # update the pt_name, pt_rows, pt_cols, pt_filters, pt_fields at your preference
        pt_name = 'example'  # pivot table name, must be a string
        pt_rows = ['Genre']  # rows of pivot table, must be a list
        # pt_cols = []  # columns of pivot table, must be a list
        pt_filters = ['Year']  # filter to be applied on pivot table, must be a list
        # [0]: field name [1]: pivot table column name [3]: calulation method [4]: number format (explain the list item of pt_fields below)
        pt_fields = [['North America', 'Total Sales in North America', win32c.xlSum, '0'],  # must be a list of lists
                     ['Europe', 'Total Sales in Europe', win32c.xlSum, '0'],
                     ['Japan', 'Total Sales in Japan', win32c.xlSum, '0'],
                     ['Rest of World', 'Total Sales in Rest of World', win32c.xlSum, '0'],
                     ['Global', 'Total Global Sales', win32c.xlSum, '0']]
        # calculation method: xlAverage, xlSum, xlCount
        pivot_table(wb, ws1, ws2, ws2_name, pt_name, pt_rows, pt_filters, pt_fields)
        wb.Save() # save the pivot table created
    #    wb.Close(True)
    #    excel.Quit()
    def main():
        # sheet name for data
        sheet_name = 'act 05.01.2022'  # update with sheet name from your file
        # file path
        f_path = Path.cwd()  # file in current working directory
    #   f_path = Path(r'c:...Documents')  # file located somewhere else
        # excel file
        f_name = r"C:Usersszczepan.czarniakDesktopVisual Studioct 05.01.2022.xlsx" # change to your Excel file name
        
        # function calls
        run_excel(f_path, f_name, sheet_name)
        
    main()
    
    f_path = Path.cwd()
    f_name = 'act 05.01.2022.xlsx'
    filename = f_path / f_name
    # create excel object
    excel = win32.gencache.EnsureDispatch('Excel.Application')
    # excel can be visible or not
    excel.Visible = True  # False
    wb = excel.Workbooks.Open(filename)  
    pvtTable = wb.Sheets("pivot_table").Range("A3").PivotTable
    
    page_range_item = []
    for i in pvtTable.PageRange:
        page_range_item.append(str(i))
        
    print(page_range_item)
    pvtTable.PivotFields('open_loan_count_all', 'days_between_repayments').ClearAllFilters()
    

    【讨论】:

    • 我对这段代码有同样的错误
    • 你能给我看完整脚本的截图吗?也许您在复制/粘贴时遇到问题
    【解决方案2】:
    ### Pivot the dataset
    pivot_df = pd.pivot(df, index =['Date'], columns ='Country', values =['NewConfirmed'])
    ## renaming the columns  
    pivot_df.columns = df['Country'].sort_values().unique()
    

    【讨论】:

      猜你喜欢
      • 2022-11-14
      • 2013-05-15
      • 2016-08-14
      • 1970-01-01
      • 2018-12-08
      • 1970-01-01
      • 1970-01-01
      • 2023-04-04
      • 1970-01-01
      相关资源
      最近更新 更多