按钮命令 Tkinter Python 的问题答案

【问题标题】：Problem with Button Command Tkinter Python按钮命令 Tkinter Python 的问题
【发布时间】：2011-07-30 17:51:14
【问题描述】：

所以我有这个程序在 SEC Edgar 数据库中搜索年度报告（10-K），并在列表框中返回 40 个不同项目的列表。好吧，我想创建一个“下一个 40”按钮，它在列表框中显示下一个 40 项，下面的代码完成了：

def Next():

global entryWidget

page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany'
sock = urllib.urlopen(page)
raw = sock.read()
soup = BeautifulSoup(raw)

npar = str(soup.find(value="Next 40"))
index = npar.find('/cgi')
index2 = npar.find('count=40') + len('count=40')
nextpage = 'http://www.sec.gov' + npar[index:index2]

sock2 = urllib.urlopen(nextpage)
raw2 = sock2.read()
soup2 = BeautifulSoup(raw2)

psoup = str(soup2.findAll(nowrap=True))

myparser = MyParser()
myparser.parse(psoup)

filinglist = myparser.get_descriptions()
linklist = myparser.get_hyperlinks()

filinglist = [s for s in filinglist if s != 'Documents']
filinglist = [s for s in filinglist if s != 'Documents Interactive Data']
filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)]

linklist = [s for s in linklist if not s.startswith('/cgi-')]

Lb1.delete(0, END)

counter = 0

while counter < len(filinglist):
    Lb1.insert(counter, filinglist[counter])
    counter = counter +1

正如您在按下按钮时看到的那样，它会读取原始链接（页面），而不是在 html 网站（页面）上查找“Next 40”超链接。然后解析新的 html 文档（下一页），然后获取项目名称和相关链接。现在这段代码成功地从原始页面转到下一页，但它只能显示一个下一页。

那么，每次我按下“下一步”按钮时，我如何才能将 (nextpage) 制作成原始 (page)，然后能够列出 (nextnextpage) html 文档中的项目？抱歉，如果这令人困惑，我真的不知道任何其他方式来解释它。

这里是我要解析的实际站点链接的更多说明：http://www.sec.gov/cgi-bin/browse-edgar ... getcompany 我希望“下一步”按钮继续从该站点的“下一步 40”按钮中检索 html 超链接。

这是我的整个程序代码，以备不时之需：

import BeautifulSoup
from BeautifulSoup import BeautifulSoup
import urllib
import sgmllib
from Tkinter import *
import tkMessageBox
import re

class MyParser(sgmllib.SGMLParser):

def parse(self, psoup):
    self.feed(psoup)
    self.close()

def __init__(self, verbose=0):
    sgmllib.SGMLParser.__init__(self, verbose)
    self.descriptions = []
    self.hyperlinks = []
    self.inside_td_element = 0
    self.starting_description = 0

def start_td(self, attributes):
    for name, value in attributes:
        if name == "nowrap":
            self.inside_td_element = 1
            self.starting_description = 1

def end_td(self):
    self.inside_td_element = 0

def start_a(self, attributes):
    for name, value in attributes:
        if name == "href":
            self.hyperlinks.append(value)

def handle_data(self, data):
    if self.inside_td_element:
        if self.starting_description:
            self.descriptions.append(data)
            self.starting_description = 0
        else:
            self.descriptions[-1] += data

def get_descriptions(self):
    return self.descriptions

def get_hyperlinks(self):
    return self.hyperlinks

def Submit():

global entryWidget

if entryWidget.get().strip() == "":
    tkMessageBox.showerror("Tkinter Entry Widget", "Enter a text value")
else:
    page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany'
    sock = urllib.urlopen(page)
    raw = sock.read()
    soup = BeautifulSoup(raw)
    psoup = str(soup.findAll(nowrap=True))
    myparser = MyParser()
    myparser.parse(psoup)

    filinglist = myparser.get_descriptions()
    linklist = myparser.get_hyperlinks()

    filinglist = [s for s in filinglist if s != 'Documents']
    filinglist = [s for s in filinglist if s != 'Documents Interactive Data']
    filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)]

    linklist = [s for s in linklist if not s.startswith('/cgi-')]

    counter = 0

    while counter < len(filinglist):
        Lb1.insert(counter, filinglist[counter])
        counter = counter +1

    downloadbutton.configure(state=NORMAL)
    nextbutton.configure(state=NORMAL)

def Next():

global entryWidget

page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany'
sock = urllib.urlopen(page)
raw = sock.read()
soup = BeautifulSoup(raw)

npar = str(soup.find(value="Next 40"))
index = npar.find('/cgi')
index2 = npar.find('count=40') + len('count=40')
nextpage = 'http://www.sec.gov' + npar[index:index2]

sock2 = urllib.urlopen(nextpage)
raw2 = sock2.read()
soup2 = BeautifulSoup(raw2)

psoup = str(soup2.findAll(nowrap=True))

myparser = MyParser()
myparser.parse(psoup)

filinglist = myparser.get_descriptions()
linklist = myparser.get_hyperlinks()

filinglist = [s for s in filinglist if s != 'Documents']
filinglist = [s for s in filinglist if s != 'Documents Interactive Data']
filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)]

linklist = [s for s in linklist if not s.startswith('/cgi-')]

Lb1.delete(0, END)

counter = 0

while counter < len(filinglist):
    Lb1.insert(counter, filinglist[counter])
    counter = counter +1

previousbutton.configure(state=NORMAL)
nextbutton.configure(state=DISABLED)

def Previous():

global entryWidget

page = 'http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + entryWidget.get().strip() + '&filenum=&State=&Country=&SIC=&owner=exclude&Find=Find+Companies&action=getcompany'
sock = urllib.urlopen(page)
raw = sock.read()
soup = BeautifulSoup(raw)

psoup = str(soup.findAll(nowrap=True))

myparser = MyParser()
myparser.parse(psoup)

filinglist = myparser.get_descriptions()
linklist = myparser.get_hyperlinks()

filinglist = [s for s in filinglist if s != 'Documents']
filinglist = [s for s in filinglist if s != 'Documents Interactive Data']
filinglist = [s for s in filinglist if not re.match(r'\d{3}-', s)]

linklist = [s for s in linklist if not s.startswith('/cgi-')]

Lb1.delete(0, END)

counter = 0

while counter < len(filinglist):
    Lb1.insert(counter, filinglist[counter])
    counter = counter +1

nextbutton.configure(state=NORMAL)
previousbutton.configure(state=DISABLED)

if __name__ == "__main__":

root = Tk()
root.title("SEC Edgar Search")
root["padx"] = 10
root["pady"] = 25

top = Frame(root)
bottom = Frame(root)
bottom2 = Frame(root)
top.pack(side=TOP)
bottom.pack(side=BOTTOM, fill=BOTH, expand=True)
bottom2.pack(side=BOTTOM, fill=BOTH, expand=True)

textFrame = Frame(root)

entryLabel = Label(textFrame)
entryLabel["text"] = "Ticker symbol:"
entryLabel.pack(side=TOP)

entryWidget = Entry(textFrame)
entryWidget["width"] = 15
entryWidget.pack(side=LEFT)

textFrame.pack()

scrollbar = Scrollbar(root)
scrollbar.pack(side=RIGHT, fill=Y)

Lb1 =  Listbox(root, width=20, height=15, yscrollcommand=scrollbar.set, selectmode=EXTENDED)
Lb1.pack()

scrollbar.config(command=Lb1.yview)

submitbutton = Button(root, text="Submit", command=Submit)
submitbutton.pack(in_=bottom2, side=TOP)

downloadbutton = Button(root, text="Download")
downloadbutton.pack(in_=bottom2, side=TOP)
downloadbutton.configure(state=DISABLED)

previousbutton = Button(root, text="Previous 40", command=Previous)
previousbutton.pack(in_=bottom, side=LEFT)
previousbutton.configure(state=DISABLED)

nextbutton = Button(root, text="Next 40", command=Next)
nextbutton.pack(in_=bottom, side=LEFT)
nextbutton.configure(state=DISABLED)

root.mainloop()

【问题讨论】：

标签： python button tkinter

【解决方案1】：

使用 Application 类而不是全局类。目前，您总是在下载第一页。但是您的应用程序类应该缓存当前页面的“汤”，next 使用它从“Next 40”表单按钮中获取 onClick 值：

class Application(Frame):
    def __init__(self, parent=None):
        Frame.__init__(self, parent)
        self.pack()

        self.top = Frame(self)
        self.bottom = Frame(self)
        self.bottom2 = Frame(self)
        self.top.pack(side=TOP)
        self.bottom.pack(side=BOTTOM, fill=BOTH, expand=True)
        self.bottom2.pack(side=BOTTOM, fill=BOTH, expand=True)
        #... 
        self.submitbutton = Button(self, text="Submit", command=self.submit)
        self.submitbutton.pack(in_=self.bottom2, side=TOP)
        #...

    #...

    def submit(self):
        page = ('http://www.sec.gov/cgi-bin/browse-edgar?company=&match=&CIK=' + 
                 self.entryWidget.get().strip() + 
                '&filenum=&State=&Country=&SIC=&owner=exclude' 
                '&Find=Find+Companies&action=getcompany')
        #...
        self.soup = ...

    def next(self):
        #...
        #there must be a better way than this to extract the onclick value
        #but I don't use/know BeautifulSoup to help with this part

        npar = str(self.soup.find(value="Next 40"))
        index1 = npar.find('/cgi')
        index2 = npar.find('count=40') + len('count=40')  
        page = 'http://www.sec.gov' + npar[index1:index2]

        sock = urllib.urlopen(page)
        raw = sock.read()
        self.soup = BeautifulSoup(raw)

        #...

if __name__ == '__main__':
    root = Tk()
    root.title("SEC Edgar Search")
    root["padx"] = 10
    root["pady"] = 25

    app = Application(root)

    app.mainloop()
    root.destroy()

对于每个新页面，onClick 链接都会更新 &Start 参数。因此，或者你可以在你的类中增加一个计数器，而无需解析当前的汤来获取值。

【讨论】：

我尝试使用以下代码创建一个新类：Class Application(): def submit(self): 。 . 。ETC。但我不断收到此异常：Tkinter 回调 Traceback 中的异常（最近一次调用最后一次）：文件“C:\Python27\lib\lib-tk\Tkinter.py”，第 1410 行，在 call 返回self.func(*args) TypeError: unbound method Submit() must be called with Application instance as the first argument (什么都没有)。知道是什么原因造成的吗？