【发布时间】:2014-04-04 20:33:58
【问题描述】:
您好,我正在尝试使用谷歌应用引擎来创建一个学术项目。 我想通过google api实现的主要功能是搜索用户输入的查询并返回结果,然后我可以对结果进行进一步的分析。 xgoogle 是我的第一次尝试,但它与 google api 引擎有导入错误(我可以在本地运行相同的代码)。即使是 xgoogle 作品,我也不知道如何获取不同网站的包含。 我想知道是否有任何方法可以将谷歌搜索结果作为文档返回。谢谢。
代码部分是使用正则表达式来查找 html 代码中的包含,但它不能用于具有不同 html 布局的网站。谢谢你。
#!/usr/bin/env python
from html import HTML_PAGE
import webapp2
import jinja2
import os
import re
import sys
from os import walk
from google.appengine.api import search
from google.appengine.ext import ndb
from urllib import urlopen
from cgi import parse_qs
from xgoogle.search import GoogleSearch, SearchError
page = HTML_PAGE()
class MainPage(webapp2.RequestHandler):
def get(self):
self.response.out.write(page.pageChange())
class SearchFile():
def __init__(self,userInput=''):
self.__input = userInput
self.__result = {}
self.__files = []
self.__filenames =[]
for (dirpath, dirnames, filenames) in walk("dataFolder"):
for name in filenames:
path = dirpath+"/"+name
self.__files.append(path)
self.__filenames.append(name)
break
def outPutData(self):
iterator = range(0,len(self.__filenames))
for i in iterator:
with open(self.__files[i]) as f:
for line in f:
if self.__input in line:
self.__result[self.__filenames[i]] = line
break
f.close()
return self.__result
class SearchFileHandle(webapp2.RequestHandler):
def post(self):
userInput = str(self.request.get('input'))
self.response.out.write(page.pageChange(userInput))
search = SearchFile(userInput)
tramText = CropText()
for key,value in search.outPutData().iteritems():
keyBold = "<b>%s</b><br>"%(key)
keyLink = "<a href = \"dataFolder/%s\" name =\"%s\"> %s </a>"%(key,key,keyBold)
self.response.out.write(keyLink)
#print >>sys.stderr, "====>", re.search(regex, value,re.IGNORECASE)
resultContain = tramText.tram(value,userInput)
for word in resultContain.split(" "):
if word in userInput:
for keyWord in userInput.split(" "):
if word == keyWord:
self.response.out.write(" <b>%s</b> "%(word))
else:
self.response.out.write(" %s "%(word))
self.response.out.write("<br><br><br>")
news = TakeNews()
for key,value in news.websiteRead(userInput).iteritems():
keyBold = "<b>%s</b><br>"%(key)
keyLink = "<a href = %s> %s </a>"%(value,keyBold)
self.response.out.write(keyLink)
self.response.out.write("<br><br><br>")
googleSearch = WebSearch(userInput)
results = googleSearch.returnResult()
for res in results:
self.response.out.write(res)
self.response.out.write("<br><br><br>")
class CropText():
def tram(self,text,word):
regex = r"( .*? )"+re.escape(word)+r"( .*?\.)"
#print >>sys.stderr, text
if re.search(regex,text,re.IGNORECASE):
return re.search(regex,text,re.IGNORECASE).group()
else:
return ''
class TakeNews():
def __init__(self):
self.__website = 'http://www.bloomberg.com'
self.__topNews =''
self.__topNewsTitle =''
def setWebsite(self,website):
if (website[:10] != 'http://www') and not('http://www' in website):
website = 'http://www' + website
self.__website = website
def websiteRead(self,userInput):
webpage = urlopen(self.__website).read()
pathFinderTopNewsTitle = re.compile('<a class=\"icon-article-headline\".*<span class=\'headline\'>(.*)</span>')
pathFinderTopNews = re.compile('<a class=\"icon-article-headline\" data-id=.* data-type=.* href=\"(.*)\"><span class=\'headline\'>')
self.__topNewsTitle = re.findall(pathFinderTopNewsTitle,webpage)
self.__topNews = re.findall(pathFinderTopNews,webpage)
result = {}
iterator = range(0,len(self.__topNewsTitle))
for i in iterator:
if userInput in self.__topNews[i]:
result[self.__topNewsTitle[i]] = self.__website+"/"+self.__topNews[i]
return result
class WebSearch():
def __init__(self,word):
self.__search = word
def returnResult(self):
gs = GoogleSearch(self.__search)
gs.results_per_page = 200
return gs.get_results()
def main():
app.run()
app = webapp2.WSGIApplication([('/',MainPage),
('/searchFile',SearchFileHandle)
],
debug =True)
if __name__ == "__main__":
main()
【问题讨论】:
-
请发布错误的完整回溯。你不能在应用引擎上使用
os.walk。 -
@aschmid00 实际上这部分代码正在工作。 os.walk 用于本地文档搜索。错误消息是: from xgoogle.search import GoogleSearch, SearchError ImportError: No module named xgoogle.search
-
您的代码中没有提及
xgoogle。请使用错误的完整回溯更新问题。 -
@aschmid00 已更新。谢谢
-
@aschmid00 我发现我没有将模块添加到app的文件夹中
标签: python google-app-engine google-api