【发布时间】:2019-02-03 14:24:24
【问题描述】:
我现在要道歉,因为我确信我的问题格式和我提供的信息与本网站的预期不符。我已经编写 SQL 和 VBA 数年了,我正在尝试学习第三种语言来提高我的技能。随着时间的推移,我会变得更好。
现在我的问题...
我正在尝试使用在 BasketballReference.com 上有效的代码来抓取一系列表格,但是在 NBA.com 上,代码什么也没有返回。进一步挖掘时,make_soup 不包含我在浏览器中检查表格时看到的 tr 和 td 标签。下面是我正在使用的代码,作为我的 csv 文件外观图片的参考。
import urllib
import urllib.request
from bs4 import BeautifulSoup
import os
import csv
import time
def make_soup(url):
thepage = urllib.request.urlopen(url)
soupdata = BeautifulSoup(thepage, "html.parser")
return soupdata
with open('PlayTypeKey.csv', 'r') as PlaytypeData:
csv_reader = csv.reader(PlaytypeData)
a = []
b = []
c = []
d = []
next(csv_reader)
for row in csv_reader:
a1 = row[0]
b1 = row[1]
c1 = row[2]
d1 = row[3]
a.append(a1)
b.append(b1)
c.append(c1)
d.append(d1)
playerdatasaved = ""
i = 0
while i < 5:
soup = make_soup("http://stats.nba.com/players/"+a[i]+"/?Season="+b[i]+"&SeasonType=Regular%20Season&PerMode="+c[i]+"&OD="+d[i])
for record in soup.findAll('tr'):
playerdata = b[i]+ a[i] + ","
for data in record.findAll('td'):
playerdata=playerdata+","+data.text
playerdatasaved = playerdatasaved + "\n" + playerdata[1:]
i=i+1
header = "Season,PlayType,PLAYER,TEAM,GP,POSS,FREQ,PPP,PTS,FGM,FGA,FG%,EFG%,FT-Freq,TO-Freq,SF-Freq,AND ONE-Freq,SCORE-Freq,PERCENTILE"
file = open(os.path.expanduser("BasketballPlayTypeData.csv"), "wb")
file.write(bytes(header, encoding="ascii", errors='ignore'))
file.write(bytes(playerdatasaved, encoding="ascii", errors='ignore'))
PlayTypeKey.csv 数据:
PlayType Season Mode OffDef
isolation 2015-16 Totals offensive
isolation 2016-17 Totals offensive
isolation 2017-18 Totals offensive
transition 2015-16 Totals offensive
transition 2016-17 Totals offensive
transition 2017-18 Totals offensive
我有限的故障排除能力告诉我,当我用 URL 制作汤时,表格数据没有返回。当打印汤的文字时,我得到了这个......
window.NREUM||(NREUM={}),__nr_require=function(e,t,n){function r(n){if(!t[n]){var o=t[n]={exports:{}};e[n][0].call(o.exports,function(t){var o=e[n][1][t];return r(o||t)},o,o.exports)}return t[n].exports}if("function"==typeof __nr_require)return __nr_require;for(var o=0;o<n.length;o++)r(n[o]);return r}({1:[function(e,t,n){function r(){}function o(e,t,n){return function(){return i(e,[f.now()].concat(u(arguments)),t?null:this,n),t?void 0:this}}var i=e("handle"),a=e(2),u=e(3),c=e("ee").get("tracer"),f=e("loader"),s=NREUM;"undefined"==typeof window.newrelic&&(newrelic=s);var p=["setPageViewName","setCustomAttribute","setErrorHandler","finished","addToTrace","inlineHit","addRelease"],d="api-",l=d+"ixn-";a(p,function(e,t){s[t]=o(d+t,!0,"api")}),s.addPageAction=o(d+"addPageAction",!0),s.setCurrentRouteName=o(d+"routeName",!0),t.exports=newrelic,s.interaction=function(){return(new r).get()};var m=r.prototype={createTracer:function(e,t){var n={},r=this,o="function"==typeof t;return i(l+"tracer",[f.now(),e,n],r),function(){if(c.emit((o?"":"no-")+"fn-start",[f.now(),r,o],n),o)try{return t.apply(this,arguments)}catch(e){throw c.emit("fn-err",[arguments,this,e],n),e}finally{c.emit("fn-end",[f.now()],n)}}}};a("setName,setAttribute,save,ignore,onEnd,getContext,end,get".split(","),function(e,t){m[t]=o(l+t)}),newrelic.noticeError=function(e){"string"==typeof e&&(e=new Error(e)),i("err",[e,f.now()])}},{}],2:[function(e,t,n){function r(e,t){var n=[],r="",i=0;for(r in e)o.call(e,r)&&(n[i]=t(r,e[r]),i+=1);return n}var o=Object.prototype.hasOwnProperty;t.exports=r},{}],3:[function(e,t,n){function r(e,t,n){t||(t=0),"undefined"==typeof n&&(n=e?e.length:0);for(var r=-1,o=n-t||0,i=Array(o<0?0:o);++r<o;)i[r]=e[t+r];return i}t.exports=r},{}],4:[function(e,t,n){t.exports={exists:"undefined"!=typeof window.performance&&window.performance.timing&&"undefined"!=typeof window.performance.timing.navigationStart}},{}],ee:[function(e,t,n){function r(){}function o(e){function t(e){return e&&e instanceof r?e:e?c(e,u,i):i()}function n(n,r,o,i){if(!d.aborted||i){e&&e(n,r,o);for(var a=t(o),u=m(n),c=u.length,f=0;f<c;f++)u[f].apply(a,r);var p=s[y[n]];return p&&p.push([b,n,r,a]),a}}function l(e,t){v[e]=m(e).concat(t)}function m(e){return v[e]||[]}function w(e){return p[e]=p[e]||o(n)}function g(e,t){f(e,function(e,n){t=t||"feature",y[n]=t,t in s||(s[t]=[])})}var v={},y={},b={on:l,emit:n,get:w,listeners:m,context:t,buffer:g,abort:a,aborted:!1};return b}function i(){return new r}function a(){(s.api||s.feature)&&(d.aborted=!0,s=d.backlog={})}var u="nr@context",c=e("gos"),f=e(2),s={},p={},d=t.exports=o();d.backlog=s},{}],gos:[function(e,t,n){function r(e,t,n){if(o.call(e,t))return e[t];var r=n();if(Object.defineProperty&&Object.keys)try{return Object.defineProperty(e,t,{value:r,writable:!0,enumerable:!1}),r}catch(i){}return e[t]=r,r}var o=Object.prototype.hasOwnProperty;t.exports=r},{}],handle:[function(e,t,n){function r(e,t,n,r){o.buffer([e],r),o.emit(e,t,n)}var o=e("ee").get("handle");t.exports=r,r.ee=o},{}],id:[function(e,t,n){function r(e){var t=typeof e;return!e||"object"!==t&&"function"!==t?-1:e===window?0:a(e,i,function(){return o++})}var o=1,i="nr@id",a=e("gos");t.exports=r},{}],loader:[function(e,t,n){function r(){if(!x++){var e=h.info=NREUM.info,t=d.getElementsByTagName("script")[0];if(setTimeout(s.abort,3e4),!(e&&e.licenseKey&&e.applicationID&&t))return s.abort();f(y,function(t,n){e[t]||(e[t]=n)}),c("mark",["onload",a()+h.offset],null,"api");var n=d.createElement("script");n.src="https://"+e.agent,t.parentNode.insertBefore(n,t)}}function o(){"complete"===d.readyState&&i()}function i(){c("mark",["domContent",a()+h.offset],null,"api")}function a(){return E.exists&&performance.now?Math.round(performance.now()):(u=Math.max((new Date).getTime(),u))-h.offset}var u=(new Date).getTime(),c=e("handle"),f=e(2),s=e("ee"),p=window,d=p.document,l="addEventListener",m="attachEvent",w=p.XMLHttpRequest,g=w&&w.prototype;NREUM.o={ST:setTimeout,SI:p.setImmediate,CT:clearTimeout,XHR:w,REQ:p.Request,EV:p.Event,PR:p.Promise,MO:p.MutationObserver};var v=""+location,y={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net",agent:"js-agent.newrelic.com/nr-1071.min.js"},b=w&&g&&g[l]&&!/CriOS/.test(navigator.userAgent),h=t.exports={offset:u,now:a,origin:v,features:{},xhrWrappable:b};e(1),d[l]?(d[l]("DOMContentLoaded",i,!1),p[l]("load",r,!1)):(d[m]("onreadystatechange",o),p[m]("onload",r)),c("mark",["firstbyte",u],null,"api");var x=0,E=e(4)},{}]},{},["loader"]);
NBA.com/Stats | Players Isolation
window.nbaStatsRoute = '/players/isolation/';
Scores
Schedule
News
Video
Standings
Stats
Players
Teams
NBA LEAGUE PASS
Store
Tickets
Stats Home
{{ player.name }}
Profile
Traditional
Advanced
Misc
Scoring
Usage
Shooting
Career
Box Scores
Advanced Box Scores Traditional
Advanced Box Scores Advanced
Advanced Box Scores Misc
Advanced Box Scores Scoring
Advanced Box Scores Usage
Shots Dashboard
Rebounds Dashboard
Passes Dashboard
Defense Dashboard
{{ team.city }} {{ team.name }}
Profile
Traditional
Advanced
Four Factors
Misc
Scoring
Opponent
Shooting
Lineups Traditional
Lineups Advanced
Lineups Four Factors
Lineups Misc
Lineups Scoring
Lineups Opponent
Players Traditional
Players Advanced
Players Misc
Players Scoring
Players Usage
On/Off Court Traditional
On/Off Court Advanced
On/Off Court Four Factors
On/Off Court Misc
On/Off Court Scoring
On/Off Court Opponent
On/Off Court Summary
Shots Dashboard
Rebounds Dashboard
Passes Dashboard
Box Scores
Advanced Box Scores Traditional
Advanced Box Scores Advanced
Advanced Box Scores Four Factors
Advanced Box Scores Misc
Advanced Box Scores Scoring
Seasons
Year Over Year
Franchise Leaders
Historic Roster
On Off Court Compare
Traditional
Advanced
Misc
Scoring
Four Factors
Opponent
{{ game.info }}
Box Score
Advanced
Misc
Scoring
Usage
Four Factors
Player Tracking
Play By Play
Shotchart
Game Charts
Hustle
Fantasy Compare
Traditional
Advanced
Misc
Scoring
Impact
Traditional
Advanced
Misc
Scoring
Four Factors
Defense
Players
Players Home
Player Index
Traditional Stats
Regular Season Leaders
Hustle Leaders
Advanced Leaders
All Time Leaders
General
Traditional
Advanced
Misc
Scoring
Usage
Opponent
Defense
Clutch
Clutch Traditional
Clutch Advanced
Clutch Misc
Clutch Scoring
Clutch Usage
Defense
Defense Overall
Defending Shots Overall
Defending 3 Pointers
Defending 2 Pointers
Defending Less Than 6 FT
Defending Less Than 10 FT
Defending More Than 15 FT
Opponent Shooting
By Play Type
Play Type
Transition
Isolation
Ball Handler
Roll Man
Post Up
Spot Up
Hand Off
Cut
Off Screen
Put Backs
Misc
Tracking
Catch & Shoot
Defensive Impact
Drives
Passing
Touches
Pull Up Shooting
Rebounding
Offensive Rebounding
Defensive Rebounding
Shooting Efficiency
Speed & Distance
Elbow Touches
Post Ups
Paint Touches
Shooting
Shots by General Range
Shots by Shotclock
Shots by Dribbles
Shots by Touch Time
Shots by Closest Defender
Shots by Closest Defender (+10 FT)
Shooting
Opponent Shooting
Hustle Stats
Bios
Box Scores
Advanced Box Scores
Traditional
Advanced
Misc
Scoring
Usage
Teams
Teams Home
All Teams
{{ t.city }} {{ t.name }}
Traditional Stats
Hustle Leaders
Advanced Leaders
General
Traditional
Advanced
Misc
Four Factors
Scoring
Opponent
Defense
Clutch
Clutch Traditional
Clutch Advanced
Clutch Misc
Clutch Four Factors
Clutch Scoring
Clutch Opponent
Defense
Defense Overall
Defending Shots Overall
Defending 3 Pointers
Defending 2 Pointers
Defending Less Than 6 FT
Defending Less Than 10 FT
Defending More Than 15 FT
Overall Opponent Shooting
By Play Type
Play Type
Transition
Isolation
Ball Handler
Roll Man
Post Up
Spot Up
Hand Off
Cut
Off Screen
Put Backs
Misc
Tracking
Catch & Shoot
Defensive Impact
Drives
Passing
Touches
Pull Up Shooting
Rebounding
Offensive Rebounding
Defensive Rebounding
Shooting Efficiency
Speed & Distance
Elbow Touches
Post Ups
Paint Touches
Shooting
Shots By General Range
Shots By Shotclock
Shots By Dribbles
Shots By Touch Time
Shots By Closest Defender
Shots By Closest Defender (+10 FT)
Shooting
Opponent Shooting
Overall Opponent Shooting
Opponent Shots By General Range
Opponent Shots By Shotclock
Opponent Shots By Dribbles
Opponent Shots By Touch Time
Opponent Shots By Closest Defender
Opponent Shots By Closest Defender (+10 FT)
Hustle Stats
Box Scores
Advanced Box Scores
Traditional
Advanced
Four Factors
Misc
Scoring
All Time Leaders
Finals
2018 Draft Combine
Hustle Leaders
Advanced Leaders
Scores
NBA Schedule
Summer League Schedule
Standings
Franchise Leaders
{{ t.city }} {{ t.name }}
Fantasy
Fantasy News
Fantasy Compare
Fantasy Mock Draft
Fantasy Cumulative Ranking
Lineups
Traditional
Advanced
Misc
Four Factors
Scoring
Opponent
On/Off Court by Team
{{ t.city }} {{ t.name }}
Draft & Combine
2018 Draft Combine
Draft History
Combine Spot Up Shooting
Combine Non-Stationary Shooting
Combine Strength & Agility
Combine Anthro
Tools
Player Box Score Search
Team Box Score Search
Player Streak Search
Team Streak Search
Player Compare
Fantasy Compare
Impact
Stats 101
Franchise History
Statistical Minimums
Glossary
Transactions
Fantasy News
Articles
Weekly Stats Archive
FAQ
What's New
Video & Tracking Status
Video Rulebook
Contact Us
Events
2018 Finals
2018 Playoffs
2018 Summer League
NBA Africa Game 2017
All-Star Infographics
All-Star Friday Infographic
All-Star Saturday Infographic
All-Star Sunday Infographic
2018 Draft Combine
Advanced Stats
Stats Home
Players
Teams
Advanced
Scores
Schedule
All Time Leaders
Search For A Player or Team
No Results Found
{{ ::item.city }} {{ ::item.name }}
{{ ::item.fn }} {{ ::item.ln }}
{{ ::item.fn }} {{ ::item.ln }}
See More Results
Stats Home
/
Players
/
Playtype
/
Isolation
{{ alpha }}
Sortable Player Stats
Sortable Team Stats
{{ betaText }}
Official Leaders
Advanced Leaders
Hustle Leaders
General
Clutch
Playtype
Tracking
Defense Dashboard
Shot Dashboard
Box Scores
Advanced Box Scores
Shooting
Opponent Shooting
Hustle
All Time Summary
All Time Leaders
Bios
Players List
{{ gammaText }}
Traditional
Advanced
Misc
Scoring
Usage
Opponent
Defense
{{ gammaText }}
Traditional
Advanced
Misc
Scoring
Usage
{{ gammaText }}
Transition
Isolation
Pick & Roll Ball Handler
Pick & Roll Roll Man
Post Up
Spot Up
Handoff
Cut
Off Screen
Putbacks
Misc
{{ gammaText }}
Drives
Defensive Impact
Catch & Shoot
Passing
Touches
Pull Up Shooting
Rebounding
Offensive Rebounding
Defensive Rebounding
Shooting Efficiency
Speed & Distance
Elbow Touches
Post Ups
Paint Touches
{{ gammaText }}
Overall
3 Pointers
2 Pointers
< 6ft.
< 10ft.
> 15ft.
{{ gammaText }}
General
Shotclock
Dribbles
Touch Time
Closest Defender
Closest Defender +10
{{ gammaText }}
Traditional
Advanced
Misc
Scoring
Usage
Advanced Filters
Custom Filters
GP
Games Played
Poss
Possessions
Freq
Frequency
PPP
Points Per Possession
PTS
Games Played
PTS
Points
FGM
Field Goals Made
FGA
Field Goals Attempted
FG%
Field Goal Percentage
eFG%
Effective Field Goal Percentage
FT Freq
Free Throw Frequency
TO FREQ
Turnover Frequency
SF Freq
Shooting Foul Frequency
And One Freq
And One Frequency
Score Freq
Score Frequency
Minimum of 10 min/game and 10 possessions per play type to qualify.
provided by Synergy
Copyright © 2018 NBA Media Ventures, LLC. All rights reserved.
Privacy Policy | Terms of Use | Ad Choices
NBA.com is part of Turner Sports Digital, part of the Turner Sports & Entertainment Digital Network.
window.esi_ = {
country: "US",
tz: "EST",
region: "NC",
browser: "OTHER",
nbarid: "1",
nbarn: "United States"
};
if (typeof _satellite !== 'undefined') { _satellite.pageBottom(); }
//Set a listener for GPTRenderComplete Event
AdFuel.addEvent(document, 'GPTRenderComplete', function(event) {
if (event.detail && !event.detail.isEmpty) {
var detail = event.detail;
//If ad_bnr_atf_01 is 728x90, dispatch the global marketing slot
if (detail.pos[0] === 'bnr_atf_01' && detail.renderedSize[0] === 728 && detail.renderedSize[1] === 90) {
AdFuel.queueRegistry('//ssl.cdn.turner.com/ads/nba2/singles/nba_global_mktg.js', {
dispatch: true,
});
}
}
});
//Dispatch nba_stats registry
AdFuel.queueRegistry('//i.cdn.turner.com/ads/nba2/nba_stats.js', {
dispatch: true
});
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-86643645-4', 'auto');
ga('send', 'pageview');
var hostname = window.location.hostname;
var accountIdOverride = "";
var nbaOmChannelPrefix = "";
nbaOmChannelPrefix="stats.nba.com";
//Set to use the omniture dev suite for our development hostnames, else prod
if (hostname === "dev.stats.nba.com" || hostname === "statsv3.nba.com" || hostname === "linuxpubstats.nba.com" || hostname == "linuxpubstatsdev.nba.com"){
accountIdOverride="nbag-n-league-dev";
}
else {
accountIdOverride="nbag-n-league";
}
_nba.analytics.setOverride("disableAutoPage",true);
_nba.analytics.setOverride("accountIds", accountIdOverride);
if (_nba && _nba.settings && _nba.settings.global) {
_nba.settings.global.experience="responsive";
}
window.NREUM||(NREUM={});NREUM.info={"beacon":"bam.nr-data.net","licenseKey":"09f0cb5c68","applicationID":"76210961","transactionName":"NgcGZUJQVxYEVEJcCQ9NMUNZHkkJBE5TRxVOCxdeXFBNDApZGVwIBQccH0BZSQ==","queueTime":0,"applicationTime":17,"atts":"GkAFEwpKRBg=","errorBeacon":"bam.nr-data.net","agent":""}
上面粘贴的内容的底部是表格的标题,但表格本身没有相应的文本或代码。我不会用打印 HTML 时得到的内容来混淆这篇文章,但在搜索 tr 或 td 标签时我没有得到任何结果。
提前感谢任何花时间研究此问题的人,我只想说这个网站对我来说已经非常有价值。
【问题讨论】:
-
适用于一个网站的代码不太可能适用于另一个网站。您能否在edit 中包含
PlayTypeKey.csv文件中的几行内容? (文本格式)没有它很难重现你的问题。另外您使用的是哪个版本的 Python? -
马丁感谢您的回复。我已经用文本替换了图像,并且我使用的是 Python 3.6。我还应该提到,该表被分成几个需要导航的“页面”。我想这会增加难度,可能需要像 RoboBrowser 这样的东西,但我真的很想一次解决一个问题。
-
您需要的信息实际上是通过使用javascript的页面请求返回的。它在被页面转换为 HTML 之前以 JSON 格式返回。因此,最好的方法是使用它来提取您的统计数据。
标签: python html web-scraping beautifulsoup python-beautifultable