您可以查询一个端点以获取您所追求的表数据。
方法如下:
import json
import requests
region_id = "01"
occupation_id = "110"
url = f"https://www.ccq.org/api/labourpools?regionId={region_id}&occupationId={occupation_id}"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:95.0) Gecko/20100101 Firefox/95.0",
"X-Requested-With": "XMLHttpRequest",
}
data = requests.get(url, headers=headers).json()
print(json.dumps(data, indent=2))
输出:
[
{
"Id": "01",
"Name": "Iles de la Madeleine",
"Occupations": [
{
"Id": "110",
"Name": "Briqueteur-ma\u00e7on",
"Pool": {
"IsOpen": true,
"IsLessThan10": true,
"IsLessThan30": true
}
}
],
"EffectiveDate": "17 janvier 2022"
}
]
编辑:
如果你想获取所有地区和职业的所有表格,你可以创建所有可能的 API 请求 url 并获取数据。
方法如下:
import json
import requests
from bs4 import BeautifulSoup
base_url = "https://www.ccq.org/fr-CA/qualification-acces-industrie/bassins-main-oeuvre/etat-bassins-main-oeuvre"
api_url = "https://www.ccq.org/api/labourpools?"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:95.0) Gecko/20100101 Firefox/95.0",
"X-Requested-With": "XMLHttpRequest",
}
def get_ids(id_value: str) -> list:
return [
i["value"] for i
in soup.find("select", {"id": id_value}).find_all("option")[1:]
]
with requests.Session() as session:
soup = BeautifulSoup(session.get(base_url, headers=headers).text, "lxml")
region_ids = get_ids("dropdown-region")
occupation_ids = get_ids("dropdown-occupation")
all_query_urls = [
f"{api_url}regionId={region_id}&occupationId={occupation_id}"
for region_id in region_ids for occupation_id in occupation_ids
]
for query_url in all_query_urls[:2]: # remove [:2] to get all combinations
data = session.get(query_url, headers=headers).json()
print(json.dumps(data, indent=2))
这应该输出两个条目:
[
{
"Id": "01",
"Name": "Iles de la Madeleine",
"Occupations": [
{
"Id": "110",
"Name": "Briqueteur-ma\u00e7on",
"Pool": {
"IsOpen": true,
"IsLessThan10": true,
"IsLessThan30": true
}
}
],
"EffectiveDate": "17 janvier 2022"
}
]
[
{
"Id": "01",
"Name": "Iles de la Madeleine",
"Occupations": [
{
"Id": "130",
"Name": "Calorifugeur",
"Pool": {
"IsOpen": true,
"IsLessThan10": true,
"IsLessThan30": true
}
}
],
"EffectiveDate": "17 janvier 2022"
}
]