这是一个 Python 脚本,用于列出和克隆所有前面的分支。
它不使用 API。所以它不会受到速率限制,也不需要身份验证。但如果 GitHub 网站设计发生变化,可能需要调整。
与其他答案中显示 ZIP 文件链接的小书签不同,此脚本还保存有关提交的信息,因为它使用 git clone 并创建一个带有概述的 commits.htm 文件。
import requests, re, os, sys, time
def content_from_url(url):
# TODO handle internet being off and stuff
text = requests.get(url).content
return text
def clone_ahead_forks(forklist_url):
forklist_htm = content_from_url(forklist_url)
with open("forklist.htm", "w") as text_file:
text_file.write(forklist_htm)
is_root = True
# not working if there are no forks: '<a class="(Link--secondary)?" href="(/([^/"]*)/[^/"]*)">'
for match in re.finditer('<a (class=""|data-pjax="#js-repo-pjax-container") href="(/([^/"]*)/[^/"]*)">', forklist_htm):
fork_url = 'https://github.com'+match.group(2)
fork_owner_login = match.group(3)
fork_htm = content_from_url(fork_url)
match2 = re.search('<div class="d-flex flex-auto">[^<]*?([0-9]+ commits? ahead(, [0-9]+ commits? behind)?)', fork_htm)
# TODO if website design changes, fallback onto checking whether 'ahead'/'behind'/'even with' appear only once on the entire page - in that case they are not part of the username etc.
sys.stdout.write('.')
if match2 or is_root:
if match2:
aheadness = match2.group(1) # for example '1 commit ahead, 2 commits behind'
else:
aheadness = 'root repo'
is_root = False # for subsequent iterations
dir = fork_owner_login+' ('+aheadness+')'
print dir
os.mkdir(dir)
os.chdir(dir)
# save commits.htm
commits_htm = content_from_url(fork_url+'/commits')
with open("commits.htm", "w") as text_file:
text_file.write(commits_htm)
# git clone
os.system('git clone '+fork_url+'.git')
print
# no need to recurse into forks of forks because they are all listed on the initial page and being traversed already
os.chdir('..')
base_path = os.getcwd()
match_disk_letter = re.search(r'^([a-zA-Z]:\\)', base_path)
with open('repo_urls.txt') as url_file:
for url in url_file:
url = url.strip()
match = re.search('github.com/([^/]*)/([^/]*)$', url)
if match:
user_name = match.group(1)
repo_name = match.group(2)
print repo_name
dirname_for_forks = repo_name+' ('+user_name+')'
if not os.path.exists(dirname_for_forks):
url += "/network/members" # page that lists the forks
TMP_DIR = 'tmp_'+time.strftime("%Y%m%d-%H%M%S")
if match_disk_letter: # if Windows, i.e. if path starts with A:\ or so, run git in A:\tmp_... instead of .\tmp_..., in order to prevent "filename too long" errors
TMP_DIR = match_disk_letter.group(1)+TMP_DIR
print TMP_DIR
os.mkdir(TMP_DIR)
os.chdir(TMP_DIR)
clone_ahead_forks(url)
print
os.chdir(base_path)
os.rename(TMP_DIR, dirname_for_forks)
else:
print dirname_for_forks+' already exists, skipping.'
如果你将文件repo_urls.txt制作成如下内容(你可以放几个网址,每行一个网址):
https://github.com/cifkao/tonnetz-viz
然后您将获得以下目录,每个目录都包含相应的克隆存储库:
tonnetz-viz (cifkao)
bakaiadam (2 commits ahead)
chumo (2 commits ahead, 4 commits behind)
cifkao (root repo)
codedot (76 commits ahead, 27 commits behind)
k-hatano (41 commits ahead)
shimafuri (11 commits ahead, 8 commits behind)
如果不起作用,请尝试earlier versions。