想一次性把 YYDS 源码网 1-200 页的网盘资源全部扒下来?今天分享 30 行 Python 脚本,无需配置环境,双击即可运行,自动提取标题+真实外链,支持 7 大网盘,完全开源!
1. 脚本功能
- 顺序抓取 1-200 页文章列表
- 进入详情页提取「侧边栏按钮 + 正文」网盘链接
- 自动跟随 goto?down=xxx 跳转,拿到真实外链
- 支持蓝奏云 / 百度 / 夸克 / 123 / 阿里 / 迅雷 / 腾讯
- 保存格式:标题 | 网盘链接(txt 可直接复制)
2. 运行环境
项目 | 版本 |
---|---|
Python | 3.7+ |
依赖 | requests、beautifulsoup4 |
一键安装:
♾️ bash 代码:pip install requests beautifulsoup4
!/usr/bin/env python3
-- coding: utf-8 --
"""
抓取 www.yydsym.com
保存格式:标题 | 网盘链接
输出文件:桌面 title_pan_links.txt
"""
import os
import time
import random
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
MAX_PAGE = 100
SLEEP = lambda: random.uniform(1.0, 2.0)
TIMEOUT = 10
LIST_URL = "https://www.yydsym.com/page/{}"
SAVE_FILE = os.path.join(os.path.expanduser("~"), "Desktop", "title_pan_links.txt")
HEADERS = {
♾️ text 代码:"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}
PAN_KEYS = ("lanzou", "lanzn", "pan.baidu", "quark.cn", "123pan.com",
♾️ text 代码: "alipan.com", "xunlei.com", "cloud.189.cn", "tc.qq.com")
def get_list(page):
♾️ text 代码:url = LIST_URL.format(page)
resp = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
items = []
for art in soup.select('article.post-grid'):
a = art.select_one('h2.entry-title a')
if not a:
continue
title = a.get_text(strip=True)
link = urljoin(url, a["href"])
items.append((title, link))
return items
def extract_pan(html):
♾️ text 代码:soup = BeautifulSoup(html, "html.parser")
pans = set()
# 侧边栏按钮跳转
for a in soup.select('a[href*="goto?down="]'):
short = urljoin("https://www.yydsym.com", a["href"])
try:
r = requests.get(short, headers=HEADERS, allow_redirects=False, timeout=10)
real = r.headers.get("Location", "") if r.status_code in (301, 302) else short
pans.add(real)
except Exception:
continue
# 正文所有 <a>
for a in soup.find_all("a", href=True):
href = a["href"].strip()
if any(k in href for k in PAN_KEYS):
pans.add(href)
return pans
def main():
♾️ text 代码:exist = set()
if os.path.isfile(SAVE_FILE):
with open(SAVE_FILE, "r", encoding="utf-8") as f:
for line in f:
if "|" in line:
exist.add(line.split("|", 1)[1].strip())
print("🔍 开始抓取列表页...")
for p in range(1, MAX_PAGE + 1):
try:
items = get_list(p)
print(f" 第 {p:3d} 页 | 本页 {len(items):2d} 篇")
except Exception as e:
print(f" 第 {p:3d} 页 | 错误:{e}")
continue
time.sleep(SLEEP())
print(" 扫描详情页...")
for title, url in items:
try:
html = requests.get(url, headers=HEADERS, timeout=TIMEOUT).text
pans = extract_pan(html)
new = 0
for u in pans:
if u in exist:
continue
new += 1
with open(SAVE_FILE, "a", encoding="utf-8") as f:
f.write(f"{title} | {u}\n")
exist.add(u)
if new:
print(f" +{new} 条 | {title[:30]}...")
except Exception as e:
print(f" 跳过 | {e}")
time.sleep(SLEEP())
print(f"\n✅ 全部完成!文件已保存 → {SAVE_FILE}")
if name == "__main__":
♾️ text 代码:main()