Article

想一次性把 YYDS 源码网 1-200 页的网盘资源全部扒下来？今天分享 30 行 Python 脚本，无需配置环境，双击即可运行，自动提取标题+真实外链，支持 7 大网盘，完全开源！

1. 脚本功能

顺序抓取 1-200 页文章列表
进入详情页提取「侧边栏按钮 + 正文」网盘链接
自动跟随 goto?down=xxx 跳转，拿到真实外链
支持蓝奏云 / 百度 / 夸克 / 123 / 阿里 / 迅雷 / 腾讯
保存格式：标题 | 网盘链接（txt 可直接复制）

2. 运行环境

项目	版本
Python	3.7+
依赖	requests、beautifulsoup4

一键安装：

♾️ bash 代码:

pip install requests beautifulsoup4

!/usr/bin/env python3

-- coding: utf-8 --

"""
抓取 www.yydsym.com
保存格式：标题 | 网盘链接
输出文件：桌面 title_pan_links.txt
"""
import os
import time
import random
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

MAX_PAGE = 100
SLEEP = lambda: random.uniform(1.0, 2.0)
TIMEOUT = 10
LIST_URL = "https://www.yydsym.com/page/{}"
SAVE_FILE = os.path.join(os.path.expanduser("~"), "Desktop", "title_pan_links.txt")

HEADERS = {

♾️ text 代码:

"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
              "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"

}

PAN_KEYS = ("lanzou", "lanzn", "pan.baidu", "quark.cn", "123pan.com",

♾️ text 代码:

        "alipan.com", "xunlei.com", "cloud.189.cn", "tc.qq.com")

def get_list(page):

♾️ text 代码:

url = LIST_URL.format(page)
resp = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
items = []
for art in soup.select('article.post-grid'):
    a = art.select_one('h2.entry-title a')
    if not a:
        continue
    title = a.get_text(strip=True)
    link = urljoin(url, a["href"])
    items.append((title, link))
return items

def extract_pan(html):

♾️ text 代码:

soup = BeautifulSoup(html, "html.parser")
pans = set()

# 侧边栏按钮跳转
for a in soup.select('a[href*="goto?down="]'):
    short = urljoin("https://www.yydsym.com", a["href"])
    try:
        r = requests.get(short, headers=HEADERS, allow_redirects=False, timeout=10)
        real = r.headers.get("Location", "") if r.status_code in (301, 302) else short
        pans.add(real)
    except Exception:
        continue

# 正文所有 <a>
for a in soup.find_all("a", href=True):
    href = a["href"].strip()
    if any(k in href for k in PAN_KEYS):
        pans.add(href)
return pans

def main():

♾️ text 代码:

exist = set()
if os.path.isfile(SAVE_FILE):
    with open(SAVE_FILE, "r", encoding="utf-8") as f:
        for line in f:
            if "|" in line:
                exist.add(line.split("|", 1)[1].strip())

print("🔍 开始抓取列表页...")
for p in range(1, MAX_PAGE + 1):
    try:
        items = get_list(p)
        print(f"  第 {p:3d} 页 | 本页 {len(items):2d} 篇")
    except Exception as e:
        print(f"  第 {p:3d} 页 | 错误：{e}")
        continue
    time.sleep(SLEEP())

    print("     扫描详情页...")
    for title, url in items:
        try:
            html = requests.get(url, headers=HEADERS, timeout=TIMEOUT).text
            pans = extract_pan(html)
            new = 0
            for u in pans:
                if u in exist:
                    continue
                new += 1
                with open(SAVE_FILE, "a", encoding="utf-8") as f:
                    f.write(f"{title} | {u}\n")
                exist.add(u)
            if new:
                print(f"       +{new} 条 | {title[:30]}...")
        except Exception as e:
            print(f"       跳过 | {e}")
        time.sleep(SLEEP())

print(f"\n✅ 全部完成！文件已保存 → {SAVE_FILE}")

if name == "__main__":

♾️ text 代码:

main()

现在已有 14 次阅读，0 条评论，1 人点赞

日	一	二	三	四	五	六
			1	2	3	4
5	6	7	8	9	10	11
12	13	14	15	16	17	18
19	20	21	22	23	24	25
26	27	28	29	30	31

一键提取 YYDS 源码网 100 页网盘链接！Python 脚本开源

1. 脚本功能

2. 运行环境

!/usr/bin/env python3

-- coding: utf-8 --