import requests import datetime import time import os import smtplib from email.message import EmailMessage from habanero import Crossref import markdown as md from weasyprint import HTML, CSS # ================= 1. 配置中心 ================= FEISHU_APP_ID = "cli_a9d25c8530785cc8" FEISHU_APP_SECRET = "5n00X2JtvKoeWmwcPVRKkcDgnoLMoNGb" SPREADSHEET_TOKEN = "JyyEsR8tYh9Q2rt08v7cogWznJg" SHEET_ID = "3105e6" DS_API_KEY = "sk-a8e71892ed7f478eb60319c231f9c3c2" DS_API_URL = "https://api.deepseek.com/chat/completions" SERP_API_KEY = "e2778d6230fb7b81584e875344dd5cd38c8c5679f2b0dd4d1fd1cee7a1461a44" MAIL_SENDER = "lmyleo7@163.com" # 请替换为你的163邮箱 MAIL_APP_PASSWORD = "VDcsViqCvGUHg3V4" # 请替换为你的163邮箱授权码 MAIL_RECIPIENT = "kwei@zju.edu.cn" MAIL_SMTP_SERVER = "smtp.163.com" MAIL_SMTP_PORT = 465 TARGET_KEYWORDS = [ "Silicon Photonics", "Photonic Integrated Circuits", "Nanophotonics", "Metasurface", "Metamaterials", "Computational Imaging", "Plasmonics", "Optoelectronics", "Diffractive Optics", "Optical Interconnects", "On-chip Photonics", "Flat Optics" ] # ================= 2. 核心功能函数 ================= def get_journal_configs(): """[STEP 1] 从飞书读取期刊配置""" print("\n--- [STEP 1] 正在读取飞书期刊配置 ---") try: auth_url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal" token_res = requests.post(auth_url, json={"app_id": FEISHU_APP_ID, "app_secret": FEISHU_APP_SECRET}).json() token = token_res.get("tenant_access_token") read_url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values_batch_get" headers = {"Authorization": f"Bearer {token}"} res = requests.get(read_url, params={"ranges": f"{SHEET_ID}!A2:C50"}, headers=headers).json() value_ranges = res.get("data", {}).get("valueRanges", []) if not value_ranges: return [] rows = value_ranges[0].get("values", []) configs = [] for r in rows: if r and len(r) >= 1 and r[0] and str(r[0]).strip(): configs.append({ "name": str(r[0]).strip(), "issn": str(r[1]).strip() if (len(r) > 1 and r[1]) else None, "e_issn": str(r[2]).strip() if (len(r) > 2 and r[2]) else None }) print(f"✅ 成功加载 {len(configs)} 条有效期刊配置") return configs except Exception as e: print(f"❌ 飞书配置读取失败: {e}") return [] def fetch_dois_crossref(journal_configs): """[STEP 2] 通过 Crossref 挨个关键词检索最近 7 天的论文""" print(f"\n--- [STEP 2] 正在 Crossref 检索 (按关键词逐一扫描) ---") cr = Crossref(mailto="hextorize@gmail.com") start_date = (datetime.date.today() - datetime.timedelta(days=7)).strftime("%Y-%m-%d") tasks = [] seen_dois = set() # 用于去重 for j in journal_configs: print(f"\n >>> 正在扫描期刊: {j['name']}") target_issn = j['e_issn'] or j['issn'] # 遍历每一个关键词进行搜索 for kw in TARGET_KEYWORDS: print(f" - 搜索关键词: {kw}...", end="", flush=True) filters = {'from-pub-date': start_date} if target_issn: filters['issn'] = target_issn query_str = kw else: # 如果没有 ISSN,强制在 query 中包含期刊名 query_str = f'"{j["name"]}" {kw}' try: # 每个关键词限制搜 1-2 篇,避免任务量过大 res = cr.works(filter=filters, query=query_str, limit=3, select="title,DOI") items = res['message'].get('items', []) new_found = 0 for art in items: doi = art['DOI'] if doi not in seen_dois: tasks.append({ "title_en": art['title'][0] if art.get('title') else "No Title", "doi": doi, "journal": j['name'], "hit_keyword": kw # 记录是被哪个词命中的 }) seen_dois.add(doi) new_found += 1 print(f" 新找到 {new_found} 篇") except Exception as e: print(" 跳过 (请求异常)") print(f"\n✅ 扫描结束,共锁定 {len(tasks)} 篇唯一论文") return tasks def enrich_content_serpapi(tasks): """[STEP 3] 利用 SerpApi 提取 Google Scholar 片段""" print(f"\n--- [STEP 3] 正在通过 SerpApi 提取核心内容 ---") for i, task in enumerate(tasks): print(f" [{i + 1}/{len(tasks)}] 检索 DOI: {task['doi']}...", end="", flush=True) try: params = { "engine": "google_scholar", "q": f"DOI:{task['doi']}", "api_key": SERP_API_KEY, "hl": "en", } res = requests.get("https://serpapi.com/search.json", params=params, timeout=30) res.raise_for_status() results = res.json().get("organic_results", []) task['snippet_en'] = results[0].get( "snippet", "No abstract snippet available." ) if results else "No content found." print(" [OK]") except Exception as e: task['snippet_en'] = f"Search Error: {e}" print(" [Error]") time.sleep(0.6) # 稍微延迟避免 API 限制 return tasks def summarize_with_ds(tasks): """[STEP 4] 调用 DeepSeek 进行处理""" print(f"\n--- [STEP 4] 正在请求 DeepSeek 处理内容 ---") headers = {"Authorization": f"Bearer {DS_API_KEY}", "Content-Type": "application/json"} for i, task in enumerate(tasks): print(f" > 处理第 {i + 1} 篇...", end="", flush=True) prompt = (f"你是一个光学专家。请翻译以下论文标题和摘要为中文,并总结核心创新点(200字内)。\n" f"标题: {task['title_en']}\n摘要: {task['snippet_en']}\n" f"格式要求:\n中文标题:xxx\n中文摘要:xxx\nAI总结:xxx") payload = { "model": "deepseek-chat", "messages": [{"role": "system", "content": "你是一个严谨的科研助手。"}, {"role": "user", "content": prompt}] } try: res = requests.post(DS_API_URL, json=payload, headers=headers, timeout=30).json() content = res['choices'][0]['message']['content'].strip() # 解析 DeepSeek 返回的格式 for line in content.split('\n'): if "中文标题:" in line: task['title_zh'] = line.split(":", 1)[1].strip() if "中文摘要:" in line: task['snippet_zh'] = line.split(":", 1)[1].strip() if "AI总结:" in line: task['summary_zh'] = line.split(":", 1)[1].strip() print(" [完成]") except: print(" [失败]") return tasks def save_to_markdown(tasks): """[STEP 5] 保存为 Markdown 报告""" print(f"\n--- [STEP 5] 正在生成 Markdown 报告 ---") today_str = datetime.date.today().strftime("%Y-%m-%d") file_name = f"{today_str}周报.md" with open(file_name, "w", encoding="utf-8") as f: f.write(f"# 光学行业学术动态周报 ({today_str})\n\n") f.write("## 目录\n") for idx, task in enumerate(tasks, start=1): title = task.get('title_zh', 'N/A') anchor = f"task-{idx}" f.write(f"- [{idx}. {title}](#{anchor})\n") f.write("\n---\n\n") for idx, task in enumerate(tasks): f.write(f"### {idx + 1}. {task.get('title_zh', 'N/A')}\n\n") f.write(f"**期刊**: {task['journal']} | **匹配关键词**: {task.get('hit_keyword', 'N/A')} \n") f.write(f"**DOI**: [{task['doi']}](https://doi.org/{task['doi']})\n\n") f.write(f"#### 【标题】\n") f.write(f"- **中文**: {task.get('title_zh', 'N/A')}\n") f.write(f"- **英文**: {task['title_en']}\n\n") f.write(f"#### 【摘要原文】\n") f.write(f"> *{task.get('snippet_en', 'N/A')}*\n\n") f.write(f"#### 【摘要翻译】\n") f.write(f"> {task.get('snippet_zh', 'N/A')}\n\n") f.write(f"#### 【AI 深度总结】\n") f.write(f"{task.get('summary_zh', 'N/A')}\n\n") f.write("---\n\n") print(f"✅ Markdown 周报已保存: {os.path.abspath(file_name)}") return os.path.abspath(file_name) def markdown_to_pdf(md_path, css_path, pdf_path=None): """[STEP 6] 将 Markdown 转为 PDF""" if not os.path.exists(md_path): print(f"❌ Markdown 文件不存在: {md_path}") return None if not os.path.exists(css_path): print(f"❌ CSS 文件不存在: {css_path}") return None if pdf_path is None: pdf_path = os.path.splitext(md_path)[0] + ".pdf" with open(md_path, "r", encoding="utf-8") as f: md_text = f.read() html_body = md.markdown(md_text, extensions=["extra", "tables", "sane_lists"]) html_doc = ( "" "
" f"