from __future__ import annotations import re from dataclasses import dataclass, field from typing import Any from urllib.parse import urlparse import requests from requests import Response from sqlalchemy.orm import Session from app.db import init_db from app.models import Box, Item, SubItem NOTION_VERSION = "2026-03-11" NOTION_API_BASE = "https://api.notion.com/v1" @dataclass(slots=True) class ParsedSubItem: name: str note: str | None = None @dataclass(slots=True) class ParsedItem: name: str note: str | None = None is_container: bool = False subitems: list[ParsedSubItem] = field(default_factory=list) @dataclass(slots=True) class ParsedBox: name: str note: str | None = None items: list[ParsedItem] = field(default_factory=list) @dataclass(slots=True) class ImportSummary: boxes: list[ParsedBox] warnings: list[str] = field(default_factory=list) @property def box_count(self) -> int: return len(self.boxes) @property def item_count(self) -> int: return sum(len(box.items) for box in self.boxes) @property def container_item_count(self) -> int: return sum(1 for box in self.boxes for item in box.items if item.is_container) @property def subitem_count(self) -> int: return sum(len(item.subitems) for box in self.boxes for item in box.items) class NotionClient: def __init__(self, token: str): self.session = requests.Session() self.session.headers.update( { "Authorization": f"Bearer {token}", "Notion-Version": NOTION_VERSION, } ) def list_block_children(self, block_id: str) -> list[dict[str, Any]]: results: list[dict[str, Any]] = [] next_cursor: str | None = None while True: params = {"page_size": 100} if next_cursor: params["start_cursor"] = next_cursor response = self.session.get( f"{NOTION_API_BASE}/blocks/{block_id}/children", params=params, timeout=30, ) self._raise_for_status(response) payload = response.json() results.extend(payload.get("results", [])) if not payload.get("has_more"): break next_cursor = payload.get("next_cursor") return results def _raise_for_status(self, response: Response) -> None: try: response.raise_for_status() except requests.HTTPError as exc: message = response.text raise RuntimeError(f"Notion API 请求失败: {response.status_code} {message}") from exc def extract_page_id(page_url: str) -> str: cleaned = page_url.strip() parsed = urlparse(cleaned) candidates = [segment for segment in parsed.path.split("/") if segment] if parsed.fragment: candidates.append(parsed.fragment) matches: list[str] = [] pattern = re.compile( r"([0-9a-fA-F]{32}|[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})" ) for candidate in candidates: matches.extend(pattern.findall(candidate)) if not matches: raise ValueError("无法从 Notion 页面 URL 中提取 page id") raw = matches[-1].replace("-", "").lower() return f"{raw[:8]}-{raw[8:12]}-{raw[12:16]}-{raw[16:20]}-{raw[20:]}" def fetch_page_blocks(token: str, page_id: str) -> list[dict[str, Any]]: client = NotionClient(token) return _fetch_block_tree(client, page_id) def _fetch_block_tree(client: NotionClient, block_id: str) -> list[dict[str, Any]]: blocks = client.list_block_children(block_id) for block in blocks: if block.get("has_children"): block["_children"] = _fetch_block_tree(client, block["id"]) else: block["_children"] = [] return blocks def parse_notion_blocks(blocks: list[dict[str, Any]]) -> ImportSummary: boxes: list[ParsedBox] = [] warnings: list[str] = [] current_box: ParsedBox | None = None for block in blocks: block_type = block.get("type") if block_type == "heading_2": heading_text = extract_block_text(block) if not heading_text: warnings.append("发现空的 heading_2,已跳过") continue current_box = ParsedBox(name=heading_text) boxes.append(current_box) continue if block_type == "bulleted_list_item": if current_box is None: warnings.append( f"发现未归属到任何 heading_2 的一级 bullet:{extract_block_text(block) or '[空文本]'}" ) continue parsed_item = _parse_item_block(block, warnings, level=1) if parsed_item is not None: current_box.items.append(parsed_item) continue warnings.extend(_warning_for_unsupported_block(block, level=0)) return ImportSummary(boxes=boxes, warnings=warnings) def _parse_item_block( block: dict[str, Any], warnings: list[str], *, level: int, ) -> ParsedItem | None: item_name = extract_block_text(block) if not item_name: warnings.append(f"发现空的 bullet(层级 {level}),已跳过") return None child_blocks = block.get("_children", []) subitems: list[ParsedSubItem] = [] for child in child_blocks: child_type = child.get("type") if child_type == "bulleted_list_item": child_name = extract_block_text(child) if not child_name: warnings.append(f"发现空的二级 bullet(父项:{item_name}),已跳过") continue subitems.append(ParsedSubItem(name=child_name)) if child.get("_children"): warnings.append( f"发现超出支持层级的三级内容(父项:{item_name} -> 子项:{child_name}),已忽略更深层级" ) for deep_child in child["_children"]: warnings.extend(_warning_for_unsupported_block(deep_child, level=3)) continue warnings.extend(_warning_for_unsupported_block(child, level=2, parent_name=item_name)) return ParsedItem( name=item_name, is_container=bool(subitems), subitems=subitems, ) def _warning_for_unsupported_block( block: dict[str, Any], *, level: int, parent_name: str | None = None, ) -> list[str]: block_type = block.get("type", "unknown") text = extract_block_text(block) or "[无文本]" prefix = f"层级 {level} block" if parent_name: prefix += f"(父项:{parent_name})" if block_type in {"image", "file", "video", "audio", "pdf"}: return [f"{prefix} 类型 {block_type} 已跳过(这版不导入图片或媒体):{text}"] return [f"{prefix} 类型 {block_type} 未按导入规则处理,已跳过:{text}"] def extract_block_text(block: dict[str, Any]) -> str: block_type = block.get("type") block_data = block.get(block_type, {}) if block_type else {} rich_text = block_data.get("rich_text", []) return "".join(part.get("plain_text", "") for part in rich_text).strip() def print_summary(summary: ImportSummary) -> None: print() print("解析结果摘要") print(f"- Box: {summary.box_count}") print(f"- Item: {summary.item_count}") print(f"- 其中容器型 Item: {summary.container_item_count}") print(f"- SubItem: {summary.subitem_count}") print(f"- Warnings: {len(summary.warnings)}") print() for box in summary.boxes: container_names = [item.name for item in box.items if item.is_container] print(f"[Box] {box.name}") print(f" - Item 数量: {len(box.items)}") if container_names: print(f" - 容器型 Item: {', '.join(container_names)}") for item in box.items: if item.is_container: print(f" * {item.name} -> SubItem {len(item.subitems)} 个") if summary.warnings: print() print("Warnings") for warning in summary.warnings: print(f"- {warning}") def apply_import(summary: ImportSummary, db: Session) -> dict[str, int]: init_db() created_boxes = 0 created_items = 0 created_subitems = 0 for parsed_box in summary.boxes: box = Box(name=parsed_box.name, note=parsed_box.note) db.add(box) db.flush() created_boxes += 1 for parsed_item in parsed_box.items: item = Item( box=box, name=parsed_item.name, note=parsed_item.note, quantity=1, is_container=parsed_item.is_container, ) db.add(item) db.flush() created_items += 1 for parsed_subitem in parsed_item.subitems: subitem = SubItem( parent_item=item, name=parsed_subitem.name, note=parsed_subitem.note, quantity=1, ) db.add(subitem) created_subitems += 1 db.commit() return { "boxes": created_boxes, "items": created_items, "subitems": created_subitems, }