基于Legado书源规则生成python代码

书源规则：https://celetor.github.io/teachme/Rule/source.html
DrissionPage查找规则 https://g1879.gitee.io/drissionpagedocs/get_elements/usage

python代码基础抽象类

# -*- coding: utf-8 -*-
"""
@software: PyCharm
@file: base.py
@time: 2024/2/16 9:55
@author SuperLazyDog
"""
from abc import ABC, abstractmethod
from pydantic import BaseModel, Field
from typing import List, Optional
from DrissionPage import SessionPage


class SearchResult(BaseModel):
    name: str = Field(..., title="书名")
    bookUrl: str = Field(..., title="书籍链接")
    author: Optional[str] = Field(None, title="作者")
    coverUrl: Optional[str] = Field(None, title="封面链接")


class BookInfo(BaseModel):
    url: str = Field(..., title="书籍链接")
    name: str = Field(..., title="书名")
    author: str = Field(..., title="作者")
    intro: str = Field(..., title="简介")
    coverUrl: Optional[str] = Field(None, title="封面链接")
    kind: Optional[str] = Field(None, title="分类")
    lastChapter: Optional[str] = Field(None, title="最新章节")
    wordCount: Optional[str] = Field(None, title="字数")


class ChapterInfo(BaseModel):
    chapterName: str = Field(..., title="章节名")
    chapterUrl: str = Field(..., title="章节链接")


class Base(ABC):
    page = SessionPage(timeout=3)

    @property
    @abstractmethod
    def bookSourceName(self) -> str:
        """
        书源名称
        :return:
        """
        pass

    @property
    @abstractmethod
    def bookSourceUrl(self) -> str:
        """
        书源链接
        :return:
        """
        pass

    @abstractmethod
    def searchBook(self, keyword: str) -> List[SearchResult]:
        """
        搜索书籍
        :param keyword:
        :return:
        """
        pass

    @abstractmethod
    def getBookInfo(self, url: str) -> BookInfo:
        """
        获取书籍信息
        :param url:  书籍链接
        :return:
        """
        pass

    @abstractmethod
    def getChapterInfo(self, url: str, result=None) -> List[ChapterInfo]:
        """
        获取章节信息
        :param url:  章节页面链接
        :param result:  结果
        :return:
        """
        pass

    @abstractmethod
    def getContent(self, info: ChapterInfo, content: str = None) -> str:
        """
        获取章节内容
        :param info: 章节信息
        :param content: 章节内容
        :return:
        """
        pass

69书吧示例

书源规则

{
    "bookSourceComment": "/*\nBy_zhbyjm7783\neval(String(source.bookSourceComment))\n*/\n\t\nvar error=/429 Too Many Requests/;\nwhile(error.test(result)){\n\tcookie.removeCookie(baseUrl);\n\tresult=java.ajax(baseUrl);\n\t}\n\t\tresult;",
    "bookSourceGroup": "源仓库",
    "bookSourceName": "69书吧[自写]",
    "bookSourceType": 0,
    "bookSourceUrl": "https://69shu.net",
    "bookUrlPattern": "https://69shu.net/\\d+.html",
    "customOrder": -240,
    "enabled": true,
    "enabledCookieJar": false,
    "enabledExplore": true,
    "exploreUrl": "首页::/\n排行::/top/lastupdate/{{page}}/\n周点击榜::/top/weekvisit/{{page}}/\n月点击榜::/top/monthvisit/{{page}}/\n总点击榜::/top/allvisit/{{page}}/\n推荐榜::/top/allvote/{{page}}/\n必看榜::/hot/\n完本::/quanben/{{page}}/",
    "header": "",
    "lastUpdateTime": 1701848258494,
    "loginUrl": "",
    "respondTime": 109239,
    "ruleBookInfo": {
        "author": "[property=\"og:novel:author\"]@content",
        "coverUrl": "[property=\"og:image\"]@content",
        "init": "",
        "intro": "[property=\"og:description\"]@content",
        "kind": "[property~=status|update_time]@content",
        "lastChapter": "[property=\"og:novel:latest_chapter_name\"]@content",
        "name": "[property=\"og:novel:book_name\"]@content",
        "tocUrl": "",
        "wordCount": ".word-count@text"
    },
    "ruleContent": {
        "content": "<js>\neval(String(source.bookSourceComment))\n</js>.novelcontent@html"
    },
    "ruleExplore": {
        "author": ".author@text",
        "bookList": ".xbk",
        "bookUrl": "a.0@href",
        "coverUrl": "img@data-src",
        "intro": ".intro@text",
        "name": ".listtext@h2@a@text"
    },
    "ruleSearch": {
        "author": "em@text",
        "bookList": ".search_list",
        "bookUrl": "a@href",
        "checkKeyWord": "",
        "coverUrl": "a@href<js>\nvar id = result.match(/(\\d+)\\/?$/)[1];\nvar idi=(id)\nvar iid = parseInt(idi/1000);\n'https://img.69shu.net/'+iid+'/'+idi+'/'+idi+'s.jpg';\n</js>",
        "intro": "",
        "kind": "",
        "lastChapter": "",
        "name": "a@text"
    },
    "ruleToc": {
        "chapterList": "<js>\neval(String(source.bookSourceComment))\n</js>.p2.1@li",
        "chapterName": "a@text",
        "chapterUrl": "a@href",
        "nextTocUrl": "option@value||text.下一页@href"
    },
    "searchUrl": "https://69shu.net/s.php,{\n  \"charset\": \"gbk\",\n  \"method\": \"POST\",\n  \"body\": \"s={{key}}&type=articlename\"\n}",
    "weight": 0
}

python代码

import re
from utils import logger
from utils.decorator import retry
from ._base_ import Base, SearchResult, List, BookInfo, ChapterInfo


class Shu69(Base):
    bookSourceUrl = "https://69shu.net"
    bookSourceName = "69书吧[自写]"

    @retry()
    def searchBook(self, keyword: str) -> List[SearchResult]:
        searchUrl = "https://69shu.net/s.php"
        data = {"s": keyword.encode("gbk"), "type": "articlename"}
        self.page.post(searchUrl, data=data)
        bookList = self.page.eles(".search_list")
        result = list()
        for book in bookList:
            coverUrl = None
            if book.ele("tag:z") and book.ele("tag:z").attr("href") is not None:
                coverUrl = book.ele("tag:z").attr("href")
                _id = re.search(r"(\d+)/?$", coverUrl).group(1)
                idi = int(_id)
                iid = int(idi / 1000)
                coverUrl = f"https://img.69shu.net/{iid}/{idi}/{idi}s.jpg"
            result.append(
                SearchResult(
                    name=book.ele("tag:a").text,
                    author=book.ele("tag:em").text if book.ele("tag:em") else None,
                    coverUrl=coverUrl,
                    bookUrl=book.ele("tag:a").attr("href"),
                )
            )
        return result

    @retry()
    def getBookInfo(self, url: str) -> BookInfo:
        self.page.get(url)
        return BookInfo(
            url=url,
            name=self.page.ele("@property=og:novel:book_name").attr("content"),
            author=self.page.ele("@property=og:novel:author").attr("content"),
            coverUrl=self.page.ele("@property=og:image").attr("content"),
            intro=self.page.ele("@property=og:description").attr("content"),
            kind=self.page.ele("@|property:status@|property:update_time").attr(
                "content"
            ),
            lastChapter=self.page.ele("@property=og:novel:latest_chapter_name").attr(
                "content"
            ),
            wordCount=self.page.ele(".word-count").text,
        )

    @retry()
    def getChapterInfo(self, url: str, result=None) -> List[ChapterInfo]:
        if result is None:
            result = list()
        self.page.get(url)
        chapterList = self.page.ele(".p2", index=2).eles("tag:li")
        nextTocUrl = (
            self.page.ele("option").attr("value")
            if self.page.ele("option")
            else (
                self.page.ele("text=下一页").attr("href")
                if self.page.ele("text=下一页")
                else None
            )
        )
        for chapter in chapterList:
            result.append(
                ChapterInfo(
                    chapterName=chapter.ele("tag:a").text,
                    chapterUrl=chapter.ele("tag:a").attr("href"),
                )
            )
        if nextTocUrl:
            logger.debug(f"nextTocUrl: {nextTocUrl}")
            self.getChapterInfo(nextTocUrl, result)
        return result

    @retry()
    def getContent(self, info: ChapterInfo, content: str = None) -> str:
        content = content if content else ""
        self.page.get(info.chapterUrl)
        content += "".join(
            [
                item.strip() + "\n"
                for item in self.page.ele(".novelcontent").text.split("\n") if item.strip()
            ]
        )
        return content

万相书城

书源规则

{
    "bookSourceGroup": "源仓库",
    "bookSourceName": "万相书城",
    "bookSourceType": 0,
    "bookSourceUrl": "https://www.wxscn.com/",
    "customOrder": 0,
    "enabled": true,
    "enabledCookieJar": false,
    "enabledExplore": false,
    "lastUpdateTime": 1698759595973,
    "respondTime": 34062,
    "ruleBookInfo": {
        "author": "class.col-md-8 col-sm-6 dark.0@tag.a.0@text",
        "intro": "class.col-sm-11 col-xs-10@text",
        "lastChapter": "class.col-md-8 col-sm-6 dark.1@tag.a.0@text",
        "name": "class.book-name@text",
        "tocUrl": "class.panel-footer visible-xs visible-sm@href"
    },
    "ruleContent": {
        "content": "id.cont-body@text",
        "nextContentUrl": "class.col-md-6 text-center@tag.a.2@href"
    },
    "ruleSearch": {
        "author": "",
        "bookList": "class.table table-condensed@tag.tr!0",
        "bookUrl": "a.0@href",
        "checkKeyWord": "",
        "name": "a.0@text"
    },
    "ruleToc": {
        "chapterList": "class.col-md-6 item",
        "chapterName": "class.col-md-6 item@tag.a@text",
        "chapterUrl": "class.col-md-6 item@tag.a@href"
    },
    "searchUrl": "https://www.wxscn.com/plus/search.php?q={{key}}",
    "weight": 0
}

python代码


from utils.decorator import retry
from ._base_ import Base, SearchResult, List, BookInfo, ChapterInfo


class Wxscn(Base):
    bookSourceName = "万相书城"
    bookSourceUrl = "https://www.wxscn.com/"

    @retry()
    def searchBook(self, keyword: str) -> List[SearchResult]:
        searchUrl = "https://www.wxscn.com/plus/search.php"
        params = {"q": keyword}
        self.page.get(searchUrl, params=params)
        bookList = self.page.ele("@class:table table-condensed").eles("tag=tr")[1:]
        print(len(bookList))
        result = list()
        for book in bookList:
            result.append(
                SearchResult(
                    name=book.ele("tag=a").text,
                    bookUrl=book.ele("tag=a").attr("href"),
                )
            )
        return result

    @retry()
    def getBookInfo(self, url: str) -> BookInfo:
        self.page.get(url)
        return BookInfo(
            url=url,
            name=self.page.ele("@class:book-name").text.strip(),
            author=self.page.eles("@class:col-md-8 col-sm-6 dark")[0]
            .eles("tag=a")[0]
            .text.strip(),
            intro=self.page.ele("@class:col-sm-11 col-xs-10").text.strip(),
            lastChapter=self.page.eles("@class:col-md-8 col-sm-6 dark")[1]
            .eles("tag=a")[0]
            .text,
        )

    @retry()
    def getChapterInfo(self, url: str, result=None) -> List[ChapterInfo]:
        if result is None:
            result = list()
        self.page.get(url)
        chapterList = self.page.eles("@class:col-md-6 item")
        for chapter in chapterList:
            result.append(
                ChapterInfo(
                    chapterName=chapter.ele("tag=a").text,
                    chapterUrl=chapter.ele("tag=a").attr("href"),
                )
            )
        return result

    @retry()
    def getContent(self, info: ChapterInfo, content: str = None) -> str:
        content = content if content else ""
        self.page.get(info.chapterUrl)
        content += "".join(
            [
                item.strip() + "\n"
                for item in self.page.ele("@id=cont-body").text.split("\n") if item.strip()
            ]
        )
        return content

笔趣岛

书源规则

{
    "bookSourceGroup": "源仓库",
    "bookSourceName": "笔趣岛",
    "bookSourceType": 0,
    "bookSourceUrl": "https://m.biqudao.cc",
    "bookUrlPattern": "https?://m.biqudao.cc/book/[\\d_]+/",
    "customOrder": 0,
    "enabled": true,
    "enabledCookieJar": true,
    "enabledExplore": true,
    "exploreUrl": "全部小说::/xclass/0/{{page}}.html\n玄幻小说::/xclass/1/{{page}}.html\n修真小说::/xclass/2/{{page}}.html\n都市小说::/xclass/3/{{page}}.html\n穿越小说::/xclass/4/{{page}}.html\n网游小说::/xclass/5/{{page}}.html\n科幻小说::/xclass/6/{{page}}.html\n其他小说::/xclass/7/{{page}}.html\n全本小说::/quanben_{{page}}.html",
    "header": "{\"User-Agent\": \"Mozilla/5.0 (Linux; Android 9) Mobile Safari/537.36\"}",
    "lastUpdateTime": 1701773629170,
    "respondTime": 45348,
    "ruleBookInfo": {
        "author": "p.author@text",
        "coverUrl": ".synopsisArea_detail img@src",
        "intro": "p.review@text##简介：",
        "kind": ".synopsisArea_detail p.1:2:3@text##.*：",
        "lastChapter": ".synopsisArea_detail a.-1@text",
        "name": "span.title@text"
    },
    "ruleContent": {
        "content": "#chaptercontent@textNodes",
        "nextContentUrl": "text.下一页@href",
        "replaceRegex": "##\\s*{{try{chapter.title}catch(e){\"\"} }}.*\\s*"
    },
    "ruleExplore": {
        "author": "p.author@text",
        "bookList": ".hot_sale",
        "bookUrl": "a@href",
        "coverUrl": "img@data-original",
        "intro": "p.review@text##简介：",
        "kind": "0",
        "name": "p.title@text"
    },
    "ruleSearch": {
        "author": "p.author.0@text##.*\\|",
        "bookList": ".hot_sale",
        "bookUrl": "a@href",
        "checkKeyWord": "剑来",
        "coverUrl": "@js:\"https://m.biqudao.cc/files/article/image/24/24458/24458s.jpg\"",
        "kind": "p.author@text##\\|.*",
        "lastChapter": "p.author.1@text##.*\\|\\s*更新：",
        "name": "p.title@text"
    },
    "ruleToc": {
        "chapterList": ".directoryArea.1@p a",
        "chapterName": "text",
        "chapterUrl": "href",
        "nextTocUrl": "option@value||text.下一页@href"
    },
    "searchUrl": "/s.php,{\n  \"body\": \"keyword={{key}}&t=1\",\n  \"method\": \"POST\"\n}",
    "weight": 0
}

python代码


from utils import logger
from utils.decorator import retry
import re
from ._base_ import Base, SearchResult, List, BookInfo, ChapterInfo


class BiQuDao(Base):
    bookSourceName = "笔趣岛"

    bookSourceUrl = "https://m.biqudao.cc"

    @retry()
    def searchBook(self, keyword: str) -> List[SearchResult]:
        searchUrl = "https://m.biqudao.cc/s.php"
        data = {"keyword": keyword, "t": 1}
        self.page.post(searchUrl, data=data)
        bookList = self.page.eles("@class:hot_sale")
        result = list()
        for book in bookList:
            result.append(
                SearchResult(
                    name=book.ele("tag=p@class:title").text,
                    bookUrl=book.ele("tag=a").attr("href"),
                    author=book.ele("tag=p@class:author").text.split("|")[1],
                )
            )
        return result

    @retry()
    def getBookInfo(self, url: str) -> BookInfo:
        self.page.get(url)
        return BookInfo(
            url=url,
            name=self.page.ele("tag=span@class:title").text,
            author=self.page.ele("tag=p@class:author").text.split("|")[0],
            intro=self.page.ele("tag=p@class:review").text.replace("简介：", ""),
            coverUrl=self.page.ele("@class:synopsisArea_detail")
            .ele("tag=img")
            .attr("src"),
            kind=self.page.ele("@class:synopsisArea_detail")
            .ele("tag=p", index=3)
            .text.split("：")[1],
            lastChapter=self.page.ele("@class:synopsisArea_detail")
            .ele("tag=p", index=-1)
            .text,
        )

    @retry()
    def getChapterInfo(self, url: str, result=None) -> List[ChapterInfo]:
        if result is None:
            result = list()
        self.page.get(url)
        nextTocUrl = (
            self.page.ele("option").attr("value")
            if self.page.ele("option")
            else (
                self.page.ele("text=下一页").attr("href")
                if self.page.ele("text=下一页")
                else None
            )
        )
        chapterList = self.page.ele("@class:directoryArea", index=2).eles("tag=a")
        for chapter in chapterList:
            result.append(
                ChapterInfo(
                    chapterName=chapter.text,
                    chapterUrl=chapter.attr("href"),
                )
            )
        if nextTocUrl:
            logger.debug(f"nextTocUrl: {nextTocUrl}")
            self.getChapterInfo(nextTocUrl, result)
        return result

    @retry()
    def getContent(self, info: ChapterInfo, content: str = None) -> str:
        content = content if content else ""
        self.page.get(info.chapterUrl)
        nextContentUrl = (
            self.page.ele("text=下一页").attr("href")
            if self.page.ele("text=下一页")
            else None
        )
        reg = re.compile(rf"\s*{info.chapterName}.*\s*")

        content += reg.sub("", "".join(
            [
                item.strip() + "\n"
                for item in self.page.ele("@id=chaptercontent").text.split("\n") if item.strip()
            ]
        ))
        if nextContentUrl:
            logger.debug(f"nextContentUrl: {nextContentUrl}")
            info.chapterUrl = nextContentUrl
            return self.getContent(info, content)
        return content