JMComic-Crawler-Python 自动侦测新章节

trafficstars

写了一段自动侦测是否有新章节、并自动下载所有新章节的code 只需要在list上输入相对应本子id、章节id即可更新完後会显示最新的章节id，完成後把新的章节id再带入list，就可以循环使用分享给有需要的人

import jmcomic 

option = jmcomic.create_option(
'D:/config.yml'
)
client = option.build_jm_client()

#带入漫画id, 章节id(第x章)，寻找该漫画下第x章节後的所有章节Id
def find_update(albums, id):
    result = []
    flag = False
    
    for item in albums:
        if flag:
            result.append(item)
        
        if item.photo_id == id:
            flag = True
            
    return result

#带入漫画id, 章节id(第x章)，自动下载x章节以後的章节，并回传最新的章节id
def check_download(album_id, photo_id):
    album = client.get_album_detail(album_id)
    targets = find_update(album, photo_id)
    id = ""
    #下载 and 取最後的id
    for item in targets:
        jmcomic.download_photo(item.photo_id)
        id = item.photo_id
    #回传最後的id
    if id != photo_id and id != "":
        return {"album_id": album_id, "photo_id": id}
    else:
        return None

#带入要更新的清单，回传更新结果
def start(list):
    result = []
    for item in list:
        #开始侦测
        item_result = check_download(item['album_id'],item['photo_id'])
        #发现有更新则储存最後章节id
        if item_result is not None:
            result.append(item_result)
    return result

#侦测更新清单
list = [
    #带入本子id，目前更新的章节id (只抓yyy以後的章节，不含yyy)
    {"album_id": 'xxx', "photo_id": 'yyy'},
    #带入本子id，目前更新的章节id (只抓bbb以後的章节，不含bbb)
    {"album_id": 'aaa', "photo_id": 'bbb'}
]

result_list = start(list)

#结果，没有印出结果就是没有更新
for item in result_list:
    print("漫画id " + item['album_id'] + " 更新章节id至 " + item['photo_id'])

Aug 13 '23 11:08 jkdfzx

2023-08-30，该reply的代码已过时不建议使用，请使用下一个reply的代码

我提供一个我的版本

from jmcomic import *

# 侦测更新清单
# key: album_id
# value: photo_id
dic = {
    'xxx': 'yyy'
}


class MyJmDownloader(JmDownloader):

    def download_by_photo_detail(self, photo: JmPhotoDetail, client: JmcomicClient):
        album: JmAlbumDetail = photo.from_album

        # 不在清单内
        if album.album_id not in dic:
            return super().download_by_photo_detail(photo, client)

        # 只抓yyy以後的章节，不含yyy
        # 一般来说，yyy以後的章节的photo_id都会大于yyy
        photo_begin = dic[album.album_id]  # yyy
        photo_id = photo.photo_id

        if photo_id > photo_begin:
            return super().download_by_photo_detail(photo, client)
        else:
            # yyy 之前的章节
            jm_debug('侦测更新清单', f'章节已下载过: {photo_id}')


# 替换jmcomic默认Downloader
JmModuleConfig.DOWNLOADER_CLASS = MyJmDownloader
# 下载【侦测更新清单】内的album
download_album(dic.keys())

Aug 13 '23 12:08 hect0x7

我上面贴的第一版代码有些不严谨，我发现一些比较老的本子的前一些章节不一定是递增id的，使用你的find_update来挑选章节会更适用，如下

from jmcomic import *

# 侦测更新清单
# key: album_id
# value: photo_id
dic = {
    '145504': '290266'
}

# 使用配置文件来创建option
option = create_option('your option.yml')


class FindUpdateDownloader(JmDownloader):
    def filter_iter_objs(self, iter_objs: DownloadIterObjs):
        if not isinstance(iter_objs, JmAlbumDetail):
            return iter_objs

        return self.find_update(iter_objs)

    # 带入漫画id, 章节id(第x章)，寻找该漫画下第x章节後的所有章节Id
    def find_update(self, album: JmAlbumDetail):
        if album.album_id not in dic:
            return album

        photo_ls = []
        photo_begin = dic[album.album_id]
        is_new_photo = False

        for photo in album:
            if is_new_photo:
                photo_ls.append(photo)

            if photo.photo_id == photo_begin:
                is_new_photo = True

        return photo_ls


# 替换jmcomic默认Downloader
JmModuleConfig.CLASS_DOWNLOADER = FindUpdateDownloader
# 下载【侦测更新清单】内的album
download_album(dic.keys(), option)

Aug 13 '23 12:08 hect0x7

关于这个功能，新版本jmcomic支持了filter，可以更好的实现这一功能，我专门写了一个代码示例，感兴趣可以看下 https://github.com/hect0x7/JMComic-Crawler-Python/blob/master/usage/usage_feature_filter.py

Aug 22 '23 09:08 hect0x7

其实我後来改写了主要是为了可以针对章节自动更新，连带章节id也做成自动替换了写了一些自己想要的流程跟自动处理做成下载成本子id/1 本子id/2.. 然後自动移动到自订标题名称/1 自订标题名称/2.. 最後将json资料自动替换成最新的章节id

主要code

import jmcomic
import shutil
import os
import json

#删除空资料夹，子目录为空也删除
def delete_empty_directories(directory_path):
    for root_dir, subdirectories, files in os.walk(directory_path, topdown=False):
        for subdirectory in subdirectories:
            subdirectory_path = os.path.join(root_dir, subdirectory)
            if not os.listdir(subdirectory_path):  # If the subdirectory is empty
                os.rmdir(subdirectory_path)  # Delete the subdirectory

    if not os.listdir(directory_path):  # If the specified directory is also empty
        os.rmdir(directory_path)  # Delete the specified directory
        
#移动整个资料夹，会留下来源的资料结构，最後自动删除
def move_file(file_source, file_destination):
    if file_source == file_destination or not os.path.exists(file_source):
        return
    # 确保目标资料夹存在，如果不存在则建立资料夹
    if not os.path.exists(file_destination):
        os.makedirs(file_destination)

    # 递回地将来源资料夹内容移动到目标资料夹
    for folder_name, subfolders, file_list in os.walk(file_source):
        target_folder = os.path.join(file_destination, folder_name[len(file_source) + 1:])
        if not os.path.exists(target_folder):
            os.makedirs(target_folder)

        for file_name in file_list:
            source_file = os.path.join(folder_name, file_name)
            target_file = os.path.join(target_folder, file_name)
            shutil.move(source_file, target_file)

        for subfolder_name in subfolders:
            source_subfolder = os.path.join(folder_name, subfolder_name)
            target_subfolder = os.path.join(target_folder, subfolder_name)
            if not os.path.exists(target_subfolder):
                os.makedirs(target_subfolder)
    # 删除来源资料夹
    delete_empty_directories(file_source)

#带入漫画id, 章节id(第x章)，寻找该漫画下第x章节後的所有章节Id
def find_update(albums, id):
    result = []
    flag = False
    
    for item in albums:
        if flag:
            result.append(item)
        
        if item.photo_id == id:
            flag = True
            
    return result

#带入漫画id, 章节id(第x章)，自动下载x章节以後的章节，并回传最新的章节id
def check_download(album_id, photo_id,option,client):
    album = client.get_album_detail(album_id)
    targets = find_update(album, photo_id)
    id = ""
    
    if len(album) == 0:
        return None
    
    #抓整本
    if photo_id == "":
        jmcomic.download_album(album_id,option)
        return {"album_id": album_id, "photo_id": album[-1].photo_id}
    else:
        #下载最新章节 and 取最後的id
        for item in targets:
            jmcomic.download_photo(item.photo_id,option)
            id = item.photo_id
        #回传最後的id
        if id != photo_id and id != "":
            return {"album_id": album_id, "photo_id": id}
        else:
            return None

#载入json
def load_json(path):
    data = {}
    with open(path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data

#json储存
def save_json(path, data):
    with open(path, "w", encoding="utf-8") as file:
        json.dump(data, file, ensure_ascii=False, indent=4)

#结果处理
def final_result(result,setting,setting_path):
    #结果，没有印出结果就是没有更新
    if(len(result)>0):
        for item in result:
            for target in setting["UpdateList"]:
                if target["album_id"] == item["album_id"]:
                    print(target["title"]+": 漫畫id " + item["album_id"] + " 更新章節id至 " + item["photo_id"])
                    target["photo_id"] = item["photo_id"]
                    break
        
        save_json(setting_path,setting)

#开始更新
def check_update(list,configfile):
    option = jmcomic.create_option(configfile)
    client = option.build_jm_client()
    save_path = option.dir_rule.base_dir.replace("/", "\\") + "\\"
    
    result = []
    for item in list:
        #开始侦测
        item_result = check_download(item["album_id"],item["photo_id"],option,client)
        #发现有更新则储存最後章节id
        if item_result is not None:
            result.append(item_result)
            move_file(save_path+item["album_id"], save_path+item["title"])
    
    return result

#带入要更新的清单，回传更新结果
def start(configfile, jsonfile):
    script_path = os.path.dirname(os.path.abspath(__file__))
    setting_path = script_path + "\\" + jsonfile
    setting = load_json(setting_path)
    
    result = check_update(setting["UpdateList"],script_path + "\\" +configfile)
    
    final_result(result,setting,setting_path)

start("config.yml", "test.json")

test.json和主要code放在一起即可

{
    "UpdateList": [
        {
            "album_id": "本子id",
            "photo_id": "章节id，留空就是抓整本",
            "title": "自订下载後的本子资料夹名称，如本子标题；下载预设设定dir_rule.rule必须是Bd_Aid_Pindex才可以"
        },
        {
            "album_id": "123",
            "photo_id": "456",
            "title": "AAA"
        }
    ]
}

config.yml和主要code放在一起即可

client:
    postman:
        meta_data:
            cookies:
                AVS: xxx
dir_rule:
    base_dir: D:/下载/
    rule: Bd_Aid_Pindex

Aug 22 '23 13:08 jkdfzx

JMComic-Crawler-Python JMComic-Crawler-Python copied to clipboard

自动侦测新章节

2023-08-30，该reply的代码已过时不建议使用，请使用下一个reply的代码

JMComic-Crawler-Python
JMComic-Crawler-Python copied to clipboard