JMComic-Crawler-Python icon indicating copy to clipboard operation
JMComic-Crawler-Python copied to clipboard

自动侦测新章节

Open jkdfzx opened this issue 2 years ago • 4 comments
trafficstars

写了一段自动侦测是否有新章节、并自动下载所有新章节的code 只需要在list上输入相对应本子id、章节id即可 更新完後会显示最新的章节id,完成後把新的章节id再带入list,就可以循环使用 分享给有需要的人

import jmcomic 

option = jmcomic.create_option(
'D:/config.yml'
)
client = option.build_jm_client()

#带入漫画id, 章节id(第x章),寻找该漫画下第x章节後的所有章节Id
def find_update(albums, id):
    result = []
    flag = False
    
    for item in albums:
        if flag:
            result.append(item)
        
        if item.photo_id == id:
            flag = True
            
    return result

#带入漫画id, 章节id(第x章),自动下载x章节以後的章节,并回传最新的章节id
def check_download(album_id, photo_id):
    album = client.get_album_detail(album_id)
    targets = find_update(album, photo_id)
    id = ""
    #下载 and 取最後的id
    for item in targets:
        jmcomic.download_photo(item.photo_id)
        id = item.photo_id
    #回传最後的id
    if id != photo_id and id != "":
        return {"album_id": album_id, "photo_id": id}
    else:
        return None

#带入要更新的清单,回传更新结果
def start(list):
    result = []
    for item in list:
        #开始侦测
        item_result = check_download(item['album_id'],item['photo_id'])
        #发现有更新则储存最後章节id
        if item_result is not None:
            result.append(item_result)
    return result

#侦测更新清单
list = [
    #带入本子id,目前更新的章节id (只抓yyy以後的章节,不含yyy)
    {"album_id": 'xxx', "photo_id": 'yyy'},
    #带入本子id,目前更新的章节id (只抓bbb以後的章节,不含bbb)
    {"album_id": 'aaa', "photo_id": 'bbb'}
]

result_list = start(list)

#结果,没有印出结果就是没有更新
for item in result_list:
    print("漫画id " + item['album_id'] + " 更新章节id至 " + item['photo_id'])

jkdfzx avatar Aug 13 '23 11:08 jkdfzx

2023-08-30,该reply的代码已过时不建议使用,请使用下一个reply的代码

我提供一个我的版本

from jmcomic import *

# 侦测更新清单
# key: album_id
# value: photo_id
dic = {
    'xxx': 'yyy'
}


class MyJmDownloader(JmDownloader):

    def download_by_photo_detail(self, photo: JmPhotoDetail, client: JmcomicClient):
        album: JmAlbumDetail = photo.from_album

        # 不在清单内
        if album.album_id not in dic:
            return super().download_by_photo_detail(photo, client)

        # 只抓yyy以後的章节,不含yyy
        # 一般来说,yyy以後的章节的photo_id都会大于yyy
        photo_begin = dic[album.album_id]  # yyy
        photo_id = photo.photo_id

        if photo_id > photo_begin:
            return super().download_by_photo_detail(photo, client)
        else:
            # yyy 之前的章节
            jm_debug('侦测更新清单', f'章节已下载过: {photo_id}')


# 替换jmcomic默认Downloader
JmModuleConfig.DOWNLOADER_CLASS = MyJmDownloader
# 下载【侦测更新清单】内的album
download_album(dic.keys())

hect0x7 avatar Aug 13 '23 12:08 hect0x7

我上面贴的第一版代码有些不严谨,我发现一些比较老的本子的前一些章节不一定是递增id的,使用你的find_update来挑选章节会更适用,如下

from jmcomic import *

# 侦测更新清单
# key: album_id
# value: photo_id
dic = {
    '145504': '290266'
}

# 使用配置文件来创建option
option = create_option('your option.yml')


class FindUpdateDownloader(JmDownloader):
    def filter_iter_objs(self, iter_objs: DownloadIterObjs):
        if not isinstance(iter_objs, JmAlbumDetail):
            return iter_objs

        return self.find_update(iter_objs)

    # 带入漫画id, 章节id(第x章),寻找该漫画下第x章节後的所有章节Id
    def find_update(self, album: JmAlbumDetail):
        if album.album_id not in dic:
            return album

        photo_ls = []
        photo_begin = dic[album.album_id]
        is_new_photo = False

        for photo in album:
            if is_new_photo:
                photo_ls.append(photo)

            if photo.photo_id == photo_begin:
                is_new_photo = True

        return photo_ls


# 替换jmcomic默认Downloader
JmModuleConfig.CLASS_DOWNLOADER = FindUpdateDownloader
# 下载【侦测更新清单】内的album
download_album(dic.keys(), option)

hect0x7 avatar Aug 13 '23 12:08 hect0x7

关于这个功能,新版本jmcomic支持了filter,可以更好的实现这一功能,我专门写了一个代码示例,感兴趣可以看下 https://github.com/hect0x7/JMComic-Crawler-Python/blob/master/usage/usage_feature_filter.py

hect0x7 avatar Aug 22 '23 09:08 hect0x7

其实我後来改写了 主要是为了可以针对章节自动更新,连带章节id也做成自动替换了 写了一些自己想要的流程跟自动处理 做成下载成 本子id/1 本子id/2.. 然後自动移动到 自订标题名称/1 自订标题名称/2.. 最後将json资料自动替换成最新的章节id

主要code

import jmcomic
import shutil
import os
import json

#删除空资料夹,子目录为空也删除
def delete_empty_directories(directory_path):
    for root_dir, subdirectories, files in os.walk(directory_path, topdown=False):
        for subdirectory in subdirectories:
            subdirectory_path = os.path.join(root_dir, subdirectory)
            if not os.listdir(subdirectory_path):  # If the subdirectory is empty
                os.rmdir(subdirectory_path)  # Delete the subdirectory

    if not os.listdir(directory_path):  # If the specified directory is also empty
        os.rmdir(directory_path)  # Delete the specified directory
        
#移动整个资料夹,会留下来源的资料结构,最後自动删除
def move_file(file_source, file_destination):
    if file_source == file_destination or not os.path.exists(file_source):
        return
    # 确保目标资料夹存在,如果不存在则建立资料夹
    if not os.path.exists(file_destination):
        os.makedirs(file_destination)

    # 递回地将来源资料夹内容移动到目标资料夹
    for folder_name, subfolders, file_list in os.walk(file_source):
        target_folder = os.path.join(file_destination, folder_name[len(file_source) + 1:])
        if not os.path.exists(target_folder):
            os.makedirs(target_folder)

        for file_name in file_list:
            source_file = os.path.join(folder_name, file_name)
            target_file = os.path.join(target_folder, file_name)
            shutil.move(source_file, target_file)

        for subfolder_name in subfolders:
            source_subfolder = os.path.join(folder_name, subfolder_name)
            target_subfolder = os.path.join(target_folder, subfolder_name)
            if not os.path.exists(target_subfolder):
                os.makedirs(target_subfolder)
    # 删除来源资料夹
    delete_empty_directories(file_source)

#带入漫画id, 章节id(第x章),寻找该漫画下第x章节後的所有章节Id
def find_update(albums, id):
    result = []
    flag = False
    
    for item in albums:
        if flag:
            result.append(item)
        
        if item.photo_id == id:
            flag = True
            
    return result

#带入漫画id, 章节id(第x章),自动下载x章节以後的章节,并回传最新的章节id
def check_download(album_id, photo_id,option,client):
    album = client.get_album_detail(album_id)
    targets = find_update(album, photo_id)
    id = ""
    
    if len(album) == 0:
        return None
    
    #抓整本
    if photo_id == "":
        jmcomic.download_album(album_id,option)
        return {"album_id": album_id, "photo_id": album[-1].photo_id}
    else:
        #下载最新章节 and 取最後的id
        for item in targets:
            jmcomic.download_photo(item.photo_id,option)
            id = item.photo_id
        #回传最後的id
        if id != photo_id and id != "":
            return {"album_id": album_id, "photo_id": id}
        else:
            return None

#载入json
def load_json(path):
    data = {}
    with open(path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data

#json储存
def save_json(path, data):
    with open(path, "w", encoding="utf-8") as file:
        json.dump(data, file, ensure_ascii=False, indent=4)

#结果处理
def final_result(result,setting,setting_path):
    #结果,没有印出结果就是没有更新
    if(len(result)>0):
        for item in result:
            for target in setting["UpdateList"]:
                if target["album_id"] == item["album_id"]:
                    print(target["title"]+": 漫畫id " + item["album_id"] + " 更新章節id至 " + item["photo_id"])
                    target["photo_id"] = item["photo_id"]
                    break
        
        save_json(setting_path,setting)

#开始更新
def check_update(list,configfile):
    option = jmcomic.create_option(configfile)
    client = option.build_jm_client()
    save_path = option.dir_rule.base_dir.replace("/", "\\") + "\\"
    
    result = []
    for item in list:
        #开始侦测
        item_result = check_download(item["album_id"],item["photo_id"],option,client)
        #发现有更新则储存最後章节id
        if item_result is not None:
            result.append(item_result)
            move_file(save_path+item["album_id"], save_path+item["title"])
    
    return result

#带入要更新的清单,回传更新结果
def start(configfile, jsonfile):
    script_path = os.path.dirname(os.path.abspath(__file__))
    setting_path = script_path + "\\" + jsonfile
    setting = load_json(setting_path)
    
    result = check_update(setting["UpdateList"],script_path + "\\" +configfile)
    
    final_result(result,setting,setting_path)

start("config.yml", "test.json")

test.json和主要code放在一起即可

{
    "UpdateList": [
        {
            "album_id": "本子id",
            "photo_id": "章节id,留空就是抓整本",
            "title": "自订下载後的本子资料夹名称,如本子标题;下载预设设定dir_rule.rule必须是Bd_Aid_Pindex才可以"
        },
        {
            "album_id": "123",
            "photo_id": "456",
            "title": "AAA"
        }
    ]
}

config.yml和主要code放在一起即可

client:
    postman:
        meta_data:
            cookies:
                AVS: xxx
dir_rule:
    base_dir: D:/下载/
    rule: Bd_Aid_Pindex

jkdfzx avatar Aug 22 '23 13:08 jkdfzx