JMComic-Crawler-Python
JMComic-Crawler-Python copied to clipboard
自动侦测新章节
trafficstars
写了一段自动侦测是否有新章节、并自动下载所有新章节的code 只需要在list上输入相对应本子id、章节id即可 更新完後会显示最新的章节id,完成後把新的章节id再带入list,就可以循环使用 分享给有需要的人
import jmcomic
option = jmcomic.create_option(
'D:/config.yml'
)
client = option.build_jm_client()
#带入漫画id, 章节id(第x章),寻找该漫画下第x章节後的所有章节Id
def find_update(albums, id):
result = []
flag = False
for item in albums:
if flag:
result.append(item)
if item.photo_id == id:
flag = True
return result
#带入漫画id, 章节id(第x章),自动下载x章节以後的章节,并回传最新的章节id
def check_download(album_id, photo_id):
album = client.get_album_detail(album_id)
targets = find_update(album, photo_id)
id = ""
#下载 and 取最後的id
for item in targets:
jmcomic.download_photo(item.photo_id)
id = item.photo_id
#回传最後的id
if id != photo_id and id != "":
return {"album_id": album_id, "photo_id": id}
else:
return None
#带入要更新的清单,回传更新结果
def start(list):
result = []
for item in list:
#开始侦测
item_result = check_download(item['album_id'],item['photo_id'])
#发现有更新则储存最後章节id
if item_result is not None:
result.append(item_result)
return result
#侦测更新清单
list = [
#带入本子id,目前更新的章节id (只抓yyy以後的章节,不含yyy)
{"album_id": 'xxx', "photo_id": 'yyy'},
#带入本子id,目前更新的章节id (只抓bbb以後的章节,不含bbb)
{"album_id": 'aaa', "photo_id": 'bbb'}
]
result_list = start(list)
#结果,没有印出结果就是没有更新
for item in result_list:
print("漫画id " + item['album_id'] + " 更新章节id至 " + item['photo_id'])
2023-08-30,该reply的代码已过时不建议使用,请使用下一个reply的代码
我提供一个我的版本
from jmcomic import *
# 侦测更新清单
# key: album_id
# value: photo_id
dic = {
'xxx': 'yyy'
}
class MyJmDownloader(JmDownloader):
def download_by_photo_detail(self, photo: JmPhotoDetail, client: JmcomicClient):
album: JmAlbumDetail = photo.from_album
# 不在清单内
if album.album_id not in dic:
return super().download_by_photo_detail(photo, client)
# 只抓yyy以後的章节,不含yyy
# 一般来说,yyy以後的章节的photo_id都会大于yyy
photo_begin = dic[album.album_id] # yyy
photo_id = photo.photo_id
if photo_id > photo_begin:
return super().download_by_photo_detail(photo, client)
else:
# yyy 之前的章节
jm_debug('侦测更新清单', f'章节已下载过: {photo_id}')
# 替换jmcomic默认Downloader
JmModuleConfig.DOWNLOADER_CLASS = MyJmDownloader
# 下载【侦测更新清单】内的album
download_album(dic.keys())
我上面贴的第一版代码有些不严谨,我发现一些比较老的本子的前一些章节不一定是递增id的,使用你的find_update来挑选章节会更适用,如下
from jmcomic import *
# 侦测更新清单
# key: album_id
# value: photo_id
dic = {
'145504': '290266'
}
# 使用配置文件来创建option
option = create_option('your option.yml')
class FindUpdateDownloader(JmDownloader):
def filter_iter_objs(self, iter_objs: DownloadIterObjs):
if not isinstance(iter_objs, JmAlbumDetail):
return iter_objs
return self.find_update(iter_objs)
# 带入漫画id, 章节id(第x章),寻找该漫画下第x章节後的所有章节Id
def find_update(self, album: JmAlbumDetail):
if album.album_id not in dic:
return album
photo_ls = []
photo_begin = dic[album.album_id]
is_new_photo = False
for photo in album:
if is_new_photo:
photo_ls.append(photo)
if photo.photo_id == photo_begin:
is_new_photo = True
return photo_ls
# 替换jmcomic默认Downloader
JmModuleConfig.CLASS_DOWNLOADER = FindUpdateDownloader
# 下载【侦测更新清单】内的album
download_album(dic.keys(), option)
关于这个功能,新版本jmcomic支持了filter,可以更好的实现这一功能,我专门写了一个代码示例,感兴趣可以看下 https://github.com/hect0x7/JMComic-Crawler-Python/blob/master/usage/usage_feature_filter.py
其实我後来改写了 主要是为了可以针对章节自动更新,连带章节id也做成自动替换了 写了一些自己想要的流程跟自动处理 做成下载成 本子id/1 本子id/2.. 然後自动移动到 自订标题名称/1 自订标题名称/2.. 最後将json资料自动替换成最新的章节id
主要code
import jmcomic
import shutil
import os
import json
#删除空资料夹,子目录为空也删除
def delete_empty_directories(directory_path):
for root_dir, subdirectories, files in os.walk(directory_path, topdown=False):
for subdirectory in subdirectories:
subdirectory_path = os.path.join(root_dir, subdirectory)
if not os.listdir(subdirectory_path): # If the subdirectory is empty
os.rmdir(subdirectory_path) # Delete the subdirectory
if not os.listdir(directory_path): # If the specified directory is also empty
os.rmdir(directory_path) # Delete the specified directory
#移动整个资料夹,会留下来源的资料结构,最後自动删除
def move_file(file_source, file_destination):
if file_source == file_destination or not os.path.exists(file_source):
return
# 确保目标资料夹存在,如果不存在则建立资料夹
if not os.path.exists(file_destination):
os.makedirs(file_destination)
# 递回地将来源资料夹内容移动到目标资料夹
for folder_name, subfolders, file_list in os.walk(file_source):
target_folder = os.path.join(file_destination, folder_name[len(file_source) + 1:])
if not os.path.exists(target_folder):
os.makedirs(target_folder)
for file_name in file_list:
source_file = os.path.join(folder_name, file_name)
target_file = os.path.join(target_folder, file_name)
shutil.move(source_file, target_file)
for subfolder_name in subfolders:
source_subfolder = os.path.join(folder_name, subfolder_name)
target_subfolder = os.path.join(target_folder, subfolder_name)
if not os.path.exists(target_subfolder):
os.makedirs(target_subfolder)
# 删除来源资料夹
delete_empty_directories(file_source)
#带入漫画id, 章节id(第x章),寻找该漫画下第x章节後的所有章节Id
def find_update(albums, id):
result = []
flag = False
for item in albums:
if flag:
result.append(item)
if item.photo_id == id:
flag = True
return result
#带入漫画id, 章节id(第x章),自动下载x章节以後的章节,并回传最新的章节id
def check_download(album_id, photo_id,option,client):
album = client.get_album_detail(album_id)
targets = find_update(album, photo_id)
id = ""
if len(album) == 0:
return None
#抓整本
if photo_id == "":
jmcomic.download_album(album_id,option)
return {"album_id": album_id, "photo_id": album[-1].photo_id}
else:
#下载最新章节 and 取最後的id
for item in targets:
jmcomic.download_photo(item.photo_id,option)
id = item.photo_id
#回传最後的id
if id != photo_id and id != "":
return {"album_id": album_id, "photo_id": id}
else:
return None
#载入json
def load_json(path):
data = {}
with open(path, "r", encoding="utf-8") as file:
data = json.load(file)
return data
#json储存
def save_json(path, data):
with open(path, "w", encoding="utf-8") as file:
json.dump(data, file, ensure_ascii=False, indent=4)
#结果处理
def final_result(result,setting,setting_path):
#结果,没有印出结果就是没有更新
if(len(result)>0):
for item in result:
for target in setting["UpdateList"]:
if target["album_id"] == item["album_id"]:
print(target["title"]+": 漫畫id " + item["album_id"] + " 更新章節id至 " + item["photo_id"])
target["photo_id"] = item["photo_id"]
break
save_json(setting_path,setting)
#开始更新
def check_update(list,configfile):
option = jmcomic.create_option(configfile)
client = option.build_jm_client()
save_path = option.dir_rule.base_dir.replace("/", "\\") + "\\"
result = []
for item in list:
#开始侦测
item_result = check_download(item["album_id"],item["photo_id"],option,client)
#发现有更新则储存最後章节id
if item_result is not None:
result.append(item_result)
move_file(save_path+item["album_id"], save_path+item["title"])
return result
#带入要更新的清单,回传更新结果
def start(configfile, jsonfile):
script_path = os.path.dirname(os.path.abspath(__file__))
setting_path = script_path + "\\" + jsonfile
setting = load_json(setting_path)
result = check_update(setting["UpdateList"],script_path + "\\" +configfile)
final_result(result,setting,setting_path)
start("config.yml", "test.json")
test.json和主要code放在一起即可
{
"UpdateList": [
{
"album_id": "本子id",
"photo_id": "章节id,留空就是抓整本",
"title": "自订下载後的本子资料夹名称,如本子标题;下载预设设定dir_rule.rule必须是Bd_Aid_Pindex才可以"
},
{
"album_id": "123",
"photo_id": "456",
"title": "AAA"
}
]
}
config.yml和主要code放在一起即可
client:
postman:
meta_data:
cookies:
AVS: xxx
dir_rule:
base_dir: D:/下载/
rule: Bd_Aid_Pindex