pySmartDL
pySmartDL copied to clipboard
How to resume partially downloaded file in pysmartDL
I would like to resume downloads from the previously downloaded part. Now each time the download starts from the beginning. How to check for the downloaded data...
Also interested whether this is possible. Can a download continue if the download is abruptly stopped/killed midway?
This library does not support this feature.
I'll work on it.
Hi @iTaybb , am I correct that this is still not implemented? And am I correct that this feature would/could handle lengthy disconnections? My boss noticed that if he turned off the internet connection and then reconnected it, an active download would start from scratch rather than picking up from where it had left off.
Hi there. It's not implemented yet. I've started working on it (on a side branch) but it is still buggy. Might take some weeks to make it stable.
On Tue, Apr 23, 2019 at 9:14 AM Nathan Wailes [email protected] wrote:
Hi @iTaybb https://github.com/iTaybb , am I correct that this is still not implemented? And am I correct that this feature would/could handle lengthy disconnections? My boss noticed that if he turned off the internet connection and then reconnected it, an active download would start from scratch rather than picking up from where it had left off.
— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub https://github.com/iTaybb/pySmartDL/issues/14#issuecomment-485657572, or mute the thread https://github.com/notifications/unsubscribe-auth/AA3SSBQ2G4VO2M6Q4CA2WV3PR2SKZANCNFSM4DBNOYKA .
Looks like the issue with the current implementation is related to a race condition in the recursive threading: https://stackoverflow.com/questions/51879070/python-executor-spawn-tasks-from-done-callback-recursively-submit-tasks
test_hash (__main__.TestSmartDL) ... Exception in thread Thread-9:
Traceback (most recent call last):
File "C:\Users\brand\AppData\Local\Programs\Python\Python36\lib\threading.py", line 916, in _bootstrap_inner
self.run()
File "C:\Users\brand\AppData\Local\Programs\Python\Python36\lib\threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "c:\users\brand\source\pysmartdl\pySmartDL\pySmartDL.py", line 645, in post_threadpool_actions
SmartDLObj.try_next_mirror(HashFailedException(os.path.basename(dest_path), hash, SmartDLObj.hash_code))
File "c:\users\brand\source\pysmartdl\pySmartDL\pySmartDL.py", line 339, in try_next_mirror
self.start()
File "c:\users\brand\source\pysmartdl\pySmartDL\pySmartDL.py", line 292, in start
self.logger
File "c:\users\brand\source\pysmartdl\pySmartDL\utils.py", line 284, in submit
future = super().submit(fn, *args, **kwargs)
File "C:\Users\brand\AppData\Local\Programs\Python\Python36\lib\concurrent\futures\thread.py", line 117, in submit
raise RuntimeError('cannot schedule new futures after shutdown')
RuntimeError: cannot schedule new futures after shutdown
will it be possible to add functionality like using a different url only if one fails and continuing downloading from where it left off, this will be usefull for downloading files which have exipre time so even after the link expires we can just use a new link and continue from where we left off. the current mirror function checks before hand if the 2nd url is valid even if the first one is vaild it would be great if it only checked the 2nd url in case the first one fails or returns an error
import json
import random
import time
from math import inf
from pathlib import Path
from threading import Thread
import psutil
import requests
from requests.adapters import HTTPAdapter
from requests.sessions import Session
class Port_Getter:
@staticmethod
def busyports():
return set(i.laddr.port for i in psutil.net_connections())
def __init__(self):
self.assigned = set()
def randomport(self):
port = random.randint(1, 65535)
while port in Port_Getter.busyports() or port in self.assigned:
port = random.randint(1, 65535)
self.assigned.add(port)
return port
class Adapter(HTTPAdapter):
def __init__(self, port, *args, **kwargs):
self._source_port = port
super().__init__(*args, **kwargs)
class UserSession(Session):
portassigner = Port_Getter()
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.headers.update(
{'connection': 'close'})
self.setport()
def setport(self):
port = UserSession.portassigner.randomport()
self.mount('http://', Adapter(port))
self.mount('https://', Adapter(port))
class Multidown:
def __init__(self, dic, id):
self.count = 0
self.completed = False
self.id = id
self.dic = dic
self.position = self.getval('position')
def getval(self, key):
return self.dic[self.id][key]
def setval(self, key, val):
self.dic[self.id][key] = val
def worker(self):
filepath = self.getval('filepath')
path = Path(filepath)
end = self.getval('end')
if not path.exists():
start = self.getval('start')
else:
self.count = path.stat().st_size
start = self.getval('start') + self.count
url = self.getval('url')
self.position = start
with open(path,'ab+') as f:
if self.count != self.getval('length'):
s = UserSession()
r = s.get(
url, headers={'range': 'bytes={0}-{1}'.format(start, end)}, stream=True)
while True:
if self.dic['paused']:
r.connection.close()
r.close()
s.close()
break
if (chunk := next(r.iter_content(128 * 1024), None)):
f.write(chunk)
self.count += len(chunk)
self.position += len(chunk)
self.setval('count', self.count)
self.setval('position', self.position)
else:
break
if self.count == self.getval('length'):
self.completed = 1
self.setval('completed', 1)
class Singledown:
def __init__(self):
self.count = 0
def worker(self, url, path):
with requests.get(url, stream=True) as r:
with path.open('wb') as file:
for chunk in r.iter_content(1048576):
if chunk:
self.count += len(chunk)
file.write(chunk)
class Downloader:
def __init__(self):
self.dic = dict()
self.workers = []
self.progress = 0
self.alive = True
self.dic['paused'] = False
def download(self, url, filepath, num_connections):
f_path = filepath + '.progress.json'
bcontinue = Path(f_path).exists()
singlethread = False
threads = []
path = Path(filepath)
head = requests.head(url)
size = int(int(head.headers["Content-Length"])/1000000) #1MB = 1,000,000 bytes
if size < 50:
num_connections = 5
folder = '/'.join(filepath.split('/')[:-1])
Path(folder).mkdir(parents=True, exist_ok=True)
headers = head.headers
total = headers.get('content-length')
if not total:
print(
f'Cannot find the total length of the content of {url}, the file will be downloaded using a single thread.')
print('Download started!')
sd = Singledown()
th = Thread(target=sd.worker, args=(url, path))
self.workers.append(sd)
th.start()
total = inf
singlethread = True
else:
total = int(total)
if not headers.get('accept-ranges'):
print(
'Server does not support the `range` parameter, the file will be downloaded using a single thread.')
print('Download started!')
sd = self.Singledown()
th = Thread(target=sd.singledown, args=(url, path))
self.workers.append(sd)
th.start()
singlethread = True
else:
if bcontinue:
progress = json.loads(Path(f_path).read_text(),
object_hook=lambda d: {int(k) if k.isdigit() else k: v for k, v in d.items()})
segment = total / num_connections
print('Download started!')
self.dic['total'] = total
self.dic['connections'] = num_connections
for i in range(num_connections):
if not bcontinue:
start = int(segment * i)
end = int(segment * (i + 1)) - (i != num_connections - 1)
position = start
length = end - start + (i != num_connections - 1)
else:
start = progress[i]['start']
end = progress[i]['end']
position = progress[i]['position']
length = progress[i]['length']
self.dic[i] = {
'start': start,
'position': position,
'end': end,
'filepath': filepath + '.' + str(i).zfill(2) + '.part',
'count': 0,
'length': length,
'url': url,
'completed': False
}
for i in range(num_connections):
md = Multidown(self.dic, i)
th = Thread(target=md.worker)
threads.append(th)
th.start()
self.workers.append(md)
Path(f_path).write_text(json.dumps(self.dic, indent=4))
downloaded = 0
totalMiB = total / 1048576
while True:
Path(f_path).write_text(json.dumps(self.dic, indent=4))
status = sum([i.completed for i in self.workers])
downloaded = sum(i.count for i in self.workers)
doneMiB = downloaded / 1048576
try:
self.progress = (doneMiB * 100)/ totalMiB
except ZeroDivisionError:
print("zero division error")
if self.dic['paused'] == True:
break
if status == len(self.workers):
if not singlethread:
BLOCKSIZE = 4096
BLOCKS = 1024
CHUNKSIZE = BLOCKSIZE * BLOCKS
with path.open('wb') as dest:
for i in range(num_connections):
file = filepath + '.' + str(i).zfill(2) + '.part'
with Path(file).open('rb') as f:
while (chunk := f.read(CHUNKSIZE)):
dest.write(chunk)
Path(file).unlink()
break
time.sleep(0.04)
status = sum([i.completed for i in self.workers])
if status == len(self.workers):
print('Download completed!')
Path(f_path).unlink()
else:
print('Download interrupted!')
if __name__ == "__main__":
d = Downloader()
url = "https://gamedownloads.rockstargames.com/public/installer/Rockstar-Games-Launcher.exe"
d.download(url,"./Downloads/rockstar.exe",4)
by using something like this you can resume partially downloaded file