onenote_export
onenote_export copied to clipboard
Process crashed with `AttributeError: 'MyHTMLParser' object has no attribute 'attrs'`
The program seems to be downloading attached images or something.
This is a Ubuntu 20.10 desktop, run by Python 3.8.6 in a Python venv.
Traceback (most recent call last)
-
File "/venv/lib/python3.8/site-packages/flask/app.py", line 2464, in
__call__
def __call__(self, environ, start_response): """The WSGI server calls the Flask application object as the WSGI application. This calls :meth:`wsgi_app` which can be wrapped to applying middleware.""" return self.wsgi_app(environ, start_response) def __repr__(self): return "<%s %r>" % (self.__class__.__name__, self.name)
-
File "/venv/lib/python3.8/site-packages/flask/app.py", line 2450, in
wsgi_app
try: ctx.push() response = self.full_dispatch_request() except Exception as e: error = e response = self.handle_exception(e) except: # noqa: B001 error = sys.exc_info()[1] raise return response(environ, start_response) finally:
-
File "/venv/lib/python3.8/site-packages/flask/app.py", line 1867, in
handle_exception
# if we want to repropagate the exception, we can attempt to # raise it with the whole traceback in case we can do that # (the function was actually called from the except part) # otherwise, we just raise the error again if exc_value is e: reraise(exc_type, exc_value, tb) else: raise e self.log_exception((exc_type, exc_value, tb)) server_error = InternalServerError()
-
File "/venv/lib/python3.8/site-packages/flask/_compat.py", line 39, in
reraise
import collections.abc as collections_abc def reraise(tp, value, tb=None): if value.__traceback__ is not tb: raise value.with_traceback(tb) raise value implements_to_string = _identity ![Open an interactive python shell in this frame][img41]else: iterkeys = lambda d: d.iterkeys()
-
File "/venv/lib/python3.8/site-packages/flask/app.py", line 2447, in
wsgi_app
ctx = self.request_context(environ) error = None try: try: ctx.push() response = self.full_dispatch_request() except Exception as e: error = e response = self.handle_exception(e) except: # noqa: B001 error = sys.exc_info()[1]
-
File "/venv/lib/python3.8/site-packages/flask/app.py", line 1952, in
full_dispatch_request
request_started.send(self) rv = self.preprocess_request() if rv is None: rv = self.dispatch_request() except Exception as e: rv = self.handle_user_exception(e) return self.finalize_request(rv) def finalize_request(self, rv, from_error_handler=False): """Given the return value from a view function this finalizes the request by converting it into a response and invoking the
-
File "/venv/lib/python3.8/site-packages/flask/app.py", line 1821, in
handle_user_exception
return self.handle_http_exception(e) handler = self._find_error_handler(e) if handler is None: reraise(exc_type, exc_value, tb) return handler(e) def handle_exception(self, e): """Handle an exception that did not have an error handler associated with it, or that was raised from an error handler.
-
File "/venv/lib/python3.8/site-packages/flask/_compat.py", line 39, in
reraise
import collections.abc as collections_abc def reraise(tp, value, tb=None): if value.__traceback__ is not tb: raise value.with_traceback(tb) raise value implements_to_string = _identity ![Open an interactive python shell in this frame][img85]else: iterkeys = lambda d: d.iterkeys()
-
File "/venv/lib/python3.8/site-packages/flask/app.py", line 1950, in
full_dispatch_request
self.try_trigger_before_first_request_functions() try: request_started.send(self) rv = self.preprocess_request() if rv is None: rv = self.dispatch_request() except Exception as e: rv = self.handle_user_exception(e) return self.finalize_request(rv) def finalize_request(self, rv, from_error_handler=False):
-
File "/venv/lib/python3.8/site-packages/flask/app.py", line 1936, in
dispatch_request
getattr(rule, "provide_automatic_options", False) and req.method == "OPTIONS" ): return self.make_default_options_response() # otherwise dispatch to the handler for that endpoint return self.view_functions[rule.endpoint](**req.view_args) def full_dispatch_request(self): """Dispatches the request and on top of that performs request pre and postprocessing as well as HTTP exception catching and error handling.
-
File "/onenote_export.py", line 236, in
main_logic
![Open an interactive python shell in this frame][img109]def main_logic(): code = flask.request.args['code'] token = application.acquire_token_by_authorization_code(code, scopes=scopes, redirect_uri=redirect_uri) graph_client = OAuth2Session(token=token) download_notebooks(graph_client, app.config['output_path'], app.config['select_path'], indent=0) print("Done!") return flask.render_template_string('<html>' '<head><title>Done</title></head>' '<body><p1><b>Done</b></p1></body>' '</html>')
-
File "/onenote_export.py", line 175, in
download_notebooks
nb_name = nb["displayName"] indent_print(indent, f'Opening notebook {nb_name}') sections = get_json(graph_client, nb['sectionsUrl']) section_groups = get_json(graph_client, nb['sectionGroupsUrl']) indent_print(indent + 1, f'Got {len(sections)} sections and {len(section_groups)} section groups.') download_sections(graph_client, sections, path / nb_name, select, indent=indent + 1) download_section_groups(graph_client, section_groups, path / nb_name, select, indent=indent + 1) ![Open an interactive python shell in this frame][img129]def download_section_groups(graph_client, section_groups, path, select=None, indent=0): section_groups, select = filter_items(section_groups, select, 'section groups', indent)
-
File "/onenote_export.py", line 196, in
download_sections
for sec in sections: sec_name = sec["displayName"] indent_print(indent, f'Opening section {sec_name}') pages = get_json(graph_client, sec['pagesUrl'] + '?pagelevel=true') indent_print(indent + 1, f'Got {len(pages)} pages.') download_pages(graph_client, pages, path / sec_name, select, indent=indent + 1) ![Open an interactive python shell in this frame][img139]def download_pages(graph_client, pages, path, select=None, indent=0): pages, select = filter_items(pages, select, 'pages', indent) pages = sorted([(page['order'], page) for page in pages])
-
File "/onenote_export.py", line 212, in
download_pages
if level == 0: page_dir = path / page_title else: page_dir = level_dirs[level - 1] / page_title level_dirs[level] = page_dir download_page(graph_client, page['contentUrl'], page_dir, indent=indent + 1) ![Open an interactive python shell in this frame][img150]def download_page(graph_client, page_url, path, indent=0): out_html = path / 'main.html' if out_html.exists():
-
File "/onenote_export.py", line 225, in
download_page
path.mkdir(parents=True, exist_ok=True) response = get(graph_client, page_url, indent=indent) if response is not None: content = response.text indent_print(indent, f'Got content of length {len(content)}') content = download_attachments(graph_client, content, path, indent=indent) with open(out_html, "w", encoding='utf-8') as f: f.write(content) ![Open an interactive python shell in this frame][img163]@app.route("/getToken")
-
File "/onenote_export.py", line 146, in
download_attachments
with open(attachment_dir / file_name, "wb") as f: f.write(data) props['data'] = "attachments/" + file_name return generate_html('object', props) content = re.sub(r"<img .*?\\/>", download_image, content, flags=re.DOTALL) content = re.sub(r"<object .*?\\/>", download_attachment, content, flags=re.DOTALL) return content ![Open an interactive python shell in this frame][img174]def indent_print(depth, text):
-
File "/usr/lib/python3.8/re.py", line 210, in
sub
non-overlapping occurrences of the pattern in string by the replacement repl. repl can be either a string or a callable; if a string, backslash escapes in it are processed. If it is a callable, it's passed the Match object and must return a replacement string to be used.""" return _compile(pattern, flags).sub(repl, string, count) ![Open an interactive python shell in this frame][img182]def subn(pattern, repl, string, count=0, flags=0): """Return a 2-tuple containing (new_string, number). new_string is the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the source
-
File "/onenote_export.py", line 108, in
download_image
def download_image(tag_match): # <img width="843" height="218.5" src="..." data-src-type="image/png" data-fullres-src="..." # data-fullres-src-type="image/png" /> parser = MyHTMLParser() parser.feed(tag_match[0]) props = parser.attrs image_url = props.get('data-fullres-src', props['src']) image_type = props.get('data-fullres-src-type', props['data-src-type']).split("/")[-1] file_name = ''.join(random.choice(string.ascii_lowercase) for _ in range(10)) + '.' + image_type req = get(graph_client, image_url, indent=indent) if req is None:
AttributeError: 'MyHTMLParser' object has no attribute 'attrs'
It looks to me like this is a bug in the upstream html parser. I was able to work around the issue by: Opening Onenote,
- finding the image with the problematic alt text and
- editing it to remove the html code fragments, or more precisely: the angle brrackets and quotes
<>' "
.- (Select image >> r-click >> Alt text...)