IndexError using parse_form_data method
I would ultimately like a drop in replacement for cgi.py, but that is later, first there is this.
There is an error I am receiving while using multipart and Apache2 via mod_cgid, mod_cgi
The code I am using:
from multipart import *
forms, files = parse_form_data(os.environ)
The POST command line I am using to cause the error:
curl -F "text=default" -F "filename=@/home/pc/Desktop/aa.png" -F "filename=@/home/pc/Desktop/bb.png" http://superhotpage.com
Therefore, I am getting this error report:
IndexError
A problem occurred in a Python script. Here is the sequence of function calls leading up to the error, in the order they occurred.
/var/superhotpage/www/html/SuperHotPage.com/index_superhotpage.py in
IndexError: list index out of range args = ('list index out of range',) with_traceback = <built-in method with_traceback of IndexError object>
However, if I use this code without the method parse_form_data it works. I am not suggesting to replace parse_form_data, though a simple how to on the site would have helped a little, I am simply saying what code is working to create the two uploads on my computer correctly.
from multipart import *
import email.parser
import urllib
forms, files = MultiDict(), MultiDict()
class MyCGI():
_os_environ = []
_escape = True
_data = {}
def __init__(self, os_environ, escape=True):
self._os_environ = os_environ
self._escape = escape
def FieldStorage(self):
e = self._os_environ
if 'REQUEST_METHOD' in e:
if e['REQUEST_METHOD'] == 'GET':
self._data = urllib.parse.parse_qs(e['QUERY_STRING'])
elif e['REQUEST_METHOD'] == 'POST':
if 'CONTENT_TYPE' in e:
if 'multipart/form-data' in e['CONTENT_TYPE']:
if 'boundary' in e['CONTENT_TYPE']:
ee = e['CONTENT_TYPE'].split(';')
eee = ee[1].split('=')
boundary = eee[1]
if 'CONTENT_LENGTH' in e:
content_length = int(os.environ.get("CONTENT_LENGTH", "-1"))
stream = sys.stdin.buffer
for part in MultipartParser(stream, boundary, content_length):
if part.filename or not part.is_buffered():
files[part.name] = part
#self._data = urllib.parse.parse_qs(str(email.parser.Parser().parse(sys.stdin)).strip())
elif e['CONTENT_TYPE'] == 'application/x-www-form-urlencoded':
self._data = urllib.parse.parse_qs(str(sys.stdin.read()), keep_blank_values=1)
def getvalue(self, arg_key, default=''):
if arg_key in self._data:
value = self._data[arg_key]
if isinstance(value, list):
if self._escape == True:
return escape(self._data[arg_key][0])
else:
self._data[arg_key][0]
else:
if self._escape == True:
return escape(value)
else:
return value
else:
return default
mycgi = MyCGI(os.environ)
mycgi.FieldStorage()
filenames = files.getall('filename')
for x, file in enumerate(filenames):
file.save_as(f"""/home/pc/Desktop/vvv{x}.png""")
I am happy to help with resolving this error to use the parse_form_data method correctly.
Thank you, Stan
Originally posted by @ohlogic in https://github.com/defnull/multipart/issues/46#issuecomment-1507274138
I have edited the code, until this IndexError can be resolved, here is the following code I am using that works.
#!/usr/bin/python3
import os
import sys
import urllib
from html import escape
from multipart import MultipartParser, MultiDict, parse_options_header
forms, files = MultiDict(), MultiDict()
class MyCGI():
_os_environ = []
_escape = True
_data = {}
def __init__(self, os_environ, escape=True):
self._os_environ = os_environ
self._escape = escape
def FieldStorage(self):
e = self._os_environ
if 'REQUEST_METHOD' in e:
if e['REQUEST_METHOD'] == 'GET':
self._data = urllib.parse.parse_qs(e['QUERY_STRING'])
elif e['REQUEST_METHOD'] == 'POST':
if 'CONTENT_TYPE' in e:
content_type, options = parse_options_header(e['CONTENT_TYPE'])
if 'multipart/form-data' in e['CONTENT_TYPE']:
if 'boundary' in e['CONTENT_TYPE']:
boundary = options.get("boundary", "")
if 'CONTENT_LENGTH' in e:
content_length = int(os.environ.get("CONTENT_LENGTH", "-1"))
stream = sys.stdin.buffer
for part in MultipartParser(stream, boundary, content_length):
if part.filename or not part.is_buffered():
files[part.name] = part
if 'application/x-www-form-urlencoded' in e['CONTENT_TYPE']:
self._data = urllib.parse.parse_qs(str(sys.stdin.read()), keep_blank_values=True)
def getvalue(self, arg_key, default=''):
if arg_key in self._data:
value = self._data[arg_key]
if isinstance(value, list):
if self._escape == True:
return escape(self._data[arg_key][0])
else:
self._data[arg_key][0]
else:
if self._escape == True:
return escape(value)
else:
return value
else:
return default
usage:
mycgi = MyCGI(os.environ) mycgi.FieldStorage() user = mycgi.getvalue('user')
And uploading files something like: curl -F "text=default" -F "filename=@/home/computer/Desktop/aa.png" -F "filename=@/home/computer/Desktop/bb.png" http://example.com
filenames = files.getall('filename') for x, file in enumerate(filenames): file.save_as(f"""/home/computer/Desktop/file{x}.png""")
parse_form_data expects an WSGI environment dict, not os.environ. WSGI is very similar to CGI, but not quite. The main difference is that instead of sys.stdin the request body is expected to be part of the WSGI environment dictionary (environ['wsgi.input']). You could build an improvised WSGI environment dictionary by cloning os.environ and adding sys.stdin to it. That would probably work in most CGI environments.-
I'll leave this open because while this is not a bug, the error could be more helpful. Instead of an IndexError the parser should warn that the request body is empty.
Thank you.
import sys
import os
# https://github.com/defnull/multipart
import multipart
# IndexError @ len_first_line = len(lines[0])
#forms, files = multipart.parse_form_data(os.environ)
wsgi_env = dict(os.environ)
wsgi_env["wsgi.input"] = sys.stdin.buffer
forms, files = multipart.parse_form_data(wsgi_env)
some_key = forms["some_key"]
forms = None
attachment = files["attachment"]
attachment_filename = os.path.basename(attachment.filename)
attachment_bytes = attachment.file.getvalue()
attachment_size = len(attachment_bytes)
attachment = None
files = None