amazon-product-review-scraper
amazon-product-review-scraper copied to clipboard
ValueError: arrays must all be same length
from amazon_product_review_scraper import amazon_product_review_scraper
review_scraper = amazon_product_review_scraper(amazon_site="amazon.in", product_asin="1475096062")
reviews_df = review_scraper.scrape()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-48-ff7f097bdc9a> in <module>
1 from amazon_product_review_scraper import amazon_product_review_scraper
2 review_scraper = amazon_product_review_scraper(amazon_site="amazon.in", product_asin="1475096062")
----> 3 reviews_df = review_scraper.scrape()
4 reviews_df.head(5)
~/.local/lib/python3.7/site-packages/amazon_product_review_scraper/amazon_product_review_scraper.py in scrape(self)
72
73 # returning df
---> 74 return pd.DataFrame(self.reviews_dict)
75
76
~/.local/lib/python3.7/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
466
467 elif isinstance(data, dict):
--> 468 mgr = init_dict(data, index, columns, dtype=dtype)
469 elif isinstance(data, ma.MaskedArray):
470 import numpy.ma.mrecords as mrecords
~/.local/lib/python3.7/site-packages/pandas/core/internals/construction.py in init_dict(data, index, columns, dtype)
281 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
282 ]
--> 283 return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
284
285
~/.local/lib/python3.7/site-packages/pandas/core/internals/construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype, verify_integrity)
76 # figure out the index, if necessary
77 if index is None:
---> 78 index = extract_index(arrays)
79 else:
80 index = ensure_index(index)
~/.local/lib/python3.7/site-packages/pandas/core/internals/construction.py in extract_index(data)
395 lengths = list(set(raw_lengths))
396 if len(lengths) > 1:
--> 397 raise ValueError("arrays must all be same length")
398
399 if have_dicts:
ValueError: arrays must all be same length
The issue is because not all columns have the same number of values:
>>> {a: len(b) for a, b in self.reviews_dict.items()}
{'date_info': 469, 'name': 469, 'title': 51, 'content': 469, 'rating': 51}
I had this same issue but managed to fix it by adding
df = pd.DataFrame.from_dict(self.reviews_dict, orient='index')
df = df.transpose()
return df
in the scrape() function in amazon_product_review_scraper.py - hope this helps!
Doesn't really matter, if you run the scraper 2 or 3 times, you get blocked, so I gave up on scraping Amazon.
Hmmm strange, I've been running it loads of times and not been blocked!