Asking for n posts, but only getting 1 latest post
Calling the example, on 5, 10, 20, 100 posts, but only getting the latest post in the output.
But it still says 5/5 or 20/20 on the loading bar. Have not changed any code! Any help or insight is appreciated :)
let mostrec be the variable which take a integer input, saying post number from most recent (ex. If you want to select last 2nd post from any profile, mostrec = 2)
row = (mostrec//3) + 1 #(this will select the row in which post is on a profile) column = mostrec % 3 #(Column of the post) if column == 0: #(If Column is third, you will get column = 0, therfore using if condition) column = 3
[ex, you chose to open last 5th post, row = (5//3) +1 = 1+1 = 2, Column = (5%3) = 2] so that post is in 2nd column of 2nd row
#use is row and column fetched to open that particular post
im = '/html/body/div[1]/section/main/div/div[3]/article/div[1]/div/div['+str(row)+']/div['+str(column)+']/a/div/div[2]' img = browser.find_element_by_xpath(im)
#using selenium to open that post
Use : https://github.com/Ayushmanglani/ayush-projects/tree/master/python_projects/InstaBot For complete source code
Calling the example, on 5, 10, 20, 100 posts, but only getting the latest post in the output.
But it still says 5/5 or 20/20 on the loading bar. Have not changed any code! Any help or insight is appreciated :)
hey, did you fix this issue? I've read the mostrec as said by Ayushmanglani but is it work?
Hey, no i was not able to fix the issue :(
yeah got the same problem. Hope you can fix it soon! If I do posts instead of posts_full, it does grab all the posts specified by 'n' but it doesn't grab the full caption text unforutnately
If you are still facing the issue, let me tell you my solution:
If you update the _get_posts_full method as follows:
def _get_posts_full(self, num):
@retry()
def check_next_post(cur_key):
ele_a_datetime = browser.find_one("._65Bje.coreSpriteRightPaginationArrow")
# It takes time to load the post for some users with slow network
if ele_a_datetime is None:
raise RetryException()
ele_a_datetime.click()
# next_key = ele_a_datetime.get_attribute("href")
# print("next_key: "+next_key+" cur_key: "+cur_key)
# if cur_key == next_key:
# raise RetryException()
# else:
# return next_key
browser = self.browser
browser.implicitly_wait(1)
browser.scroll_down()
ele_post = browser.find_one(".v1Nh3 a")
ele_post.click()
dict_posts = {}
pbar = tqdm(total=num)
pbar.set_description("fetching")
cur_key = None
all_posts = self._get_posts(num)
i = 1
ele_a_datetime = browser.find_one("._65Bje.coreSpriteRightPaginationArrow")
# cur_key = ele_a_datetime.get_attribute("href")
# Fetching all posts
for _ in range(num):
dict_post = {}
# Fetching post detail
try:
print("i: "+str(i)+" num: "+str(num))
if(i < num):
# cur_key = all_posts[i]['key']
# check_next_post(all_posts[i]['key'])
# i = i + 1
# Fetching datetime and url as key
# ele_a_datetime = browser.find_one(".eo2As .c-Yi7")
# cur_key = ele_a_datetime.get_attribute("href")
# print("cur_key: "+cur_key)
dict_post["key"] = all_posts[i]['key']
fetch_datetime(browser, dict_post)
fetch_imgs(browser, dict_post)
fetch_likes_plays(browser, dict_post)
fetch_likers(browser, dict_post)
fetch_caption(browser, dict_post)
fetch_comments(browser, dict_post)
print(json.dumps(dict_post, ensure_ascii=False))
self.log(json.dumps(dict_post, ensure_ascii=False))
dict_posts[browser.current_url] = dict_post
pbar.update(1)
check_next_post(all_posts[i]['key'])
i = i + 1
except RetryException:
sys.stderr.write(
"\x1b[1;31m"
+ "Failed to fetch the post: "
+ cur_key or 'URL not fetched'
+ "\x1b[0m"
+ "\n"
)
break
except Exception:
sys.stderr.write(
"\x1b[1;31m"
+ "Failed to fetch the post: "
+ cur_key if isinstance(cur_key,str) else 'URL not fetched'
+ "\x1b[0m"
+ "\n"
)
traceback.print_exc()
# print(json.dumps(dict_post, ensure_ascii=False))
# self.log(json.dumps(dict_post, ensure_ascii=False))
# dict_posts[browser.current_url] = dict_post
#
# pbar.update(1)
pbar.close()
posts = list(dict_posts.values())
if posts:
posts.sort(key=lambda post: post["datetime"], reverse=True)
print("Alaa")
print(json.dumps(posts, ensure_ascii=False))
return posts
It works! thank you! However, the function for fetch_likers is not working.
If you are still facing the issue, let me tell you my solution:
If you update the _get_posts_full method as follows:
def _get_posts_full(self, num): @retry() def check_next_post(cur_key): ele_a_datetime = browser.find_one("._65Bje.coreSpriteRightPaginationArrow") # It takes time to load the post for some users with slow network if ele_a_datetime is None: raise RetryException() ele_a_datetime.click() # next_key = ele_a_datetime.get_attribute("href") # print("next_key: "+next_key+" cur_key: "+cur_key) # if cur_key == next_key: # raise RetryException() # else: # return next_key browser = self.browser browser.implicitly_wait(1) browser.scroll_down() ele_post = browser.find_one(".v1Nh3 a") ele_post.click() dict_posts = {} pbar = tqdm(total=num) pbar.set_description("fetching") cur_key = None all_posts = self._get_posts(num) i = 1 ele_a_datetime = browser.find_one("._65Bje.coreSpriteRightPaginationArrow") # cur_key = ele_a_datetime.get_attribute("href") # Fetching all posts for _ in range(num): dict_post = {} # Fetching post detail try: print("i: "+str(i)+" num: "+str(num)) if(i < num): # cur_key = all_posts[i]['key'] # check_next_post(all_posts[i]['key']) # i = i + 1 # Fetching datetime and url as key # ele_a_datetime = browser.find_one(".eo2As .c-Yi7") # cur_key = ele_a_datetime.get_attribute("href") # print("cur_key: "+cur_key) dict_post["key"] = all_posts[i]['key'] fetch_datetime(browser, dict_post) fetch_imgs(browser, dict_post) fetch_likes_plays(browser, dict_post) fetch_likers(browser, dict_post) fetch_caption(browser, dict_post) fetch_comments(browser, dict_post) print(json.dumps(dict_post, ensure_ascii=False)) self.log(json.dumps(dict_post, ensure_ascii=False)) dict_posts[browser.current_url] = dict_post pbar.update(1) check_next_post(all_posts[i]['key']) i = i + 1 except RetryException: sys.stderr.write( "\x1b[1;31m" + "Failed to fetch the post: " + cur_key or 'URL not fetched' + "\x1b[0m" + "\n" ) break except Exception: sys.stderr.write( "\x1b[1;31m" + "Failed to fetch the post: " + cur_key if isinstance(cur_key,str) else 'URL not fetched' + "\x1b[0m" + "\n" ) traceback.print_exc() # print(json.dumps(dict_post, ensure_ascii=False)) # self.log(json.dumps(dict_post, ensure_ascii=False)) # dict_posts[browser.current_url] = dict_post # # pbar.update(1) pbar.close() posts = list(dict_posts.values()) if posts: posts.sort(key=lambda post: post["datetime"], reverse=True) print("Alaa") print(json.dumps(posts, ensure_ascii=False)) return posts
If you are still facing the issue, let me tell you my solution:
If you update the _get_posts_full method as follows:
def _get_posts_full(self, num): @retry() def check_next_post(cur_key): ele_a_datetime = browser.find_one("._65Bje.coreSpriteRightPaginationArrow") # It takes time to load the post for some users with slow network if ele_a_datetime is None: raise RetryException() ele_a_datetime.click() # next_key = ele_a_datetime.get_attribute("href") # print("next_key: "+next_key+" cur_key: "+cur_key) # if cur_key == next_key: # raise RetryException() # else: # return next_key browser = self.browser browser.implicitly_wait(1) browser.scroll_down() ele_post = browser.find_one(".v1Nh3 a") ele_post.click() dict_posts = {} pbar = tqdm(total=num) pbar.set_description("fetching") cur_key = None all_posts = self._get_posts(num) i = 1 ele_a_datetime = browser.find_one("._65Bje.coreSpriteRightPaginationArrow") # cur_key = ele_a_datetime.get_attribute("href") # Fetching all posts for _ in range(num): dict_post = {} # Fetching post detail try: print("i: "+str(i)+" num: "+str(num)) if(i < num): # cur_key = all_posts[i]['key'] # check_next_post(all_posts[i]['key']) # i = i + 1 # Fetching datetime and url as key # ele_a_datetime = browser.find_one(".eo2As .c-Yi7") # cur_key = ele_a_datetime.get_attribute("href") # print("cur_key: "+cur_key) dict_post["key"] = all_posts[i]['key'] fetch_datetime(browser, dict_post) fetch_imgs(browser, dict_post) fetch_likes_plays(browser, dict_post) fetch_likers(browser, dict_post) fetch_caption(browser, dict_post) fetch_comments(browser, dict_post) print(json.dumps(dict_post, ensure_ascii=False)) self.log(json.dumps(dict_post, ensure_ascii=False)) dict_posts[browser.current_url] = dict_post pbar.update(1) check_next_post(all_posts[i]['key']) i = i + 1 except RetryException: sys.stderr.write( "\x1b[1;31m" + "Failed to fetch the post: " + cur_key or 'URL not fetched' + "\x1b[0m" + "\n" ) break except Exception: sys.stderr.write( "\x1b[1;31m" + "Failed to fetch the post: " + cur_key if isinstance(cur_key,str) else 'URL not fetched' + "\x1b[0m" + "\n" ) traceback.print_exc() # print(json.dumps(dict_post, ensure_ascii=False)) # self.log(json.dumps(dict_post, ensure_ascii=False)) # dict_posts[browser.current_url] = dict_post # # pbar.update(1) pbar.close() posts = list(dict_posts.values()) if posts: posts.sort(key=lambda post: post["datetime"], reverse=True) print("Alaa") print(json.dumps(posts, ensure_ascii=False)) return posts
it works! thank you! However, the function fetch_likers is not working!
Thx this is working but the caption is not correct.