twitterscraper
twitterscraper copied to clipboard
Collection of Followers, Followings of account Year Wise
Is there any way that I can collect the Followers, Followings, Likes of Twitter account year wise , say at end of December every year from the day account was established on the Twitter?
Hi @NileshJorwar I have a script which can scraper user account info including location, number of tweets, number of followers, number of following, number of likes. It can expanded to also scrape for the actual accounts of the followers and followlings, but I dont think it is possible to know when the following started.
Hi @taspinar Can you make the script available to scrape the user info for the variables mentioned in above response?
@NileshJorwar . I will write a separate User class later in a proper way, but for now you can find the code below. Sorry for the messy code.
You will need to install the geopy library (pip install geopy
) in order for this to work.
As you can see, you need to pass the output of the scrape process, i.e. the list of Tweet objects to the scrape_user_info()
import re
import random
import requests
from collections import Counter
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
geolocator = Nominatim()
HEADERS_LIST = [
'Mozilla/5.0 (Windows; U; Windows NT 6.1; x64; fr; rv:1.9.2.13) Gecko/20101203 Firebird/3.6.13',
'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201',
'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
'Mozilla/5.0 (Windows NT 5.2; RW; rv:7.0a1) Gecko/20091211 SeaMonkey/9.23a1pre'
]
URL_INIT = 'https://twitter.com/'
def parse_url(tweet_user):
url = URL_INIT+ tweet_user.strip('@')
return url
def process_location(location = None):
if not location:
return ""
if ',' in location:
splitted_location = location.split(',')
else:
splitted_location = re.split('|;|-|/|°|#', location)
if splitted_location:
located_location = [geolocator.geocode(elem, timeout=100) for elem in splitted_location]
located_location = list(filter(None, located_location))
else:
located_location = geolocator.geocode(location, timeout=100)
return located_location or ""
def scrape_user_info(list_tweets):
list_tweet_users = [tweet.user for tweet in list_tweets]
list_users_occurences = list(Counter(list_tweet_users).most_common())
dict_users = {elem[0]: {'no_occurences':elem[1]} for elem in list_users_occurences}
list_users = list(dict_users.keys())
for user in list_users:
try:
headers = {'User-Agent': random.choice(HEADERS_LIST)}
url = parse_url(user)
response = requests.get(url, headers=headers)
html = response.text
soup = BeautifulSoup(html, 'lxml')
location = soup.find('span','ProfileHeaderCard-locationText').text or ''
location = location.strip('\n').strip()
no_tweets = int(soup.find('li','ProfileNav-item--tweets').find('span', 'ProfileNav-value')['data-count']) or ""
no_following = soup.find('li','ProfileNav-item--following') or ""
if no_following:
no_following = no_following.find('span', 'ProfileNav-value') or ""
if no_following:
no_following = int(no_following['data-count']) or ""
no_followers = soup.find('li','ProfileNav-item--followers') or ""
if no_followers:
no_followers = no_followers.find('span', 'ProfileNav-value')
if no_followers:
no_followers = int(no_followers['data-count']) or ""
no_likes = soup.find('li','ProfileNav-item--favorites') or ""
if no_likes:
no_likes = no_likes.find('span', 'ProfileNav-value')
if no_likes:
no_likes = int(no_likes['data-count']) or ""
processed_location = process_location(location)
dict_users[user]['location'] = location
dict_users[user]['processed_location'] = processed_location
dict_users[user]['no_likes'] = no_likes
dict_users[user]['no_followers'] = no_followers
dict_users[user]['no_following'] = no_following
dict_users[user]['no_tweets'] = no_tweets
except:
print("Not found for user {}".format(user))
continue
return dict_users
@NileshJorwar hey, how to get a all followers list of one twitter account ? I wanna know all followers id for one account .