twitterscraper icon indicating copy to clipboard operation
twitterscraper copied to clipboard

Collection of Followers, Followings of account Year Wise

Open NileshJorwar opened this issue 6 years ago • 4 comments

Is there any way that I can collect the Followers, Followings, Likes of Twitter account year wise , say at end of December every year from the day account was established on the Twitter?

NileshJorwar avatar Aug 29 '18 19:08 NileshJorwar

Hi @NileshJorwar I have a script which can scraper user account info including location, number of tweets, number of followers, number of following, number of likes. It can expanded to also scrape for the actual accounts of the followers and followlings, but I dont think it is possible to know when the following started.

taspinar avatar Oct 26 '18 15:10 taspinar

Hi @taspinar Can you make the script available to scrape the user info for the variables mentioned in above response?

NileshJorwar avatar Oct 29 '18 20:10 NileshJorwar

@NileshJorwar . I will write a separate User class later in a proper way, but for now you can find the code below. Sorry for the messy code. You will need to install the geopy library (pip install geopy) in order for this to work.

As you can see, you need to pass the output of the scrape process, i.e. the list of Tweet objects to the scrape_user_info()


import re
import random
import requests
from collections import Counter
from bs4 import BeautifulSoup
 
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
geolocator = Nominatim()

 
HEADERS_LIST = [
    'Mozilla/5.0 (Windows; U; Windows NT 6.1; x64; fr; rv:1.9.2.13) Gecko/20101203 Firebird/3.6.13',
    'Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
    'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201',
    'Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16',
    'Mozilla/5.0 (Windows NT 5.2; RW; rv:7.0a1) Gecko/20091211 SeaMonkey/9.23a1pre'
]

URL_INIT = 'https://twitter.com/'
def parse_url(tweet_user):
    url = URL_INIT+ tweet_user.strip('@')
    return url
 
def process_location(location = None):
    if not location:
        return ""
    if ',' in location:
        splitted_location = location.split(',')
    else:
        splitted_location = re.split('|;|-|/|°|#', location)
    if splitted_location:
        located_location = [geolocator.geocode(elem, timeout=100) for elem in splitted_location]
        located_location = list(filter(None, located_location))
    else:
        located_location = geolocator.geocode(location, timeout=100)
    return located_location or ""
    
 
def scrape_user_info(list_tweets):
    list_tweet_users = [tweet.user for tweet in list_tweets]
    list_users_occurences = list(Counter(list_tweet_users).most_common())
    dict_users = {elem[0]: {'no_occurences':elem[1]} for elem in list_users_occurences}
    list_users = list(dict_users.keys())

    for user in list_users:
        try:
            headers = {'User-Agent': random.choice(HEADERS_LIST)}
            url = parse_url(user)
            response = requests.get(url, headers=headers)
            html = response.text
            soup = BeautifulSoup(html, 'lxml')
            location = soup.find('span','ProfileHeaderCard-locationText').text or ''
            location = location.strip('\n').strip()
            no_tweets = int(soup.find('li','ProfileNav-item--tweets').find('span', 'ProfileNav-value')['data-count']) or ""
            no_following = soup.find('li','ProfileNav-item--following') or ""
            if no_following:
                no_following = no_following.find('span', 'ProfileNav-value') or ""
                if no_following:
                    no_following = int(no_following['data-count']) or ""
            no_followers = soup.find('li','ProfileNav-item--followers') or ""
            if no_followers:
                no_followers = no_followers.find('span', 'ProfileNav-value')
                if no_followers:
                    no_followers = int(no_followers['data-count']) or ""
            no_likes = soup.find('li','ProfileNav-item--favorites') or ""
            if no_likes:
                no_likes = no_likes.find('span', 'ProfileNav-value')
                if no_likes:
                    no_likes = int(no_likes['data-count']) or ""
            processed_location = process_location(location)
            dict_users[user]['location'] = location
            dict_users[user]['processed_location'] = processed_location
            dict_users[user]['no_likes'] = no_likes
            dict_users[user]['no_followers'] = no_followers
            dict_users[user]['no_following'] = no_following
            dict_users[user]['no_tweets'] = no_tweets
        except:
            print("Not found for user {}".format(user))
            continue
    return dict_users

taspinar avatar Nov 04 '18 15:11 taspinar

@NileshJorwar hey, how to get a all followers list of one twitter account ? I wanna know all followers id for one account .

lidd77 avatar Sep 01 '21 07:09 lidd77