Linguist 278: Programming for Linguists
Stanford Linguistics, Fall 2021
Christopher Potts
This notebook provides very basic functionality for using the Twitter API (Application Programming Interface) to download tweets with their metadata using keywords. Check out Twitter's documentation for much more!
import requests
import pandas as pd
Apply for developer access at
https://developer.twitter.com/
and you'll be given a Bearer Token to use here:
BEARER_TOKEN = None # Fill this in using the value given to you by Twitter.
HEADERS = {'Authorization': 'Bearer ' + BEARER_TOKEN}
def get_tweet_query_params(keywords, max_results=100):
disj = " OR ".join(keywords)
# See https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/tweet
tweet_fields = [
'author_id',
'public_metrics',
'created_at',
'context_annotations',
'conversation_id',
'geo',
'referenced_tweets',
'reply_settings',
'source']
params = {
# Excludes retweets and replies:
'query': '-is:retweet -is:reply lang:en ({})'.format(disj),
'tweet.fields': ",".join(tweet_fields),
'max_results': max_results}
return params
def tweets2df(keywords, max_results=100):
# The API raises an error if `max_results` isn't
# in the range [10, 100].
max_results = max([10, max_results])
max_results = min([100, max_results])
url = "https://api.twitter.com/2/tweets/search/recent"
params = get_tweet_query_params(keywords, max_results=max_results)
response = requests.request(
"GET",
url,
headers=HEADERS,
params=params)
j = response.json()
df = pd.json_normalize(j['data'])
return df
tweet_df = tweets2df(["Python", "Stanford"], max_results=10)
tweet_df.head(2)
def get_user_query_params(author_ids):
# See https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/user
user_fields = [
'name',
'username']
params = {
# Excludes retweets and replies:
'ids': ",".join(author_ids),
#'user.fields': ",".join(user_fields)
}
return params
def users2df(keywords):
url = "https://api.twitter.com/2/users"
params = get_user_query_params(keywords)
response = requests.request(
"GET",
url,
headers=HEADERS,
params=params)
j = response.json()
df = pd.json_normalize(j['data'])
return df
user_df = users2df(tweet_df['author_id'])
user_df.head(2)
df = tweet_df.merge(user_df, how='left', left_on='author_id', right_on='id')
df.head(2)