gorilla
gorilla copied to clipboard
Added Postman Top 50 to API Zoo
I scraped Postman top 50 collections into format specified on the doc. I didn't add any of the optional arguments because most of the collections did not have that info. Many of the postman collections are also missing data, like a version or a description. Many of these collections also have unresolved variables. I have an error log of these things, so let me know if you want to see it, or if you want me to handle the errors in a different way. Currently, i just have "POST MISSING DESC", or something to that effect in those fields. Let me know if you want anything changed.
@ShishirPatil sounds good, should be a quick fix.
For anyone else who wants to scrape postman data, I have added the code below. Postman has an api you can call to download collection data in json form, which can then be parsed. I have two files, collection_download.py, and process_collection.py
import requests
import json
import os
PASS = 'put postman api key here'
headers = {'X-Api-Key': PASS}
collection_ids = []
start_ind = 39
def parse_top50():
f = open('top50')
for line in f.readlines():
collection_ids.append(line[line.rfind('/')+1:].replace('\n', ''))
#print(collection_ids)
def download_links():
for i in range(start_ind, len(collection_ids)):
id = collection_ids[i]
r = requests.get(f'https://api.getpostman.com/collections/{id}', headers = headers)
json_r = r.json()
with open(f'input/{json_r["collection"]["info"]["name"].replace("/", "")}.json', 'w') as fp:
json.dump(json_r['collection'], fp, indent= 4)
parse_top50()
download_links()
import json
import re
import os
USERNAME = "opatothan"
errlog = dict()
#UTILS
def version_in_name(name):
pattern = r'v\d+'
match = re.search(pattern, name)
if match:
return match.group(0)
else:
return None
def log_error(error, collection_name):
if(error in errlog[collection_name]):
errlog[collection_name][error]+=1
else:
errlog[collection_name][error] = 1
def parse_api_call(collection_name, input_json, variables):
#takes in json object representing a single api call and turns it into a python dict in the structure established by apizoo doc
try:
output = dict()
request = input_json['request']
arg_params = ["auth", "method", "header", "body"]
url = request['url']['raw']
for key, val in variables.items():
url = url.replace('{{'+key+'}}', val)
if('{{' in url):
log_error("Warning: unresolved variables", collection_name)
if('description' in request):
desc = ' '.join(request['description'].split(" ")[:20])
else:
desc = 'NO DESC IN POSTMAN'
log_error("Warning: no description in postman", collection_name)
output['user_name'] = USERNAME
output['api_name'] = f'{collection_name}: {input_json["name"]}'
output['api_call'] = url
if('version' in variables):
output['api_version'] = variables['version']
else:
output['api_version'] = "NO VERSION IN POSTMAN"
log_error("Warning: no version in postman", collection_name)
output['api_arguments'] = {param:request[param] for param in arg_params if param in request}
output['functionality'] = desc
return output
except Exception as error:
#print("An exception occurred:", error)
#print(input_json)
log_error(repr(error), collection_name)
return None
#END UTILS
def scrape_collection(filename):
api_calls = []
f = open(f'input\{filename}', encoding = 'utf8')
cur_collection = json.load(f)
collection_name = cur_collection['info']['name']
variables = dict()
errlog[collection_name] = dict()
if('variable' in cur_collection):
for var in cur_collection['variable']:
variables[var['key']] = str(var['value'])
if('version' not in variables and version_in_name(collection_name)!= None):
variables['version'] = version_in_name(collection_name)
def recurse(cur_json):
if('item' in cur_json):
for item in cur_json['item']:
recurse(item)
else:
res = parse_api_call(collection_name, cur_json, variables)
if res != None:
api_calls.append(res)
recurse(cur_collection)
with open(f'output/{collection_name.replace("/", "")}.json', 'w') as fp:
json.dump(api_calls, fp, indent= 4)
#files = ["ActiveCampaign API v3.json", "Amadeus for Developers.json"]
files = os.listdir('input')
for x in files:
scrape_collection(x)
# with open(f'output/opatothan.json', 'w') as fp:
# json.dump(api_calls, fp, indent= 4)
with open(f'output/errlog', 'w') as fp:
json.dump(errlog, fp, indent=4)