gorilla icon indicating copy to clipboard operation
gorilla copied to clipboard

Added Postman Top 50 to API Zoo

Open opatothan opened this issue 1 year ago • 2 comments

I scraped Postman top 50 collections into format specified on the doc. I didn't add any of the optional arguments because most of the collections did not have that info. Many of the postman collections are also missing data, like a version or a description. Many of these collections also have unresolved variables. I have an error log of these things, so let me know if you want to see it, or if you want me to handle the errors in a different way. Currently, i just have "POST MISSING DESC", or something to that effect in those fields. Let me know if you want anything changed.

opatothan avatar Jan 28 '24 21:01 opatothan

@ShishirPatil sounds good, should be a quick fix.

opatothan avatar Feb 05 '24 19:02 opatothan

For anyone else who wants to scrape postman data, I have added the code below. Postman has an api you can call to download collection data in json form, which can then be parsed. I have two files, collection_download.py, and process_collection.py

import requests
import json
import os

PASS = 'put postman api key here'
headers = {'X-Api-Key': PASS}
collection_ids = []
start_ind = 39
def parse_top50():
    f = open('top50')
    for line in f.readlines():
        collection_ids.append(line[line.rfind('/')+1:].replace('\n', ''))
    #print(collection_ids)


def download_links():
    for i in range(start_ind, len(collection_ids)):
        id = collection_ids[i]
        r = requests.get(f'https://api.getpostman.com/collections/{id}', headers = headers)
        json_r = r.json()
        with open(f'input/{json_r["collection"]["info"]["name"].replace("/", "")}.json', 'w') as fp:
            json.dump(json_r['collection'], fp, indent= 4)        

parse_top50()
download_links()
import json
import re
import os

USERNAME = "opatothan"

errlog = dict()

#UTILS

def version_in_name(name):
    pattern = r'v\d+'
    match = re.search(pattern, name)
    if match:
        return match.group(0)
    else:
        return None

def log_error(error, collection_name):
    if(error in errlog[collection_name]):
        errlog[collection_name][error]+=1
    else:
        errlog[collection_name][error] = 1

def parse_api_call(collection_name, input_json, variables):
    #takes in json object representing a single api call and turns it into a python dict in the structure established by apizoo doc
    try:
        output = dict()
        request = input_json['request']
        arg_params = ["auth", "method", "header", "body"]

        url = request['url']['raw']
        for key, val in variables.items():
            url = url.replace('{{'+key+'}}', val)
        if('{{' in url):
            log_error("Warning: unresolved variables", collection_name)
        if('description' in request):
            desc = ' '.join(request['description'].split(" ")[:20])
        else:
            desc = 'NO DESC IN POSTMAN'
            log_error("Warning: no description in postman", collection_name)

        output['user_name'] = USERNAME
        output['api_name'] = f'{collection_name}: {input_json["name"]}'
        output['api_call'] = url
        if('version' in variables):
            output['api_version'] = variables['version']
        else:
            output['api_version'] = "NO VERSION IN POSTMAN"
            log_error("Warning: no version in postman", collection_name)
        output['api_arguments'] = {param:request[param] for param in arg_params if param in request}
        output['functionality'] = desc

        return output 
    except Exception as error:
        #print("An exception occurred:", error)
        #print(input_json) 
        log_error(repr(error), collection_name)
        return None

#END UTILS

def scrape_collection(filename):
    api_calls = []
    f = open(f'input\{filename}', encoding = 'utf8')
    cur_collection = json.load(f)
    collection_name = cur_collection['info']['name']
    variables = dict()
    errlog[collection_name] = dict()

    if('variable' in cur_collection):
        for var in cur_collection['variable']:
            variables[var['key']] = str(var['value'])
        if('version' not in variables and version_in_name(collection_name)!= None):
            variables['version'] = version_in_name(collection_name)

    def recurse(cur_json):
        if('item' in cur_json):
            for item in cur_json['item']:
                recurse(item)
        else:
            res = parse_api_call(collection_name, cur_json, variables)
            if res != None:
                api_calls.append(res)
    recurse(cur_collection)
    with open(f'output/{collection_name.replace("/", "")}.json', 'w') as fp:
        json.dump(api_calls, fp, indent= 4)
    
#files = ["ActiveCampaign API v3.json", "Amadeus for Developers.json"]
files = os.listdir('input')
for x in files:
    scrape_collection(x)

# with open(f'output/opatothan.json', 'w') as fp:
#     json.dump(api_calls, fp, indent= 4)
with open(f'output/errlog', 'w') as fp:
    json.dump(errlog, fp, indent=4)

opatothan avatar Feb 06 '24 07:02 opatothan