#!/usr/bin/env python import requests from urllib3.exceptions import InsecureRequestWarning import json import pandas as pd import argparse import unicodedata # Set the base URL for the InsightVM API base_url = 'https://:3780/api/3/' enrich_asset_vulns = True # lookup the vulns for each asset tag_assets = True # lookup the tags for each asset # global variables for the data tables assets_df = pd.DataFrame() vulns_df = pd.DataFrame() vuln_extra_df = pd.DataFrame() class bcolors: """ Helper class for color output on the console prompt """ HEADER = '\033[95m' OKBLUE = '\033[94m' OKGREEN = '\033[92m' WARNING = '\033[93m' FAIL = '\033[91m' ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' def _create_api_session(username: str, password: str): """ Helper function to create a session with the Rapid7 InsightVM API :param username: :param password: :return: """ # disable warnings for self-signed certificates requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) # Create a Session object and set the authentication credentials auth = (username, password) try: session = requests.Session() assert type(session) == requests.Session session.auth = auth session.verify = False except AssertionError: print("Error: Could not create a session object") exit(1) except Exception as e: print("Error: " + str(e)) exit(1) return session def _initiate_pagination(session: requests.Session, api_endpoint: str): """ Helper function to initiate pagination for the Rapid7 InsightVM API :param session: :param api_endpoint: :return: """ # Set the URL for the endpoint api_url = base_url + api_endpoint + '?size=200' # Make a GET request to the vulnerabilities endpoint response = session.get(api_url) # Check the status code of the response if response.status_code == 200: # Load the response data as a JSON object try: setup_data = json.loads(response.text) assert 'page' in setup_data page_limit = setup_data["page"]["totalPages"] except AssertionError: print("Error: The API endpoint does not support pagination") exit(1) except Exception as e: print("Error: " + str(e)) exit(1) else: print('Error: ' + str(response.status_code)) return page_limit, response, api_url def create_rapid7_asset_list(session: requests.Session): """ Function to create a list of assets from the Rapid7 InsightVM API :param session: :return: """ print("Retrieving asset data from Rapid7 InsightVM") # initial control variables for pagination and followup requests page_limit, response, asset_url = _initiate_pagination(session, 'assets') page_offset = 0 more_pages = True # make a list to hold the asset data asset_data = [] global assets_df print("Retrieving asset list...") while more_pages: # Check the status code of the response if response.status_code == 200: # Set the parameters for the API request params = {'offset': page_offset, 'limit': page_limit} # Make a GET request to the vulnerabilities endpoint response = session.get(asset_url, params=params) try: data = json.loads(response.text) assert 'resources' in data except AssertionError: print("resources key not found in response") exit(1) except Exception as e: print("Error: " + str(e)) exit(1) df = pd.json_normalize(data["resources"]) if tag_assets: for index, row in df.iterrows(): asset_id = df.loc[index, 'id'] tags_url = base_url + 'assets/{id}/tags'.format(id=asset_id) response = session.get(tags_url) if response.status_code == 200: try: tag_data = json.loads(response.text) print("\t\tLooking up tags for host {hostname}".format(hostname=row["hostName"])) assert 'resources' in tag_data tags_df = pd.json_normalize(tag_data["resources"]) if 'name' not in tags_df.columns: tags_df['name'] = "untagged" except AssertionError: print("resources or name key not found in response") exit(1) except Exception as e: print("Error: " + str(e)) exit(1) # making a list of tags for each asset asset_tags_string = tags_df['name'].to_csv(header=None, index=False).strip('\n').split('\n') df.loc[index, 'tags'] = str(asset_tags_string) else: print("Error: " + str(response.status_code)) print("\tAsset tags retrieved") asset_data.append(df) page_offset += 1 if page_offset >= page_limit: more_pages = False else: print('An error occurred:') print(response.status_code) print("Building asset list...") assets_df = pd.concat(asset_data) print("Writing asset list to file...") name = "asset_list" assets_df.to_csv(name + '.csv', index=False) assets_df.to_json(name + '.json', orient='records', lines=True) def _enrich_vuln_data(session: requests.Session, vuln_id: str): """ Helper function to enrich the vulnerability data with additional information :param session: :param vuln_id: :return: """ # https://help.rapid7.com/insightvm/en-us/api/index.html#operation/getVulnerability current_vuln_url = base_url + "vulnerabilities/{id}".format(id=vuln_id) response = session.get(current_vuln_url) if response.status_code == 200: try: data = json.loads(response.text) assert 'cvss' in data except AssertionError: print("cvss key not found in response") exit(1) except Exception as e: print("Error: " + str(e)) exit(1) df = pd.json_normalize(data) return df else: print('An error occurred:') print(response.status_code) def create_rapid7_vuln_asset_list(session: requests.Session): """ Function to create a list of vulnerabilities from the Rapid7 InsightVM API :param session: :return: """ # https://help.rapid7.com/insightvm/en-us/api/index.html#operation/getAssetVulnerabilities # make a list to hold the vulnerability data per asset # collect all lists into a DataFrame at the end vuln_data = [] global vulns_df # create a list of asset ids for the requests try: asset_ids = assets_df['id'].tolist() asset_names = assets_df['hostName'].tolist() asset_tags = assets_df['tags'].tolist() assert len(asset_ids) > 0 and len(asset_names) > 0 except AssertionError: print("Error: No assets found") exit(1) except Exception as e: print("Error: " + str(e)) exit(1) # loop over the assets and maintain an index i for i, asset in enumerate(asset_ids): current_asset = asset current_hostname = asset_names[i] current_asset_url = "assets/{id}/vulnerabilities".format(id=current_asset) current_asset_tags = asset_tags[i] page_limit, response, current_asset_url = _initiate_pagination(session, current_asset_url) page_offset = 0 more_pages = True while more_pages: # Check the status code of the response if response.status_code == 200: print("Asset {} of {}".format(i + 1, len(asset_ids))) # Set the parameters for the API request params = {'offset': page_offset, 'limit': page_limit} # Make a GET request to the vulnerabilities endpoint response = session.get(current_asset_url, params=params) try: data = json.loads(response.text) assert 'resources' in data except AssertionError: print("resources key not found in response") exit(1) except Exception as e: print("Error: " + str(e)) exit(1) df = pd.json_normalize(data["resources"]) df = df.assign(assetHostname=current_hostname) if enrich_asset_vulns: print("\tEnriching vulnerability data for asset: " + current_hostname) print("\tTags: " + current_asset_tags) print("\t\t... processing page {} of {}".format(page_offset + 1, page_limit)) # enrich the vulnerability data with additional information try: vuln_id_list = df["id"].tolist() assert len(vuln_id_list) > 0 except AssertionError: print("Error: No vulnerabilities found for asset: " + current_hostname) exit(1) except Exception as e: print("Error: " + str(e)) exit(1) for vuln_id in vuln_id_list: vuln_extra_df = _enrich_vuln_data(session, vuln_id) # we cannot have duplicate column names # we also drop html content in the tables vuln_extra_df.drop('id', axis=1, inplace=True) vuln_extra_df.drop('links', axis=1, inplace=True) vuln_extra_df.drop('added', axis=1, inplace=True) vuln_extra_df.drop('modified', axis=1, inplace=True) vuln_extra_df.drop('published', axis=1, inplace=True) vuln_extra_df.drop('description.html', axis=1, inplace=True) vuln_extra_df['tags'] = current_asset_tags df = pd.concat([df, vuln_extra_df], axis=1) page_offset += 1 vuln_data.append(df) if page_offset >= page_limit: more_pages = False else: print('An error occurred:') print(response.status_code) print("\tData retrieved from asset {}\n".format(current_hostname)) print("Building vulnerability list...") vulns_df = pd.concat(vuln_data) print("Saving vulnerability data to file...") name = "vulns_list" vulns_df.to_csv(name + ".csv", index=False) vulns_df.to_json(name + ".json", orient='records', lines=True) if __name__ == '__main__': """ Main function to run the script """ # create the args parser parser = argparse.ArgumentParser() parser.add_argument("--username", required=True) parser.add_argument("--password", required=True) args = parser.parse_args() print(bcolors.BOLD + bcolors.OKBLUE + "! Starting Rapid7 InsightVM API script !" + bcolors.ENDC) print(bcolors.WARNING + "Currently only tested for less than 200 assets" + bcolors.ENDC) print() try: # normalize the username and password to ASCII username = unicodedata.normalize("NFKD", args.username) password = unicodedata.normalize("NFKD", args.password) except UnicodeError: # handle UnicodeError exception print("Invalid Unicode string in username or password") exit(1) except Exception as e: print("Error: " + str(e)) exit(1) try: session = _create_api_session(username, password) del (password) create_rapid7_asset_list(session) create_rapid7_vuln_asset_list(session) except Exception as e: print("Error: " + str(e)) finally: # close the session session.close()