commit a9dec81e7338b63584383fece7c5feb2578aeb06 Author: marius Date: Fri Dec 16 07:07:54 2022 +0000 publishing code diff --git a/insightVM-exporter.py b/insightVM-exporter.py new file mode 100644 index 0000000..e29d9ab --- /dev/null +++ b/insightVM-exporter.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python + +import requests +from urllib3.exceptions import InsecureRequestWarning +import json +import pandas as pd +import argparse +import unicodedata + +# Set the base URL for the InsightVM API +base_url = 'https://:3780/api/3/' +enrich_asset_vulns = True # lookup the vulns for each asset +tag_assets = True # lookup the tags for each asset + +# global variables for the data tables +assets_df = pd.DataFrame() +vulns_df = pd.DataFrame() +vuln_extra_df = pd.DataFrame() + + + +class bcolors: + """ + Helper class for color output on the console prompt + """ + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + +def _create_api_session(username: str, password: str): + """ + Helper function to create a session with the Rapid7 InsightVM API + :param username: + :param password: + :return: + """ + # disable warnings for self-signed certificates + requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning) + + # Create a Session object and set the authentication credentials + auth = (username, password) + try: + session = requests.Session() + assert type(session) == requests.Session + session.auth = auth + session.verify = False + except AssertionError: + print("Error: Could not create a session object") + exit(1) + except Exception as e: + print("Error: " + str(e)) + exit(1) + + return session + + +def _initiate_pagination(session: requests.Session, api_endpoint: str): + """ + Helper function to initiate pagination for the Rapid7 InsightVM API + :param session: + :param api_endpoint: + :return: + """ + # Set the URL for the endpoint + api_url = base_url + api_endpoint + '?size=200' + + # Make a GET request to the vulnerabilities endpoint + response = session.get(api_url) + + # Check the status code of the response + if response.status_code == 200: + # Load the response data as a JSON object + try: + setup_data = json.loads(response.text) + assert 'page' in setup_data + page_limit = setup_data["page"]["totalPages"] + except AssertionError: + print("Error: The API endpoint does not support pagination") + exit(1) + except Exception as e: + print("Error: " + str(e)) + exit(1) + else: + print('Error: ' + str(response.status_code)) + + return page_limit, response, api_url + + +def create_rapid7_asset_list(session: requests.Session): + """ + Function to create a list of assets from the Rapid7 InsightVM API + :param session: + :return: + """ + print("Retrieving asset data from Rapid7 InsightVM") + + # initial control variables for pagination and followup requests + page_limit, response, asset_url = _initiate_pagination(session, 'assets') + page_offset = 0 + more_pages = True + + # make a list to hold the asset data + asset_data = [] + global assets_df + + print("Retrieving asset list...") + + while more_pages: + # Check the status code of the response + if response.status_code == 200: + # Set the parameters for the API request + params = {'offset': page_offset, 'limit': page_limit} + + # Make a GET request to the vulnerabilities endpoint + response = session.get(asset_url, params=params) + try: + data = json.loads(response.text) + assert 'resources' in data + except AssertionError: + print("resources key not found in response") + exit(1) + except Exception as e: + print("Error: " + str(e)) + exit(1) + + df = pd.json_normalize(data["resources"]) + + if tag_assets: + for index, row in df.iterrows(): + asset_id = df.loc[index, 'id'] + tags_url = base_url + 'assets/{id}/tags'.format(id=asset_id) + response = session.get(tags_url) + + if response.status_code == 200: + + try: + tag_data = json.loads(response.text) + print("\t\tLooking up tags for host {hostname}".format(hostname=row["hostName"])) + assert 'resources' in tag_data + tags_df = pd.json_normalize(tag_data["resources"]) + + if 'name' not in tags_df.columns: + tags_df['name'] = "untagged" + + except AssertionError: + print("resources or name key not found in response") + exit(1) + except Exception as e: + print("Error: " + str(e)) + exit(1) + + # making a list of tags for each asset + asset_tags_string = tags_df['name'].to_csv(header=None, index=False).strip('\n').split('\n') + df.loc[index, 'tags'] = str(asset_tags_string) + + else: + print("Error: " + str(response.status_code)) + + print("\tAsset tags retrieved") + + asset_data.append(df) + + page_offset += 1 + if page_offset >= page_limit: + more_pages = False + + else: + print('An error occurred:') + print(response.status_code) + + print("Building asset list...") + assets_df = pd.concat(asset_data) + + print("Writing asset list to file...") + name = "asset_list" + assets_df.to_csv(name + '.csv', index=False) + assets_df.to_json(name + '.json', orient='records', lines=True) + + +def _enrich_vuln_data(session: requests.Session, vuln_id: str): + """ + Helper function to enrich the vulnerability data with additional information + :param session: + :param vuln_id: + :return: + """ + # https://help.rapid7.com/insightvm/en-us/api/index.html#operation/getVulnerability + + current_vuln_url = base_url + "vulnerabilities/{id}".format(id=vuln_id) + response = session.get(current_vuln_url) + + if response.status_code == 200: + try: + data = json.loads(response.text) + assert 'cvss' in data + except AssertionError: + print("cvss key not found in response") + exit(1) + except Exception as e: + print("Error: " + str(e)) + exit(1) + + df = pd.json_normalize(data) + return df + else: + print('An error occurred:') + print(response.status_code) + + +def create_rapid7_vuln_asset_list(session: requests.Session): + """ + Function to create a list of vulnerabilities from the Rapid7 InsightVM API + :param session: + :return: + """ + # https://help.rapid7.com/insightvm/en-us/api/index.html#operation/getAssetVulnerabilities + + # make a list to hold the vulnerability data per asset + # collect all lists into a DataFrame at the end + vuln_data = [] + global vulns_df + + + # create a list of asset ids for the requests + try: + asset_ids = assets_df['id'].tolist() + asset_names = assets_df['hostName'].tolist() + asset_tags = assets_df['tags'].tolist() + assert len(asset_ids) > 0 and len(asset_names) > 0 + except AssertionError: + print("Error: No assets found") + exit(1) + except Exception as e: + print("Error: " + str(e)) + exit(1) + + # loop over the assets and maintain an index i + for i, asset in enumerate(asset_ids): + + current_asset = asset + current_hostname = asset_names[i] + current_asset_url = "assets/{id}/vulnerabilities".format(id=current_asset) + + current_asset_tags = asset_tags[i] + + page_limit, response, current_asset_url = _initiate_pagination(session, current_asset_url) + page_offset = 0 + more_pages = True + + while more_pages: + # Check the status code of the response + if response.status_code == 200: + print("Asset {} of {}".format(i + 1, len(asset_ids))) + + # Set the parameters for the API request + params = {'offset': page_offset, 'limit': page_limit} + + # Make a GET request to the vulnerabilities endpoint + response = session.get(current_asset_url, params=params) + + try: + data = json.loads(response.text) + assert 'resources' in data + except AssertionError: + print("resources key not found in response") + exit(1) + except Exception as e: + print("Error: " + str(e)) + exit(1) + + df = pd.json_normalize(data["resources"]) + df = df.assign(assetHostname=current_hostname) + + if enrich_asset_vulns: + print("\tEnriching vulnerability data for asset: " + current_hostname) + print("\tTags: " + current_asset_tags) + print("\t\t... processing page {} of {}".format(page_offset + 1, page_limit)) + # enrich the vulnerability data with additional information + try: + vuln_id_list = df["id"].tolist() + assert len(vuln_id_list) > 0 + except AssertionError: + print("Error: No vulnerabilities found for asset: " + current_hostname) + exit(1) + except Exception as e: + print("Error: " + str(e)) + exit(1) + + for vuln_id in vuln_id_list: + vuln_extra_df = _enrich_vuln_data(session, vuln_id) + # we cannot have duplicate column names + # we also drop html content in the tables + vuln_extra_df.drop('id', axis=1, inplace=True) + vuln_extra_df.drop('links', axis=1, inplace=True) + vuln_extra_df.drop('added', axis=1, inplace=True) + vuln_extra_df.drop('modified', axis=1, inplace=True) + vuln_extra_df.drop('published', axis=1, inplace=True) + vuln_extra_df.drop('description.html', axis=1, inplace=True) + vuln_extra_df['tags'] = current_asset_tags + + df = pd.concat([df, vuln_extra_df], axis=1) + + page_offset += 1 + vuln_data.append(df) + + if page_offset >= page_limit: + more_pages = False + + else: + print('An error occurred:') + print(response.status_code) + + print("\tData retrieved from asset {}\n".format(current_hostname)) + + print("Building vulnerability list...") + vulns_df = pd.concat(vuln_data) + + print("Saving vulnerability data to file...") + name = "vulns_list" + vulns_df.to_csv(name + ".csv", index=False) + vulns_df.to_json(name + ".json", orient='records', lines=True) + + +if __name__ == '__main__': + """ + Main function to run the script + """ + + # create the args parser + parser = argparse.ArgumentParser() + parser.add_argument("--username", required=True) + parser.add_argument("--password", required=True) + args = parser.parse_args() + + print(bcolors.BOLD + bcolors.OKBLUE + "! Starting Rapid7 InsightVM API script !" + bcolors.ENDC) + print(bcolors.WARNING + "Currently only tested for less than 200 assets" + bcolors.ENDC) + print() + + try: + # normalize the username and password to ASCII + username = unicodedata.normalize("NFKD", args.username) + password = unicodedata.normalize("NFKD", args.password) + except UnicodeError: + # handle UnicodeError exception + print("Invalid Unicode string in username or password") + exit(1) + except Exception as e: + print("Error: " + str(e)) + exit(1) + + try: + session = _create_api_session(username, password) + del (password) + create_rapid7_asset_list(session) + create_rapid7_vuln_asset_list(session) + except Exception as e: + print("Error: " + str(e)) + finally: + # close the session + session.close() +