rapid7-insightvm-data-exporter/insightVM-exporter.py

368 lines
12 KiB
Python

#!/usr/bin/env python
import requests
from urllib3.exceptions import InsecureRequestWarning
import json
import pandas as pd
import argparse
import unicodedata
# Set the base URL for the InsightVM API
base_url = 'https://<YOUR_INSIGHTVM_HOST>:3780/api/3/'
enrich_asset_vulns = True # lookup the vulns for each asset
tag_assets = True # lookup the tags for each asset
# global variables for the data tables
assets_df = pd.DataFrame()
vulns_df = pd.DataFrame()
vuln_extra_df = pd.DataFrame()
class bcolors:
"""
Helper class for color output on the console prompt
"""
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
def _create_api_session(username: str, password: str):
"""
Helper function to create a session with the Rapid7 InsightVM API
:param username:
:param password:
:return:
"""
# disable warnings for self-signed certificates
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
# Create a Session object and set the authentication credentials
auth = (username, password)
try:
session = requests.Session()
assert type(session) == requests.Session
session.auth = auth
session.verify = False
except AssertionError:
print("Error: Could not create a session object")
exit(1)
except Exception as e:
print("Error: " + str(e))
exit(1)
return session
def _initiate_pagination(session: requests.Session, api_endpoint: str):
"""
Helper function to initiate pagination for the Rapid7 InsightVM API
:param session:
:param api_endpoint:
:return:
"""
# Set the URL for the endpoint
api_url = base_url + api_endpoint + '?size=200'
# Make a GET request to the vulnerabilities endpoint
response = session.get(api_url)
# Check the status code of the response
if response.status_code == 200:
# Load the response data as a JSON object
try:
setup_data = json.loads(response.text)
assert 'page' in setup_data
page_limit = setup_data["page"]["totalPages"]
except AssertionError:
print("Error: The API endpoint does not support pagination")
exit(1)
except Exception as e:
print("Error: " + str(e))
exit(1)
else:
print('Error: ' + str(response.status_code))
return page_limit, response, api_url
def create_rapid7_asset_list(session: requests.Session):
"""
Function to create a list of assets from the Rapid7 InsightVM API
:param session:
:return:
"""
print("Retrieving asset data from Rapid7 InsightVM")
# initial control variables for pagination and followup requests
page_limit, response, asset_url = _initiate_pagination(session, 'assets')
page_offset = 0
more_pages = True
# make a list to hold the asset data
asset_data = []
global assets_df
print("Retrieving asset list...")
while more_pages:
# Check the status code of the response
if response.status_code == 200:
# Set the parameters for the API request
params = {'offset': page_offset, 'limit': page_limit}
# Make a GET request to the vulnerabilities endpoint
response = session.get(asset_url, params=params)
try:
data = json.loads(response.text)
assert 'resources' in data
except AssertionError:
print("resources key not found in response")
exit(1)
except Exception as e:
print("Error: " + str(e))
exit(1)
df = pd.json_normalize(data["resources"])
if tag_assets:
for index, row in df.iterrows():
asset_id = df.loc[index, 'id']
tags_url = base_url + 'assets/{id}/tags'.format(id=asset_id)
response = session.get(tags_url)
if response.status_code == 200:
try:
tag_data = json.loads(response.text)
print("\t\tLooking up tags for host {hostname}".format(hostname=row["hostName"]))
assert 'resources' in tag_data
tags_df = pd.json_normalize(tag_data["resources"])
if 'name' not in tags_df.columns:
tags_df['name'] = "untagged"
except AssertionError:
print("resources or name key not found in response")
exit(1)
except Exception as e:
print("Error: " + str(e))
exit(1)
# making a list of tags for each asset
asset_tags_string = tags_df['name'].to_csv(header=None, index=False).strip('\n').split('\n')
df.loc[index, 'tags'] = str(asset_tags_string)
else:
print("Error: " + str(response.status_code))
print("\tAsset tags retrieved")
asset_data.append(df)
page_offset += 1
if page_offset >= page_limit:
more_pages = False
else:
print('An error occurred:')
print(response.status_code)
print("Building asset list...")
assets_df = pd.concat(asset_data)
print("Writing asset list to file...")
name = "asset_list"
assets_df.to_csv(name + '.csv', index=False)
assets_df.to_json(name + '.json', orient='records', lines=True)
def _enrich_vuln_data(session: requests.Session, vuln_id: str):
"""
Helper function to enrich the vulnerability data with additional information
:param session:
:param vuln_id:
:return:
"""
# https://help.rapid7.com/insightvm/en-us/api/index.html#operation/getVulnerability
current_vuln_url = base_url + "vulnerabilities/{id}".format(id=vuln_id)
response = session.get(current_vuln_url)
if response.status_code == 200:
try:
data = json.loads(response.text)
assert 'cvss' in data
except AssertionError:
print("cvss key not found in response")
exit(1)
except Exception as e:
print("Error: " + str(e))
exit(1)
df = pd.json_normalize(data)
return df
else:
print('An error occurred:')
print(response.status_code)
def create_rapid7_vuln_asset_list(session: requests.Session):
"""
Function to create a list of vulnerabilities from the Rapid7 InsightVM API
:param session:
:return:
"""
# https://help.rapid7.com/insightvm/en-us/api/index.html#operation/getAssetVulnerabilities
# make a list to hold the vulnerability data per asset
# collect all lists into a DataFrame at the end
vuln_data = []
global vulns_df
# create a list of asset ids for the requests
try:
asset_ids = assets_df['id'].tolist()
asset_names = assets_df['hostName'].tolist()
asset_tags = assets_df['tags'].tolist()
assert len(asset_ids) > 0 and len(asset_names) > 0
except AssertionError:
print("Error: No assets found")
exit(1)
except Exception as e:
print("Error: " + str(e))
exit(1)
# loop over the assets and maintain an index i
for i, asset in enumerate(asset_ids):
current_asset = asset
current_hostname = asset_names[i]
current_asset_url = "assets/{id}/vulnerabilities".format(id=current_asset)
current_asset_tags = asset_tags[i]
page_limit, response, current_asset_url = _initiate_pagination(session, current_asset_url)
page_offset = 0
more_pages = True
while more_pages:
# Check the status code of the response
if response.status_code == 200:
print("Asset {} of {}".format(i + 1, len(asset_ids)))
# Set the parameters for the API request
params = {'offset': page_offset, 'limit': page_limit}
# Make a GET request to the vulnerabilities endpoint
response = session.get(current_asset_url, params=params)
try:
data = json.loads(response.text)
assert 'resources' in data
except AssertionError:
print("resources key not found in response")
exit(1)
except Exception as e:
print("Error: " + str(e))
exit(1)
df = pd.json_normalize(data["resources"])
df = df.assign(assetHostname=current_hostname)
if enrich_asset_vulns:
print("\tEnriching vulnerability data for asset: " + current_hostname)
print("\tTags: " + current_asset_tags)
print("\t\t... processing page {} of {}".format(page_offset + 1, page_limit))
# enrich the vulnerability data with additional information
try:
vuln_id_list = df["id"].tolist()
assert len(vuln_id_list) > 0
except AssertionError:
print("Error: No vulnerabilities found for asset: " + current_hostname)
exit(1)
except Exception as e:
print("Error: " + str(e))
exit(1)
for vuln_id in vuln_id_list:
vuln_extra_df = _enrich_vuln_data(session, vuln_id)
# we cannot have duplicate column names
# we also drop html content in the tables
vuln_extra_df.drop('id', axis=1, inplace=True)
vuln_extra_df.drop('links', axis=1, inplace=True)
vuln_extra_df.drop('added', axis=1, inplace=True)
vuln_extra_df.drop('modified', axis=1, inplace=True)
vuln_extra_df.drop('published', axis=1, inplace=True)
vuln_extra_df.drop('description.html', axis=1, inplace=True)
vuln_extra_df['tags'] = current_asset_tags
df = pd.concat([df, vuln_extra_df], axis=1)
page_offset += 1
vuln_data.append(df)
if page_offset >= page_limit:
more_pages = False
else:
print('An error occurred:')
print(response.status_code)
print("\tData retrieved from asset {}\n".format(current_hostname))
print("Building vulnerability list...")
vulns_df = pd.concat(vuln_data)
print("Saving vulnerability data to file...")
name = "vulns_list"
vulns_df.to_csv(name + ".csv", index=False)
vulns_df.to_json(name + ".json", orient='records', lines=True)
if __name__ == '__main__':
"""
Main function to run the script
"""
# create the args parser
parser = argparse.ArgumentParser()
parser.add_argument("--username", required=True)
parser.add_argument("--password", required=True)
args = parser.parse_args()
print(bcolors.BOLD + bcolors.OKBLUE + "! Starting Rapid7 InsightVM API script !" + bcolors.ENDC)
print(bcolors.WARNING + "Currently only tested for less than 200 assets" + bcolors.ENDC)
print()
try:
# normalize the username and password to ASCII
username = unicodedata.normalize("NFKD", args.username)
password = unicodedata.normalize("NFKD", args.password)
except UnicodeError:
# handle UnicodeError exception
print("Invalid Unicode string in username or password")
exit(1)
except Exception as e:
print("Error: " + str(e))
exit(1)
try:
session = _create_api_session(username, password)
del (password)
create_rapid7_asset_list(session)
create_rapid7_vuln_asset_list(session)
except Exception as e:
print("Error: " + str(e))
finally:
# close the session
session.close()