data sorting

master
Daniel Mikula 2023-04-27 12:17:19 +02:00
parent 22ce1c53c1
commit 1952038cc0
1 changed files with 231 additions and 76 deletions

View File

@ -15,6 +15,8 @@ import requests
import urllib.parse
import time
import numpy as np
def get_slo(ENV, DTAPIToken, DTENV) -> pd.DataFrame:
# DTENV = base url
@ -46,17 +48,21 @@ def build_params(params: typing.Dict) -> str:
# TODO: remove env parameter
def get_hosts_from_dynatrace(
env: str, token: str, env_url: str, params: typing.Dict, route: str
def get_data_from_dynatrace(
throttling: float | int,
token: str,
env_url: str,
params: typing.Dict | str,
route: str,
) -> typing.Dict:
"""
Sends out GET request to dynatrace
Args:
env (str): Environment (euprod|naprod|cnprod)
throttling (float | int ): If needed set timeout for throttling
token (str): Token for dynatrace API
env_url (str): Url for the respective environment
params (typing.Dict): Parameters as dictionary as stated on dynatrace documentation
params (typing.Dict | str): Parameters as dictionary as stated on dynatrace documentation
route (str): Route for the request
Returns:
@ -65,7 +71,7 @@ def get_hosts_from_dynatrace(
# TODO: add nextpage key feature
time.sleep(0.5)
time.sleep(throttling)
if type(params) is dict:
params_string = f"?{build_params(params)}"
@ -91,6 +97,55 @@ def previous_week_range(date: datetime):
return start_date, end_date
def get_process_group_data(df: pd.DataFrame) -> typing.Dict:
"""
Gets process group data from dynatrace
Args:
df (pd.DataFrame): Dataframe with process group ids
Returns:
typing.Dict: Returns dictionary with unique process group data
"""
hub_data = {}
with open("./environment.yaml") as file:
env_doc = yaml.safe_load(file)
for env, doc in env_doc.items():
token = dict(doc[2])
url = dict(doc[1])
hub_data[env] = {}
if config(token.get("env-token-name")) != "":
DTTOKEN = config(token.get("env-token-name"))
DTURL = url.get("env-url")
hub_data[env]["token"] = DTTOKEN
hub_data[env]["url"] = DTURL
unique_process_groups_per_hub = {}
unique_hubs = df["environment"].unique()
for hub in unique_hubs:
unique_process_groups_per_hub[hub] = {}
hub_value = hub
process_groups_unique = df.query(f"environment == @hub_value")
process_groups_unique = process_groups_unique["process_group_id"].unique()
for process_group in process_groups_unique:
params = {
"entitySelector": f'type("PROCESS_GROUP"),entityId("{process_group}")',
"fields": "firstSeenTms,tags",
}
data = get_data_from_dynatrace(
0.1, hub_data[hub]["token"], hub_data[hub]["url"], params, "entities"
)
unique_process_groups_per_hub[hub][process_group] = data["entities"]
return unique_process_groups_per_hub
def build_dataframe_for_report(report_items: typing.Dict) -> pd.DataFrame:
"""
Builds a pandas dataframe based on received items from dynatrace
@ -102,83 +157,130 @@ def build_dataframe_for_report(report_items: typing.Dict) -> pd.DataFrame:
pd.DataFrame: Contains data as requested for further processing
"""
df_data = [] # fill list with dictionary objects which contain requested data
df = pd.DataFrame(report_items)
process_group_data = get_process_group_data(df)
df = pd.DataFrame(
df_data,
columns=[
"slo_name",
"host_name",
"host_id",
"environment",
"paas",
"platform",
"process_group_id",
"process_group_name",
"namespace",
"licensing_tag_host",
"licensing_tag_process_group",
"first_seen_process_group",
"first_seen_host",
],
)
for hub in process_group_data:
for pgid in process_group_data[hub]:
if len(process_group_data[hub][pgid]) == 0:
# TODO: Custom device group returns null data - handling needed
print(f"ERROR: {hub} - {pgid} | no data returned from dynatrace")
else:
df.loc[
(df["environment"] == hub) & (df["process_group_id"] == pgid),
"process_group_name",
] = process_group_data[hub][pgid][0]["displayName"]
df.loc[
(df["environment"] == hub) & (df["process_group_id"] == pgid),
"first_seen_process_group",
] = process_group_data[hub][pgid][0]["firstSeenTms"]
print(df)
print("Writing to xlsx")
write_xlsx(df)
return df
def write_xlsx(df: pd.DataFrame) -> bool:
pass
def write_xlsx(df: pd.DataFrame) -> None:
filename = f"CoCo-APM-Report_{datetime.date.today()}.xlsx"
writer = pd.ExcelWriter(filename, engine="xlsxwriter")
df.to_excel(writer, sheet_name="hosts", index=False)
writer.close()
def develop_load_json():
with open("test-data.json", "r") as f:
with open("test-data-with-hosts-main.json", "r") as f:
data = json.loads(f.read())
with open("./environment.yaml") as file:
env_doc = yaml.safe_load(file)
df_data = []
for env, doc in env_doc.items():
# DEBUG
if env == "euprod":
token = dict(doc[2])
url = dict(doc[1])
for hub in data:
for slo in data[hub]:
slo_name = data[hub][slo]["sloname"]
if len(data[hub][slo]["services"]) > 0:
for service in data[hub][slo]["services"]:
if len(service["entities"]) > 0:
for entity in service["entities"]:
if "fromRelationships" in entity:
if "runsOnHost" in entity["fromRelationships"]:
for host in entity["fromRelationships"][
"runsOnHost"
]:
df_data_item = {
"slo_name": slo_name,
"host_name": host["details"]["displayName"],
"host_id": host["id"],
"environment": hub,
"process_group_id": "NaN",
"process_group_name": "NaN",
"licensing_tag_host": "NaN",
"licensing_tag_process_group": "NaN",
"first_seen_process_group": "NaN",
"first_seen_host": host["details"][
"firstSeenTms"
],
}
if config(token.get("env-token-name")) != "":
print("Gather data, hold on a minute")
DTTOKEN = config(token.get("env-token-name"))
DTURL = url.get("env-url")
for tag in host["details"]["tags"]:
if tag["key"] == "Platform":
df_data_item["platform"] = tag["value"]
if tag["key"] == "Namespace":
df_data_item["namespace"] = tag["value"]
if tag["key"] == "PaaS":
df_data_item["paas"] = tag["value"]
for slo in data[env]:
if len(data[env][slo]["services"]) == 0:
# DEBUG
print(f"ERROR: {slo} has no services")
else:
for service in data[env][slo]["services"]:
params = {
"entitySelector": f'type("SERVICE"),entityId("{service["entityId"]}")',
"fields": "fromRelationships,tags",
}
entities = get_hosts_from_dynatrace(
env, DTTOKEN, DTURL, params, "entities"
)
# TODO: it is possible that "entities" is empty. maybe create check.
service["entities"] = entities["entities"]
for hosts in service["entities"]:
if "fromRelationships" in hosts:
if "runsOnHost" in hosts["fromRelationships"]:
for host in hosts["fromRelationships"][
"runsOnHost"
]:
# TODO: make dynatrace call to /entites/{entityId}
print(f'{slo} - {host["id"]}')
# host_response = get_hosts_from_dynatrace(
# env, DTTOKEN, DTURL, host["id"], "entities"
# )
# host["details"] = host_response
# TODO: rework
if "runsOn" in entity["fromRelationships"]:
for process_group in entity[
"fromRelationships"
]["runsOn"]:
df_data_item[
"process_group_id"
] = process_group["id"]
with open("test-data-with-hosts-2.json", "w") as f:
f.write(json.dumps(data, indent=4))
df_data.append(df_data_item)
build_dataframe_for_report(df_data)
# with open("./environment.yaml") as file:
# env_doc = yaml.safe_load(file)
# for env, doc in env_doc.items():
# # DEBUG
# if env == "euprod":
# token = dict(doc[2])
# url = dict(doc[1])
# if config(token.get("env-token-name")) != "":
# print("Gather data, hold on a minute")
# DTTOKEN = config(token.get("env-token-name"))
# DTURL = url.get("env-url")
# for slo in data[env]:
# if len(data[env][slo]["services"]) == 0:
# # DEBUG
# print(f"ERROR: {slo} has no services")
# else:
# for service in data[env][slo]["services"]:
# params = {
# "entitySelector": f'type("SERVICE"),entityId("{service["entityId"]}")',
# "fields": "fromRelationships,tags",
# }
# entities = get_data_from_dynatrace(
# 0.5, DTTOKEN, DTURL, params, "entities"
# )
# # TODO: it is possible that "entities" is empty. maybe create check.
# service["entities"] = entities["entities"]
# for hosts in service["entities"]:
# if "fromRelationships" in hosts:
# if "runsOnHost" in hosts["fromRelationships"]:
# for host in hosts["fromRelationships"][
# "runsOnHost"
# ]:
# # TODO: make dynatrace call to /entites/{entityId}
# host_response = get_data_from_dynatrace(
# 0.5, DTTOKEN, DTURL, host["id"], "entities"
# )
# host["details"] = host_response
def check_if_service_already_exists(services: list, entity_id: str) -> bool:
@ -189,6 +291,58 @@ def check_if_service_already_exists(services: list, entity_id: str) -> bool:
return result
def build_data_frame_data(data: typing.Dict) -> None:
df_data = []
for hub in data:
for slo in data[hub]:
slo_name = data[hub][slo]["sloname"]
if len(data[hub][slo]["services"]) > 0:
for service in data[hub][slo]["services"]:
if len(service["entities"]) > 0:
for entity in service["entities"]:
if "fromRelationships" in entity:
if "runsOnHost" in entity["fromRelationships"]:
for host in entity["fromRelationships"][
"runsOnHost"
]:
df_data_item = {
"slo_name": slo_name,
"host_name": host["details"]["displayName"],
"host_id": host["id"],
"environment": hub,
"process_group_id": "NaN",
"process_group_name": "NaN",
"licensing_tag_host": "NaN",
"licensing_tag_process_group": "NaN",
"first_seen_process_group": "NaN",
"first_seen_host": host["details"][
"firstSeenTms"
],
}
for tag in host["details"]["tags"]:
if tag["key"] == "Platform":
df_data_item["platform"] = tag["value"]
if tag["key"] == "Namespace":
df_data_item["namespace"] = tag["value"]
if tag["key"] == "PaaS":
df_data_item["paas"] = tag["value"]
# TODO: rework
if "runsOn" in entity["fromRelationships"]:
for process_group in entity[
"fromRelationships"
]["runsOn"]:
df_data_item[
"process_group_id"
] = process_group["id"]
df_data.append(df_data_item)
build_dataframe_for_report(df_data)
@timer
def main() -> None:
reportItem = {}
@ -303,8 +457,8 @@ def main() -> None:
"entitySelector": f'type("SERVICE"),entityId("{service["entityId"]}")',
"fields": "fromRelationships,tags",
}
entities = get_hosts_from_dynatrace(
env, DTTOKEN, DTURL, params, "entities"
entities = get_data_from_dynatrace(
0.5, DTTOKEN, DTURL, params, "entities"
)
# TODO: it is possible that "entities" is empty. maybe create check.
service["entities"] = entities["entities"]
@ -319,8 +473,8 @@ def main() -> None:
"fromRelationships"
]["runsOnHost"]:
host_response = (
get_hosts_from_dynatrace(
env,
get_data_from_dynatrace(
0.5,
DTTOKEN,
DTURL,
host["id"],
@ -329,10 +483,11 @@ def main() -> None:
)
host["details"] = host_response
with open("test-data-with-hosts-main.json", "w") as f:
f.write(json.dumps(reportItem, indent=4))
build_data_frame_data(reportItem)
# with open("test-data-with-hosts-main.json", "w") as f:
# f.write(json.dumps(reportItem, indent=4))
if __name__ == "__main__":
main()
# develop_load_json()
# main()
develop_load_json()