data sorting

2023-04-27 12:17:19 +02:00 · 2023-04-27 12:17:19 +02:00 · 1952038cc0
parent 22ce1c53c1
commit 1952038cc0
1 changed files with 231 additions and 76 deletions
--- a/create_report.py
+++ b/create_report.py
@ -15,6 +15,8 @@ import requests
 import urllib.parse
 import time

+import numpy as np
+

 def get_slo(ENV, DTAPIToken, DTENV) -> pd.DataFrame:
    # DTENV = base url
@ -46,17 +48,21 @@ def build_params(params: typing.Dict) -> str:


 # TODO: remove env parameter
-def get_hosts_from_dynatrace(
-    env: str, token: str, env_url: str, params: typing.Dict, route: str
+def get_data_from_dynatrace(
+    throttling: float | int,
+    token: str,
+    env_url: str,
+    params: typing.Dict | str,
+    route: str,
 ) -> typing.Dict:
    """
    Sends out GET request to dynatrace

    Args:
-        env (str): Environment (euprod|naprod|cnprod)
+        throttling (float | int ): If needed set timeout for throttling
        token (str): Token for dynatrace API
        env_url (str): Url for the respective environment
-        params (typing.Dict): Parameters as dictionary as stated on dynatrace documentation
+        params (typing.Dict | str): Parameters as dictionary as stated on dynatrace documentation
        route (str): Route for the request

    Returns:
@ -65,7 +71,7 @@ def get_hosts_from_dynatrace(

    # TODO: add nextpage key feature

-    time.sleep(0.5)
+    time.sleep(throttling)

    if type(params) is dict:
        params_string = f"?{build_params(params)}"
@ -91,6 +97,55 @@ def previous_week_range(date: datetime):
    return start_date, end_date


+def get_process_group_data(df: pd.DataFrame) -> typing.Dict:
+    """
+    Gets process group data from dynatrace
+
+    Args:
+        df (pd.DataFrame): Dataframe with process group ids
+
+    Returns:
+        typing.Dict: Returns dictionary with unique process group data
+    """
+    hub_data = {}
+    with open("./environment.yaml") as file:
+        env_doc = yaml.safe_load(file)
+
+    for env, doc in env_doc.items():
+        token = dict(doc[2])
+        url = dict(doc[1])
+
+        hub_data[env] = {}
+
+        if config(token.get("env-token-name")) != "":
+            DTTOKEN = config(token.get("env-token-name"))
+            DTURL = url.get("env-url")
+
+            hub_data[env]["token"] = DTTOKEN
+            hub_data[env]["url"] = DTURL
+
+    unique_process_groups_per_hub = {}
+    unique_hubs = df["environment"].unique()
+    for hub in unique_hubs:
+        unique_process_groups_per_hub[hub] = {}
+
+        hub_value = hub
+        process_groups_unique = df.query(f"environment == @hub_value")
+
+        process_groups_unique = process_groups_unique["process_group_id"].unique()
+        for process_group in process_groups_unique:
+            params = {
+                "entitySelector": f'type("PROCESS_GROUP"),entityId("{process_group}")',
+                "fields": "firstSeenTms,tags",
+            }
+            data = get_data_from_dynatrace(
+                0.1, hub_data[hub]["token"], hub_data[hub]["url"], params, "entities"
+            )
+            unique_process_groups_per_hub[hub][process_group] = data["entities"]
+
+    return unique_process_groups_per_hub
+
+
 def build_dataframe_for_report(report_items: typing.Dict) -> pd.DataFrame:
    """
    Builds a pandas dataframe based on received items from dynatrace
@ -102,83 +157,130 @@ def build_dataframe_for_report(report_items: typing.Dict) -> pd.DataFrame:
            pd.DataFrame: Contains data as requested for further processing
    """

-    df_data = []  # fill list with dictionary objects which contain requested data
+    df = pd.DataFrame(report_items)
+    process_group_data = get_process_group_data(df)

-    df = pd.DataFrame(
-        df_data,
-        columns=[
-            "slo_name",
-            "host_name",
-            "host_id",
-            "environment",
-            "paas",
-            "platform",
-            "process_group_id",
-            "process_group_name",
-            "namespace",
-            "licensing_tag_host",
-            "licensing_tag_process_group",
-            "first_seen_process_group",
-            "first_seen_host",
-        ],
-    )
+    for hub in process_group_data:
+        for pgid in process_group_data[hub]:
+            if len(process_group_data[hub][pgid]) == 0:
+                # TODO: Custom device group returns null data - handling needed
+                print(f"ERROR: {hub} - {pgid} | no data returned from dynatrace")
+            else:
+                df.loc[
+                    (df["environment"] == hub) & (df["process_group_id"] == pgid),
+                    "process_group_name",
+                ] = process_group_data[hub][pgid][0]["displayName"]
+                df.loc[
+                    (df["environment"] == hub) & (df["process_group_id"] == pgid),
+                    "first_seen_process_group",
+                ] = process_group_data[hub][pgid][0]["firstSeenTms"]

-    print(df)
+    print("Writing to xlsx")
+    write_xlsx(df)
    return df


-def write_xlsx(df: pd.DataFrame) -> bool:
-    pass
+def write_xlsx(df: pd.DataFrame) -> None:
+    filename = f"CoCo-APM-Report_{datetime.date.today()}.xlsx"
+    writer = pd.ExcelWriter(filename, engine="xlsxwriter")
+    df.to_excel(writer, sheet_name="hosts", index=False)
+    writer.close()


 def develop_load_json():
-    with open("test-data.json", "r") as f:
+    with open("test-data-with-hosts-main.json", "r") as f:
        data = json.loads(f.read())

-    with open("./environment.yaml") as file:
-        env_doc = yaml.safe_load(file)
+    df_data = []

-    for env, doc in env_doc.items():
-        # DEBUG
-        if env == "euprod":
-            token = dict(doc[2])
-            url = dict(doc[1])
+    for hub in data:
+        for slo in data[hub]:
+            slo_name = data[hub][slo]["sloname"]
+            if len(data[hub][slo]["services"]) > 0:
+                for service in data[hub][slo]["services"]:
+                    if len(service["entities"]) > 0:
+                        for entity in service["entities"]:
+                            if "fromRelationships" in entity:
+                                if "runsOnHost" in entity["fromRelationships"]:
+                                    for host in entity["fromRelationships"][
+                                        "runsOnHost"
+                                    ]:
+                                        df_data_item = {
+                                            "slo_name": slo_name,
+                                            "host_name": host["details"]["displayName"],
+                                            "host_id": host["id"],
+                                            "environment": hub,
+                                            "process_group_id": "NaN",
+                                            "process_group_name": "NaN",
+                                            "licensing_tag_host": "NaN",
+                                            "licensing_tag_process_group": "NaN",
+                                            "first_seen_process_group": "NaN",
+                                            "first_seen_host": host["details"][
+                                                "firstSeenTms"
+                                            ],
+                                        }

-            if config(token.get("env-token-name")) != "":
-                print("Gather data, hold on a minute")
-                DTTOKEN = config(token.get("env-token-name"))
-                DTURL = url.get("env-url")
+                                        for tag in host["details"]["tags"]:
+                                            if tag["key"] == "Platform":
+                                                df_data_item["platform"] = tag["value"]
+                                            if tag["key"] == "Namespace":
+                                                df_data_item["namespace"] = tag["value"]
+                                            if tag["key"] == "PaaS":
+                                                df_data_item["paas"] = tag["value"]

-                for slo in data[env]:
-                    if len(data[env][slo]["services"]) == 0:
-                        # DEBUG
-                        print(f"ERROR: {slo} has no services")
-                    else:
-                        for service in data[env][slo]["services"]:
-                            params = {
-                                "entitySelector": f'type("SERVICE"),entityId("{service["entityId"]}")',
-                                "fields": "fromRelationships,tags",
-                            }
-                            entities = get_hosts_from_dynatrace(
-                                env, DTTOKEN, DTURL, params, "entities"
-                            )
-                            # TODO: it is possible that "entities" is empty. maybe create check.
-                            service["entities"] = entities["entities"]
-                            for hosts in service["entities"]:
-                                if "fromRelationships" in hosts:
-                                    if "runsOnHost" in hosts["fromRelationships"]:
-                                        for host in hosts["fromRelationships"][
-                                            "runsOnHost"
-                                        ]:
-                                            # TODO: make dynatrace call to /entites/{entityId}
-                                            print(f'{slo} - {host["id"]}')
-                                            # host_response = get_hosts_from_dynatrace(
-                                            #     env, DTTOKEN, DTURL, host["id"], "entities"
-                                            # )
-                                            # host["details"] = host_response
+                                        # TODO: rework
+                                        if "runsOn" in entity["fromRelationships"]:
+                                            for process_group in entity[
+                                                "fromRelationships"
+                                            ]["runsOn"]:
+                                                df_data_item[
+                                                    "process_group_id"
+                                                ] = process_group["id"]

-    with open("test-data-with-hosts-2.json", "w") as f:
-        f.write(json.dumps(data, indent=4))
+                                        df_data.append(df_data_item)
+
+    build_dataframe_for_report(df_data)
+
+    # with open("./environment.yaml") as file:
+    #     env_doc = yaml.safe_load(file)
+
+    # for env, doc in env_doc.items():
+    #     # DEBUG
+    #     if env == "euprod":
+    #         token = dict(doc[2])
+    #         url = dict(doc[1])
+
+    #         if config(token.get("env-token-name")) != "":
+    #             print("Gather data, hold on a minute")
+    #             DTTOKEN = config(token.get("env-token-name"))
+    #             DTURL = url.get("env-url")
+
+    #             for slo in data[env]:
+    #                 if len(data[env][slo]["services"]) == 0:
+    #                     # DEBUG
+    #                     print(f"ERROR: {slo} has no services")
+    #                 else:
+    #                     for service in data[env][slo]["services"]:
+    #                         params = {
+    #                             "entitySelector": f'type("SERVICE"),entityId("{service["entityId"]}")',
+    #                             "fields": "fromRelationships,tags",
+    #                         }
+    #                         entities = get_data_from_dynatrace(
+    #                             0.5, DTTOKEN, DTURL, params, "entities"
+    #                         )
+    #                         # TODO: it is possible that "entities" is empty. maybe create check.
+    #                         service["entities"] = entities["entities"]
+    #                         for hosts in service["entities"]:
+    #                             if "fromRelationships" in hosts:
+    #                                 if "runsOnHost" in hosts["fromRelationships"]:
+    #                                     for host in hosts["fromRelationships"][
+    #                                         "runsOnHost"
+    #                                     ]:
+    #                                         # TODO: make dynatrace call to /entites/{entityId}
+    #                                         host_response = get_data_from_dynatrace(
+    #                                             0.5, DTTOKEN, DTURL, host["id"], "entities"
+    #                                         )
+    #                                         host["details"] = host_response


 def check_if_service_already_exists(services: list, entity_id: str) -> bool:
@ -189,6 +291,58 @@ def check_if_service_already_exists(services: list, entity_id: str) -> bool:
    return result


+def build_data_frame_data(data: typing.Dict) -> None:
+    df_data = []
+
+    for hub in data:
+        for slo in data[hub]:
+            slo_name = data[hub][slo]["sloname"]
+            if len(data[hub][slo]["services"]) > 0:
+                for service in data[hub][slo]["services"]:
+                    if len(service["entities"]) > 0:
+                        for entity in service["entities"]:
+                            if "fromRelationships" in entity:
+                                if "runsOnHost" in entity["fromRelationships"]:
+                                    for host in entity["fromRelationships"][
+                                        "runsOnHost"
+                                    ]:
+                                        df_data_item = {
+                                            "slo_name": slo_name,
+                                            "host_name": host["details"]["displayName"],
+                                            "host_id": host["id"],
+                                            "environment": hub,
+                                            "process_group_id": "NaN",
+                                            "process_group_name": "NaN",
+                                            "licensing_tag_host": "NaN",
+                                            "licensing_tag_process_group": "NaN",
+                                            "first_seen_process_group": "NaN",
+                                            "first_seen_host": host["details"][
+                                                "firstSeenTms"
+                                            ],
+                                        }
+
+                                        for tag in host["details"]["tags"]:
+                                            if tag["key"] == "Platform":
+                                                df_data_item["platform"] = tag["value"]
+                                            if tag["key"] == "Namespace":
+                                                df_data_item["namespace"] = tag["value"]
+                                            if tag["key"] == "PaaS":
+                                                df_data_item["paas"] = tag["value"]
+
+                                        # TODO: rework
+                                        if "runsOn" in entity["fromRelationships"]:
+                                            for process_group in entity[
+                                                "fromRelationships"
+                                            ]["runsOn"]:
+                                                df_data_item[
+                                                    "process_group_id"
+                                                ] = process_group["id"]
+
+                                        df_data.append(df_data_item)
+
+    build_dataframe_for_report(df_data)
+
+
@timer
 def main() -> None:
    reportItem = {}
@ -303,8 +457,8 @@ def main() -> None:
                                        "entitySelector": f'type("SERVICE"),entityId("{service["entityId"]}")',
                                        "fields": "fromRelationships,tags",
                                    }
-                                    entities = get_hosts_from_dynatrace(
-                                        env, DTTOKEN, DTURL, params, "entities"
+                                    entities = get_data_from_dynatrace(
+                                        0.5, DTTOKEN, DTURL, params, "entities"
                                    )
                                    # TODO: it is possible that "entities" is empty. maybe create check.
                                    service["entities"] = entities["entities"]
@ -319,8 +473,8 @@ def main() -> None:
                                                        "fromRelationships"
                                                    ]["runsOnHost"]:
                                                        host_response = (
-                                                            get_hosts_from_dynatrace(
-                                                                env,
+                                                            get_data_from_dynatrace(
+                                                                0.5,
                                                                DTTOKEN,
                                                                DTURL,
                                                                host["id"],
@ -329,10 +483,11 @@ def main() -> None:
                                                        )
                                                        host["details"] = host_response

-    with open("test-data-with-hosts-main.json", "w") as f:
-        f.write(json.dumps(reportItem, indent=4))
+    build_data_frame_data(reportItem)
+    # with open("test-data-with-hosts-main.json", "w") as f:
+    #     f.write(json.dumps(reportItem, indent=4))


 if __name__ == "__main__":
-    main()
-    # develop_load_json()
+    # main()
+    develop_load_json()