From 003315bb91f1d0324db6480bb818a3a3460a929f Mon Sep 17 00:00:00 2001
From: prasanna <maddilaprasanna10@gmail.com>
Date: Wed, 24 Apr 2024 21:07:29 +0200
Subject: [PATCH 1/3] Simplified the training_iteration_metric_plot function

---
 examples/plots/generate_plots.py | 90 +++++++++-----------------------
 1 file changed, 24 insertions(+), 66 deletions(-)

diff --git a/examples/plots/generate_plots.py b/examples/plots/generate_plots.py
index cc494bb..6395733 100644
--- a/examples/plots/generate_plots.py
+++ b/examples/plots/generate_plots.py
@@ -48,30 +48,10 @@ def generate_report(pdf_plots, params, path, num_samples):
             + " }"
         )
         latex += add_line("\end{table}")
-        # latex += add_line(pd.DataFrame.from_dict(params[key]).to_latex())
-    # latex += add_line("For each of the 5 environments, we sampled %d choice configurations where we sampled the following choices independently and uniformly from the following ranges:" % (num_wus // 5))
     latex += add_line(
         "For each of the sampled choice configurations, we train %d agents with different random seeds and compute the performance metrics."
         % num_samples
     )
-    # latex += add_line("\begin{figure}[ht]")
-    # latex += add_line("\begin{center}")
-    # latex += add_line("\centerline{\includegraphics[width=0.45\textwidth]{final_arch/perf__gin_study_design_choice_value_policy_and_value_function_network.pdf}\hspace{1cm}\includegraphics[width=0.45\textwidth]{final_arch/frequency__gin_study_design_choice_value_policy_and_value_function_network.pdf}}")
-    # latex += add_line("\caption{Analysis of choice \choicet{mlpshared}: "+str(percentile)+ "th percentile of performance scores conditioned on choice (left) and distribution of choices in top "+str(100-percentile)+"\% of configurations (right).}")
-    # latex += add_line("\label{fig:final_arch__mlpshared}")
-    # latex += add_line("\end{center}")
-    # latex += add_line("\end{figure}")
-    # latex += add_line("\subsection{Results}")
-    # latex += add_line("\label{exp_results_EXPNAME}")
-    # latex += add_line("We report aggregate statistics of the experiment in Table~\ref{tab:EXPNAME_overview} as well as training curves in Figure~\ref{fig:EXPNAME_training_curves}.")
-    # last = "fig:EXPNAME_"+params[-1].replace(".","_")
-    # if ename == 'final_setup':
-    # last = "fig:final_setup2__gin_study_design_choice_value_batch_mode"
-    # latex += add_line("For each of the investigated choices in this experiment, we further provide a per-choice analysis in Figures~\ref{fig:EXPNAME_"+params[0].replace(".","_")+"}-\ref{"+last+"}.")
-    # t, pdf = plot_training_curves(frame)
-    #    html += t
-    #    pdf_plots.append(pdf)
-    #    atex += add_line("")
     latex += add_line("\section{Training plots}")
     latex += add_line(
         "Plots that consider training iterations.\\footnote{A training iteration includes parallel sample collection by the environment workers as well as loss calculation on the collected batch and a model update.}"
@@ -84,7 +64,6 @@ def generate_report(pdf_plots, params, path, num_samples):
     latex += add_line(
         "\caption{Training curves. Mean over different runs and 95\% confidence intervals bands. \label{fig:training_curves}}"
     )
-    # latex += add_line("\label{fig:training_curves}")
     latex += add_line("\end{center}")
     latex += add_line("\end{figure}")
     latex += add_line(
@@ -206,39 +185,24 @@ def training_iteration_metric_plot(
             counter = 0
             for ind in results.index:
                 for policy in results["info"][ind]["learner"].keys():
+                    # Get the metric title
+                    _row_results = results["info"][ind]["learner"][policy]
+                    row_metric = None
                     if (
-                        "learner_stats"
-                        in results["info"][ind]["learner"][policy].keys()
+                        "learner_stats" in _row_results.keys()
+                        and metric in _row_results["learner_stats"]
                     ):
-                        if (
-                            metric
-                            in results["info"][ind]["learner"][policy][
-                                "learner_stats"
-                            ].keys()
-                        ):
-                            new_row = {
-                                "Training iteration": counter,
-                                metric_title: results["info"][ind]["learner"][
-                                    policy
-                                ]["learner_stats"][metric],
-                                "Configuration": conf,
-                                "Policy": policy,
-                            }
-                            df.loc[len(df)] = new_row
-                    else:
-                        if (
-                            metric
-                            in results["info"][ind]["learner"][policy].keys()
-                        ):
-                            new_row = {
-                                "Training iteration": counter,
-                                metric_title: results["info"][ind]["learner"][
-                                    policy
-                                ][metric],
-                                "Configuration": conf,
-                                "Policy": policy,
-                            }
-                            df.loc[len(df)] = new_row
+                        row_metric = _row_results["learner_stats"][metric]
+                    elif metric in _row_results.keys():
+                        row_metric = _row_results[metric]
+
+                    # Define the new row
+                    df.loc[len(df)] = {
+                        "Training iteration": counter,
+                        metric_title: row_metric,
+                        "Configuration": conf,
+                        "Policy": policy,
+                    }
                 counter += 1
     plt.figure(figsize=(16, 6))
     if df["Policy"].nunique() == 1:  # single-RL
@@ -262,15 +226,6 @@ def training_iteration_metric_plot(
     return pdf
 
 
-def get_policies(all_results):
-    for conf in all_results.keys():
-        conf_results = all_results[conf]
-        for key in conf_results.keys():
-            results = conf_results[key]
-            # for ind in results.index:
-            # print(results['config'][ind]['policies'])
-
-
 def training_policies_reward_plot(all_results):
     df = pd.DataFrame(
         columns=[
@@ -299,8 +254,10 @@ def training_policies_reward_plot(all_results):
                             )
                             >= n_episodes_iter
                         ), f"Found number of episodes rewards in training iterations lower than the number of episodes in the iteration {ind}"
-                        # This can happen because rllib adds older historical episodes in order to reach the required smooting window "metrics_num_episodes_for_smoothing"
-                        # I saw in the code that these episodes are added at the beginning of the list
+                        # This can happen because rllib adds older historical
+                        # episodes in order to reach the required smooting
+                        # window "metrics_num_episodes_for_smoothing". I saw in
+                        # the code that these episodes are added at the beginning of the list
                         diff_n_episodes = (
                             len(
                                 results["sampler_results"][ind]["hist_stats"][
@@ -362,7 +319,8 @@ def evaluation_policies_mean_reward(all_results):
         for key in conf_results.keys():
             results = conf_results[key]
             # Plot the reward for the evaluation graph
-            # We get the evaluation interval to make sure that we only get evaluation data in iterations for which we performed evaluation
+            # We get the evaluation interval to make sure that we only get
+            # evaluation data in iterations for which we performed evaluation
             evaluation_interval = results["config"][0]["evaluation_interval"]
             # We get also the train_batch_size
             train_batch_size = results["config"][0]["train_batch_size"]
@@ -465,7 +423,8 @@ def evaluation_exploitability(all_results):
         for key in conf_results.keys():
             results = conf_results[key]
             # Plot the reward for the evaluation graph
-            # We get the evaluation interval to make sure that we only get evaluation data in iterations for which we performed evaluation
+            # We get the evaluation interval to make sure that we only get
+            # evaluation data in iterations for which we performed evaluation
             evaluation_interval = results["config"][0]["evaluation_interval"]
             i = 1
             evaluation_iteration = 1
@@ -584,7 +543,6 @@ if __name__ == "__main__":
         all_results[logdir] = results
         num_samples = len(results)
 
-    get_policies(all_results)
     # Check that each logdir provided exists
     # for logdir in logdirs:
     # print("Log directory ", logdir, " exists ", os.path.isdir(logdir))
-- 
GitLab


From 73df2c610d405b0a64e5992da1af7d1359a67c8e Mon Sep 17 00:00:00 2001
From: prasanna <maddilaprasanna10@gmail.com>
Date: Wed, 24 Apr 2024 22:53:25 +0200
Subject: [PATCH 2/3] TODO: make evaluation_exploitability work

Currently, the update to RLlib has moved the storage location of
exploitability in the results directory. This means the exploitability
results/dataframes are always empty. This needs to be fixed.
---
 examples/plots/generate_plots.py | 121 ++++++++++++++++---------------
 1 file changed, 63 insertions(+), 58 deletions(-)

diff --git a/examples/plots/generate_plots.py b/examples/plots/generate_plots.py
index 6395733..2fc625a 100644
--- a/examples/plots/generate_plots.py
+++ b/examples/plots/generate_plots.py
@@ -175,35 +175,39 @@ def publish_report(pdf_plots, path):
 def training_iteration_metric_plot(
     metric, metric_title, graph_title, all_results
 ):
-    df = pd.DataFrame(
-        columns=["Training iteration", metric_title, "Configuration", "Policy"]
-    )
-    for conf in all_results.keys():
-        conf_results = all_results[conf]
-        for key in conf_results.keys():
-            results = conf_results[key]
+    df = []
+    for conf, conf_results in all_results.items():
+        for key, results in conf_results.items():
             counter = 0
             for ind in results.index:
                 for policy in results["info"][ind]["learner"].keys():
-                    # Get the metric title
+                    # Get the metric title, or skip if keys not found
                     _row_results = results["info"][ind]["learner"][policy]
+                    _result_keys = getattr(_row_results, "keys", None)
+                    if not _result_keys:
+                        continue
+
+                    # Now, get row_metric to fill column
                     row_metric = None
                     if (
-                        "learner_stats" in _row_results.keys()
+                        "learner_stats" in _result_keys()
                         and metric in _row_results["learner_stats"]
                     ):
                         row_metric = _row_results["learner_stats"][metric]
                     elif metric in _row_results.keys():
                         row_metric = _row_results[metric]
 
-                    # Define the new row
-                    df.loc[len(df)] = {
-                        "Training iteration": counter,
-                        metric_title: row_metric,
-                        "Configuration": conf,
-                        "Policy": policy,
-                    }
+                    # Define the new row and append it to df
+                    df.append(
+                        {
+                            "Training iteration": counter,
+                            metric_title: row_metric,
+                            "Configuration": conf,
+                            "Policy": policy,
+                        }
+                    )
                 counter += 1
+    df = pd.DataFrame.from_dict(df)  # Convert to DataFrame
     plt.figure(figsize=(16, 6))
     if df["Policy"].nunique() == 1:  # single-RL
         sns.lineplot(
@@ -408,7 +412,7 @@ def evaluation_policies_mean_reward(all_results):
 
 
 def evaluation_exploitability(all_results):
-    # dfs = []
+    dfi, max_evaluation_iteration = [], 0
     df = pd.DataFrame(
         columns=[
             "Evaluation iteration",
@@ -417,52 +421,53 @@ def evaluation_exploitability(all_results):
             "Configuration",
         ]
     )
-    max_evaluation_iteration = 0
-    for conf in all_results.keys():
-        conf_results = all_results[conf]
-        for key in conf_results.keys():
-            results = conf_results[key]
+    breakpoint()
+    for conf, conf_results in all_results.items():
+        for key, results in conf_results.items():
             # Plot the reward for the evaluation graph
             # We get the evaluation interval to make sure that we only get
             # evaluation data in iterations for which we performed evaluation
+            i, evaluation_iteration = 1, 1
             evaluation_interval = results["config"][0]["evaluation_interval"]
-            i = 1
-            evaluation_iteration = 1
-            if "evaluation" in results.keys():
-                for ind in results["evaluation"].index:
-                    if i == evaluation_interval:
-                        i = 1
-                        if (
-                            "policy_exploitability"
-                            in results["evaluation"][ind].keys()
-                        ):
-                            for policy in results["evaluation"][ind][
+            if "evaluation" not in results.keys():
+                continue
+            eval_results = results["evaluation"]
+            for ind in eval_results.index:
+                # If not an eval iteration, continue
+                if i != evaluation_interval:
+                    i = i + 1
+                    continue
+
+                # else, reset the evaluation_iteration count
+                # and check if exploitability data was stored.
+                i = 1
+                if not ("policy_exploitability" in eval_results[ind].keys()):
+                    continue  # Skip, exploitability not here...
+
+                # Now processing exploitability data ...
+                for policy in eval_results[ind][
+                    "policy_exploitability"
+                ].keys():
+                    dfi.append(
+                        {
+                            "Evaluation iteration": np.full(
+                                eval_results[ind]["episodes_this_iter"],
+                                evaluation_iteration,
+                            ),
+                            "Policy exploitability": eval_results[ind][
                                 "policy_exploitability"
-                            ].keys():
-                                dfi = pd.DataFrame(
-                                    {
-                                        "Evaluation iteration": np.full(
-                                            results["evaluation"][ind][
-                                                "episodes_this_iter"
-                                            ],
-                                            evaluation_iteration,
-                                        ),
-                                        "Policy exploitability": results[
-                                            "evaluation"
-                                        ][ind]["policy_exploitability"][
-                                            policy
-                                        ],
-                                        "Policy": policy,
-                                        "Configuration": conf,
-                                    }
-                                )
-                                df = pd.concat([df, dfi], ignore_index=True)
-                        evaluation_iteration = evaluation_iteration + 1
-                    else:
-                        i = i + 1
-                max_evaluation_iteration = max(
-                    max_evaluation_iteration, evaluation_iteration
-                )
+                            ][policy],
+                            "Policy": policy,
+                            "Configuration": conf,
+                        }
+                    )
+                evaluation_iteration = evaluation_iteration + 1
+            max_evaluation_iteration = max(
+                max_evaluation_iteration, evaluation_iteration
+            )
+
+    # TODO: df is always empty; expl moved to info.learner etc.
+    df = pd.DataFrame.from_dict(dfi)
     plt.figure(figsize=(16, 6))
     sns.lineplot(
         data=df,
-- 
GitLab


From 2386f8e9c9179d85849ab72f2b1fc831a58e3953 Mon Sep 17 00:00:00 2001
From: ssmaddila <siva-sri-prasanna.maddila@inrae.fr>
Date: Thu, 25 Apr 2024 10:00:35 +0200
Subject: [PATCH 3/3] exploitability graph fixed

---
 examples/plots/generate_plots.py | 40 ++++++++++++++------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/examples/plots/generate_plots.py b/examples/plots/generate_plots.py
index 2fc625a..0a95261 100644
--- a/examples/plots/generate_plots.py
+++ b/examples/plots/generate_plots.py
@@ -413,15 +413,13 @@ def evaluation_policies_mean_reward(all_results):
 
 def evaluation_exploitability(all_results):
     dfi, max_evaluation_iteration = [], 0
-    df = pd.DataFrame(
-        columns=[
-            "Evaluation iteration",
-            "Policy exploitability",
-            "Policy",
-            "Configuration",
-        ]
-    )
-    breakpoint()
+    df_columns = [
+        "Evaluation iteration",
+        "Policy exploitability",
+        "Policy",
+        "Configuration",
+    ]
+    df = pd.DataFrame(columns=df_columns)
     for conf, conf_results in all_results.items():
         for key, results in conf_results.items():
             # Plot the reward for the evaluation graph
@@ -431,8 +429,8 @@ def evaluation_exploitability(all_results):
             evaluation_interval = results["config"][0]["evaluation_interval"]
             if "evaluation" not in results.keys():
                 continue
-            eval_results = results["evaluation"]
-            for ind in eval_results.index:
+            expl_results = results["info"]
+            for ind in expl_results.index:
                 # If not an eval iteration, continue
                 if i != evaluation_interval:
                     i = i + 1
@@ -441,22 +439,21 @@ def evaluation_exploitability(all_results):
                 # else, reset the evaluation_iteration count
                 # and check if exploitability data was stored.
                 i = 1
-                if not ("policy_exploitability" in eval_results[ind].keys()):
+                if not "policy_exploitability" in expl_results[ind]["learner"]:
                     continue  # Skip, exploitability not here...
 
                 # Now processing exploitability data ...
-                for policy in eval_results[ind][
+                for policy in expl_results[ind]["learner"][
                     "policy_exploitability"
                 ].keys():
                     dfi.append(
                         {
-                            "Evaluation iteration": np.full(
-                                eval_results[ind]["episodes_this_iter"],
-                                evaluation_iteration,
-                            ),
-                            "Policy exploitability": eval_results[ind][
-                                "policy_exploitability"
-                            ][policy],
+                            "Evaluation iteration": results[
+                                "num_env_steps_sampled"
+                            ][ind],
+                            "Policy exploitability": expl_results[ind][
+                                "learner"
+                            ]["policy_exploitability"][policy],
                             "Policy": policy,
                             "Configuration": conf,
                         }
@@ -466,8 +463,7 @@ def evaluation_exploitability(all_results):
                 max_evaluation_iteration, evaluation_iteration
             )
 
-    # TODO: df is always empty; expl moved to info.learner etc.
-    df = pd.DataFrame.from_dict(dfi)
+    df = pd.concat([df, pd.DataFrame.from_dict(dfi)])
     plt.figure(figsize=(16, 6))
     sns.lineplot(
         data=df,
-- 
GitLab