From 17a066c9900adc9ff52d8131bb40e2f4f45cfe9d Mon Sep 17 00:00:00 2001 From: Malte Tammena Date: Mon, 8 Jul 2024 12:57:05 +0200 Subject: [PATCH] adjust scripts for loop-full run --- scripts/decode-result-folder.py | 141 ++++++++++++++++++++++---------- scripts/test.py | 5 +- 2 files changed, 103 insertions(+), 43 deletions(-) diff --git a/scripts/decode-result-folder.py b/scripts/decode-result-folder.py index b8bc15f..6bcb309 100755 --- a/scripts/decode-result-folder.py +++ b/scripts/decode-result-folder.py @@ -1,62 +1,120 @@ #!/usr/bin/env python3 -import glob import json import os import argparse import csv + parser = argparse.ArgumentParser() parser.add_argument("-d", "--directory") parser.add_argument("-o", "--output") +# The jobinfo file +# Generated with `sacct -j JOB_ID --format=all --json > jobinfo.json` +parser.add_argument("-j", "--jobinfo") +# File list that was used when scheduling the task +parser.add_argument("-l", "--file-list", dest="file_list") args = parser.parse_args() # Path to the folder folder_path = args.directory if args.directory is not None else "output" output = args.output if args.output is not None else "all.csv" +jobinfo_path = args.jobinfo if args.jobinfo is not None else "jobinfo.json" +file_list_path = args.file_list if args.file_list is not None else "acyclic.list" def run(): - count = 0 - out = [] - # Using glob to match all .json files - for file_path in glob.glob(os.path.join(folder_path, "*.json")): - try: - # Open and read the contents of the file - with open(file_path, "r", encoding="utf-8") as json_file: - ( - _ident, - atom_count, - assumption_ratio, - max_rules_per_head, - max_rule_size, - _idx, - ) = file_path.split("_") - data = json.load(json_file)["results"] - aba2sat, aspforaba = ( - (data[0], data[1]) - if ( - data[0]["command"] == "aba2sat" - and data[1]["command"] == "aspforaba" - ) - else (data[1], data[0]) - ) - speedup = float(aspforaba['mean']) / float(aba2sat['mean']) - out.append({ - "atom_count": atom_count, - "assumption_ratio": assumption_ratio, - "max_rules_per_head": max_rules_per_head, - "max_rule_size": max_rule_size, - "time_ours": aba2sat["mean"], - "time_theirs": aspforaba['mean'], - "stddev": aba2sat['stddev'], - "speedup": speedup, - }) - if count > 700: - break - count += 1 - except Exception: - print(f'Failed to read {file_path}. Skipping..') + # Read the list of files to match with their id + with open(file_list_path, 'r') as file_list: + files = [file.strip() for file in file_list.readlines()] + # Open the generated jobinfo + with open(jobinfo_path, 'r') as jobinfo_file: + out = [] + jobs = json.load(jobinfo_file)["jobs"] + for job in jobs: + array_id = job["array"]["job_id"] + task_id = job["array"]["task_id"]["number"] + status = job["derived_exit_code"]["status"][0] + flags = ",".join(job["flags"]) + state = job["state"]["current"][0] + file = os.path.basename(files[task_id - 1]) + + # Extract the requested memory + mem_requested = [item["count"] for item in job["tres"]["requested"] if item["type"] == "mem"][0] + + ( + _ident, + atom_count, + assumption_ratio, + max_rules_per_head, + max_rule_size, + #loop_percent, + _idx_with_file_end, + ) = file.split("_") + loop_percent = 0 + + aba2sat_result_file = f"{file}-aba2sat-result" + aspforaba_result_file = f"{file}-aspforaba-result" + hyperfine_file = f"{file}-hyperfine.json" + + solved_correctly = False + speedup = None + time_ours = None + time_theirs = None + stddev = None + # Only override the values if the job succesfully ended, this guarantees correct values + if state == "COMPLETED": + try: + aba2sat_result_path = os.path.join(folder_path, aba2sat_result_file) + aspforaba_result_path = os.path.join(folder_path, aspforaba_result_file) + with open(aba2sat_result_path, 'r') as aba2sat_result, open(aspforaba_result_path, 'r') as aspforaba_result: + aba2sat = aba2sat_result.read() + aspforaba = aspforaba_result.read() + if aba2sat == aspforaba: + solved_correctly = True + except Exception: + print("No result files") + hyperfine_path = os.path.join(folder_path, hyperfine_file) + try: + with open(hyperfine_path, 'r') as hyperfine: + data = json.load(hyperfine)["results"] + aba2sat, aspforaba = ( + (data[0], data[1]) + if ( + data[0]["command"] == "aba2sat" + and data[1]["command"] == "aspforaba" + ) + else (data[1], data[0]) + ) + speedup = float(aspforaba['mean']) / float(aba2sat['mean']) + time_ours = aba2sat["mean"] + time_theirs = aspforaba['mean'] + stddev = aba2sat['stddev'] + except Exception: + print("No hyperfine file") + + out.append({ + "array_id": array_id, + "task_id": task_id, + "mem_requested": mem_requested, + "status": status, + "flags": flags, + "state": state, + "file": file, + + "atom_count": atom_count, + "assumption_ratio": assumption_ratio, + "max_rules_per_head": max_rules_per_head, + "max_rule_size": max_rule_size, + "loop_percent": loop_percent, + + "solved_correctly": solved_correctly, + + "time_ours": time_ours, + "time_theirs": time_theirs, + "stddev": stddev, + "speedup": speedup, + }) if len(out) > 0: with open(output, 'w') as output_file: output_file.write @@ -66,4 +124,5 @@ def run(): else: print('Empty set') + run() diff --git a/scripts/test.py b/scripts/test.py index 6b9f5fb..66a40e7 100755 --- a/scripts/test.py +++ b/scripts/test.py @@ -7,6 +7,7 @@ import seaborn as sns def read_and_visualize(csv_file): # Read the CSV file df = pd.read_csv(csv_file) + df = df[df['state'] == 'COMPLETED'] # Display the first few rows of the dataframe print(df.head()) @@ -21,8 +22,8 @@ def read_and_visualize(csv_file): # ax.set_xscale('log') # ax.set_yscale('log') - plt.xlabel("aba2sat") - plt.ylabel("ASPforABA") + plt.xlabel("aba2sat (t [s])") + plt.ylabel("ASPforABA (t [s])") plt.legend() plt.show()