adjust scripts for loop-full run

This commit is contained in:
Malte Tammena 2024-07-08 12:57:05 +02:00
parent a9fae50bb8
commit 17a066c990
2 changed files with 103 additions and 43 deletions

View file

@ -1,62 +1,120 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import glob
import json import json
import os import os
import argparse import argparse
import csv import csv
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-d", "--directory") parser.add_argument("-d", "--directory")
parser.add_argument("-o", "--output") parser.add_argument("-o", "--output")
# The jobinfo file
# Generated with `sacct -j JOB_ID --format=all --json > jobinfo.json`
parser.add_argument("-j", "--jobinfo")
# File list that was used when scheduling the task
parser.add_argument("-l", "--file-list", dest="file_list")
args = parser.parse_args() args = parser.parse_args()
# Path to the folder # Path to the folder
folder_path = args.directory if args.directory is not None else "output" folder_path = args.directory if args.directory is not None else "output"
output = args.output if args.output is not None else "all.csv" output = args.output if args.output is not None else "all.csv"
jobinfo_path = args.jobinfo if args.jobinfo is not None else "jobinfo.json"
file_list_path = args.file_list if args.file_list is not None else "acyclic.list"
def run(): def run():
count = 0 # Read the list of files to match with their id
out = [] with open(file_list_path, 'r') as file_list:
# Using glob to match all .json files files = [file.strip() for file in file_list.readlines()]
for file_path in glob.glob(os.path.join(folder_path, "*.json")): # Open the generated jobinfo
try: with open(jobinfo_path, 'r') as jobinfo_file:
# Open and read the contents of the file out = []
with open(file_path, "r", encoding="utf-8") as json_file: jobs = json.load(jobinfo_file)["jobs"]
( for job in jobs:
_ident, array_id = job["array"]["job_id"]
atom_count, task_id = job["array"]["task_id"]["number"]
assumption_ratio, status = job["derived_exit_code"]["status"][0]
max_rules_per_head, flags = ",".join(job["flags"])
max_rule_size, state = job["state"]["current"][0]
_idx, file = os.path.basename(files[task_id - 1])
) = file_path.split("_")
data = json.load(json_file)["results"] # Extract the requested memory
aba2sat, aspforaba = ( mem_requested = [item["count"] for item in job["tres"]["requested"] if item["type"] == "mem"][0]
(data[0], data[1])
if ( (
data[0]["command"] == "aba2sat" _ident,
and data[1]["command"] == "aspforaba" atom_count,
) assumption_ratio,
else (data[1], data[0]) max_rules_per_head,
) max_rule_size,
speedup = float(aspforaba['mean']) / float(aba2sat['mean']) #loop_percent,
out.append({ _idx_with_file_end,
"atom_count": atom_count, ) = file.split("_")
"assumption_ratio": assumption_ratio, loop_percent = 0
"max_rules_per_head": max_rules_per_head,
"max_rule_size": max_rule_size, aba2sat_result_file = f"{file}-aba2sat-result"
"time_ours": aba2sat["mean"], aspforaba_result_file = f"{file}-aspforaba-result"
"time_theirs": aspforaba['mean'], hyperfine_file = f"{file}-hyperfine.json"
"stddev": aba2sat['stddev'],
"speedup": speedup, solved_correctly = False
}) speedup = None
if count > 700: time_ours = None
break time_theirs = None
count += 1 stddev = None
except Exception: # Only override the values if the job succesfully ended, this guarantees correct values
print(f'Failed to read {file_path}. Skipping..') if state == "COMPLETED":
try:
aba2sat_result_path = os.path.join(folder_path, aba2sat_result_file)
aspforaba_result_path = os.path.join(folder_path, aspforaba_result_file)
with open(aba2sat_result_path, 'r') as aba2sat_result, open(aspforaba_result_path, 'r') as aspforaba_result:
aba2sat = aba2sat_result.read()
aspforaba = aspforaba_result.read()
if aba2sat == aspforaba:
solved_correctly = True
except Exception:
print("No result files")
hyperfine_path = os.path.join(folder_path, hyperfine_file)
try:
with open(hyperfine_path, 'r') as hyperfine:
data = json.load(hyperfine)["results"]
aba2sat, aspforaba = (
(data[0], data[1])
if (
data[0]["command"] == "aba2sat"
and data[1]["command"] == "aspforaba"
)
else (data[1], data[0])
)
speedup = float(aspforaba['mean']) / float(aba2sat['mean'])
time_ours = aba2sat["mean"]
time_theirs = aspforaba['mean']
stddev = aba2sat['stddev']
except Exception:
print("No hyperfine file")
out.append({
"array_id": array_id,
"task_id": task_id,
"mem_requested": mem_requested,
"status": status,
"flags": flags,
"state": state,
"file": file,
"atom_count": atom_count,
"assumption_ratio": assumption_ratio,
"max_rules_per_head": max_rules_per_head,
"max_rule_size": max_rule_size,
"loop_percent": loop_percent,
"solved_correctly": solved_correctly,
"time_ours": time_ours,
"time_theirs": time_theirs,
"stddev": stddev,
"speedup": speedup,
})
if len(out) > 0: if len(out) > 0:
with open(output, 'w') as output_file: with open(output, 'w') as output_file:
output_file.write output_file.write
@ -66,4 +124,5 @@ def run():
else: else:
print('Empty set') print('Empty set')
run() run()

View file

@ -7,6 +7,7 @@ import seaborn as sns
def read_and_visualize(csv_file): def read_and_visualize(csv_file):
# Read the CSV file # Read the CSV file
df = pd.read_csv(csv_file) df = pd.read_csv(csv_file)
df = df[df['state'] == 'COMPLETED']
# Display the first few rows of the dataframe # Display the first few rows of the dataframe
print(df.head()) print(df.head())
@ -21,8 +22,8 @@ def read_and_visualize(csv_file):
# ax.set_xscale('log') # ax.set_xscale('log')
# ax.set_yscale('log') # ax.set_yscale('log')
plt.xlabel("aba2sat") plt.xlabel("aba2sat (t [s])")
plt.ylabel("ASPforABA") plt.ylabel("ASPforABA (t [s])")
plt.legend() plt.legend()
plt.show() plt.show()