small script adjustments to make the cluster happy

This commit is contained in:
Malte Tammena 2024-05-25 10:15:27 +02:00
parent 27ed3caf0b
commit ad54f0755f
5 changed files with 190 additions and 172 deletions

1
.gitignore vendored
View file

@ -5,3 +5,4 @@ target/
scripts/__pycache__
acyclic
output-*
all.csv

View file

@ -21,6 +21,7 @@ def run():
out = []
# Using glob to match all .json files
for file_path in glob.glob(os.path.join(folder_path, "*.json")):
try:
# Open and read the contents of the file
with open(file_path, "r", encoding="utf-8") as json_file:
(
@ -53,6 +54,8 @@ def run():
if count > 700:
break
count += 1
except Exception:
print(f'Failed to read {file_path}. Skipping..')
with open(output, 'w') as output_file:
output_file.write
writer = csv.DictWriter(output_file, fieldnames=out[0].keys())

View file

@ -3,12 +3,26 @@
# Batch script to run on sc.uni-leipzig.de cluster, i used
# sbatch -a "1-$(cat acyclic.list | wc -l)" ./scripts/sc-batch.sh
# Somehow all paths used in spawned processes need to be absolute,
# there's probably a good explanation, but I don't have it
FILE_LIST=acyclic.list
# Pick line `$SLURM_ARRAY_TASK_ID` from the FILE_LIST
# This will probably cause issues if more processes are allocated
# than lines in the FILE_LIST, but who knows
file="$(pwd)/$(awk "NR == $SLURM_ARRAY_TASK_ID" "$FILE_LIST")"
# Read the extra argument
arg=$(cat "$file.asm")
# Make sure we get all the data in one central place
OUTPUT_DIR="$(pwd)/output"
export OUTPUT_DIR
./validate --file "$file" --arg "$arg" --time --problem dc-co
# This assumes that `validate` accepts the --no-rm flag,
# which is not a flag the script accepts, but recognized by
# the default bundler `nix bundle .#validate` uses. Required here
# to prevent the fastest process from cleaning the extracted
# package. Slower processes or those allocated later *will* fail
# without the flag
./validate --no-rm // --file "$file" --arg "$arg" --time --problem dc-co

View file

@ -11,7 +11,7 @@ def read_and_visualize(csv_file):
# Display the first few rows of the dataframe
print(df.head())
# Identify all the properties (assuming they are all columns except for 'runtime')
# Identify all the properties (assuming they are all columns except for some timings)
properties = [col for col in df.columns if col != 'speedup' and col != 'time' and col != 'stddev']
# Pairplot to see general pairwise relationships, may help to understand the overall relationship between properties and runtime
@ -22,20 +22,20 @@ def read_and_visualize(csv_file):
# Create scatter plots for each property against runtime
for prop in properties:
plt.figure(figsize=(10, 6))
sns.scatterplot(x=df[prop], y=df['runtime'])
plt.title(f'Impact of {prop} on Runtime')
sns.scatterplot(x=df[prop], y=df['speedup'])
plt.title(f'Impact of {prop} on Speedup')
plt.xlabel(prop)
plt.ylabel('Runtime')
plt.show()
plt.ylabel('Speedup')
# Create box plots for categorical properties if any (e.g., difficulty level or type) against runtime
for prop in properties:
if df[prop].dtype == 'object':
plt.figure(figsize=(10, 6))
sns.boxplot(x=df[prop], y=df['runtime'])
plt.title(f'Impact of {prop} on Runtime')
sns.boxplot(x=df[prop], y=df['speedup'])
plt.title(f'Impact of {prop} on Speedup')
plt.xlabel(prop)
plt.ylabel('Runtime')
plt.ylabel('Speedup')
plt.show()
# Example usage

View file

@ -85,7 +85,7 @@ TIME_COMMANDS=
while [[ $# -gt 0 ]]; do
case $1 in
-h | --help)
print_help_and_exit
print_help_and_exit ""
;;
--aspforaba)
shift
@ -150,7 +150,7 @@ case "$PROBLEM" in
dc-co | DC-CO)
if [ -n "$ABA_FILE_DIR" ]; then
# run for every file found in the directory
for file in "$ABA_FILE_DIR"/*."$ABA_FILE_EXT"; do
for file in $(find "$ABA_FILE_DIR" -type f -iname "*.$ABA_FILE_EXT" | shuf); do
ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co
done
else