small script adjustments to make the cluster happy

2024-05-25 10:15:27 +02:00 · 2024-05-25 10:15:27 +02:00 · ad54f0755f
parent 27ed3caf0b
commit ad54f0755f
5 changed files with 190 additions and 172 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,4 @@ target/
 scripts/__pycache__
 acyclic
 output-*
+all.csv
--- a/scripts/decode-result-folder.py
+++ b/scripts/decode-result-folder.py
@ -21,6 +21,7 @@ def run():
    out = []
    # Using glob to match all .json files
    for file_path in glob.glob(os.path.join(folder_path, "*.json")):
+        try:
            # Open and read the contents of the file
            with open(file_path, "r", encoding="utf-8") as json_file:
                (
@ -53,6 +54,8 @@ def run():
                if count > 700:
                    break
            count += 1
+        except Exception:
+            print(f'Failed to read {file_path}. Skipping..')
    with open(output, 'w') as output_file:
        output_file.write
        writer = csv.DictWriter(output_file, fieldnames=out[0].keys())
--- a/scripts/sc-batch.sh
+++ b/scripts/sc-batch.sh
@ -3,12 +3,26 @@
 # Batch script to run on sc.uni-leipzig.de cluster, i used
 # sbatch -a "1-$(cat acyclic.list | wc -l)" ./scripts/sc-batch.sh

+# Somehow all paths used in spawned processes need to be absolute,
+# there's probably a good explanation, but I don't have it
+
 FILE_LIST=acyclic.list

+# Pick line `$SLURM_ARRAY_TASK_ID` from the FILE_LIST
+# This will probably cause issues if more processes are allocated
+# than lines in the FILE_LIST, but who knows
 file="$(pwd)/$(awk "NR == $SLURM_ARRAY_TASK_ID" "$FILE_LIST")"
+# Read the extra argument
 arg=$(cat "$file.asm")

+# Make sure we get all the data in one central place
 OUTPUT_DIR="$(pwd)/output"
-
 export OUTPUT_DIR
-./validate --file "$file" --arg "$arg" --time --problem dc-co
+
+# This assumes that `validate` accepts the --no-rm flag,
+# which is not a flag the script accepts, but recognized by
+# the default bundler `nix bundle .#validate` uses. Required here
+# to prevent the fastest process from cleaning the extracted
+# package. Slower processes or those allocated later *will* fail
+# without the flag
+./validate --no-rm // --file "$file" --arg "$arg" --time --problem dc-co
--- a/scripts/test.py
+++ b/scripts/test.py
@ -11,7 +11,7 @@ def read_and_visualize(csv_file):
    # Display the first few rows of the dataframe
    print(df.head())

-    # Identify all the properties (assuming they are all columns except for 'runtime')
+    # Identify all the properties (assuming they are all columns except for some timings)
    properties = [col for col in df.columns if col != 'speedup' and col != 'time' and col != 'stddev']

    # Pairplot to see general pairwise relationships, may help to understand the overall relationship between properties and runtime
@ -22,20 +22,20 @@ def read_and_visualize(csv_file):
    # Create scatter plots for each property against runtime
    for prop in properties:
        plt.figure(figsize=(10, 6))
-        sns.scatterplot(x=df[prop], y=df['runtime'])
-        plt.title(f'Impact of {prop} on Runtime')
+        sns.scatterplot(x=df[prop], y=df['speedup'])
+        plt.title(f'Impact of {prop} on Speedup')
        plt.xlabel(prop)
-        plt.ylabel('Runtime')
-        plt.show()
+        plt.ylabel('Speedup')

    # Create box plots for categorical properties if any (e.g., difficulty level or type) against runtime
    for prop in properties:
        if df[prop].dtype == 'object':
            plt.figure(figsize=(10, 6))
-            sns.boxplot(x=df[prop], y=df['runtime'])
-            plt.title(f'Impact of {prop} on Runtime')
+            sns.boxplot(x=df[prop], y=df['speedup'])
+            plt.title(f'Impact of {prop} on Speedup')
            plt.xlabel(prop)
-            plt.ylabel('Runtime')
+            plt.ylabel('Speedup')
+
    plt.show()

 # Example usage
--- a/scripts/validate.sh
+++ b/scripts/validate.sh
@ -85,7 +85,7 @@ TIME_COMMANDS=
 while [[ $# -gt 0 ]]; do
 	case $1 in
 		-h | --help)
-    print_help_and_exit
+			print_help_and_exit ""
 			;;
 		--aspforaba)
 			shift
@ -150,7 +150,7 @@ case "$PROBLEM" in
 	dc-co | DC-CO)
 		if [ -n "$ABA_FILE_DIR" ]; then
 			# run for every file found in the directory
-    for file in "$ABA_FILE_DIR"/*."$ABA_FILE_EXT"; do
+			for file in $(find "$ABA_FILE_DIR" -type f -iname "*.$ABA_FILE_EXT" | shuf); do
 				ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co
 			done
 		else