small script adjustments to make the cluster happy

2024-05-25 10:15:27 +02:00 · 2024-05-25 10:15:27 +02:00 · ad54f0755f
parent 27ed3caf0b
commit ad54f0755f
5 changed files with 190 additions and 172 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,4 @@ target/
 scripts/__pycache__
 acyclic
 output-*
 all.csv
--- a/scripts/decode-result-folder.py
+++ b/scripts/decode-result-folder.py
@ -21,38 +21,41 @@ def run():
    out = []
    # Using glob to match all .json files
    for file_path in glob.glob(os.path.join(folder_path, "*.json")):
-        # Open and read the contents of the file
+        try:
-        with open(file_path, "r", encoding="utf-8") as json_file:
+            # Open and read the contents of the file
-            (
+            with open(file_path, "r", encoding="utf-8") as json_file:
-                _ident,
+                (
-                atom_count,
+                    _ident,
-                assumption_ratio,
+                    atom_count,
-                max_rules_per_head,
+                    assumption_ratio,
-                max_rule_size,
+                    max_rules_per_head,
-                _idx,
+                    max_rule_size,
-            ) = file_path.split("_")
+                    _idx,
-            data = json.load(json_file)["results"]
+                ) = file_path.split("_")
-            aba2sat, aspforaba = (
+                data = json.load(json_file)["results"]
-                (data[0], data[1])
+                aba2sat, aspforaba = (
-                if (
+                    (data[0], data[1])
-                    data[0]["command"] == "aba2sat"
+                    if (
-                    and data[1]["command"] == "aspforaba"
+                        data[0]["command"] == "aba2sat"
                        and data[1]["command"] == "aspforaba"
                    )
                    else (data[1], data[0])
                )
-                else (data[1], data[0])
+                speedup = float(aspforaba['mean']) / float(aba2sat['mean'])
-            )
+                out.append({
-            speedup = float(aspforaba['mean']) / float(aba2sat['mean'])
+                    "atom_count": atom_count,
-            out.append({
+                    "assumption_ratio": assumption_ratio,
-                "atom_count": atom_count,
+                    "max_rules_per_head": max_rules_per_head,
-                "assumption_ratio": assumption_ratio,
+                    "max_rule_size": max_rule_size,
-                "max_rules_per_head": max_rules_per_head,
+                    "time": aba2sat["mean"],
-                "max_rule_size": max_rule_size,
+                    "stddev": aba2sat['stddev'],
-                "time": aba2sat["mean"],
+                    "speedup": speedup,
-                "stddev": aba2sat['stddev'],
+                })
-                "speedup": speedup,
+                if count > 700:
-            })
+                    break
-            if count > 700:
+            count += 1
-                break
+        except Exception:
-        count += 1
+            print(f'Failed to read {file_path}. Skipping..')
    with open(output, 'w') as output_file:
        output_file.write
        writer = csv.DictWriter(output_file, fieldnames=out[0].keys())
--- a/scripts/sc-batch.sh
+++ b/scripts/sc-batch.sh
@ -3,12 +3,26 @@
 # Batch script to run on sc.uni-leipzig.de cluster, i used
 # sbatch -a "1-$(cat acyclic.list | wc -l)" ./scripts/sc-batch.sh
 # Somehow all paths used in spawned processes need to be absolute,
 # there's probably a good explanation, but I don't have it
 FILE_LIST=acyclic.list
 # Pick line `$SLURM_ARRAY_TASK_ID` from the FILE_LIST
 # This will probably cause issues if more processes are allocated
 # than lines in the FILE_LIST, but who knows
 file="$(pwd)/$(awk "NR == $SLURM_ARRAY_TASK_ID" "$FILE_LIST")"
 # Read the extra argument
 arg=$(cat "$file.asm")
 # Make sure we get all the data in one central place
 OUTPUT_DIR="$(pwd)/output"
 export OUTPUT_DIR
-./validate --file "$file" --arg "$arg" --time --problem dc-co
+
 # This assumes that `validate` accepts the --no-rm flag,
 # which is not a flag the script accepts, but recognized by
 # the default bundler `nix bundle .#validate` uses. Required here
 # to prevent the fastest process from cleaning the extracted
 # package. Slower processes or those allocated later *will* fail
 # without the flag
 ./validate --no-rm // --file "$file" --arg "$arg" --time --problem dc-co
--- a/scripts/test.py
+++ b/scripts/test.py
@ -11,7 +11,7 @@ def read_and_visualize(csv_file):
    # Display the first few rows of the dataframe
    print(df.head())
-    # Identify all the properties (assuming they are all columns except for 'runtime')
+    # Identify all the properties (assuming they are all columns except for some timings)
    properties = [col for col in df.columns if col != 'speedup' and col != 'time' and col != 'stddev']
    # Pairplot to see general pairwise relationships, may help to understand the overall relationship between properties and runtime
@ -22,21 +22,21 @@ def read_and_visualize(csv_file):
    # Create scatter plots for each property against runtime
    for prop in properties:
        plt.figure(figsize=(10, 6))
-        sns.scatterplot(x=df[prop], y=df['runtime'])
+        sns.scatterplot(x=df[prop], y=df['speedup'])
-        plt.title(f'Impact of {prop} on Runtime')
+        plt.title(f'Impact of {prop} on Speedup')
        plt.xlabel(prop)
-        plt.ylabel('Runtime')
+        plt.ylabel('Speedup')
        plt.show()
    # Create box plots for categorical properties if any (e.g., difficulty level or type) against runtime
    for prop in properties:
        if df[prop].dtype == 'object':
            plt.figure(figsize=(10, 6))
-            sns.boxplot(x=df[prop], y=df['runtime'])
+            sns.boxplot(x=df[prop], y=df['speedup'])
-            plt.title(f'Impact of {prop} on Runtime')
+            plt.title(f'Impact of {prop} on Speedup')
            plt.xlabel(prop)
-            plt.ylabel('Runtime')
+            plt.ylabel('Speedup')
-            plt.show()
+
    plt.show()
 # Example usage
 csv_file = 'all.csv'  # Replace with your actual CSV file path
--- a/scripts/validate.sh
+++ b/scripts/validate.sh
@ -4,70 +4,70 @@
 # best used through the bundled nix version `nix run .#validate`
 print_help_and_exit() {
-  if [ -n "$1" ]; then
+	if [ -n "$1" ]; then
-    printf "%s\n\n" "$1"
+		printf "%s\n\n" "$1"
-  fi
+	fi
-  printf "Usage: validate [OPTIONS] \n"
+	printf "Usage: validate [OPTIONS] \n"
-  printf "\n"
+	printf "\n"
-  printf "Options:\n"
+	printf "Options:\n"
-  printf "  --aspforaba\n"
+	printf "  --aspforaba\n"
-  printf "          Binary to use when calling aspforaba\n"
+	printf "          Binary to use when calling aspforaba\n"
-  printf "  -p, --problem\n"
+	printf "  -p, --problem\n"
-  printf "          The problem to solve\n"
+	printf "          The problem to solve\n"
-  printf "  -a, --arg\n"
+	printf "  -a, --arg\n"
-  printf "          The additional argument for the problem\n"
+	printf "          The additional argument for the problem\n"
-  printf "  -f, --file\n"
+	printf "  -f, --file\n"
-  printf "          The file containing the problem in ABA format\n"
+	printf "          The file containing the problem in ABA format\n"
-  printf "  -t, --time\n"
+	printf "  -t, --time\n"
-  printf "          Execute hyperfine to determine runtimes\n"
+	printf "          Execute hyperfine to determine runtimes\n"
-  printf "  --files-from\n"
+	printf "  --files-from\n"
-  printf "          Use the following dir to read files, specify a single file with --file instead\n"
+	printf "          Use the following dir to read files, specify a single file with --file instead\n"
-  exit 1
+	exit 1
 }
 format_time() {
-  COMMAND="$1"
+	COMMAND="$1"
-  FILE="$2"
+	FILE="$2"
-  mean=$(jq ".results[]   | select(.command == \"$COMMAND\") | (.mean   * 1000)" "$FILE")
+	mean=$(jq ".results[]   | select(.command == \"$COMMAND\") | (.mean   * 1000)" "$FILE")
-  stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE")
+	stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE")
-  printf "%7.3f±%7.3fms" "$mean" "$stddev"
+	printf "%7.3f±%7.3fms" "$mean" "$stddev"
 }
 run_dc_co() {
-  OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)}
+	OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)}
-  mkdir -p "$OUTPUT_DIR"
+	mkdir -p "$OUTPUT_DIR"
-  JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json"
+	JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json"
-  # Restrict memory to 20GB
+	# Restrict memory to 20GB
-  ulimit -v 20000000
+	ulimit -v 20000000
-  if [ -z "$ADDITIONAL_ARG" ]; then
+	if [ -z "$ADDITIONAL_ARG" ]; then
-    print_help_and_exit "Parameter --arg is missing!"
+		print_help_and_exit "Parameter --arg is missing!"
-  fi
+	fi
-  if [ -z "$ABA_FILE" ]; then
+	if [ -z "$ABA_FILE" ]; then
-    print_help_and_exit "Parameter --file is missing!"
+		print_help_and_exit "Parameter --file is missing!"
-  fi
+	fi
-  printf "===== %s ==== " "$(basename "$ABA_FILE")"
+	printf "===== %s ==== " "$(basename "$ABA_FILE")"
-  our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result")
+	our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result")
-  other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result")
+	other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result")
-  if [ "$our_result" != "$other_result" ]; then
+	if [ "$our_result" != "$other_result" ]; then
-    printf "❌\n"
+		printf "❌\n"
-  else
+	else
-    printf "✅\n"
+		printf "✅\n"
-  fi
+	fi
-  printf "Argument: %s\n" "$ADDITIONAL_ARG"
+	printf "Argument: %s\n" "$ADDITIONAL_ARG"
-  if [ -n "$TIME_COMMANDS" ]; then
+	if [ -n "$TIME_COMMANDS" ]; then
-    $HYPERFINE --shell=none \
+		$HYPERFINE --shell=none \
-      --export-json "$JSON_FILE" \
+			--export-json "$JSON_FILE" \
-      --command-name "aba2sat" \
+			--command-name "aba2sat" \
-      "$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \
+			"$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \
-      --command-name "aspforaba" \
+			--command-name "aspforaba" \
-      "$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1
+			"$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1
-    printf "Our:      %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")"
+		printf "Our:      %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")"
-    printf "Their:    %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")"
+		printf "Their:    %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")"
-  else
+	else
-    printf "Our:      %3s\n" "$our_result"
+		printf "Our:      %3s\n" "$our_result"
-    printf "Their:    %3s\n" "$other_result"
+		printf "Their:    %3s\n" "$other_result"
-  fi
+	fi
 }
@ -83,82 +83,82 @@ ADDITIONAL_ARG=
 TIME_COMMANDS=
 while [[ $# -gt 0 ]]; do
-  case $1 in
+	case $1 in
-  -h | --help)
+		-h | --help)
-    print_help_and_exit
+			print_help_and_exit ""
-    ;;
+			;;
-  --aspforaba)
+		--aspforaba)
-    shift
+			shift
-    ASPFORABA=$1
+			ASPFORABA=$1
-    shift
+			shift
-    ;;
+			;;
-  -p | --problem)
+		-p | --problem)
-    shift
+			shift
-    PROBLEM=$1
+			PROBLEM=$1
-    shift
+			shift
-    ;;
+			;;
-  -f | --file)
+		-f | --file)
-    if [ -n "$ABA_FILE_DIR" ]; then
+			if [ -n "$ABA_FILE_DIR" ]; then
-      print_help_and_exit "Parameters --file and --files-from cannot be mixed"
+				print_help_and_exit "Parameters --file and --files-from cannot be mixed"
-    fi
+			fi
-    shift
+			shift
-    ABA_FILE=$1
+			ABA_FILE=$1
-    shift
+			shift
-    ;;
+			;;
-  --files-from)
+		--files-from)
-    if [ -n "$ABA_FILE" ]; then
+			if [ -n "$ABA_FILE" ]; then
-      print_help_and_exit "Parameters --file and --files-from cannot be mixed"
+				print_help_and_exit "Parameters --file and --files-from cannot be mixed"
-    fi
+			fi
-    if [ -n "$ADDITIONAL_ARG" ]; then
+			if [ -n "$ADDITIONAL_ARG" ]; then
-      print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
+				print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
-    fi
+			fi
-    shift
+			shift
-    ABA_FILE_DIR=$1
+			ABA_FILE_DIR=$1
-    shift
+			shift
-    ;;
+			;;
-  -a | --arg)
+		-a | --arg)
-    if [ -n "$ABA_FILE_DIR" ]; then
+			if [ -n "$ABA_FILE_DIR" ]; then
-      print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
+				print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
-    fi
+			fi
-    shift
+			shift
-    ADDITIONAL_ARG=$1
+			ADDITIONAL_ARG=$1
-    shift
+			shift
-    ;;
+			;;
-  -t | --time)
+		-t | --time)
-    shift
+			shift
-    TIME_COMMANDS=yes
+			TIME_COMMANDS=yes
-    ;;
+			;;
-  --aba2sat)
+		--aba2sat)
-    shift
+			shift
-    ABA2SAT=$1
+			ABA2SAT=$1
-    shift
+			shift
-    ;;
+			;;
-  -*)
+		-*)
-    echo "Unknown option $1"
+			echo "Unknown option $1"
-    print_help_and_exit
+			print_help_and_exit
-    ;;
+			;;
-  *)
+		*)
-    POSITIONAL_ARGS+=("$1") # save positional arg
+			POSITIONAL_ARGS+=("$1") # save positional arg
-    shift                   # past argument
+			shift                   # past argument
-    ;;
+			;;
-  esac
+	esac
 done
 set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
 case "$PROBLEM" in
-dc-co | DC-CO)
+	dc-co | DC-CO)
-  if [ -n "$ABA_FILE_DIR" ]; then
+		if [ -n "$ABA_FILE_DIR" ]; then
-    # run for every file found in the directory
+			# run for every file found in the directory
-    for file in "$ABA_FILE_DIR"/*."$ABA_FILE_EXT"; do
+			for file in $(find "$ABA_FILE_DIR" -type f -iname "*.$ABA_FILE_EXT" | shuf); do
-      ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co
+				ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co
-    done
+			done
-  else
+		else
-    # run for the single configured file
+			# run for the single configured file
-    run_dc_co
+			run_dc_co
-  fi
+		fi
-  ;;
+		;;
-*)
+	*)
-  print_help_and_exit "Problem $PROBLEM is not supported"
+		print_help_and_exit "Problem $PROBLEM is not supported"
-  ;;
+		;;
 esac