From ad54f0755fbd46d346a79c4ace2e4107eac669ca Mon Sep 17 00:00:00 2001 From: Malte Tammena Date: Sat, 25 May 2024 10:15:27 +0200 Subject: [PATCH] small script adjustments to make the cluster happy --- .gitignore | 1 + scripts/decode-result-folder.py | 65 ++++---- scripts/sc-batch.sh | 18 ++- scripts/test.py | 18 +-- scripts/validate.sh | 260 ++++++++++++++++---------------- 5 files changed, 190 insertions(+), 172 deletions(-) diff --git a/.gitignore b/.gitignore index 9753bd0..4378c03 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ target/ scripts/__pycache__ acyclic output-* +all.csv diff --git a/scripts/decode-result-folder.py b/scripts/decode-result-folder.py index 2b07fc0..8ebfec5 100755 --- a/scripts/decode-result-folder.py +++ b/scripts/decode-result-folder.py @@ -21,38 +21,41 @@ def run(): out = [] # Using glob to match all .json files for file_path in glob.glob(os.path.join(folder_path, "*.json")): - # Open and read the contents of the file - with open(file_path, "r", encoding="utf-8") as json_file: - ( - _ident, - atom_count, - assumption_ratio, - max_rules_per_head, - max_rule_size, - _idx, - ) = file_path.split("_") - data = json.load(json_file)["results"] - aba2sat, aspforaba = ( - (data[0], data[1]) - if ( - data[0]["command"] == "aba2sat" - and data[1]["command"] == "aspforaba" + try: + # Open and read the contents of the file + with open(file_path, "r", encoding="utf-8") as json_file: + ( + _ident, + atom_count, + assumption_ratio, + max_rules_per_head, + max_rule_size, + _idx, + ) = file_path.split("_") + data = json.load(json_file)["results"] + aba2sat, aspforaba = ( + (data[0], data[1]) + if ( + data[0]["command"] == "aba2sat" + and data[1]["command"] == "aspforaba" + ) + else (data[1], data[0]) ) - else (data[1], data[0]) - ) - speedup = float(aspforaba['mean']) / float(aba2sat['mean']) - out.append({ - "atom_count": atom_count, - "assumption_ratio": assumption_ratio, - "max_rules_per_head": max_rules_per_head, - "max_rule_size": max_rule_size, - "time": aba2sat["mean"], - "stddev": aba2sat['stddev'], - "speedup": speedup, - }) - if count > 700: - break - count += 1 + speedup = float(aspforaba['mean']) / float(aba2sat['mean']) + out.append({ + "atom_count": atom_count, + "assumption_ratio": assumption_ratio, + "max_rules_per_head": max_rules_per_head, + "max_rule_size": max_rule_size, + "time": aba2sat["mean"], + "stddev": aba2sat['stddev'], + "speedup": speedup, + }) + if count > 700: + break + count += 1 + except Exception: + print(f'Failed to read {file_path}. Skipping..') with open(output, 'w') as output_file: output_file.write writer = csv.DictWriter(output_file, fieldnames=out[0].keys()) diff --git a/scripts/sc-batch.sh b/scripts/sc-batch.sh index 9ac2c32..e6fcc5e 100755 --- a/scripts/sc-batch.sh +++ b/scripts/sc-batch.sh @@ -3,12 +3,26 @@ # Batch script to run on sc.uni-leipzig.de cluster, i used # sbatch -a "1-$(cat acyclic.list | wc -l)" ./scripts/sc-batch.sh +# Somehow all paths used in spawned processes need to be absolute, +# there's probably a good explanation, but I don't have it + FILE_LIST=acyclic.list +# Pick line `$SLURM_ARRAY_TASK_ID` from the FILE_LIST +# This will probably cause issues if more processes are allocated +# than lines in the FILE_LIST, but who knows file="$(pwd)/$(awk "NR == $SLURM_ARRAY_TASK_ID" "$FILE_LIST")" +# Read the extra argument arg=$(cat "$file.asm") +# Make sure we get all the data in one central place OUTPUT_DIR="$(pwd)/output" - export OUTPUT_DIR -./validate --file "$file" --arg "$arg" --time --problem dc-co + +# This assumes that `validate` accepts the --no-rm flag, +# which is not a flag the script accepts, but recognized by +# the default bundler `nix bundle .#validate` uses. Required here +# to prevent the fastest process from cleaning the extracted +# package. Slower processes or those allocated later *will* fail +# without the flag +./validate --no-rm // --file "$file" --arg "$arg" --time --problem dc-co diff --git a/scripts/test.py b/scripts/test.py index 24673de..de60ac5 100755 --- a/scripts/test.py +++ b/scripts/test.py @@ -11,7 +11,7 @@ def read_and_visualize(csv_file): # Display the first few rows of the dataframe print(df.head()) - # Identify all the properties (assuming they are all columns except for 'runtime') + # Identify all the properties (assuming they are all columns except for some timings) properties = [col for col in df.columns if col != 'speedup' and col != 'time' and col != 'stddev'] # Pairplot to see general pairwise relationships, may help to understand the overall relationship between properties and runtime @@ -22,21 +22,21 @@ def read_and_visualize(csv_file): # Create scatter plots for each property against runtime for prop in properties: plt.figure(figsize=(10, 6)) - sns.scatterplot(x=df[prop], y=df['runtime']) - plt.title(f'Impact of {prop} on Runtime') + sns.scatterplot(x=df[prop], y=df['speedup']) + plt.title(f'Impact of {prop} on Speedup') plt.xlabel(prop) - plt.ylabel('Runtime') - plt.show() + plt.ylabel('Speedup') # Create box plots for categorical properties if any (e.g., difficulty level or type) against runtime for prop in properties: if df[prop].dtype == 'object': plt.figure(figsize=(10, 6)) - sns.boxplot(x=df[prop], y=df['runtime']) - plt.title(f'Impact of {prop} on Runtime') + sns.boxplot(x=df[prop], y=df['speedup']) + plt.title(f'Impact of {prop} on Speedup') plt.xlabel(prop) - plt.ylabel('Runtime') - plt.show() + plt.ylabel('Speedup') + + plt.show() # Example usage csv_file = 'all.csv' # Replace with your actual CSV file path diff --git a/scripts/validate.sh b/scripts/validate.sh index b4c7f40..000f11d 100755 --- a/scripts/validate.sh +++ b/scripts/validate.sh @@ -4,70 +4,70 @@ # best used through the bundled nix version `nix run .#validate` print_help_and_exit() { - if [ -n "$1" ]; then - printf "%s\n\n" "$1" - fi - printf "Usage: validate [OPTIONS] \n" - printf "\n" - printf "Options:\n" - printf " --aspforaba\n" - printf " Binary to use when calling aspforaba\n" - printf " -p, --problem\n" - printf " The problem to solve\n" - printf " -a, --arg\n" - printf " The additional argument for the problem\n" - printf " -f, --file\n" - printf " The file containing the problem in ABA format\n" - printf " -t, --time\n" - printf " Execute hyperfine to determine runtimes\n" - printf " --files-from\n" - printf " Use the following dir to read files, specify a single file with --file instead\n" - exit 1 + if [ -n "$1" ]; then + printf "%s\n\n" "$1" + fi + printf "Usage: validate [OPTIONS] \n" + printf "\n" + printf "Options:\n" + printf " --aspforaba\n" + printf " Binary to use when calling aspforaba\n" + printf " -p, --problem\n" + printf " The problem to solve\n" + printf " -a, --arg\n" + printf " The additional argument for the problem\n" + printf " -f, --file\n" + printf " The file containing the problem in ABA format\n" + printf " -t, --time\n" + printf " Execute hyperfine to determine runtimes\n" + printf " --files-from\n" + printf " Use the following dir to read files, specify a single file with --file instead\n" + exit 1 } format_time() { - COMMAND="$1" - FILE="$2" - mean=$(jq ".results[] | select(.command == \"$COMMAND\") | (.mean * 1000)" "$FILE") - stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE") - printf "%7.3f±%7.3fms" "$mean" "$stddev" + COMMAND="$1" + FILE="$2" + mean=$(jq ".results[] | select(.command == \"$COMMAND\") | (.mean * 1000)" "$FILE") + stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE") + printf "%7.3f±%7.3fms" "$mean" "$stddev" } run_dc_co() { - OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)} - mkdir -p "$OUTPUT_DIR" - JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json" - # Restrict memory to 20GB - ulimit -v 20000000 - if [ -z "$ADDITIONAL_ARG" ]; then - print_help_and_exit "Parameter --arg is missing!" - fi - if [ -z "$ABA_FILE" ]; then - print_help_and_exit "Parameter --file is missing!" - fi - printf "===== %s ==== " "$(basename "$ABA_FILE")" - our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result") - other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result") - if [ "$our_result" != "$other_result" ]; then - printf "❌\n" - else - printf "✅\n" - fi - printf "Argument: %s\n" "$ADDITIONAL_ARG" + OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)} + mkdir -p "$OUTPUT_DIR" + JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json" + # Restrict memory to 20GB + ulimit -v 20000000 + if [ -z "$ADDITIONAL_ARG" ]; then + print_help_and_exit "Parameter --arg is missing!" + fi + if [ -z "$ABA_FILE" ]; then + print_help_and_exit "Parameter --file is missing!" + fi + printf "===== %s ==== " "$(basename "$ABA_FILE")" + our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result") + other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result") + if [ "$our_result" != "$other_result" ]; then + printf "❌\n" + else + printf "✅\n" + fi + printf "Argument: %s\n" "$ADDITIONAL_ARG" - if [ -n "$TIME_COMMANDS" ]; then - $HYPERFINE --shell=none \ - --export-json "$JSON_FILE" \ - --command-name "aba2sat" \ - "$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \ - --command-name "aspforaba" \ - "$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1 - printf "Our: %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")" - printf "Their: %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")" - else - printf "Our: %3s\n" "$our_result" - printf "Their: %3s\n" "$other_result" - fi + if [ -n "$TIME_COMMANDS" ]; then + $HYPERFINE --shell=none \ + --export-json "$JSON_FILE" \ + --command-name "aba2sat" \ + "$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \ + --command-name "aspforaba" \ + "$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1 + printf "Our: %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")" + printf "Their: %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")" + else + printf "Our: %3s\n" "$our_result" + printf "Their: %3s\n" "$other_result" + fi } @@ -83,82 +83,82 @@ ADDITIONAL_ARG= TIME_COMMANDS= while [[ $# -gt 0 ]]; do - case $1 in - -h | --help) - print_help_and_exit - ;; - --aspforaba) - shift - ASPFORABA=$1 - shift - ;; - -p | --problem) - shift - PROBLEM=$1 - shift - ;; - -f | --file) - if [ -n "$ABA_FILE_DIR" ]; then - print_help_and_exit "Parameters --file and --files-from cannot be mixed" - fi - shift - ABA_FILE=$1 - shift - ;; - --files-from) - if [ -n "$ABA_FILE" ]; then - print_help_and_exit "Parameters --file and --files-from cannot be mixed" - fi - if [ -n "$ADDITIONAL_ARG" ]; then - print_help_and_exit "Parameters --arg and --files-from cannot be mixed" - fi - shift - ABA_FILE_DIR=$1 - shift - ;; - -a | --arg) - if [ -n "$ABA_FILE_DIR" ]; then - print_help_and_exit "Parameters --arg and --files-from cannot be mixed" - fi - shift - ADDITIONAL_ARG=$1 - shift - ;; - -t | --time) - shift - TIME_COMMANDS=yes - ;; - --aba2sat) - shift - ABA2SAT=$1 - shift - ;; - -*) - echo "Unknown option $1" - print_help_and_exit - ;; - *) - POSITIONAL_ARGS+=("$1") # save positional arg - shift # past argument - ;; - esac + case $1 in + -h | --help) + print_help_and_exit "" + ;; + --aspforaba) + shift + ASPFORABA=$1 + shift + ;; + -p | --problem) + shift + PROBLEM=$1 + shift + ;; + -f | --file) + if [ -n "$ABA_FILE_DIR" ]; then + print_help_and_exit "Parameters --file and --files-from cannot be mixed" + fi + shift + ABA_FILE=$1 + shift + ;; + --files-from) + if [ -n "$ABA_FILE" ]; then + print_help_and_exit "Parameters --file and --files-from cannot be mixed" + fi + if [ -n "$ADDITIONAL_ARG" ]; then + print_help_and_exit "Parameters --arg and --files-from cannot be mixed" + fi + shift + ABA_FILE_DIR=$1 + shift + ;; + -a | --arg) + if [ -n "$ABA_FILE_DIR" ]; then + print_help_and_exit "Parameters --arg and --files-from cannot be mixed" + fi + shift + ADDITIONAL_ARG=$1 + shift + ;; + -t | --time) + shift + TIME_COMMANDS=yes + ;; + --aba2sat) + shift + ABA2SAT=$1 + shift + ;; + -*) + echo "Unknown option $1" + print_help_and_exit + ;; + *) + POSITIONAL_ARGS+=("$1") # save positional arg + shift # past argument + ;; + esac done set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters case "$PROBLEM" in -dc-co | DC-CO) - if [ -n "$ABA_FILE_DIR" ]; then - # run for every file found in the directory - for file in "$ABA_FILE_DIR"/*."$ABA_FILE_EXT"; do - ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co - done - else - # run for the single configured file - run_dc_co - fi - ;; -*) - print_help_and_exit "Problem $PROBLEM is not supported" - ;; + dc-co | DC-CO) + if [ -n "$ABA_FILE_DIR" ]; then + # run for every file found in the directory + for file in $(find "$ABA_FILE_DIR" -type f -iname "*.$ABA_FILE_EXT" | shuf); do + ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co + done + else + # run for the single configured file + run_dc_co + fi + ;; + *) + print_help_and_exit "Problem $PROBLEM is not supported" + ;; esac