small script adjustments to make the cluster happy

This commit is contained in:
Malte Tammena 2024-05-25 10:15:27 +02:00
parent 27ed3caf0b
commit ad54f0755f
5 changed files with 190 additions and 172 deletions

1
.gitignore vendored
View file

@ -5,3 +5,4 @@ target/
scripts/__pycache__ scripts/__pycache__
acyclic acyclic
output-* output-*
all.csv

View file

@ -21,38 +21,41 @@ def run():
out = [] out = []
# Using glob to match all .json files # Using glob to match all .json files
for file_path in glob.glob(os.path.join(folder_path, "*.json")): for file_path in glob.glob(os.path.join(folder_path, "*.json")):
# Open and read the contents of the file try:
with open(file_path, "r", encoding="utf-8") as json_file: # Open and read the contents of the file
( with open(file_path, "r", encoding="utf-8") as json_file:
_ident, (
atom_count, _ident,
assumption_ratio, atom_count,
max_rules_per_head, assumption_ratio,
max_rule_size, max_rules_per_head,
_idx, max_rule_size,
) = file_path.split("_") _idx,
data = json.load(json_file)["results"] ) = file_path.split("_")
aba2sat, aspforaba = ( data = json.load(json_file)["results"]
(data[0], data[1]) aba2sat, aspforaba = (
if ( (data[0], data[1])
data[0]["command"] == "aba2sat" if (
and data[1]["command"] == "aspforaba" data[0]["command"] == "aba2sat"
and data[1]["command"] == "aspforaba"
)
else (data[1], data[0])
) )
else (data[1], data[0]) speedup = float(aspforaba['mean']) / float(aba2sat['mean'])
) out.append({
speedup = float(aspforaba['mean']) / float(aba2sat['mean']) "atom_count": atom_count,
out.append({ "assumption_ratio": assumption_ratio,
"atom_count": atom_count, "max_rules_per_head": max_rules_per_head,
"assumption_ratio": assumption_ratio, "max_rule_size": max_rule_size,
"max_rules_per_head": max_rules_per_head, "time": aba2sat["mean"],
"max_rule_size": max_rule_size, "stddev": aba2sat['stddev'],
"time": aba2sat["mean"], "speedup": speedup,
"stddev": aba2sat['stddev'], })
"speedup": speedup, if count > 700:
}) break
if count > 700: count += 1
break except Exception:
count += 1 print(f'Failed to read {file_path}. Skipping..')
with open(output, 'w') as output_file: with open(output, 'w') as output_file:
output_file.write output_file.write
writer = csv.DictWriter(output_file, fieldnames=out[0].keys()) writer = csv.DictWriter(output_file, fieldnames=out[0].keys())

View file

@ -3,12 +3,26 @@
# Batch script to run on sc.uni-leipzig.de cluster, i used # Batch script to run on sc.uni-leipzig.de cluster, i used
# sbatch -a "1-$(cat acyclic.list | wc -l)" ./scripts/sc-batch.sh # sbatch -a "1-$(cat acyclic.list | wc -l)" ./scripts/sc-batch.sh
# Somehow all paths used in spawned processes need to be absolute,
# there's probably a good explanation, but I don't have it
FILE_LIST=acyclic.list FILE_LIST=acyclic.list
# Pick line `$SLURM_ARRAY_TASK_ID` from the FILE_LIST
# This will probably cause issues if more processes are allocated
# than lines in the FILE_LIST, but who knows
file="$(pwd)/$(awk "NR == $SLURM_ARRAY_TASK_ID" "$FILE_LIST")" file="$(pwd)/$(awk "NR == $SLURM_ARRAY_TASK_ID" "$FILE_LIST")"
# Read the extra argument
arg=$(cat "$file.asm") arg=$(cat "$file.asm")
# Make sure we get all the data in one central place
OUTPUT_DIR="$(pwd)/output" OUTPUT_DIR="$(pwd)/output"
export OUTPUT_DIR export OUTPUT_DIR
./validate --file "$file" --arg "$arg" --time --problem dc-co
# This assumes that `validate` accepts the --no-rm flag,
# which is not a flag the script accepts, but recognized by
# the default bundler `nix bundle .#validate` uses. Required here
# to prevent the fastest process from cleaning the extracted
# package. Slower processes or those allocated later *will* fail
# without the flag
./validate --no-rm // --file "$file" --arg "$arg" --time --problem dc-co

View file

@ -11,7 +11,7 @@ def read_and_visualize(csv_file):
# Display the first few rows of the dataframe # Display the first few rows of the dataframe
print(df.head()) print(df.head())
# Identify all the properties (assuming they are all columns except for 'runtime') # Identify all the properties (assuming they are all columns except for some timings)
properties = [col for col in df.columns if col != 'speedup' and col != 'time' and col != 'stddev'] properties = [col for col in df.columns if col != 'speedup' and col != 'time' and col != 'stddev']
# Pairplot to see general pairwise relationships, may help to understand the overall relationship between properties and runtime # Pairplot to see general pairwise relationships, may help to understand the overall relationship between properties and runtime
@ -22,21 +22,21 @@ def read_and_visualize(csv_file):
# Create scatter plots for each property against runtime # Create scatter plots for each property against runtime
for prop in properties: for prop in properties:
plt.figure(figsize=(10, 6)) plt.figure(figsize=(10, 6))
sns.scatterplot(x=df[prop], y=df['runtime']) sns.scatterplot(x=df[prop], y=df['speedup'])
plt.title(f'Impact of {prop} on Runtime') plt.title(f'Impact of {prop} on Speedup')
plt.xlabel(prop) plt.xlabel(prop)
plt.ylabel('Runtime') plt.ylabel('Speedup')
plt.show()
# Create box plots for categorical properties if any (e.g., difficulty level or type) against runtime # Create box plots for categorical properties if any (e.g., difficulty level or type) against runtime
for prop in properties: for prop in properties:
if df[prop].dtype == 'object': if df[prop].dtype == 'object':
plt.figure(figsize=(10, 6)) plt.figure(figsize=(10, 6))
sns.boxplot(x=df[prop], y=df['runtime']) sns.boxplot(x=df[prop], y=df['speedup'])
plt.title(f'Impact of {prop} on Runtime') plt.title(f'Impact of {prop} on Speedup')
plt.xlabel(prop) plt.xlabel(prop)
plt.ylabel('Runtime') plt.ylabel('Speedup')
plt.show()
plt.show()
# Example usage # Example usage
csv_file = 'all.csv' # Replace with your actual CSV file path csv_file = 'all.csv' # Replace with your actual CSV file path

View file

@ -4,70 +4,70 @@
# best used through the bundled nix version `nix run .#validate` # best used through the bundled nix version `nix run .#validate`
print_help_and_exit() { print_help_and_exit() {
if [ -n "$1" ]; then if [ -n "$1" ]; then
printf "%s\n\n" "$1" printf "%s\n\n" "$1"
fi fi
printf "Usage: validate [OPTIONS] \n" printf "Usage: validate [OPTIONS] \n"
printf "\n" printf "\n"
printf "Options:\n" printf "Options:\n"
printf " --aspforaba\n" printf " --aspforaba\n"
printf " Binary to use when calling aspforaba\n" printf " Binary to use when calling aspforaba\n"
printf " -p, --problem\n" printf " -p, --problem\n"
printf " The problem to solve\n" printf " The problem to solve\n"
printf " -a, --arg\n" printf " -a, --arg\n"
printf " The additional argument for the problem\n" printf " The additional argument for the problem\n"
printf " -f, --file\n" printf " -f, --file\n"
printf " The file containing the problem in ABA format\n" printf " The file containing the problem in ABA format\n"
printf " -t, --time\n" printf " -t, --time\n"
printf " Execute hyperfine to determine runtimes\n" printf " Execute hyperfine to determine runtimes\n"
printf " --files-from\n" printf " --files-from\n"
printf " Use the following dir to read files, specify a single file with --file instead\n" printf " Use the following dir to read files, specify a single file with --file instead\n"
exit 1 exit 1
} }
format_time() { format_time() {
COMMAND="$1" COMMAND="$1"
FILE="$2" FILE="$2"
mean=$(jq ".results[] | select(.command == \"$COMMAND\") | (.mean * 1000)" "$FILE") mean=$(jq ".results[] | select(.command == \"$COMMAND\") | (.mean * 1000)" "$FILE")
stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE") stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE")
printf "%7.3f±%7.3fms" "$mean" "$stddev" printf "%7.3f±%7.3fms" "$mean" "$stddev"
} }
run_dc_co() { run_dc_co() {
OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)} OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)}
mkdir -p "$OUTPUT_DIR" mkdir -p "$OUTPUT_DIR"
JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json" JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json"
# Restrict memory to 20GB # Restrict memory to 20GB
ulimit -v 20000000 ulimit -v 20000000
if [ -z "$ADDITIONAL_ARG" ]; then if [ -z "$ADDITIONAL_ARG" ]; then
print_help_and_exit "Parameter --arg is missing!" print_help_and_exit "Parameter --arg is missing!"
fi fi
if [ -z "$ABA_FILE" ]; then if [ -z "$ABA_FILE" ]; then
print_help_and_exit "Parameter --file is missing!" print_help_and_exit "Parameter --file is missing!"
fi fi
printf "===== %s ==== " "$(basename "$ABA_FILE")" printf "===== %s ==== " "$(basename "$ABA_FILE")"
our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result") our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result")
other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result") other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result")
if [ "$our_result" != "$other_result" ]; then if [ "$our_result" != "$other_result" ]; then
printf "❌\n" printf "❌\n"
else else
printf "✅\n" printf "✅\n"
fi fi
printf "Argument: %s\n" "$ADDITIONAL_ARG" printf "Argument: %s\n" "$ADDITIONAL_ARG"
if [ -n "$TIME_COMMANDS" ]; then if [ -n "$TIME_COMMANDS" ]; then
$HYPERFINE --shell=none \ $HYPERFINE --shell=none \
--export-json "$JSON_FILE" \ --export-json "$JSON_FILE" \
--command-name "aba2sat" \ --command-name "aba2sat" \
"$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \ "$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \
--command-name "aspforaba" \ --command-name "aspforaba" \
"$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1 "$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1
printf "Our: %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")" printf "Our: %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")"
printf "Their: %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")" printf "Their: %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")"
else else
printf "Our: %3s\n" "$our_result" printf "Our: %3s\n" "$our_result"
printf "Their: %3s\n" "$other_result" printf "Their: %3s\n" "$other_result"
fi fi
} }
@ -83,82 +83,82 @@ ADDITIONAL_ARG=
TIME_COMMANDS= TIME_COMMANDS=
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case $1 in case $1 in
-h | --help) -h | --help)
print_help_and_exit print_help_and_exit ""
;; ;;
--aspforaba) --aspforaba)
shift shift
ASPFORABA=$1 ASPFORABA=$1
shift shift
;; ;;
-p | --problem) -p | --problem)
shift shift
PROBLEM=$1 PROBLEM=$1
shift shift
;; ;;
-f | --file) -f | --file)
if [ -n "$ABA_FILE_DIR" ]; then if [ -n "$ABA_FILE_DIR" ]; then
print_help_and_exit "Parameters --file and --files-from cannot be mixed" print_help_and_exit "Parameters --file and --files-from cannot be mixed"
fi fi
shift shift
ABA_FILE=$1 ABA_FILE=$1
shift shift
;; ;;
--files-from) --files-from)
if [ -n "$ABA_FILE" ]; then if [ -n "$ABA_FILE" ]; then
print_help_and_exit "Parameters --file and --files-from cannot be mixed" print_help_and_exit "Parameters --file and --files-from cannot be mixed"
fi fi
if [ -n "$ADDITIONAL_ARG" ]; then if [ -n "$ADDITIONAL_ARG" ]; then
print_help_and_exit "Parameters --arg and --files-from cannot be mixed" print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
fi fi
shift shift
ABA_FILE_DIR=$1 ABA_FILE_DIR=$1
shift shift
;; ;;
-a | --arg) -a | --arg)
if [ -n "$ABA_FILE_DIR" ]; then if [ -n "$ABA_FILE_DIR" ]; then
print_help_and_exit "Parameters --arg and --files-from cannot be mixed" print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
fi fi
shift shift
ADDITIONAL_ARG=$1 ADDITIONAL_ARG=$1
shift shift
;; ;;
-t | --time) -t | --time)
shift shift
TIME_COMMANDS=yes TIME_COMMANDS=yes
;; ;;
--aba2sat) --aba2sat)
shift shift
ABA2SAT=$1 ABA2SAT=$1
shift shift
;; ;;
-*) -*)
echo "Unknown option $1" echo "Unknown option $1"
print_help_and_exit print_help_and_exit
;; ;;
*) *)
POSITIONAL_ARGS+=("$1") # save positional arg POSITIONAL_ARGS+=("$1") # save positional arg
shift # past argument shift # past argument
;; ;;
esac esac
done done
set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
case "$PROBLEM" in case "$PROBLEM" in
dc-co | DC-CO) dc-co | DC-CO)
if [ -n "$ABA_FILE_DIR" ]; then if [ -n "$ABA_FILE_DIR" ]; then
# run for every file found in the directory # run for every file found in the directory
for file in "$ABA_FILE_DIR"/*."$ABA_FILE_EXT"; do for file in $(find "$ABA_FILE_DIR" -type f -iname "*.$ABA_FILE_EXT" | shuf); do
ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co
done done
else else
# run for the single configured file # run for the single configured file
run_dc_co run_dc_co
fi fi
;; ;;
*) *)
print_help_and_exit "Problem $PROBLEM is not supported" print_help_and_exit "Problem $PROBLEM is not supported"
;; ;;
esac esac