small script adjustments to make the cluster happy

This commit is contained in:
Malte Tammena 2024-05-25 10:15:27 +02:00
parent 27ed3caf0b
commit ad54f0755f
5 changed files with 190 additions and 172 deletions

1
.gitignore vendored
View file

@ -5,3 +5,4 @@ target/
scripts/__pycache__
acyclic
output-*
all.csv

View file

@ -21,38 +21,41 @@ def run():
out = []
# Using glob to match all .json files
for file_path in glob.glob(os.path.join(folder_path, "*.json")):
# Open and read the contents of the file
with open(file_path, "r", encoding="utf-8") as json_file:
(
_ident,
atom_count,
assumption_ratio,
max_rules_per_head,
max_rule_size,
_idx,
) = file_path.split("_")
data = json.load(json_file)["results"]
aba2sat, aspforaba = (
(data[0], data[1])
if (
data[0]["command"] == "aba2sat"
and data[1]["command"] == "aspforaba"
try:
# Open and read the contents of the file
with open(file_path, "r", encoding="utf-8") as json_file:
(
_ident,
atom_count,
assumption_ratio,
max_rules_per_head,
max_rule_size,
_idx,
) = file_path.split("_")
data = json.load(json_file)["results"]
aba2sat, aspforaba = (
(data[0], data[1])
if (
data[0]["command"] == "aba2sat"
and data[1]["command"] == "aspforaba"
)
else (data[1], data[0])
)
else (data[1], data[0])
)
speedup = float(aspforaba['mean']) / float(aba2sat['mean'])
out.append({
"atom_count": atom_count,
"assumption_ratio": assumption_ratio,
"max_rules_per_head": max_rules_per_head,
"max_rule_size": max_rule_size,
"time": aba2sat["mean"],
"stddev": aba2sat['stddev'],
"speedup": speedup,
})
if count > 700:
break
count += 1
speedup = float(aspforaba['mean']) / float(aba2sat['mean'])
out.append({
"atom_count": atom_count,
"assumption_ratio": assumption_ratio,
"max_rules_per_head": max_rules_per_head,
"max_rule_size": max_rule_size,
"time": aba2sat["mean"],
"stddev": aba2sat['stddev'],
"speedup": speedup,
})
if count > 700:
break
count += 1
except Exception:
print(f'Failed to read {file_path}. Skipping..')
with open(output, 'w') as output_file:
output_file.write
writer = csv.DictWriter(output_file, fieldnames=out[0].keys())

View file

@ -3,12 +3,26 @@
# Batch script to run on sc.uni-leipzig.de cluster, i used
# sbatch -a "1-$(cat acyclic.list | wc -l)" ./scripts/sc-batch.sh
# Somehow all paths used in spawned processes need to be absolute,
# there's probably a good explanation, but I don't have it
FILE_LIST=acyclic.list
# Pick line `$SLURM_ARRAY_TASK_ID` from the FILE_LIST
# This will probably cause issues if more processes are allocated
# than lines in the FILE_LIST, but who knows
file="$(pwd)/$(awk "NR == $SLURM_ARRAY_TASK_ID" "$FILE_LIST")"
# Read the extra argument
arg=$(cat "$file.asm")
# Make sure we get all the data in one central place
OUTPUT_DIR="$(pwd)/output"
export OUTPUT_DIR
./validate --file "$file" --arg "$arg" --time --problem dc-co
# This assumes that `validate` accepts the --no-rm flag,
# which is not a flag the script accepts, but recognized by
# the default bundler `nix bundle .#validate` uses. Required here
# to prevent the fastest process from cleaning the extracted
# package. Slower processes or those allocated later *will* fail
# without the flag
./validate --no-rm // --file "$file" --arg "$arg" --time --problem dc-co

View file

@ -11,7 +11,7 @@ def read_and_visualize(csv_file):
# Display the first few rows of the dataframe
print(df.head())
# Identify all the properties (assuming they are all columns except for 'runtime')
# Identify all the properties (assuming they are all columns except for some timings)
properties = [col for col in df.columns if col != 'speedup' and col != 'time' and col != 'stddev']
# Pairplot to see general pairwise relationships, may help to understand the overall relationship between properties and runtime
@ -22,21 +22,21 @@ def read_and_visualize(csv_file):
# Create scatter plots for each property against runtime
for prop in properties:
plt.figure(figsize=(10, 6))
sns.scatterplot(x=df[prop], y=df['runtime'])
plt.title(f'Impact of {prop} on Runtime')
sns.scatterplot(x=df[prop], y=df['speedup'])
plt.title(f'Impact of {prop} on Speedup')
plt.xlabel(prop)
plt.ylabel('Runtime')
plt.show()
plt.ylabel('Speedup')
# Create box plots for categorical properties if any (e.g., difficulty level or type) against runtime
for prop in properties:
if df[prop].dtype == 'object':
plt.figure(figsize=(10, 6))
sns.boxplot(x=df[prop], y=df['runtime'])
plt.title(f'Impact of {prop} on Runtime')
sns.boxplot(x=df[prop], y=df['speedup'])
plt.title(f'Impact of {prop} on Speedup')
plt.xlabel(prop)
plt.ylabel('Runtime')
plt.show()
plt.ylabel('Speedup')
plt.show()
# Example usage
csv_file = 'all.csv' # Replace with your actual CSV file path

View file

@ -4,70 +4,70 @@
# best used through the bundled nix version `nix run .#validate`
print_help_and_exit() {
if [ -n "$1" ]; then
printf "%s\n\n" "$1"
fi
printf "Usage: validate [OPTIONS] \n"
printf "\n"
printf "Options:\n"
printf " --aspforaba\n"
printf " Binary to use when calling aspforaba\n"
printf " -p, --problem\n"
printf " The problem to solve\n"
printf " -a, --arg\n"
printf " The additional argument for the problem\n"
printf " -f, --file\n"
printf " The file containing the problem in ABA format\n"
printf " -t, --time\n"
printf " Execute hyperfine to determine runtimes\n"
printf " --files-from\n"
printf " Use the following dir to read files, specify a single file with --file instead\n"
exit 1
if [ -n "$1" ]; then
printf "%s\n\n" "$1"
fi
printf "Usage: validate [OPTIONS] \n"
printf "\n"
printf "Options:\n"
printf " --aspforaba\n"
printf " Binary to use when calling aspforaba\n"
printf " -p, --problem\n"
printf " The problem to solve\n"
printf " -a, --arg\n"
printf " The additional argument for the problem\n"
printf " -f, --file\n"
printf " The file containing the problem in ABA format\n"
printf " -t, --time\n"
printf " Execute hyperfine to determine runtimes\n"
printf " --files-from\n"
printf " Use the following dir to read files, specify a single file with --file instead\n"
exit 1
}
format_time() {
COMMAND="$1"
FILE="$2"
mean=$(jq ".results[] | select(.command == \"$COMMAND\") | (.mean * 1000)" "$FILE")
stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE")
printf "%7.3f±%7.3fms" "$mean" "$stddev"
COMMAND="$1"
FILE="$2"
mean=$(jq ".results[] | select(.command == \"$COMMAND\") | (.mean * 1000)" "$FILE")
stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE")
printf "%7.3f±%7.3fms" "$mean" "$stddev"
}
run_dc_co() {
OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)}
mkdir -p "$OUTPUT_DIR"
JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json"
# Restrict memory to 20GB
ulimit -v 20000000
if [ -z "$ADDITIONAL_ARG" ]; then
print_help_and_exit "Parameter --arg is missing!"
fi
if [ -z "$ABA_FILE" ]; then
print_help_and_exit "Parameter --file is missing!"
fi
printf "===== %s ==== " "$(basename "$ABA_FILE")"
our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result")
other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result")
if [ "$our_result" != "$other_result" ]; then
printf "❌\n"
else
printf "✅\n"
fi
printf "Argument: %s\n" "$ADDITIONAL_ARG"
OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)}
mkdir -p "$OUTPUT_DIR"
JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json"
# Restrict memory to 20GB
ulimit -v 20000000
if [ -z "$ADDITIONAL_ARG" ]; then
print_help_and_exit "Parameter --arg is missing!"
fi
if [ -z "$ABA_FILE" ]; then
print_help_and_exit "Parameter --file is missing!"
fi
printf "===== %s ==== " "$(basename "$ABA_FILE")"
our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result")
other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result")
if [ "$our_result" != "$other_result" ]; then
printf "❌\n"
else
printf "✅\n"
fi
printf "Argument: %s\n" "$ADDITIONAL_ARG"
if [ -n "$TIME_COMMANDS" ]; then
$HYPERFINE --shell=none \
--export-json "$JSON_FILE" \
--command-name "aba2sat" \
"$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \
--command-name "aspforaba" \
"$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1
printf "Our: %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")"
printf "Their: %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")"
else
printf "Our: %3s\n" "$our_result"
printf "Their: %3s\n" "$other_result"
fi
if [ -n "$TIME_COMMANDS" ]; then
$HYPERFINE --shell=none \
--export-json "$JSON_FILE" \
--command-name "aba2sat" \
"$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \
--command-name "aspforaba" \
"$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1
printf "Our: %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")"
printf "Their: %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")"
else
printf "Our: %3s\n" "$our_result"
printf "Their: %3s\n" "$other_result"
fi
}
@ -83,82 +83,82 @@ ADDITIONAL_ARG=
TIME_COMMANDS=
while [[ $# -gt 0 ]]; do
case $1 in
-h | --help)
print_help_and_exit
;;
--aspforaba)
shift
ASPFORABA=$1
shift
;;
-p | --problem)
shift
PROBLEM=$1
shift
;;
-f | --file)
if [ -n "$ABA_FILE_DIR" ]; then
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
fi
shift
ABA_FILE=$1
shift
;;
--files-from)
if [ -n "$ABA_FILE" ]; then
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
fi
if [ -n "$ADDITIONAL_ARG" ]; then
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
fi
shift
ABA_FILE_DIR=$1
shift
;;
-a | --arg)
if [ -n "$ABA_FILE_DIR" ]; then
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
fi
shift
ADDITIONAL_ARG=$1
shift
;;
-t | --time)
shift
TIME_COMMANDS=yes
;;
--aba2sat)
shift
ABA2SAT=$1
shift
;;
-*)
echo "Unknown option $1"
print_help_and_exit
;;
*)
POSITIONAL_ARGS+=("$1") # save positional arg
shift # past argument
;;
esac
case $1 in
-h | --help)
print_help_and_exit ""
;;
--aspforaba)
shift
ASPFORABA=$1
shift
;;
-p | --problem)
shift
PROBLEM=$1
shift
;;
-f | --file)
if [ -n "$ABA_FILE_DIR" ]; then
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
fi
shift
ABA_FILE=$1
shift
;;
--files-from)
if [ -n "$ABA_FILE" ]; then
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
fi
if [ -n "$ADDITIONAL_ARG" ]; then
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
fi
shift
ABA_FILE_DIR=$1
shift
;;
-a | --arg)
if [ -n "$ABA_FILE_DIR" ]; then
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
fi
shift
ADDITIONAL_ARG=$1
shift
;;
-t | --time)
shift
TIME_COMMANDS=yes
;;
--aba2sat)
shift
ABA2SAT=$1
shift
;;
-*)
echo "Unknown option $1"
print_help_and_exit
;;
*)
POSITIONAL_ARGS+=("$1") # save positional arg
shift # past argument
;;
esac
done
set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
case "$PROBLEM" in
dc-co | DC-CO)
if [ -n "$ABA_FILE_DIR" ]; then
# run for every file found in the directory
for file in "$ABA_FILE_DIR"/*."$ABA_FILE_EXT"; do
ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co
done
else
# run for the single configured file
run_dc_co
fi
;;
*)
print_help_and_exit "Problem $PROBLEM is not supported"
;;
dc-co | DC-CO)
if [ -n "$ABA_FILE_DIR" ]; then
# run for every file found in the directory
for file in $(find "$ABA_FILE_DIR" -type f -iname "*.$ABA_FILE_EXT" | shuf); do
ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co
done
else
# run for the single configured file
run_dc_co
fi
;;
*)
print_help_and_exit "Problem $PROBLEM is not supported"
;;
esac