small script adjustments to make the cluster happy
This commit is contained in:
parent
27ed3caf0b
commit
ad54f0755f
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -5,3 +5,4 @@ target/
|
|||
scripts/__pycache__
|
||||
acyclic
|
||||
output-*
|
||||
all.csv
|
||||
|
|
|
@ -21,38 +21,41 @@ def run():
|
|||
out = []
|
||||
# Using glob to match all .json files
|
||||
for file_path in glob.glob(os.path.join(folder_path, "*.json")):
|
||||
# Open and read the contents of the file
|
||||
with open(file_path, "r", encoding="utf-8") as json_file:
|
||||
(
|
||||
_ident,
|
||||
atom_count,
|
||||
assumption_ratio,
|
||||
max_rules_per_head,
|
||||
max_rule_size,
|
||||
_idx,
|
||||
) = file_path.split("_")
|
||||
data = json.load(json_file)["results"]
|
||||
aba2sat, aspforaba = (
|
||||
(data[0], data[1])
|
||||
if (
|
||||
data[0]["command"] == "aba2sat"
|
||||
and data[1]["command"] == "aspforaba"
|
||||
try:
|
||||
# Open and read the contents of the file
|
||||
with open(file_path, "r", encoding="utf-8") as json_file:
|
||||
(
|
||||
_ident,
|
||||
atom_count,
|
||||
assumption_ratio,
|
||||
max_rules_per_head,
|
||||
max_rule_size,
|
||||
_idx,
|
||||
) = file_path.split("_")
|
||||
data = json.load(json_file)["results"]
|
||||
aba2sat, aspforaba = (
|
||||
(data[0], data[1])
|
||||
if (
|
||||
data[0]["command"] == "aba2sat"
|
||||
and data[1]["command"] == "aspforaba"
|
||||
)
|
||||
else (data[1], data[0])
|
||||
)
|
||||
else (data[1], data[0])
|
||||
)
|
||||
speedup = float(aspforaba['mean']) / float(aba2sat['mean'])
|
||||
out.append({
|
||||
"atom_count": atom_count,
|
||||
"assumption_ratio": assumption_ratio,
|
||||
"max_rules_per_head": max_rules_per_head,
|
||||
"max_rule_size": max_rule_size,
|
||||
"time": aba2sat["mean"],
|
||||
"stddev": aba2sat['stddev'],
|
||||
"speedup": speedup,
|
||||
})
|
||||
if count > 700:
|
||||
break
|
||||
count += 1
|
||||
speedup = float(aspforaba['mean']) / float(aba2sat['mean'])
|
||||
out.append({
|
||||
"atom_count": atom_count,
|
||||
"assumption_ratio": assumption_ratio,
|
||||
"max_rules_per_head": max_rules_per_head,
|
||||
"max_rule_size": max_rule_size,
|
||||
"time": aba2sat["mean"],
|
||||
"stddev": aba2sat['stddev'],
|
||||
"speedup": speedup,
|
||||
})
|
||||
if count > 700:
|
||||
break
|
||||
count += 1
|
||||
except Exception:
|
||||
print(f'Failed to read {file_path}. Skipping..')
|
||||
with open(output, 'w') as output_file:
|
||||
output_file.write
|
||||
writer = csv.DictWriter(output_file, fieldnames=out[0].keys())
|
||||
|
|
|
@ -3,12 +3,26 @@
|
|||
# Batch script to run on sc.uni-leipzig.de cluster, i used
|
||||
# sbatch -a "1-$(cat acyclic.list | wc -l)" ./scripts/sc-batch.sh
|
||||
|
||||
# Somehow all paths used in spawned processes need to be absolute,
|
||||
# there's probably a good explanation, but I don't have it
|
||||
|
||||
FILE_LIST=acyclic.list
|
||||
|
||||
# Pick line `$SLURM_ARRAY_TASK_ID` from the FILE_LIST
|
||||
# This will probably cause issues if more processes are allocated
|
||||
# than lines in the FILE_LIST, but who knows
|
||||
file="$(pwd)/$(awk "NR == $SLURM_ARRAY_TASK_ID" "$FILE_LIST")"
|
||||
# Read the extra argument
|
||||
arg=$(cat "$file.asm")
|
||||
|
||||
# Make sure we get all the data in one central place
|
||||
OUTPUT_DIR="$(pwd)/output"
|
||||
|
||||
export OUTPUT_DIR
|
||||
./validate --file "$file" --arg "$arg" --time --problem dc-co
|
||||
|
||||
# This assumes that `validate` accepts the --no-rm flag,
|
||||
# which is not a flag the script accepts, but recognized by
|
||||
# the default bundler `nix bundle .#validate` uses. Required here
|
||||
# to prevent the fastest process from cleaning the extracted
|
||||
# package. Slower processes or those allocated later *will* fail
|
||||
# without the flag
|
||||
./validate --no-rm // --file "$file" --arg "$arg" --time --problem dc-co
|
||||
|
|
|
@ -11,7 +11,7 @@ def read_and_visualize(csv_file):
|
|||
# Display the first few rows of the dataframe
|
||||
print(df.head())
|
||||
|
||||
# Identify all the properties (assuming they are all columns except for 'runtime')
|
||||
# Identify all the properties (assuming they are all columns except for some timings)
|
||||
properties = [col for col in df.columns if col != 'speedup' and col != 'time' and col != 'stddev']
|
||||
|
||||
# Pairplot to see general pairwise relationships, may help to understand the overall relationship between properties and runtime
|
||||
|
@ -22,21 +22,21 @@ def read_and_visualize(csv_file):
|
|||
# Create scatter plots for each property against runtime
|
||||
for prop in properties:
|
||||
plt.figure(figsize=(10, 6))
|
||||
sns.scatterplot(x=df[prop], y=df['runtime'])
|
||||
plt.title(f'Impact of {prop} on Runtime')
|
||||
sns.scatterplot(x=df[prop], y=df['speedup'])
|
||||
plt.title(f'Impact of {prop} on Speedup')
|
||||
plt.xlabel(prop)
|
||||
plt.ylabel('Runtime')
|
||||
plt.show()
|
||||
plt.ylabel('Speedup')
|
||||
|
||||
# Create box plots for categorical properties if any (e.g., difficulty level or type) against runtime
|
||||
for prop in properties:
|
||||
if df[prop].dtype == 'object':
|
||||
plt.figure(figsize=(10, 6))
|
||||
sns.boxplot(x=df[prop], y=df['runtime'])
|
||||
plt.title(f'Impact of {prop} on Runtime')
|
||||
sns.boxplot(x=df[prop], y=df['speedup'])
|
||||
plt.title(f'Impact of {prop} on Speedup')
|
||||
plt.xlabel(prop)
|
||||
plt.ylabel('Runtime')
|
||||
plt.show()
|
||||
plt.ylabel('Speedup')
|
||||
|
||||
plt.show()
|
||||
|
||||
# Example usage
|
||||
csv_file = 'all.csv' # Replace with your actual CSV file path
|
||||
|
|
|
@ -4,70 +4,70 @@
|
|||
# best used through the bundled nix version `nix run .#validate`
|
||||
|
||||
print_help_and_exit() {
|
||||
if [ -n "$1" ]; then
|
||||
printf "%s\n\n" "$1"
|
||||
fi
|
||||
printf "Usage: validate [OPTIONS] \n"
|
||||
printf "\n"
|
||||
printf "Options:\n"
|
||||
printf " --aspforaba\n"
|
||||
printf " Binary to use when calling aspforaba\n"
|
||||
printf " -p, --problem\n"
|
||||
printf " The problem to solve\n"
|
||||
printf " -a, --arg\n"
|
||||
printf " The additional argument for the problem\n"
|
||||
printf " -f, --file\n"
|
||||
printf " The file containing the problem in ABA format\n"
|
||||
printf " -t, --time\n"
|
||||
printf " Execute hyperfine to determine runtimes\n"
|
||||
printf " --files-from\n"
|
||||
printf " Use the following dir to read files, specify a single file with --file instead\n"
|
||||
exit 1
|
||||
if [ -n "$1" ]; then
|
||||
printf "%s\n\n" "$1"
|
||||
fi
|
||||
printf "Usage: validate [OPTIONS] \n"
|
||||
printf "\n"
|
||||
printf "Options:\n"
|
||||
printf " --aspforaba\n"
|
||||
printf " Binary to use when calling aspforaba\n"
|
||||
printf " -p, --problem\n"
|
||||
printf " The problem to solve\n"
|
||||
printf " -a, --arg\n"
|
||||
printf " The additional argument for the problem\n"
|
||||
printf " -f, --file\n"
|
||||
printf " The file containing the problem in ABA format\n"
|
||||
printf " -t, --time\n"
|
||||
printf " Execute hyperfine to determine runtimes\n"
|
||||
printf " --files-from\n"
|
||||
printf " Use the following dir to read files, specify a single file with --file instead\n"
|
||||
exit 1
|
||||
}
|
||||
|
||||
format_time() {
|
||||
COMMAND="$1"
|
||||
FILE="$2"
|
||||
mean=$(jq ".results[] | select(.command == \"$COMMAND\") | (.mean * 1000)" "$FILE")
|
||||
stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE")
|
||||
printf "%7.3f±%7.3fms" "$mean" "$stddev"
|
||||
COMMAND="$1"
|
||||
FILE="$2"
|
||||
mean=$(jq ".results[] | select(.command == \"$COMMAND\") | (.mean * 1000)" "$FILE")
|
||||
stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE")
|
||||
printf "%7.3f±%7.3fms" "$mean" "$stddev"
|
||||
}
|
||||
|
||||
run_dc_co() {
|
||||
OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)}
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json"
|
||||
# Restrict memory to 20GB
|
||||
ulimit -v 20000000
|
||||
if [ -z "$ADDITIONAL_ARG" ]; then
|
||||
print_help_and_exit "Parameter --arg is missing!"
|
||||
fi
|
||||
if [ -z "$ABA_FILE" ]; then
|
||||
print_help_and_exit "Parameter --file is missing!"
|
||||
fi
|
||||
printf "===== %s ==== " "$(basename "$ABA_FILE")"
|
||||
our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result")
|
||||
other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result")
|
||||
if [ "$our_result" != "$other_result" ]; then
|
||||
printf "❌\n"
|
||||
else
|
||||
printf "✅\n"
|
||||
fi
|
||||
printf "Argument: %s\n" "$ADDITIONAL_ARG"
|
||||
OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)}
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json"
|
||||
# Restrict memory to 20GB
|
||||
ulimit -v 20000000
|
||||
if [ -z "$ADDITIONAL_ARG" ]; then
|
||||
print_help_and_exit "Parameter --arg is missing!"
|
||||
fi
|
||||
if [ -z "$ABA_FILE" ]; then
|
||||
print_help_and_exit "Parameter --file is missing!"
|
||||
fi
|
||||
printf "===== %s ==== " "$(basename "$ABA_FILE")"
|
||||
our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result")
|
||||
other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result")
|
||||
if [ "$our_result" != "$other_result" ]; then
|
||||
printf "❌\n"
|
||||
else
|
||||
printf "✅\n"
|
||||
fi
|
||||
printf "Argument: %s\n" "$ADDITIONAL_ARG"
|
||||
|
||||
if [ -n "$TIME_COMMANDS" ]; then
|
||||
$HYPERFINE --shell=none \
|
||||
--export-json "$JSON_FILE" \
|
||||
--command-name "aba2sat" \
|
||||
"$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \
|
||||
--command-name "aspforaba" \
|
||||
"$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1
|
||||
printf "Our: %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")"
|
||||
printf "Their: %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")"
|
||||
else
|
||||
printf "Our: %3s\n" "$our_result"
|
||||
printf "Their: %3s\n" "$other_result"
|
||||
fi
|
||||
if [ -n "$TIME_COMMANDS" ]; then
|
||||
$HYPERFINE --shell=none \
|
||||
--export-json "$JSON_FILE" \
|
||||
--command-name "aba2sat" \
|
||||
"$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \
|
||||
--command-name "aspforaba" \
|
||||
"$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1
|
||||
printf "Our: %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")"
|
||||
printf "Their: %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")"
|
||||
else
|
||||
printf "Our: %3s\n" "$our_result"
|
||||
printf "Their: %3s\n" "$other_result"
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
|
@ -83,82 +83,82 @@ ADDITIONAL_ARG=
|
|||
TIME_COMMANDS=
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-h | --help)
|
||||
print_help_and_exit
|
||||
;;
|
||||
--aspforaba)
|
||||
shift
|
||||
ASPFORABA=$1
|
||||
shift
|
||||
;;
|
||||
-p | --problem)
|
||||
shift
|
||||
PROBLEM=$1
|
||||
shift
|
||||
;;
|
||||
-f | --file)
|
||||
if [ -n "$ABA_FILE_DIR" ]; then
|
||||
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
|
||||
fi
|
||||
shift
|
||||
ABA_FILE=$1
|
||||
shift
|
||||
;;
|
||||
--files-from)
|
||||
if [ -n "$ABA_FILE" ]; then
|
||||
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
|
||||
fi
|
||||
if [ -n "$ADDITIONAL_ARG" ]; then
|
||||
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
|
||||
fi
|
||||
shift
|
||||
ABA_FILE_DIR=$1
|
||||
shift
|
||||
;;
|
||||
-a | --arg)
|
||||
if [ -n "$ABA_FILE_DIR" ]; then
|
||||
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
|
||||
fi
|
||||
shift
|
||||
ADDITIONAL_ARG=$1
|
||||
shift
|
||||
;;
|
||||
-t | --time)
|
||||
shift
|
||||
TIME_COMMANDS=yes
|
||||
;;
|
||||
--aba2sat)
|
||||
shift
|
||||
ABA2SAT=$1
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option $1"
|
||||
print_help_and_exit
|
||||
;;
|
||||
*)
|
||||
POSITIONAL_ARGS+=("$1") # save positional arg
|
||||
shift # past argument
|
||||
;;
|
||||
esac
|
||||
case $1 in
|
||||
-h | --help)
|
||||
print_help_and_exit ""
|
||||
;;
|
||||
--aspforaba)
|
||||
shift
|
||||
ASPFORABA=$1
|
||||
shift
|
||||
;;
|
||||
-p | --problem)
|
||||
shift
|
||||
PROBLEM=$1
|
||||
shift
|
||||
;;
|
||||
-f | --file)
|
||||
if [ -n "$ABA_FILE_DIR" ]; then
|
||||
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
|
||||
fi
|
||||
shift
|
||||
ABA_FILE=$1
|
||||
shift
|
||||
;;
|
||||
--files-from)
|
||||
if [ -n "$ABA_FILE" ]; then
|
||||
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
|
||||
fi
|
||||
if [ -n "$ADDITIONAL_ARG" ]; then
|
||||
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
|
||||
fi
|
||||
shift
|
||||
ABA_FILE_DIR=$1
|
||||
shift
|
||||
;;
|
||||
-a | --arg)
|
||||
if [ -n "$ABA_FILE_DIR" ]; then
|
||||
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
|
||||
fi
|
||||
shift
|
||||
ADDITIONAL_ARG=$1
|
||||
shift
|
||||
;;
|
||||
-t | --time)
|
||||
shift
|
||||
TIME_COMMANDS=yes
|
||||
;;
|
||||
--aba2sat)
|
||||
shift
|
||||
ABA2SAT=$1
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option $1"
|
||||
print_help_and_exit
|
||||
;;
|
||||
*)
|
||||
POSITIONAL_ARGS+=("$1") # save positional arg
|
||||
shift # past argument
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
|
||||
|
||||
case "$PROBLEM" in
|
||||
dc-co | DC-CO)
|
||||
if [ -n "$ABA_FILE_DIR" ]; then
|
||||
# run for every file found in the directory
|
||||
for file in "$ABA_FILE_DIR"/*."$ABA_FILE_EXT"; do
|
||||
ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co
|
||||
done
|
||||
else
|
||||
# run for the single configured file
|
||||
run_dc_co
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
print_help_and_exit "Problem $PROBLEM is not supported"
|
||||
;;
|
||||
dc-co | DC-CO)
|
||||
if [ -n "$ABA_FILE_DIR" ]; then
|
||||
# run for every file found in the directory
|
||||
for file in $(find "$ABA_FILE_DIR" -type f -iname "*.$ABA_FILE_EXT" | shuf); do
|
||||
ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co
|
||||
done
|
||||
else
|
||||
# run for the single configured file
|
||||
run_dc_co
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
print_help_and_exit "Problem $PROBLEM is not supported"
|
||||
;;
|
||||
esac
|
||||
|
|
Loading…
Reference in a new issue