small script adjustments to make the cluster happy
This commit is contained in:
parent
27ed3caf0b
commit
ad54f0755f
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -5,3 +5,4 @@ target/
|
||||||
scripts/__pycache__
|
scripts/__pycache__
|
||||||
acyclic
|
acyclic
|
||||||
output-*
|
output-*
|
||||||
|
all.csv
|
||||||
|
|
|
@ -21,38 +21,41 @@ def run():
|
||||||
out = []
|
out = []
|
||||||
# Using glob to match all .json files
|
# Using glob to match all .json files
|
||||||
for file_path in glob.glob(os.path.join(folder_path, "*.json")):
|
for file_path in glob.glob(os.path.join(folder_path, "*.json")):
|
||||||
# Open and read the contents of the file
|
try:
|
||||||
with open(file_path, "r", encoding="utf-8") as json_file:
|
# Open and read the contents of the file
|
||||||
(
|
with open(file_path, "r", encoding="utf-8") as json_file:
|
||||||
_ident,
|
(
|
||||||
atom_count,
|
_ident,
|
||||||
assumption_ratio,
|
atom_count,
|
||||||
max_rules_per_head,
|
assumption_ratio,
|
||||||
max_rule_size,
|
max_rules_per_head,
|
||||||
_idx,
|
max_rule_size,
|
||||||
) = file_path.split("_")
|
_idx,
|
||||||
data = json.load(json_file)["results"]
|
) = file_path.split("_")
|
||||||
aba2sat, aspforaba = (
|
data = json.load(json_file)["results"]
|
||||||
(data[0], data[1])
|
aba2sat, aspforaba = (
|
||||||
if (
|
(data[0], data[1])
|
||||||
data[0]["command"] == "aba2sat"
|
if (
|
||||||
and data[1]["command"] == "aspforaba"
|
data[0]["command"] == "aba2sat"
|
||||||
|
and data[1]["command"] == "aspforaba"
|
||||||
|
)
|
||||||
|
else (data[1], data[0])
|
||||||
)
|
)
|
||||||
else (data[1], data[0])
|
speedup = float(aspforaba['mean']) / float(aba2sat['mean'])
|
||||||
)
|
out.append({
|
||||||
speedup = float(aspforaba['mean']) / float(aba2sat['mean'])
|
"atom_count": atom_count,
|
||||||
out.append({
|
"assumption_ratio": assumption_ratio,
|
||||||
"atom_count": atom_count,
|
"max_rules_per_head": max_rules_per_head,
|
||||||
"assumption_ratio": assumption_ratio,
|
"max_rule_size": max_rule_size,
|
||||||
"max_rules_per_head": max_rules_per_head,
|
"time": aba2sat["mean"],
|
||||||
"max_rule_size": max_rule_size,
|
"stddev": aba2sat['stddev'],
|
||||||
"time": aba2sat["mean"],
|
"speedup": speedup,
|
||||||
"stddev": aba2sat['stddev'],
|
})
|
||||||
"speedup": speedup,
|
if count > 700:
|
||||||
})
|
break
|
||||||
if count > 700:
|
count += 1
|
||||||
break
|
except Exception:
|
||||||
count += 1
|
print(f'Failed to read {file_path}. Skipping..')
|
||||||
with open(output, 'w') as output_file:
|
with open(output, 'w') as output_file:
|
||||||
output_file.write
|
output_file.write
|
||||||
writer = csv.DictWriter(output_file, fieldnames=out[0].keys())
|
writer = csv.DictWriter(output_file, fieldnames=out[0].keys())
|
||||||
|
|
|
@ -3,12 +3,26 @@
|
||||||
# Batch script to run on sc.uni-leipzig.de cluster, i used
|
# Batch script to run on sc.uni-leipzig.de cluster, i used
|
||||||
# sbatch -a "1-$(cat acyclic.list | wc -l)" ./scripts/sc-batch.sh
|
# sbatch -a "1-$(cat acyclic.list | wc -l)" ./scripts/sc-batch.sh
|
||||||
|
|
||||||
|
# Somehow all paths used in spawned processes need to be absolute,
|
||||||
|
# there's probably a good explanation, but I don't have it
|
||||||
|
|
||||||
FILE_LIST=acyclic.list
|
FILE_LIST=acyclic.list
|
||||||
|
|
||||||
|
# Pick line `$SLURM_ARRAY_TASK_ID` from the FILE_LIST
|
||||||
|
# This will probably cause issues if more processes are allocated
|
||||||
|
# than lines in the FILE_LIST, but who knows
|
||||||
file="$(pwd)/$(awk "NR == $SLURM_ARRAY_TASK_ID" "$FILE_LIST")"
|
file="$(pwd)/$(awk "NR == $SLURM_ARRAY_TASK_ID" "$FILE_LIST")"
|
||||||
|
# Read the extra argument
|
||||||
arg=$(cat "$file.asm")
|
arg=$(cat "$file.asm")
|
||||||
|
|
||||||
|
# Make sure we get all the data in one central place
|
||||||
OUTPUT_DIR="$(pwd)/output"
|
OUTPUT_DIR="$(pwd)/output"
|
||||||
|
|
||||||
export OUTPUT_DIR
|
export OUTPUT_DIR
|
||||||
./validate --file "$file" --arg "$arg" --time --problem dc-co
|
|
||||||
|
# This assumes that `validate` accepts the --no-rm flag,
|
||||||
|
# which is not a flag the script accepts, but recognized by
|
||||||
|
# the default bundler `nix bundle .#validate` uses. Required here
|
||||||
|
# to prevent the fastest process from cleaning the extracted
|
||||||
|
# package. Slower processes or those allocated later *will* fail
|
||||||
|
# without the flag
|
||||||
|
./validate --no-rm // --file "$file" --arg "$arg" --time --problem dc-co
|
||||||
|
|
|
@ -11,7 +11,7 @@ def read_and_visualize(csv_file):
|
||||||
# Display the first few rows of the dataframe
|
# Display the first few rows of the dataframe
|
||||||
print(df.head())
|
print(df.head())
|
||||||
|
|
||||||
# Identify all the properties (assuming they are all columns except for 'runtime')
|
# Identify all the properties (assuming they are all columns except for some timings)
|
||||||
properties = [col for col in df.columns if col != 'speedup' and col != 'time' and col != 'stddev']
|
properties = [col for col in df.columns if col != 'speedup' and col != 'time' and col != 'stddev']
|
||||||
|
|
||||||
# Pairplot to see general pairwise relationships, may help to understand the overall relationship between properties and runtime
|
# Pairplot to see general pairwise relationships, may help to understand the overall relationship between properties and runtime
|
||||||
|
@ -22,21 +22,21 @@ def read_and_visualize(csv_file):
|
||||||
# Create scatter plots for each property against runtime
|
# Create scatter plots for each property against runtime
|
||||||
for prop in properties:
|
for prop in properties:
|
||||||
plt.figure(figsize=(10, 6))
|
plt.figure(figsize=(10, 6))
|
||||||
sns.scatterplot(x=df[prop], y=df['runtime'])
|
sns.scatterplot(x=df[prop], y=df['speedup'])
|
||||||
plt.title(f'Impact of {prop} on Runtime')
|
plt.title(f'Impact of {prop} on Speedup')
|
||||||
plt.xlabel(prop)
|
plt.xlabel(prop)
|
||||||
plt.ylabel('Runtime')
|
plt.ylabel('Speedup')
|
||||||
plt.show()
|
|
||||||
|
|
||||||
# Create box plots for categorical properties if any (e.g., difficulty level or type) against runtime
|
# Create box plots for categorical properties if any (e.g., difficulty level or type) against runtime
|
||||||
for prop in properties:
|
for prop in properties:
|
||||||
if df[prop].dtype == 'object':
|
if df[prop].dtype == 'object':
|
||||||
plt.figure(figsize=(10, 6))
|
plt.figure(figsize=(10, 6))
|
||||||
sns.boxplot(x=df[prop], y=df['runtime'])
|
sns.boxplot(x=df[prop], y=df['speedup'])
|
||||||
plt.title(f'Impact of {prop} on Runtime')
|
plt.title(f'Impact of {prop} on Speedup')
|
||||||
plt.xlabel(prop)
|
plt.xlabel(prop)
|
||||||
plt.ylabel('Runtime')
|
plt.ylabel('Speedup')
|
||||||
plt.show()
|
|
||||||
|
plt.show()
|
||||||
|
|
||||||
# Example usage
|
# Example usage
|
||||||
csv_file = 'all.csv' # Replace with your actual CSV file path
|
csv_file = 'all.csv' # Replace with your actual CSV file path
|
||||||
|
|
|
@ -4,70 +4,70 @@
|
||||||
# best used through the bundled nix version `nix run .#validate`
|
# best used through the bundled nix version `nix run .#validate`
|
||||||
|
|
||||||
print_help_and_exit() {
|
print_help_and_exit() {
|
||||||
if [ -n "$1" ]; then
|
if [ -n "$1" ]; then
|
||||||
printf "%s\n\n" "$1"
|
printf "%s\n\n" "$1"
|
||||||
fi
|
fi
|
||||||
printf "Usage: validate [OPTIONS] \n"
|
printf "Usage: validate [OPTIONS] \n"
|
||||||
printf "\n"
|
printf "\n"
|
||||||
printf "Options:\n"
|
printf "Options:\n"
|
||||||
printf " --aspforaba\n"
|
printf " --aspforaba\n"
|
||||||
printf " Binary to use when calling aspforaba\n"
|
printf " Binary to use when calling aspforaba\n"
|
||||||
printf " -p, --problem\n"
|
printf " -p, --problem\n"
|
||||||
printf " The problem to solve\n"
|
printf " The problem to solve\n"
|
||||||
printf " -a, --arg\n"
|
printf " -a, --arg\n"
|
||||||
printf " The additional argument for the problem\n"
|
printf " The additional argument for the problem\n"
|
||||||
printf " -f, --file\n"
|
printf " -f, --file\n"
|
||||||
printf " The file containing the problem in ABA format\n"
|
printf " The file containing the problem in ABA format\n"
|
||||||
printf " -t, --time\n"
|
printf " -t, --time\n"
|
||||||
printf " Execute hyperfine to determine runtimes\n"
|
printf " Execute hyperfine to determine runtimes\n"
|
||||||
printf " --files-from\n"
|
printf " --files-from\n"
|
||||||
printf " Use the following dir to read files, specify a single file with --file instead\n"
|
printf " Use the following dir to read files, specify a single file with --file instead\n"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
format_time() {
|
format_time() {
|
||||||
COMMAND="$1"
|
COMMAND="$1"
|
||||||
FILE="$2"
|
FILE="$2"
|
||||||
mean=$(jq ".results[] | select(.command == \"$COMMAND\") | (.mean * 1000)" "$FILE")
|
mean=$(jq ".results[] | select(.command == \"$COMMAND\") | (.mean * 1000)" "$FILE")
|
||||||
stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE")
|
stddev=$(jq ".results[] | select(.command == \"$COMMAND\") | (.stddev * 1000)" "$FILE")
|
||||||
printf "%7.3f±%7.3fms" "$mean" "$stddev"
|
printf "%7.3f±%7.3fms" "$mean" "$stddev"
|
||||||
}
|
}
|
||||||
|
|
||||||
run_dc_co() {
|
run_dc_co() {
|
||||||
OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)}
|
OUTPUT_DIR=${OUTPUT_DIR:-$(mktemp -d)}
|
||||||
mkdir -p "$OUTPUT_DIR"
|
mkdir -p "$OUTPUT_DIR"
|
||||||
JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json"
|
JSON_FILE="$OUTPUT_DIR/$(basename "$ABA_FILE")-hyperfine.json"
|
||||||
# Restrict memory to 20GB
|
# Restrict memory to 20GB
|
||||||
ulimit -v 20000000
|
ulimit -v 20000000
|
||||||
if [ -z "$ADDITIONAL_ARG" ]; then
|
if [ -z "$ADDITIONAL_ARG" ]; then
|
||||||
print_help_and_exit "Parameter --arg is missing!"
|
print_help_and_exit "Parameter --arg is missing!"
|
||||||
fi
|
fi
|
||||||
if [ -z "$ABA_FILE" ]; then
|
if [ -z "$ABA_FILE" ]; then
|
||||||
print_help_and_exit "Parameter --file is missing!"
|
print_help_and_exit "Parameter --file is missing!"
|
||||||
fi
|
fi
|
||||||
printf "===== %s ==== " "$(basename "$ABA_FILE")"
|
printf "===== %s ==== " "$(basename "$ABA_FILE")"
|
||||||
our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result")
|
our_result=$("$ABA2SAT" --max-loops 0 --file "$ABA_FILE" dc-co --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aba2sat-result")
|
||||||
other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result")
|
other_result=$("$ASPFORABA" --file "$ABA_FILE" --problem DC-CO --query "$ADDITIONAL_ARG" | tee "$OUTPUT_DIR/$(basename "$ABA_FILE")-aspforaba-result")
|
||||||
if [ "$our_result" != "$other_result" ]; then
|
if [ "$our_result" != "$other_result" ]; then
|
||||||
printf "❌\n"
|
printf "❌\n"
|
||||||
else
|
else
|
||||||
printf "✅\n"
|
printf "✅\n"
|
||||||
fi
|
fi
|
||||||
printf "Argument: %s\n" "$ADDITIONAL_ARG"
|
printf "Argument: %s\n" "$ADDITIONAL_ARG"
|
||||||
|
|
||||||
if [ -n "$TIME_COMMANDS" ]; then
|
if [ -n "$TIME_COMMANDS" ]; then
|
||||||
$HYPERFINE --shell=none \
|
$HYPERFINE --shell=none \
|
||||||
--export-json "$JSON_FILE" \
|
--export-json "$JSON_FILE" \
|
||||||
--command-name "aba2sat" \
|
--command-name "aba2sat" \
|
||||||
"$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \
|
"$ABA2SAT --max-loops 0 --file \"$ABA_FILE\" dc-co --query \"$ADDITIONAL_ARG\"" \
|
||||||
--command-name "aspforaba" \
|
--command-name "aspforaba" \
|
||||||
"$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1
|
"$ASPFORABA --file \"$ABA_FILE\" --problem DC-CO --query \"$ADDITIONAL_ARG\"" 1>/dev/null 2>&1
|
||||||
printf "Our: %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")"
|
printf "Our: %3s %30s\n" "$our_result" "$(format_time "aba2sat" "$JSON_FILE")"
|
||||||
printf "Their: %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")"
|
printf "Their: %3s %30s\n" "$other_result" "$(format_time "aspforaba" "$JSON_FILE")"
|
||||||
else
|
else
|
||||||
printf "Our: %3s\n" "$our_result"
|
printf "Our: %3s\n" "$our_result"
|
||||||
printf "Their: %3s\n" "$other_result"
|
printf "Their: %3s\n" "$other_result"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,82 +83,82 @@ ADDITIONAL_ARG=
|
||||||
TIME_COMMANDS=
|
TIME_COMMANDS=
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case $1 in
|
case $1 in
|
||||||
-h | --help)
|
-h | --help)
|
||||||
print_help_and_exit
|
print_help_and_exit ""
|
||||||
;;
|
;;
|
||||||
--aspforaba)
|
--aspforaba)
|
||||||
shift
|
shift
|
||||||
ASPFORABA=$1
|
ASPFORABA=$1
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
-p | --problem)
|
-p | --problem)
|
||||||
shift
|
shift
|
||||||
PROBLEM=$1
|
PROBLEM=$1
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
-f | --file)
|
-f | --file)
|
||||||
if [ -n "$ABA_FILE_DIR" ]; then
|
if [ -n "$ABA_FILE_DIR" ]; then
|
||||||
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
|
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
|
||||||
fi
|
fi
|
||||||
shift
|
shift
|
||||||
ABA_FILE=$1
|
ABA_FILE=$1
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
--files-from)
|
--files-from)
|
||||||
if [ -n "$ABA_FILE" ]; then
|
if [ -n "$ABA_FILE" ]; then
|
||||||
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
|
print_help_and_exit "Parameters --file and --files-from cannot be mixed"
|
||||||
fi
|
fi
|
||||||
if [ -n "$ADDITIONAL_ARG" ]; then
|
if [ -n "$ADDITIONAL_ARG" ]; then
|
||||||
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
|
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
|
||||||
fi
|
fi
|
||||||
shift
|
shift
|
||||||
ABA_FILE_DIR=$1
|
ABA_FILE_DIR=$1
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
-a | --arg)
|
-a | --arg)
|
||||||
if [ -n "$ABA_FILE_DIR" ]; then
|
if [ -n "$ABA_FILE_DIR" ]; then
|
||||||
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
|
print_help_and_exit "Parameters --arg and --files-from cannot be mixed"
|
||||||
fi
|
fi
|
||||||
shift
|
shift
|
||||||
ADDITIONAL_ARG=$1
|
ADDITIONAL_ARG=$1
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
-t | --time)
|
-t | --time)
|
||||||
shift
|
shift
|
||||||
TIME_COMMANDS=yes
|
TIME_COMMANDS=yes
|
||||||
;;
|
;;
|
||||||
--aba2sat)
|
--aba2sat)
|
||||||
shift
|
shift
|
||||||
ABA2SAT=$1
|
ABA2SAT=$1
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
-*)
|
-*)
|
||||||
echo "Unknown option $1"
|
echo "Unknown option $1"
|
||||||
print_help_and_exit
|
print_help_and_exit
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
POSITIONAL_ARGS+=("$1") # save positional arg
|
POSITIONAL_ARGS+=("$1") # save positional arg
|
||||||
shift # past argument
|
shift # past argument
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
|
set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
|
||||||
|
|
||||||
case "$PROBLEM" in
|
case "$PROBLEM" in
|
||||||
dc-co | DC-CO)
|
dc-co | DC-CO)
|
||||||
if [ -n "$ABA_FILE_DIR" ]; then
|
if [ -n "$ABA_FILE_DIR" ]; then
|
||||||
# run for every file found in the directory
|
# run for every file found in the directory
|
||||||
for file in "$ABA_FILE_DIR"/*."$ABA_FILE_EXT"; do
|
for file in $(find "$ABA_FILE_DIR" -type f -iname "*.$ABA_FILE_EXT" | shuf); do
|
||||||
ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co
|
ABA_FILE="$file" ADDITIONAL_ARG="$(cat "$file.asm")" run_dc_co
|
||||||
done
|
done
|
||||||
else
|
else
|
||||||
# run for the single configured file
|
# run for the single configured file
|
||||||
run_dc_co
|
run_dc_co
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
print_help_and_exit "Problem $PROBLEM is not supported"
|
print_help_and_exit "Problem $PROBLEM is not supported"
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
Loading…
Reference in a new issue