Abstract over Cache

Add `Cache` trait and implement it for the `Cacache` and `DummyCache`.
Fix tests to use the DummyCache, dropping old dependencies.
See #11.
This commit is contained in:
Malte Tammena 2021-10-25 22:18:56 +02:00
parent b21f921b03
commit 321037a8a8
11 changed files with 333 additions and 170 deletions

8
Cargo.lock generated
View file

@ -990,9 +990,9 @@ dependencies = [
"serde",
"serde_json",
"serde_plain",
"ssri",
"structopt",
"strum",
"temp-dir",
"terminal_size",
"textwrap 0.14.2",
"thiserror",
@ -1762,12 +1762,6 @@ dependencies = [
"unicode-xid",
]
[[package]]
name = "temp-dir"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af547b166dd1ea4b472165569fc456cfb6818116f854690b0ff205e636523dab"
[[package]]
name = "tempfile"
version = "3.2.0"

View file

@ -37,6 +37,6 @@ serde_json = "1.0"
itertools = "0.10"
[dev-dependencies]
temp-dir = "0.1"
pretty_assertions = "1.0"
ssri = "7.0"
assert_cmd = "2.0"

67
src/cache/cacache.rs vendored Normal file
View file

@ -0,0 +1,67 @@
use std::{io::Write, path::PathBuf};
use cacache::Metadata;
use itertools::Itertools;
use lazy_static::lazy_static;
use tracing::info;
use super::Cache;
use crate::{
error::{Error, Result},
request::Headers,
DIR,
};
lazy_static! {
/// Path to the cache.
static ref CACHE: PathBuf = DIR.cache_dir().into();
}
pub struct Cacache;
impl Cache for Cacache
where
Self: Sized,
{
fn init() -> Result<Self> {
Ok(Cacache)
}
fn write(&self, headers: &Headers, url: &str, text: &str) -> Result<()> {
let header_serialized = serde_json::to_value(headers.clone())
.map_err(|why| Error::Serializing(why, "writing headers to cache"))?;
let mut writer = cacache::WriteOpts::new()
.metadata(header_serialized)
.open_sync(&*CACHE, url)
.map_err(|why| Error::Cache(why, "opening for write"))?;
writer
.write_all(text.as_bytes())
.map_err(|why| Error::Io(why, "writing value"))?;
writer
.commit()
.map_err(|why| Error::Cache(why, "commiting write"))?;
info!("Updated cache for {:?}", url);
Ok(())
}
fn read(&self, meta: &Metadata) -> Result<String> {
cacache::read_hash_sync(&*CACHE, &meta.integrity)
.map_err(|why| Error::Cache(why, "reading value"))
.and_then(|raw| String::from_utf8(raw).map_err(Error::DecodingUtf8))
}
fn meta(&self, url: &str) -> Result<Option<Metadata>> {
cacache::metadata_sync(&*CACHE, url).map_err(|why| Error::Cache(why, "reading metadata"))
}
fn clear(&self) -> Result<()> {
cacache::clear_sync(&*CACHE).map_err(|why| Error::Cache(why, "clearing"))
}
fn list(&self) -> Result<Vec<Metadata>> {
cacache::list_sync(&*CACHE)
.map(|res| res.map_err(|why| Error::Cache(why, "listing")))
.try_collect()
}
}

114
src/cache/dummy.rs vendored Normal file
View file

@ -0,0 +1,114 @@
use std::{collections::BTreeMap, sync::RwLock};
use cacache::Metadata;
use ssri::{Hash, Integrity};
use super::Cache;
use crate::{
error::{Error, Result},
request::Headers,
};
struct Entry {
meta: Metadata,
text: String,
}
pub struct DummyCache {
/// The real, cacache-based implementation takes only immutable references
/// and the API is adopted to handle that. Thus we'll have to do our
/// own interior mutability here.
content: RwLock<BTreeMap<Hash, Entry>>,
}
impl Cache for DummyCache {
fn init() -> Result<Self> {
Ok(DummyCache {
content: RwLock::new(BTreeMap::new()),
})
}
fn read(&self, meta: &Metadata) -> Result<String> {
let (algorithm, digest) = meta.integrity.to_hex();
let hash = Hash { algorithm, digest };
let read = self.content.read().expect("Reading cache failed");
let entry = read
.get(&hash)
.expect("BUG: Metadata exists, but entry does not!");
Ok(entry.text.clone())
}
fn write(&self, headers: &Headers, url: &str, text: &str) -> Result<()> {
let mut write = self.content.write().expect("Writing cache failed");
let hash = hash_from_key(url);
let meta = assemble_meta(headers, url, text)?;
write.insert(
hash,
Entry {
meta,
text: text.to_owned(),
},
);
Ok(())
}
fn meta(&self, url: &str) -> Result<Option<Metadata>> {
let hash = hash_from_key(url);
match self
.content
.read()
.expect("Reading cache failed")
.get(&hash)
{
Some(entry) => Ok(Some(clone_metadata(&entry.meta))),
None => Ok(None),
}
}
fn clear(&self) -> Result<()> {
self.content.write().expect("Writing cache failed").clear();
Ok(())
}
fn list(&self) -> Result<Vec<Metadata>> {
let read = self.content.read().expect("Reading cache failed");
let list = read
.values()
.map(|entry| clone_metadata(&entry.meta))
.collect();
Ok(list)
}
}
fn hash_from_key(key: &str) -> Hash {
let integrity = Integrity::from(key);
hash_from_integrity(&integrity)
}
fn hash_from_integrity(integrity: &Integrity) -> Hash {
let (algorithm, digest) = integrity.to_hex();
Hash { algorithm, digest }
}
fn clone_metadata(meta: &Metadata) -> Metadata {
Metadata {
key: meta.key.clone(),
integrity: meta.integrity.clone(),
time: meta.time,
size: meta.size,
metadata: meta.metadata.clone(),
}
}
fn assemble_meta(headers: &Headers, url: &str, text: &str) -> Result<Metadata> {
let time = chrono::Utc::now();
Ok(Metadata {
key: url.to_owned(),
integrity: Integrity::from(url),
time: time.timestamp_millis() as u128,
size: text.len(),
metadata: serde_json::to_value(headers)
.map_err(|why| Error::Serializing(why, "converting headers to json"))?,
})
}

193
src/cache/mod.rs vendored
View file

@ -28,7 +28,7 @@
//! - `fetch` functions are generalized over web requests and cache loading.
//! - `get` functions only operate on requests.
//! - `load`, `update` functions only operate on the cache.
use cacache::Metadata;
use ::cacache::Metadata;
use chrono::{Duration, TimeZone};
use lazy_static::lazy_static;
use reqwest::{StatusCode, Url};
@ -38,10 +38,18 @@ use tracing::{info, warn};
mod fetchable;
#[cfg(test)]
mod tests;
mod wrapper;
#[cfg(not(test))]
mod cacache;
#[cfg(not(test))]
use self::cacache::Cacache as DefaultCache;
#[cfg(test)]
mod dummy;
#[cfg(test)]
use self::dummy::DummyCache as DefaultCache;
pub use fetchable::Fetchable;
pub use wrapper::clear_cache as clear;
use crate::{
error::{Error, Result, ResultExt},
@ -51,6 +59,10 @@ use crate::{
/// Returned by most functions in this module.
type TextAndHeaders = (String, Headers);
lazy_static! {
pub static ref CACHE: DefaultCache = DefaultCache::init().expect("Initialized cache");
}
/// Possible results from a cache load.
#[derive(Debug, PartialEq)]
enum CacheResult<T> {
@ -62,77 +74,113 @@ enum CacheResult<T> {
Hit(T),
}
/// Wrapper around [`fetch`] for responses that contain json.
pub fn fetch_json<S, T>(url: S, local_ttl: Duration) -> Result<T>
/// Cache trait
///
/// Generalized over the default Cacache and a DummyCache used for tests.
pub trait Cache
where
S: AsRef<str>,
T: DeserializeOwned,
Self: Sized,
{
fetch(url, local_ttl, |text, _| {
// TODO: Check content header?
serde_json::from_str(&text).map_err(|why| Error::Deserializing(why, "fetching json"))
})
}
/// Initialize the cache.
fn init() -> Result<Self>;
/// Generic method for fetching remote url-based resources that may be cached.
pub fn fetch<Map, S, T>(url: S, local_ttl: Duration, map: Map) -> Result<T>
where
S: AsRef<str>,
Map: FnOnce(String, Headers) -> Result<T>,
{
// Normalize the url at this point since we're using it
// as the cache key
let url = Url::parse(url.as_ref()).map_err(|_| Error::InternalUrl)?;
let url = url.as_ref();
info!("Fetching {:?}", url);
// Try getting the value from cache, if that fails, query the web
let (text, headers) = match try_load_cache(url, local_ttl) {
Ok(CacheResult::Hit(text_and_headers)) => {
info!("Hit cache on {:?}", url);
text_and_headers
}
Ok(CacheResult::Miss) => {
info!("Missed cache on {:?}", url);
get_and_update_cache(url, None, None)?
}
Ok(CacheResult::Stale(old_headers, meta)) => {
info!("Stale cache on {:?}", url);
// The cache is stale but may still be valid
// Request the resource with set IF_NONE_MATCH tag and update
// the caches metadata or value
match get_and_update_cache(url, old_headers.etag, Some(meta)) {
Ok(tah) => tah,
Err(why) => {
warn!("{}", why);
// Fetching and updating failed for some reason, retry
// without the IF_NONE_MATCH tag and fail if unsuccessful
get_and_update_cache(url, None, None)?
/// Read from the cache.
fn read(&self, meta: &Metadata) -> Result<String>;
/// Write to the cache.
///
/// The `url` is used as key and the `text` as value for the entry.
/// The `headers` are attached as additional metadata.
fn write(&self, headers: &Headers, url: &str, text: &str) -> Result<()>;
/// Get the [`Metadata`] for the cache entry.
fn meta(&self, url: &str) -> Result<Option<Metadata>>;
/// Clear all entries from the cache.
fn clear(&self) -> Result<()>;
/// List all cache entries.
fn list(&self) -> Result<Vec<Metadata>>;
/// Wrapper around [`fetch`] for responses that contain json.
fn fetch_json<S, T>(&self, url: S, local_ttl: Duration) -> Result<T>
where
S: AsRef<str>,
T: DeserializeOwned,
{
self.fetch(url, local_ttl, |text, _| {
// TODO: Check content header?
serde_json::from_str(&text).map_err(|why| Error::Deserializing(why, "fetching json"))
})
}
/// Generic method for fetching remote url-based resources that may be cached.
///
/// This is the preferred way to access the cache, as the requested value
/// will be fetched from the inter-webs if the cache misses.
fn fetch<Map, S, T>(&self, url: S, local_ttl: Duration, map: Map) -> Result<T>
where
S: AsRef<str>,
Map: FnOnce(String, Headers) -> Result<T>,
{
// Normalize the url at this point since we're using it
// as the cache key
let url = Url::parse(url.as_ref()).map_err(|_| Error::InternalUrl)?;
let url = url.as_ref();
info!("Fetching {:?}", url);
// Try getting the value from cache, if that fails, query the web
let (text, headers) = match try_load_cache(self, url, local_ttl) {
Ok(CacheResult::Hit(text_and_headers)) => {
info!("Hit cache on {:?}", url);
text_and_headers
}
Ok(CacheResult::Miss) => {
info!("Missed cache on {:?}", url);
get_and_update_cache(self, url, None, None)?
}
Ok(CacheResult::Stale(old_headers, meta)) => {
info!("Stale cache on {:?}", url);
// The cache is stale but may still be valid
// Request the resource with set IF_NONE_MATCH tag and update
// the caches metadata or value
match get_and_update_cache(self, url, old_headers.etag, Some(meta)) {
Ok(tah) => tah,
Err(why) => {
warn!("{}", why);
// Fetching and updating failed for some reason, retry
// without the IF_NONE_MATCH tag and fail if unsuccessful
get_and_update_cache(self, url, None, None)?
}
}
}
}
Err(why) => {
// Fetching from the cache failed for some reason, just
// request the resource and update the cache
warn!("{}", why);
get_and_update_cache(url, None, None)?
}
};
// Apply the map and return the result
map(text, headers)
Err(why) => {
// Fetching from the cache failed for some reason, just
// request the resource and update the cache
warn!("{}", why);
get_and_update_cache(self, url, None, None)?
}
};
// Apply the map and return the result
map(text, headers)
}
}
/// Try loading the cache content.
///
/// This can fail due to errors, but also exits with a [`CacheResult`].
fn try_load_cache(url: &str, local_ttl: Duration) -> Result<CacheResult<TextAndHeaders>> {
fn try_load_cache<C: Cache>(
cache: &C,
url: &str,
local_ttl: Duration,
) -> Result<CacheResult<TextAndHeaders>> {
// Try reading the cache's metadata
match wrapper::read_cache_meta(url)? {
match cache.meta(url)? {
Some(meta) => {
// Metadata exists
if is_fresh(&meta, &local_ttl) {
// Fresh, try to fetch from cache
let raw = wrapper::read_cache(&meta)?;
to_text_and_headers(raw, &meta.metadata).map(CacheResult::Hit)
let text = cache.read(&meta)?;
to_text_and_headers(text, &meta.metadata).map(CacheResult::Hit)
} else {
// Local check failed, but the value may still be valid
let old_headers = headers_from_metadata(&meta)?;
@ -152,7 +200,8 @@ fn try_load_cache(url: &str, local_ttl: Duration) -> Result<CacheResult<TextAndH
///
/// If an optional `etag` is provided, add the If-None-Match header, and thus
/// only get an update if the new ETAG differs from the given `etag`.
fn get_and_update_cache(
fn get_and_update_cache<C: Cache>(
cache: &C,
url: &str,
etag: Option<String>,
meta: Option<Metadata>,
@ -167,11 +216,11 @@ fn get_and_update_cache(
Some(meta) if resp.status == StatusCode::NOT_MODIFIED => {
// If we received code 304 NOT MODIFIED (after adding the If-None-Match)
// our cache is actually fresh and it's timestamp should be updated
touch_and_load_cache(url, &meta, resp.headers)
touch_and_load_cache(cache, url, &meta, resp.headers)
}
_ if resp.status.is_success() => {
// Request returned successfully, now update the cache with that
update_cache_from_response(resp)
update_cache_from_response(cache, resp)
}
_ => {
// Some error occured, just error out
@ -184,19 +233,24 @@ fn get_and_update_cache(
/// Extract body and headers from response and update the cache.
///
/// Only relevant headers will be kept.
fn update_cache_from_response(resp: Response) -> Result<TextAndHeaders> {
fn update_cache_from_response<C: Cache>(cache: &C, resp: Response) -> Result<TextAndHeaders> {
let url = resp.url.to_owned();
wrapper::write_cache(&resp.headers, &url, &resp.body)?;
cache.write(&resp.headers, &url, &resp.body)?;
Ok((resp.body, resp.headers))
}
/// Reset the cache's TTL, load and return it.
fn touch_and_load_cache(url: &str, meta: &Metadata, headers: Headers) -> Result<TextAndHeaders> {
let raw = wrapper::read_cache(meta)?;
fn touch_and_load_cache<C: Cache>(
cache: &C,
url: &str,
meta: &Metadata,
headers: Headers,
) -> Result<TextAndHeaders> {
let raw = cache.read(meta)?;
let (text, _) = to_text_and_headers(raw, &meta.metadata)?;
// TODO: Update the timestamp in a smarter way..
// Do not fall on errors, this doesnt matter
wrapper::write_cache(&headers, url, &text).log_warn();
cache.write(&headers, url, &text).log_warn();
Ok((text, headers))
}
@ -215,10 +269,9 @@ fn is_fresh(meta: &Metadata, local_ttl: &Duration) -> bool {
}
/// Helper to convert raw text and serialized json to [`TextAndHeaders`].
fn to_text_and_headers(raw: Vec<u8>, meta: &serde_json::Value) -> Result<TextAndHeaders> {
let utf8 = String::from_utf8(raw).map_err(Error::DecodingUtf8)?;
fn to_text_and_headers(text: String, meta: &serde_json::Value) -> Result<TextAndHeaders> {
let headers: Headers = serde_json::from_value(meta.clone()).map_err(|why| {
Error::Deserializing(why, "reading headers from cache. Try clearing the cache.")
})?;
Ok((utf8, headers))
Ok((text, headers))
}

37
src/cache/tests.rs vendored
View file

@ -9,25 +9,24 @@ lazy_static! {
static ref TTL: Duration = Duration::minutes(1);
}
fn print_cache_list(header: &'static str) {
fn print_cache_list(header: &'static str) -> Result<()> {
println!("\n+--- Cache {} ---", header);
wrapper::list_cache()
.filter_map(|res| res.ok())
.for_each(|meta| {
let age_ms = meta.time;
let cache_age = chrono::Utc.timestamp((age_ms / 1000) as i64, (age_ms % 1000) as u32);
eprintln!(
"| - {}\n| SIZE: {}\n| AGE: {}",
meta.key, meta.size, cache_age
)
});
CACHE.list()?.iter().for_each(|meta| {
let age_ms = meta.time;
let cache_age = chrono::Utc.timestamp((age_ms / 1000) as i64, (age_ms % 1000) as u32);
eprintln!(
"| - {}\n| SIZE: {}\n| AGE: {}",
meta.key, meta.size, cache_age
)
});
println!("+{}", "-".repeat(header.len() + 14));
Ok(())
}
#[test]
fn test_cache_is_empty() {
let read = try_load_cache("test cache entry", Duration::max_value()).unwrap();
print_cache_list("Cache");
let read = try_load_cache(&*CACHE, "test cache entry", Duration::max_value()).unwrap();
print_cache_list("Cache").unwrap();
assert_eq!(read, CacheResult::Miss);
}
@ -35,15 +34,15 @@ fn test_cache_is_empty() {
fn basic_caching() {
let url = "http://invalid.local/test";
// Cache is empty
let val = try_load_cache(url, Duration::max_value()).unwrap();
print_cache_list("After first read");
let val = try_load_cache(&*CACHE, url, Duration::max_value()).unwrap();
print_cache_list("After first read").unwrap();
assert_eq!(val, CacheResult::Miss);
// Populate the cache with the first request
let val = fetch(url, *TTL, |txt, _| Ok(txt)).unwrap();
let val = CACHE.fetch(url, *TTL, |txt, _| Ok(txt)).unwrap();
assert_eq!(val, "It works",);
// The cache should now be hit
let val = try_load_cache(url, Duration::max_value()).unwrap();
print_cache_list("After second read");
let val = try_load_cache(&*CACHE, url, Duration::max_value()).unwrap();
print_cache_list("After second read").unwrap();
assert_eq!(
val,
CacheResult::Hit((
@ -58,6 +57,6 @@ fn basic_caching() {
);
// Let's fake a stale entry
thread::sleep(std::time::Duration::from_secs(1));
let val = try_load_cache(url, Duration::zero()).unwrap();
let val = try_load_cache(&*CACHE, url, Duration::zero()).unwrap();
assert!(matches!(val, CacheResult::Stale(_, _)));
}

66
src/cache/wrapper.rs vendored
View file

@ -1,66 +0,0 @@
//! Wrapper for [`cacache`] and [`reqwest`] methods
//!
//! To make testing easier.
use cacache::Metadata;
use lazy_static::lazy_static;
use tracing::info;
use std::{io::Write, path::Path};
use super::Headers;
use crate::error::{Error, Result};
pub fn write_cache(headers: &Headers, url: &str, text: &str) -> Result<()> {
let header_serialized = serde_json::to_value(headers.clone())
.map_err(|why| Error::Serializing(why, "writing headers to cache"))?;
let mut writer = cacache::WriteOpts::new()
.metadata(header_serialized)
.open_sync(cache(), url)
.map_err(|why| Error::Cache(why, "opening for write"))?;
writer
.write_all(text.as_bytes())
.map_err(|why| Error::Io(why, "writing value"))?;
writer
.commit()
.map_err(|why| Error::Cache(why, "commiting write"))?;
info!("Updated cache for {:?}", url);
Ok(())
}
pub fn read_cache(meta: &Metadata) -> Result<Vec<u8>> {
cacache::read_hash_sync(cache(), &meta.integrity)
.map_err(|why| Error::Cache(why, "reading value"))
}
pub fn read_cache_meta(url: &str) -> Result<Option<Metadata>> {
cacache::metadata_sync(cache(), url).map_err(|why| Error::Cache(why, "reading metadata"))
}
pub fn clear_cache() -> Result<()> {
cacache::clear_sync(cache()).map_err(|why| Error::Cache(why, "clearing"))
}
#[cfg(test)]
pub fn list_cache() -> impl Iterator<Item = cacache::Result<Metadata>> {
cacache::list_sync(cache())
}
#[cfg(not(test))]
fn cache() -> &'static Path {
use std::path::PathBuf;
lazy_static! {
/// Path to the cache.
static ref CACHE: PathBuf = crate::DIR.cache_dir().into();
}
&*CACHE
}
#[cfg(test)]
fn cache() -> &'static Path {
lazy_static! {
static ref CACHE: temp_dir::TempDir =
temp_dir::TempDir::new().expect("Failed to create test cache dir");
}
CACHE.path()
}

View file

@ -10,7 +10,7 @@ mod de;
mod ser;
use crate::{
cache::{fetch_json, Fetchable},
cache::{Cache, Fetchable, CACHE},
config::{
args::{CloseCommand, Command, GeoCommand},
CONF,
@ -63,7 +63,7 @@ pub struct Day {
impl Meta {
pub fn fetch(id: CanteenId) -> Result<Self> {
let url = format!("{}/canteens/{}", OPEN_MENSA_API, id);
fetch_json(url, *TTL_CANTEENS)
CACHE.fetch_json(url, *TTL_CANTEENS)
}
}

View file

@ -5,7 +5,7 @@ use lazy_static::lazy_static;
use serde::Deserialize;
use crate::{
cache::fetch_json,
cache::{Cache, CACHE},
config::{
args::{CloseCommand, Command},
CONF,
@ -56,5 +56,5 @@ pub fn infer() -> Result<(f32, f32)> {
/// Fetch geoip for current ip.
fn fetch_geoip() -> Result<LatLong> {
let url = "https://api.geoip.rs";
fetch_json(url, *TTL_GEOIP)
CACHE.fetch_json(url, *TTL_GEOIP)
}

View file

@ -63,6 +63,7 @@
//! - `$HOME/Library/Application Support/mensa/config.toml` on **macOS**,
//! - `{FOLDERID_RoamingAppData}\mensa\config.toml` on **Windows**
use cache::Cache;
use chrono::Duration;
use directories_next::ProjectDirs;
use lazy_static::lazy_static;
@ -139,6 +140,7 @@ mod tag;
// mod tests;
use crate::{
cache::CACHE,
canteen::Canteen,
config::{args::Command, CONF},
error::{Error, Result, ResultExt},
@ -169,7 +171,7 @@ fn real_main() -> Result<()> {
tracing_subscriber::fmt::init();
// Clear cache if requested
if CONF.args.clear_cache {
cache::clear()?;
CACHE.clear()?;
}
// Match over the user requested command
match CONF.cmd() {

View file

@ -9,7 +9,7 @@ use serde::de::DeserializeOwned;
use std::marker::PhantomData;
use crate::{
cache,
cache::{Cache, CACHE},
error::{Error, Result},
};
@ -75,7 +75,7 @@ where
fn next(&mut self) -> Option<Self::Item> {
// This will yield until no next_page is available
let curr_page = self.next_page.take()?;
let res = cache::fetch(curr_page, self.ttl, |text, headers| {
let res = CACHE.fetch(curr_page, self.ttl, |text, headers| {
let val = serde_json::from_str::<Vec<_>>(&text)
.map_err(|why| Error::Deserializing(why, "fetching json in pagination iterator"))?;
Ok((val, headers.this_page, headers.next_page, headers.last_page))