use crate::error::MetadataError;
use dtt::datetime::DateTime;
use regex::Regex;
use serde_json::Value as JsonValue;
use std::collections::HashMap;
use toml::Value as TomlValue;
#[derive(Debug, Default, Clone)]
pub struct Metadata {
inner: HashMap<String, String>,
}
impl Metadata {
pub fn new(data: HashMap<String, String>) -> Self {
Metadata { inner: data }
}
pub fn get(&self, key: &str) -> Option<&String> {
self.inner.get(key)
}
pub fn insert(
&mut self,
key: String,
value: String,
) -> Option<String> {
self.inner.insert(key, value)
}
pub fn contains_key(&self, key: &str) -> bool {
self.inner.contains_key(key)
}
pub fn into_inner(self) -> HashMap<String, String> {
self.inner
}
}
pub fn extract_metadata(
content: &str,
) -> Result<Metadata, MetadataError> {
extract_yaml_metadata(content)
.or_else(|| extract_toml_metadata(content))
.or_else(|| extract_json_metadata(content))
.ok_or_else(|| MetadataError::ExtractionError {
message: "No valid front matter found.".to_string(),
})
}
fn extract_yaml_metadata(content: &str) -> Option<Metadata> {
let re = Regex::new(r"(?s)^\s*---\s*\n(.*?)\n\s*---\s*").ok()?;
let captures = re.captures(content)?;
let yaml_str = captures.get(1)?.as_str().trim();
let yaml_value: serde_yml::Value =
serde_yml::from_str(yaml_str).ok()?;
let metadata: HashMap<String, String> = flatten_yaml(&yaml_value);
Some(Metadata::new(metadata))
}
fn flatten_yaml(value: &serde_yml::Value) -> HashMap<String, String> {
let mut map = HashMap::new();
flatten_yaml_recursive(value, String::new(), &mut map);
map
}
fn flatten_yaml_recursive(
value: &serde_yml::Value,
prefix: String,
map: &mut HashMap<String, String>,
) {
match value {
serde_yml::Value::Mapping(m) => {
for (k, v) in m {
let new_prefix = if prefix.is_empty() {
k.as_str().unwrap_or_default().to_string()
} else {
format!(
"{}.{}",
prefix,
k.as_str().unwrap_or_default()
)
};
flatten_yaml_recursive(v, new_prefix, map);
}
}
serde_yml::Value::Sequence(seq) => {
let inline_list = seq
.iter()
.filter_map(|item| item.as_str().map(|s| s.to_string()))
.collect::<Vec<String>>()
.join(", ");
map.insert(prefix, format!("[{}]", inline_list));
}
_ => {
map.insert(
prefix,
value.as_str().unwrap_or_default().to_string(),
);
}
}
}
fn extract_toml_metadata(content: &str) -> Option<Metadata> {
let re = Regex::new(r"(?s)^\s*\+\+\+\s*(.*?)\s*\+\+\+").ok()?;
let captures = re.captures(content)?;
let toml_str = captures.get(1)?.as_str().trim();
let toml_value: TomlValue = toml::from_str(toml_str).ok()?;
let mut metadata = HashMap::new();
flatten_toml(&toml_value, &mut metadata, String::new());
Some(Metadata::new(metadata))
}
fn flatten_toml(
value: &TomlValue,
map: &mut HashMap<String, String>,
prefix: String,
) {
match value {
TomlValue::Table(table) => {
for (k, v) in table {
let new_prefix = if prefix.is_empty() {
k.to_string()
} else {
format!("{}.{}", prefix, k)
};
flatten_toml(v, map, new_prefix);
}
}
TomlValue::Array(arr) => {
let inline_list = arr
.iter()
.map(|v| {
match v {
TomlValue::String(s) => s.clone(),
_ => v.to_string(),
}
})
.collect::<Vec<String>>()
.join(", ");
map.insert(prefix, format!("[{}]", inline_list));
}
TomlValue::String(s) => {
map.insert(prefix, s.clone());
}
TomlValue::Datetime(dt) => {
map.insert(prefix, dt.to_string());
}
_ => {
map.insert(prefix, value.to_string());
}
}
}
fn extract_json_metadata(content: &str) -> Option<Metadata> {
let re = Regex::new(r"(?s)^\s*\{\s*(.*?)\s*\}").ok()?;
let captures = re.captures(content)?;
let json_str = format!("{{{}}}", captures.get(1)?.as_str().trim());
let json_value: JsonValue = serde_json::from_str(&json_str).ok()?;
let json_object = json_value.as_object()?;
let metadata: HashMap<String, String> = json_object
.iter()
.filter_map(|(k, v)| {
v.as_str().map(|s| (k.clone(), s.to_string()))
})
.collect();
Some(Metadata::new(metadata))
}
pub fn process_metadata(
metadata: &Metadata,
) -> Result<Metadata, MetadataError> {
let mut processed = metadata.clone();
if let Some(date) = processed.get("date").cloned() {
let standardized_date = standardize_date(&date)?;
processed.insert("date".to_string(), standardized_date);
}
ensure_required_fields(&processed)?;
generate_derived_fields(&mut processed);
Ok(processed)
}
fn standardize_date(date: &str) -> Result<String, MetadataError> {
if date.trim().is_empty() {
return Err(MetadataError::DateParseError(
"Date string is empty.".to_string(),
));
}
if date.len() < 8 {
return Err(MetadataError::DateParseError(
"Date string is too short.".to_string(),
));
}
let date = if date.contains('/') && date.len() == 10 {
let parts: Vec<&str> = date.split('/').collect();
if parts.len() == 3
&& parts[0].len() == 2
&& parts[1].len() == 2
&& parts[2].len() == 4
{
format!("{}-{}-{}", parts[2], parts[1], parts[0]) } else {
return Err(MetadataError::DateParseError(
"Invalid DD/MM/YYYY date format.".to_string(),
));
}
} else {
date.to_string()
};
let parsed_date = DateTime::parse(&date)
.or_else(|_| {
DateTime::parse_custom_format(&date, "[year]-[month]-[day]")
})
.or_else(|_| {
DateTime::parse_custom_format(&date, "[month]/[day]/[year]")
})
.map_err(|e| {
MetadataError::DateParseError(format!(
"Failed to parse date: {}",
e
))
})?;
Ok(format!(
"{:04}-{:02}-{:02}",
parsed_date.year(),
parsed_date.month() as u8,
parsed_date.day()
))
}
fn ensure_required_fields(
metadata: &Metadata,
) -> Result<(), MetadataError> {
let required_fields = ["title", "date"];
for &field in &required_fields {
if !metadata.contains_key(field) {
return Err(MetadataError::MissingFieldError(
field.to_string(),
));
}
}
Ok(())
}
fn generate_derived_fields(metadata: &mut Metadata) {
if !metadata.contains_key("slug") {
if let Some(title) = metadata.get("title") {
let slug = generate_slug(title);
metadata.insert("slug".to_string(), slug);
}
}
}
fn generate_slug(title: &str) -> String {
title.to_lowercase().replace(' ', "-")
}
#[cfg(test)]
mod tests {
use super::*;
use dtt::dtt_parse;
#[test]
fn test_standardize_date() {
let test_cases = vec![
("2023-05-20T15:30:00Z", "2023-05-20"),
("2023-05-20", "2023-05-20"),
("20/05/2023", "2023-05-20"), ];
for (input, expected) in test_cases {
let result = standardize_date(input);
assert!(result.is_ok(), "Failed for input: {}", input);
assert_eq!(result.unwrap(), expected);
}
}
#[test]
fn test_standardize_date_errors() {
assert!(standardize_date("").is_err());
assert!(standardize_date("invalid").is_err());
assert!(standardize_date("20/05/23").is_err()); }
#[test]
fn test_date_format() {
let dt = dtt_parse!("2023-01-01T12:00:00+00:00").unwrap();
let formatted = format!(
"{:04}-{:02}-{:02}",
dt.year(),
dt.month() as u8,
dt.day()
);
assert_eq!(formatted, "2023-01-01");
}
#[test]
fn test_generate_slug() {
assert_eq!(generate_slug("Hello World"), "hello-world");
assert_eq!(generate_slug("Test 123"), "test-123");
assert_eq!(generate_slug(" Spaces "), "--spaces--");
}
#[test]
fn test_process_metadata() {
let mut metadata = Metadata::new(HashMap::new());
metadata.insert("title".to_string(), "Test Title".to_string());
metadata.insert(
"date".to_string(),
"2023-05-20T15:30:00Z".to_string(),
);
let processed = process_metadata(&metadata).unwrap();
assert_eq!(processed.get("title").unwrap(), "Test Title");
assert_eq!(processed.get("date").unwrap(), "2023-05-20");
assert_eq!(processed.get("slug").unwrap(), "test-title");
}
#[test]
fn test_extract_metadata() {
let yaml_content = r#"---
title: YAML Test
date: 2023-05-20
---
Content here"#;
let toml_content = r#"+++
title = "TOML Test"
date = "2023-05-20"
+++
Content here"#;
let json_content = r#"{
"title": "JSON Test",
"date": "2023-05-20"
}
Content here"#;
let yaml_metadata = extract_metadata(yaml_content).unwrap();
assert_eq!(yaml_metadata.get("title").unwrap(), "YAML Test");
let toml_metadata = extract_metadata(toml_content).unwrap();
assert_eq!(toml_metadata.get("title").unwrap(), "TOML Test");
let json_metadata = extract_metadata(json_content).unwrap();
assert_eq!(json_metadata.get("title").unwrap(), "JSON Test");
}
#[test]
fn test_extract_metadata_failure() {
let invalid_content = "This content has no metadata";
assert!(extract_metadata(invalid_content).is_err());
}
#[test]
fn test_ensure_required_fields() {
let mut metadata = Metadata::new(HashMap::new());
metadata.insert("title".to_string(), "Test".to_string());
metadata.insert("date".to_string(), "2023-05-20".to_string());
assert!(ensure_required_fields(&metadata).is_ok());
let mut incomplete_metadata = Metadata::new(HashMap::new());
incomplete_metadata
.insert("title".to_string(), "Test".to_string());
assert!(ensure_required_fields(&incomplete_metadata).is_err());
}
#[test]
fn test_generate_derived_fields() {
let mut metadata = Metadata::new(HashMap::new());
metadata.insert("title".to_string(), "Test Title".to_string());
generate_derived_fields(&mut metadata);
assert_eq!(metadata.get("slug").unwrap(), "test-title");
}
#[test]
fn test_metadata_methods() {
let mut metadata = Metadata::new(HashMap::new());
metadata.insert("key".to_string(), "value".to_string());
assert_eq!(metadata.get("key"), Some(&"value".to_string()));
assert!(metadata.contains_key("key"));
assert!(!metadata.contains_key("nonexistent"));
let old_value =
metadata.insert("key".to_string(), "new_value".to_string());
assert_eq!(old_value, Some("value".to_string()));
assert_eq!(metadata.get("key"), Some(&"new_value".to_string()));
let inner = metadata.into_inner();
assert_eq!(inner.get("key"), Some(&"new_value".to_string()));
}
#[test]
fn test_process_metadata_with_invalid_date() {
let mut metadata = Metadata::new(HashMap::new());
metadata.insert("title".to_string(), "Test Title".to_string());
metadata.insert("date".to_string(), "invalid_date".to_string());
assert!(process_metadata(&metadata).is_err());
}
#[test]
fn test_extract_yaml_metadata_with_complex_structure() {
let yaml_content = r#"---
title: Complex YAML Test
date: 2023-05-20
author:
name: John Doe
email: [email protected]
tags:
- rust
- metadata
- testing
---
Content here"#;
let metadata = extract_metadata(yaml_content).unwrap();
assert_eq!(metadata.get("title").unwrap(), "Complex YAML Test");
assert_eq!(metadata.get("date").unwrap(), "2023-05-20");
assert_eq!(metadata.get("author.name").unwrap(), "John Doe");
assert_eq!(
metadata.get("author.email").unwrap(),
"[email protected]"
);
assert_eq!(
metadata.get("tags").unwrap(),
"[rust, metadata, testing]"
);
}
#[test]
fn test_extract_toml_metadata_with_complex_structure() {
let toml_content = r#"+++
title = "Complex TOML Test"
date = 2023-05-20
[author]
name = "John Doe"
email = "[email protected]"
tags = ["rust", "metadata", "testing"]
+++
Content here"#;
let metadata = extract_metadata(toml_content).unwrap();
assert_eq!(
metadata.get("title").expect("Missing 'title' key"),
"Complex TOML Test"
);
assert_eq!(
metadata.get("date").expect("Missing 'date' key"),
"2023-05-20"
);
assert_eq!(
metadata
.get("author.name")
.expect("Missing 'author.name' key"),
"John Doe"
);
assert_eq!(
metadata
.get("author.email")
.expect("Missing 'author.email' key"),
"[email protected]"
);
assert_eq!(
metadata
.get("author.tags")
.expect("Missing 'author.tags' key"),
"[rust, metadata, testing]"
);
}
#[test]
fn test_generate_slug_with_special_characters() {
assert_eq!(
generate_slug("Hello, World! 123"),
"hello,-world!-123"
);
assert_eq!(generate_slug("Test: Ästhetik"), "test:-ästhetik");
assert_eq!(
generate_slug(" Multiple Spaces "),
"--multiple---spaces--"
);
}
}