Files
morethantext/src/document/field.rs

768 lines
24 KiB
Rust
Raw Normal View History

2026-01-06 16:19:15 -05:00
use chrono::prelude::*;
use isolang::Language;
2026-01-06 16:19:15 -05:00
use std::{
2026-02-28 15:47:19 -05:00
cmp::Ordering,
collections::HashMap,
2026-01-06 16:19:15 -05:00
ops::{Add, AddAssign},
2026-03-15 14:05:49 -04:00
str::Split,
2026-01-06 16:19:15 -05:00
time::Duration,
};
use uuid::Uuid;
2026-02-28 15:47:19 -05:00
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq)]
2026-01-06 16:19:15 -05:00
pub enum Field {
Boolean(bool),
DateTime(DateTime<Utc>),
Duration(Duration),
Integer(i128),
None,
StaticString(String),
Uuid(Uuid),
}
impl Field {
pub fn get_type(&self) -> FieldType {
self.into()
}
}
impl Add for Field {
type Output = Self;
fn add(self, other: Self) -> Self {
match self {
Field::DateTime(value1) => match other {
Field::Duration(value2) => { value1 + value2 }.into(),
_ => Field::None,
},
Field::Duration(value1) => match other {
Field::Duration(value2) => { value1 + value2 }.into(),
_ => Field::None,
},
Field::Integer(value1) => match other {
Field::Integer(value2) => { value1 + value2 }.into(),
_ => Field::None,
},
_ => Self::None,
}
}
}
impl AddAssign for Field {
fn add_assign(&mut self, other: Self) {
*self = self.clone().add(other);
}
}
impl From<bool> for Field {
fn from(value: bool) -> Self {
Self::Boolean(value)
}
}
impl From<DateTime<Utc>> for Field {
fn from(value: DateTime<Utc>) -> Self {
Self::DateTime(value)
}
}
impl From<Duration> for Field {
fn from(value: Duration) -> Self {
Self::Duration(value)
}
}
impl From<String> for Field {
fn from(value: String) -> Self {
Self::StaticString(value)
}
}
impl From<&str> for Field {
fn from(value: &str) -> Self {
Self::from(value.to_string())
}
}
impl From<Uuid> for Field {
fn from(value: Uuid) -> Self {
Self::Uuid(value)
}
}
impl From<i128> for Field {
fn from(value: i128) -> Self {
Self::Integer(value)
}
}
impl From<isize> for Field {
fn from(value: isize) -> Self {
let data: i128 = value.try_into().unwrap();
Self::from(data)
}
}
impl From<i32> for Field {
fn from(value: i32) -> Self {
let data: i128 = value.into();
Self::from(data)
}
}
2026-02-28 15:47:19 -05:00
impl PartialOrd for Field {
fn partial_cmp(&self, other: &Field) -> Option<Ordering> {
match (self, other) {
(Self::Boolean(d1), Self::Boolean(d2)) => d1.partial_cmp(d2),
(Self::DateTime(d1), Self::DateTime(d2)) => d1.partial_cmp(d2),
(Self::Duration(d1), Self::Duration(d2)) => d1.partial_cmp(d2),
(Self::Integer(d1), Self::Integer(d2)) => d1.partial_cmp(d2),
(Self::StaticString(d1), Self::StaticString(d2)) => d1.partial_cmp(d2),
(Self::Uuid(d1), Self::Uuid(d2)) => d1.partial_cmp(d2),
(_, _) => None,
}
}
}
2026-01-06 16:19:15 -05:00
#[cfg(test)]
mod fields {
use super::*;
use rand::random;
#[test]
fn can_create_static_string() {
let data = Uuid::new_v4().to_string();
let result: Field = data.clone().into();
match result.clone() {
Field::StaticString(output) => assert_eq!(output, data),
_ => unreachable!("got {:?}: should have been static string", result),
}
assert_eq!(result.get_type(), FieldType::StaticString);
}
#[test]
fn can_create_from_str() {
let holder = ["one", "two"];
for data in holder.into_iter() {
let result: Field = data.into();
match result.clone() {
Field::StaticString(output) => assert_eq!(output, data),
_ => unreachable!("got {:?}: should have been static string", result),
}
assert_eq!(result.get_type(), FieldType::StaticString);
}
}
#[test]
fn create_from_uuid() {
let data = Uuid::new_v4();
let result: Field = data.clone().into();
match result.clone() {
Field::Uuid(output) => assert_eq!(output, data),
_ => unreachable!("got {:?}: should have been uuid", result),
}
assert_eq!(result.get_type(), FieldType::Uuid);
}
#[test]
fn create_from_datatime() {
let data = Utc::now();
let result: Field = data.into();
match result.clone() {
Field::DateTime(output) => assert_eq!(output, data),
_ => unreachable!("got {:?}: should have been uuid", result),
}
assert_eq!(result.get_type(), FieldType::DateTime);
}
#[test]
fn does_adding_return_none_for_things_that_cannot_add() {
let value1: Field = Uuid::new_v4().into();
let value2: Field = Uuid::new_v4().into();
assert_eq!(value1 + value2, Field::None);
}
#[test]
fn can_integers_be_added() {
let value1: i128 = random::<u8>().into();
let value2: i128 = random::<u8>().into();
let expected: Field = { value1 + value2 }.into();
let value1: Field = value1.into();
let value2: Field = value2.into();
assert_eq!(value1 + value2, expected);
}
#[test]
fn can_integer_add_mismatch_returns_none() {
let value1: Field = 5.into();
let value2: Field = "nope".into();
assert_eq!(value1 + value2, Field::None);
}
#[test]
fn can_durations_be_added() {
let data1: u64 = random::<u8>().into();
let data2: u64 = random::<u8>().into();
let value1: Field = Duration::from_secs(data1).into();
let value2: Field = Duration::from_secs(data2).into();
let expected: Field = Duration::from_secs(data1 + data2).into();
assert_eq!(value1 + value2, expected);
}
#[test]
fn does_duration_mismatch_return_none() {
let value1: Field = Duration::from_secs(20).into();
let value2: Field = "nope".into();
assert_eq!(value1 + value2, Field::None);
}
#[test]
fn can_durations_be_added_to_datetimes() {
let timestamp = Utc::now();
let data: u64 = random::<u8>().into();
let duration = Duration::from_secs(data);
let expected: Field = { timestamp + duration }.into();
let value1: Field = timestamp.into();
let value2: Field = duration.into();
assert_eq!(value1 + value2, expected);
}
#[test]
fn does_datetime_mismatch_return_none() {
let value1: Field = Utc::now().into();
let value2: Field = "nope".into();
assert_eq!(value1 + value2, Field::None);
}
#[test]
2026-02-28 15:47:19 -05:00
fn do_comparason_functions_work() {
let fields = [Field::Integer(0), Field::Integer(1), Field::Integer(2)];
assert!(fields[1] == fields[1], "equal did not work");
assert!(fields[1] < fields[2], "less than did not work");
assert!(fields[1] <= fields[2], "less than equal to did not work");
assert!(fields[1] <= fields[1], "less tahn equal 50 did not work");
assert!(fields[1] > fields[0], "greater than did not work");
assert!(fields[1] >= fields[0], "greater than equal to did not work");
assert!(fields[1] >= fields[1], "greater than equal to did not work");
2026-01-06 16:19:15 -05:00
}
#[test]
2026-02-28 15:47:19 -05:00
fn does_mismatched_comparason_fields_return_false() {
let fields = [Field::Integer(0), Field::Uuid(Uuid::nil())];
assert!(!(fields[0] == fields[1]), "equal did not work");
assert!(!(fields[0] < fields[1]), "less than did not work");
assert!(!(fields[0] <= fields[1]), "less than equal to did not work");
assert!(!(fields[0] <= fields[1]), "less tahn equal 50 did not work");
assert!(!(fields[1] > fields[0]), "greater than did not work");
assert!(
!(fields[1] >= fields[0]),
"greater than equal to did not work"
);
assert!(
!(fields[1] >= fields[0]),
"greater than equal to did not work"
);
2026-01-06 16:19:15 -05:00
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum FieldType {
Boolean,
DateTime,
Duration,
Integer,
None,
StaticString,
Uuid,
}
impl FieldType {
pub fn get_default(&self) -> Field {
match self {
FieldType::Boolean => false.into(),
FieldType::DateTime => Utc::now().into(),
FieldType::Duration => Duration::from_secs(0).into(),
FieldType::Integer => 0.into(),
FieldType::None => Field::None,
FieldType::StaticString => "".into(),
FieldType::Uuid => Uuid::new_v4().into(),
}
}
}
impl From<&Field> for FieldType {
fn from(value: &Field) -> Self {
match value {
Field::Boolean(_) => Self::Boolean,
Field::DateTime(_) => Self::DateTime,
Field::Duration(_) => Self::Duration,
Field::Integer(_) => Self::Integer,
Field::None => Self::None,
Field::StaticString(_) => Self::StaticString,
Field::Uuid(_) => Self::Uuid,
}
}
}
#[cfg(test)]
mod fieldtypes {
use super::*;
#[test]
fn can_get_defaults_for_uuid() {
let ftype = FieldType::Uuid;
let mut ids: Vec<Uuid> = Vec::new();
for _ in 0..5 {
let result = ftype.get_default();
match result {
Field::Uuid(data) => {
assert!(
!ids.contains(&data),
"found duplicate id {:?} in {:?}",
data,
ids
);
ids.push(data.clone());
}
_ => unreachable!("got {:?}: should have been uuid", result),
}
}
}
#[test]
fn can_get_defaults_for_static_string() {
let ftype = FieldType::StaticString;
let result = ftype.get_default();
match result {
Field::StaticString(data) => assert_eq!(data, ""),
_ => unreachable!("got {:?}: should have been static string", result),
}
}
}
2026-02-04 09:45:28 -05:00
#[derive(Clone, Debug)]
enum DataOrigin {
Human,
Computer,
}
#[derive(Clone, Debug)]
struct ParagraphData {
string: String,
origin: DataOrigin,
2026-03-14 12:33:40 -04:00
initial: bool,
}
impl ParagraphData {
2026-03-14 12:33:40 -04:00
fn initial(string: String) -> Self {
Self {
string: string,
origin: DataOrigin::Human,
initial: true,
}
}
fn new(string: String, origin: DataOrigin) -> Self {
Self {
string: string,
origin: origin,
2026-03-14 12:33:40 -04:00
initial: false,
}
}
fn get_text(&self) -> &String {
&self.string
}
fn by_human(&self) -> bool {
match self.origin {
DataOrigin::Human => true,
DataOrigin::Computer => false,
}
}
2026-03-14 12:33:40 -04:00
fn is_initial(&self) -> bool {
self.initial
}
}
#[cfg(test)]
mod paragraph_data {
use super::*;
2026-03-14 12:33:40 -04:00
#[test]
fn can_determine_initial_information() {
let text = Uuid::new_v4().to_string();
let data = ParagraphData::initial(text.clone());
assert_eq!(data.get_text(), &text);
assert!(data.by_human(), "{:?} should have returned true", data);
assert!(data.is_initial(), "{:?} should have returned true", data);
}
#[test]
fn can_be_made_by_humans() {
let text = Uuid::new_v4().to_string();
let data = ParagraphData::new(text.clone(), DataOrigin::Human);
2026-03-14 12:33:40 -04:00
assert_eq!(data.get_text(), &text);
assert!(data.by_human(), "{:?} should have returned true", data);
2026-03-14 12:33:40 -04:00
assert!(!data.is_initial(), "{:?} should have returned false", data);
}
#[test]
fn can_be_made_by_computers() {
let text = Uuid::new_v4().to_string();
let data = ParagraphData::new(text.clone(), DataOrigin::Computer);
2026-03-14 12:33:40 -04:00
assert_eq!(data.get_text(), &text);
assert!(!data.by_human(), "{:?} should have returned false", data);
assert!(!data.is_initial(), "{:?} should have returned false", data);
}
}
2026-03-15 14:05:49 -04:00
#[derive(Clone, Debug)]
struct Paragraph {
data: HashMap<Language, ParagraphData>,
}
2026-02-04 09:45:28 -05:00
impl Paragraph {
2026-03-14 12:33:40 -04:00
fn new(lang: Language, string: String) -> Self {
let mut data = HashMap::new();
2026-03-14 12:33:40 -04:00
data.insert(lang, ParagraphData::initial(string));
Self { data: data }
2026-02-04 09:45:28 -05:00
}
2026-02-06 12:06:51 -05:00
fn add_translation(&mut self, string: String, lang: Language) {
match self.data.get(&lang) {
Some(_) => {}
None => {
self.data
.insert(lang, ParagraphData::new(string, DataOrigin::Computer));
}
};
}
fn improve_translation(&mut self, string: String, lang: Language) {
self.data
.insert(lang, ParagraphData::new(string, DataOrigin::Human));
}
fn get(&self, lang: &Language) -> Option<&String> {
match self.data.get(lang) {
Some(data) => Some(data.get_text()),
None => None,
}
}
2026-03-14 12:33:40 -04:00
fn get_initial(&self) -> (&Language, &String) {
for (lang, data) in self.data.iter() {
if data.is_initial() {
return (lang, data.get_text());
}
}
unreachable!("paragraph should initialize with data");
}
fn by_humans(&self) -> HashMap<Language, String> {
let mut output = HashMap::new();
for (lang, data) in self.data.iter() {
if data.by_human() {
output.insert(lang.clone(), data.get_text().clone());
}
}
output
}
}
#[cfg(test)]
mod paragraphs {
use super::*;
#[test]
fn does_paragraph_store_language_information() {
let languages = [
Language::from_639_1("en").unwrap(),
Language::from_639_1("ja").unwrap(),
];
let data = Uuid::new_v4().to_string();
for lang in languages.iter() {
2026-03-14 12:33:40 -04:00
let result = Paragraph::new(lang.clone(), data.clone());
assert_eq!(result.get(lang).unwrap(), &data);
}
}
#[test]
fn are_multiple_languages_stored() {
let text = ["test", "テスト"];
let languages = [
Language::from_639_1("en").unwrap(),
Language::from_639_1("ja").unwrap(),
];
2026-03-14 12:33:40 -04:00
let mut paragraph = Paragraph::new(languages[0].clone(), text[0].clone().to_string());
paragraph.add_translation(text[1].clone().to_string(), languages[1].clone());
for i in 0..text.len() {
assert_eq!(paragraph.get(&languages[i]).unwrap(), text[i]);
}
}
#[test]
fn does_add_translation_get_ignored_if_it_already_exists() {
let text = "something";
let lang = Language::from_639_1("en").unwrap();
2026-03-14 12:33:40 -04:00
let mut paragraph = Paragraph::new(lang.clone(), text.to_string());
paragraph.add_translation("other".to_string(), lang);
assert_eq!(paragraph.get(&lang).unwrap(), text);
}
#[test]
fn does_improve_translation_replace_existing() {
let text = "new";
let lang = Language::from_639_1("en").unwrap();
2026-03-14 12:33:40 -04:00
let mut paragraph = Paragraph::new(lang.clone(), "old".to_string());
paragraph.improve_translation(text.to_string(), lang.clone());
assert_eq!(paragraph.get(&lang).unwrap(), text);
}
#[test]
2026-03-14 12:33:40 -04:00
fn can_determine_human_text() {
let text = "something";
let lang = Language::from_639_1("en").unwrap();
2026-03-14 12:33:40 -04:00
let paragraph = Paragraph::new(lang.clone(), text.to_string());
let result = paragraph.by_humans();
assert_eq!(result.len(), 1, "got wrong numnber of texts");
assert_eq!(result.get(&lang).unwrap(), text);
}
#[test]
2026-03-14 12:33:40 -04:00
fn add_translation_does_not_count_as_human_text() {
let text = "test";
let lang = Language::from_639_1("en").unwrap();
2026-03-14 12:33:40 -04:00
let mut paragraph = Paragraph::new(lang.clone(), text.to_string());
paragraph.add_translation("テスト".to_string(), Language::from_639_1("ja").unwrap());
2026-03-14 12:33:40 -04:00
let result = paragraph.by_humans();
assert_eq!(result.len(), 1, "got wrong numnber of texts");
assert_eq!(result.get(&lang).unwrap(), text);
}
#[test]
2026-03-14 12:33:40 -04:00
fn impove_translation_does_get_added_as_human() {
let text = ["test", "テスト"];
let languages = [
Language::from_639_1("en").unwrap(),
Language::from_639_1("ja").unwrap(),
];
2026-03-14 12:33:40 -04:00
let mut paragraph = Paragraph::new(languages[0].clone(), text[0].clone().to_string());
paragraph.improve_translation(text[1].clone().to_string(), languages[1].clone());
2026-03-14 12:33:40 -04:00
let result = paragraph.by_humans();
assert_eq!(result.len(), 2, "got wrong numnber of texts");
for i in 0..text.len() {
assert_eq!(result.get(&languages[i]).unwrap(), text[i]);
}
}
2026-03-14 12:33:40 -04:00
#[test]
fn can_get_original_text() {
let text = Uuid::nil().to_string();
let lang = Language::from_639_1("en").unwrap();
let mut paragraph = Paragraph::new(lang.clone(), text.clone());
paragraph.add_translation(
Uuid::new_v4().to_string(),
Language::from_639_1("ja").unwrap(),
);
paragraph.improve_translation(
Uuid::new_v4().to_string(),
Language::from_639_1("de").unwrap(),
);
let (rlang, rtext) = paragraph.get_initial();
assert_eq!(rlang, &lang);
assert_eq!(rtext, &text);
}
}
2026-03-15 14:05:49 -04:00
#[derive(Clone, Debug)]
2026-03-14 12:33:40 -04:00
struct UniversalString {
2026-03-15 14:05:49 -04:00
paragraphs: HashMap<Uuid, Paragraph>,
revisions: Vec<Vec<Uuid>>,
2026-03-14 12:33:40 -04:00
}
impl UniversalString {
fn new(lang: Language, text: String) -> Self {
let mut output = Self {
paragraphs: HashMap::new(),
revisions: Vec::new(),
};
output.update(lang, text);
2026-03-15 14:05:49 -04:00
output
}
fn get(&self, lang: &Language) -> Option<String> {
let latest = self.revisions.len() - 1;
self.get_revision(latest, lang)
2026-03-14 12:33:40 -04:00
}
fn get_revision(&self, rev_num: usize, lang: &Language) -> Option<String> {
2026-03-15 14:05:49 -04:00
let mut output = "".to_string();
for id in self.revisions[rev_num].iter() {
2026-03-15 14:05:49 -04:00
let paragraph = self.paragraphs.get(id).unwrap();
let text = paragraph.get(lang).unwrap();
output += text;
output += "\u{2029}";
}
Some(output)
}
fn revision_count(&self) -> usize {
self.revisions.len() - 1
}
fn update(&mut self, lang: Language, text: String) {
let mut version = Vec::new();
for paragraph in text.as_str().split("\u{2029}") {
2026-03-15 14:05:49 -04:00
if paragraph != "" {
let mut id = Uuid::nil();
for (key, value) in self.paragraphs.iter() {
if &paragraph == value.get(&lang).unwrap() {
id = key.clone();
break;
}
}
if id == Uuid::nil() {
id = Uuid::new_v4();
while self.paragraphs.contains_key(&id) {
id = Uuid::new_v4();
}
self.paragraphs.insert(
id.clone(),
Paragraph::new(lang.clone(), paragraph.to_string()),
);
}
version.push(id);
}
}
self.revisions.push(version);
2026-02-06 12:06:51 -05:00
}
2026-02-04 09:45:28 -05:00
}
#[cfg(test)]
mod universal_strings {
2026-02-04 09:45:28 -05:00
use super::*;
2026-03-15 14:05:49 -04:00
use rand::random_range;
2026-02-04 09:45:28 -05:00
2026-03-15 14:05:49 -04:00
const ENGLISH_DATA: [&str; 5] = ["one", "two", "three", "four", "five"];
const JAPANESE_DATA: [&str; 5] = ["", "", "", "", ""];
2026-03-14 12:33:40 -04:00
2026-03-15 14:05:49 -04:00
struct TestData;
2026-03-14 12:33:40 -04:00
impl TestData {
2026-03-15 14:05:49 -04:00
fn english() -> (Language, Vec<String>) {
let lang = Language::from_639_1("en").unwrap();
let mut data = Vec::new();
for text in ENGLISH_DATA.iter() {
data.push(text.to_string());
2026-03-14 12:33:40 -04:00
}
2026-03-15 14:05:49 -04:00
(lang, data)
2026-03-14 12:33:40 -04:00
}
2026-03-15 14:05:49 -04:00
fn japanese() -> (Language, Vec<String>) {
let lang = Language::from_639_1("ja").unwrap();
let mut data = Vec::new();
for text in JAPANESE_DATA.iter() {
data.push(text.to_string());
}
(lang, data)
}
fn to_input(data: Vec<String>) -> String {
2026-03-14 12:33:40 -04:00
let mut output = "".to_string();
2026-03-15 14:05:49 -04:00
for paragraph in data.iter() {
2026-03-14 12:33:40 -04:00
output += paragraph;
output += "\u{2029}";
}
output
}
}
2026-02-04 09:45:28 -05:00
#[test]
fn are_initial_strings_empty() {
2026-03-14 12:33:40 -04:00
let text = ["test", "テスト"];
let languages = [
Language::from_639_1("en").unwrap(),
Language::from_639_1("ja").unwrap(),
];
for i in 0..text.len() {
let ustr = UniversalString::new(languages[i].clone(), text[i].to_string());
2026-03-15 14:05:49 -04:00
let expected = text[i].to_string() + "\u{2029}";
assert_eq!(ustr.get(&languages[i]).unwrap(), expected);
assert_eq!(ustr.revision_count(), 0);
assert_eq!(ustr.paragraphs.len(), 1);
2026-03-14 12:33:40 -04:00
}
}
#[test]
fn accepts_strings_with_multiple_paragraphs() {
2026-03-15 14:05:49 -04:00
let (lang, data) = TestData::english();
let input = TestData::to_input(data.clone());
let ustr = UniversalString::new(lang.clone(), input.clone());
assert_eq!(ustr.get(&lang).unwrap(), input);
assert_eq!(ustr.revision_count(), 0);
assert_eq!(ustr.paragraphs.len(), data.len(), "{:?}", ustr);
}
#[test]
fn can_insert_text_into_string() {
let (lang, mut data) = TestData::english();
let initial = TestData::to_input(data.clone());
let mut ustr = UniversalString::new(lang.clone(), initial.clone());
let position = random_range(..data.len());
data.insert(position, "something".to_string());
let expected = TestData::to_input(data.clone());
ustr.update(lang.clone(), expected.clone());
assert_eq!(ustr.get(&lang).unwrap(), expected);
assert_eq!(ustr.revision_count(), 1);
assert_eq!(ustr.paragraphs.len(), data.len(), "{:?}", ustr);
assert_eq!(ustr.get_revision(0, &lang).unwrap(), initial);
}
#[test]
fn can_a_paragraph_be_replaced() {
let (lang, mut data) = TestData::english();
let initial = TestData::to_input(data.clone());
let mut ustr = UniversalString::new(lang.clone(), initial.clone());
let position = random_range(..data.len());
data[position] = "replaced".to_string();
let expected = TestData::to_input(data.clone());
ustr.update(lang.clone(), expected.clone());
assert_eq!(ustr.get(&lang).unwrap(), expected);
assert_eq!(ustr.revision_count(), 1);
assert_eq!(ustr.paragraphs.len(), (data.len() + 1), "{:?}", ustr);
assert_eq!(ustr.get_revision(0, &lang).unwrap(), initial);
}
#[test]
fn does_not_store_duplicate_data() {
let lang = Language::from_639_1("en").unwrap();
let mut data = Vec::new();
for _ in 0..3 {
data.push("same".to_string());
}
let initial = TestData::to_input(data.clone());
let mut ustr = UniversalString::new(lang.clone(), initial.clone());
assert_eq!(ustr.get(&lang).unwrap(), initial);
assert_eq!(ustr.paragraphs.len(), 1, "{:?}", ustr);
}
#[test]
fn can_text_be_removed() {
let (lang, mut data) = TestData::english();
let expected_paragraphs = data.len();
let initial = TestData::to_input(data.clone());
let mut ustr = UniversalString::new(lang.clone(), initial.clone());
let position = random_range(..data.len());
data.remove(position);
let expected = TestData::to_input(data.clone());
ustr.update(lang.clone(), expected.clone());
assert_eq!(ustr.get(&lang).unwrap(), expected);
assert_eq!(ustr.revision_count(), 1);
assert_eq!(ustr.paragraphs.len(), expected_paragraphs, "{:?}", ustr);
assert_eq!(ustr.get_revision(0, &lang).unwrap(), initial);
2026-02-04 09:45:28 -05:00
}
}