From 7d8de156c7c843ca00ed3cb4ffab3652ad626d9f Mon Sep 17 00:00:00 2001 From: Jeff Baskin Date: Sun, 15 Mar 2026 14:05:49 -0400 Subject: [PATCH] Can add text to a universal string. --- src/document/field.rs | 137 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 117 insertions(+), 20 deletions(-) diff --git a/src/document/field.rs b/src/document/field.rs index ee97984..5d5b054 100644 --- a/src/document/field.rs +++ b/src/document/field.rs @@ -4,6 +4,7 @@ use std::{ cmp::Ordering, collections::HashMap, ops::{Add, AddAssign}, + str::Split, time::Duration, }; use uuid::Uuid; @@ -417,6 +418,7 @@ mod paragraph_data { } } +#[derive(Clone, Debug)] struct Paragraph { data: HashMap, } @@ -575,44 +577,120 @@ mod paragraphs { } } +#[derive(Clone, Debug)] struct UniversalString { - data: Paragraph, + paragraphs: HashMap, + revisions: Vec>, } impl UniversalString { + fn split_string(text: String) -> Vec { + let holder = text.as_str(); + let mut output = Vec::new(); + for item in holder.split("\u{2029}") { + output.push(item.to_string()); + } + output + } + fn new(lang: Language, text: String) -> Self { + let input = Self::split_string(text); + let mut paragraphs = HashMap::new(); + let mut revision = Vec::new(); + for paragraph in input { + let mut id = Uuid::new_v4(); + while paragraphs.contains_key(&id) { + id = Uuid::new_v4(); + } + if paragraph != "" { + revision.push(id.clone()); + paragraphs.insert(id, Paragraph::new(lang.clone(), paragraph.to_string())); + } + } Self { - data: Paragraph::new(lang, text), + paragraphs: paragraphs, + revisions: vec![revision], } } - fn get(&self, lang: &Language) -> Option<&String> { - self.data.get(lang) + fn get(&self, lang: &Language) -> Option { + let latest = self.revisions.iter().last().unwrap(); + let mut output = "".to_string(); + for id in latest.iter() { + let paragraph = self.paragraphs.get(id).unwrap(); + let text = paragraph.get(lang).unwrap(); + output += text; + output += "\u{2029}"; + } + Some(output) + } + + fn revision_count(&self) -> usize { + self.revisions.len() - 1 + } + + fn update(&mut self, lang: Language, text: String) { + let input = Self::split_string(text); + let mut version = Vec::new(); + for paragraph in input { + if paragraph != "" { + let mut id = Uuid::nil(); + for (key, value) in self.paragraphs.iter() { + if ¶graph == value.get(&lang).unwrap() { + println!("--- Got here ---"); + id = key.clone(); + break; + } + } + if id == Uuid::nil() { + id = Uuid::new_v4(); + while self.paragraphs.contains_key(&id) { + id = Uuid::new_v4(); + } + self.paragraphs.insert( + id.clone(), + Paragraph::new(lang.clone(), paragraph.to_string()), + ); + } + version.push(id); + } + } + self.revisions.push(version); } } #[cfg(test)] mod universal_strings { use super::*; + use rand::random_range; - const INITIAL_DATA: [&str; 5] = ["one", "two", "three", "four", "five"]; + const ENGLISH_DATA: [&str; 5] = ["one", "two", "three", "four", "five"]; + const JAPANESE_DATA: [&str; 5] = ["一", "二", "三", "四", "五"]; - struct TestData { - data: Vec, - } + struct TestData; impl TestData { - fn new(paragraphs: Vec<&str>) -> Self { - let mut holder = Vec::new(); - for item in paragraphs.iter() { - holder.push(item.to_string()); + fn english() -> (Language, Vec) { + let lang = Language::from_639_1("en").unwrap(); + let mut data = Vec::new(); + for text in ENGLISH_DATA.iter() { + data.push(text.to_string()); } - Self { data: holder } + (lang, data) } - fn get_text(&self) -> String { + fn japanese() -> (Language, Vec) { + let lang = Language::from_639_1("ja").unwrap(); + let mut data = Vec::new(); + for text in JAPANESE_DATA.iter() { + data.push(text.to_string()); + } + (lang, data) + } + + fn to_input(data: Vec) -> String { let mut output = "".to_string(); - for paragraph in self.data.iter() { + for paragraph in data.iter() { output += paragraph; output += "\u{2029}"; } @@ -629,15 +707,34 @@ mod universal_strings { ]; for i in 0..text.len() { let ustr = UniversalString::new(languages[i].clone(), text[i].to_string()); - assert_eq!(ustr.get(&languages[i]).unwrap(), text[i]); + let expected = text[i].to_string() + "\u{2029}"; + assert_eq!(ustr.get(&languages[i]).unwrap(), expected); + assert_eq!(ustr.revision_count(), 0); + assert_eq!(ustr.paragraphs.len(), 1); } } #[test] fn accepts_strings_with_multiple_paragraphs() { - let lang = Language::from_639_1("en").unwrap(); - let input = TestData::new(INITIAL_DATA.to_vec()); - let ustr = UniversalString::new(lang.clone(), input.get_text()); - assert_eq!(ustr.get(&lang).unwrap(), &input.get_text()); + let (lang, data) = TestData::english(); + let input = TestData::to_input(data.clone()); + let ustr = UniversalString::new(lang.clone(), input.clone()); + assert_eq!(ustr.get(&lang).unwrap(), input); + assert_eq!(ustr.revision_count(), 0); + assert_eq!(ustr.paragraphs.len(), data.len(), "{:?}", ustr); + } + + #[test] + fn can_insert_text_into_string() { + let (lang, mut data) = TestData::english(); + let initial = TestData::to_input(data.clone()); + let mut ustr = UniversalString::new(lang.clone(), initial.clone()); + let position = random_range(..data.len()); + data.insert(position, "something".to_string()); + let expected = TestData::to_input(data.clone()); + ustr.update(lang.clone(), expected.clone()); + assert_eq!(ustr.get(&lang).unwrap(), expected); + assert_eq!(ustr.revision_count(), 1); + assert_eq!(ustr.paragraphs.len(), data.len(), "{:?}", ustr); } }