1mod error;
2mod span;
3pub mod tokens;
4
5pub use error::LexerError;
6pub use span::Span;
7pub use tokens::IntoToken;
8
9pub fn preprocess(card_name: &str, oracle_text: &str) -> String {
11 let card_name = card_name.to_lowercase();
12 let result = oracle_text.to_lowercase();
13
14 lazy_static::lazy_static!(
16 static ref unicode_regex: regex::Regex = regex::Regex::new("\\\\u(\\d{4})")
17 .expect("Failed to compile unicode character point regex");
18 );
19 let replacement = |cap: ®ex::Captures| -> String {
20 let (_, [point]) = cap.extract();
21 let point = u32::from_str_radix(point, 16).expect("Regex matched a non valid u32!");
22 let ch = char::from_u32(point).expect("Regex matched a non valid unicode point!");
23 ch.to_string()
24 };
25 let result = unicode_regex.replace_all(&result, &replacement);
26
27 let result = result.to_ascii_lowercase();
29
30 let result = remove_comments(&result);
32 let result = replace_name(&card_name, &result);
33 let result = result.replace("\\n", "\n");
34
35 result
36}
37
38fn remove_comments(input: &str) -> String {
40 let mut chars = input.chars();
41 let mut result = String::with_capacity(input.len());
42
43 while let Some(char) = chars.next() {
44 match char {
45 '(' => remove_parens(&mut chars),
46 c => result.push(c),
47 }
48 }
49
50 result
51}
52
53fn remove_parens<I: Iterator<Item = char>>(chars: &mut I) {
56 loop {
57 match chars.next() {
58 Some(')') => break,
59 Some('(') => remove_parens(chars),
60 _ => { }
61 }
62 }
63}
64
65fn replace_name(card_name: &str, lowercase_oracle_text: &str) -> String {
66 let card_name_lowercase = card_name.to_ascii_lowercase();
67 let card_name_without_epithet = card_name_lowercase.split(',').next();
68
69 let result = lowercase_oracle_text;
70 let result = result.replace(&card_name_lowercase, "~");
71 let result = match card_name_without_epithet {
72 Some(card_name) => result.replace(card_name, "~"),
73 None => result,
74 };
75
76 result
77}
78
79pub fn lex(input: &str) -> Result<Vec<tokens::Token>, error::LexerError> {
81 lazy_static::lazy_static!(
82 static ref raw_token_regex: regex::Regex = {
83 const MATCHABLE_NON_WORDS: &[&'static str] = &[
85 "\\.", ",", "'", "{", "}", "~", "\\/", ":", "+", "\\-", "—", "•", "\n",
86 ];
87 let matchable_non_words: String = MATCHABLE_NON_WORDS.iter().cloned().collect();
88 let raw_token_pattern = format!("(\\b\\w+\\b)|([{}])", matchable_non_words);
89 regex::Regex::new(&raw_token_pattern).expect("Failed to compile regex!")
90 };
91 );
92
93 let mut raw_tokens: std::collections::VecDeque<_> = raw_token_regex.find_iter(input).collect();
94
95 let mut result = Vec::new();
96
97 'outer: while !raw_tokens.is_empty() {
98 for token_count in (0..raw_tokens.len()).rev() {
100 let start = raw_tokens[0].start();
101 let end = raw_tokens[token_count].end();
102 let span = span::Span {
103 start,
104 length: end - start,
105 text: &input[start..end],
106 };
107 if let Some(token) = tokens::Token::try_from_span(span) {
109 raw_tokens.drain(0..token_count + 1);
110 result.push(token);
111 continue 'outer;
112 }
113 }
114 break;
116 }
117
118 if raw_tokens.is_empty() {
119 Ok(result)
120 } else {
121 let start = raw_tokens[0].start();
122 let end = raw_tokens[raw_tokens.len() - 1].end();
123 Err(error::LexerError::NoTokenMatch {
124 start: start,
125 end: end,
126 tokens: input[start..end].to_string(),
127 })
128 }
129}