use crate::units::{NonMetric, NonMetricQuantity}; enum Expect { Number, Unit, } #[derive(Debug, PartialEq)] pub enum ParseError { NotValidNumber(String), UnexpectedUnit(String), UnknownUnit(String), ExpectedUnit, } pub fn parse(input: &str) -> Result, ParseError> { let mut quantities = Vec::new(); let mut state = Expect::Number; let mut amount = None; for token in tokenize(input) { match (&state, token) { (Expect::Number, Token::Number(number)) => { let number = parse_number(number)?; amount = Some(number); state = Expect::Unit; } (Expect::Number, Token::Unit(unit)) => { return Err(ParseError::UnexpectedUnit(unit)); } (Expect::Unit, Token::Number(_)) => { unreachable!("token stream can't contain two numbers in a row"); } (Expect::Unit, Token::Unit(unit)) => { let unit = parse_unit(unit)?; let quantity = NonMetricQuantity { amount: amount.take().expect("must have read a number to be in this state"), unit: unit, }; quantities.push(quantity); state = Expect::Number; } } } match state { Expect::Number => {} Expect::Unit => { return Err(ParseError::ExpectedUnit); } } Ok(quantities) } fn parse_number(input: String) -> Result { let no_whitespace: String = input.chars().filter(|c| !c.is_whitespace()).collect(); no_whitespace.parse().or_else(|_| Err(ParseError::NotValidNumber(input))) } fn parse_unit(input: String) -> Result { match input.as_str() { // Length "inch" => Ok(NonMetric::Inch), "inches" => Ok(NonMetric::Inch), "in" => Ok(NonMetric::Inch), "\"" => Ok(NonMetric::Inch), "″" => Ok(NonMetric::Inch), "foot" => Ok(NonMetric::Foot), "feet" => Ok(NonMetric::Foot), "ft" => Ok(NonMetric::Foot), "'" => Ok(NonMetric::Foot), "′" => Ok(NonMetric::Foot), "yard" => Ok(NonMetric::Yard), "yards" => Ok(NonMetric::Yard), "yd" => Ok(NonMetric::Yard), "mile" => Ok(NonMetric::Mile), "miles" => Ok(NonMetric::Mile), "mi" => Ok(NonMetric::Mile), "m" => Ok(NonMetric::Mile), // Weight "ounce" => Ok(NonMetric::Ounce), "ounces" => Ok(NonMetric::Ounce), "oz" => Ok(NonMetric::Ounce), "pound" => Ok(NonMetric::Pound), "pounds" => Ok(NonMetric::Pound), "lb" => Ok(NonMetric::Pound), "lbs" => Ok(NonMetric::Pound), "#" => Ok(NonMetric::Pound), "stone" => Ok(NonMetric::Stone), "stones" => Ok(NonMetric::Stone), "st" => Ok(NonMetric::Stone), // Temperature "°F" => Ok(NonMetric::Fahrenheit), "F" => Ok(NonMetric::Fahrenheit), // Area "inch²" => Ok(NonMetric::SquareInch), "inches²" => Ok(NonMetric::SquareInch), "in²" => Ok(NonMetric::SquareInch), "foot²" => Ok(NonMetric::SquareFoot), "feet²" => Ok(NonMetric::SquareFoot), "ft²" => Ok(NonMetric::SquareFoot), _ => Err(ParseError::UnknownUnit(input)), } } #[derive(Debug, PartialEq)] enum Token { Number(String), Unit(String), } enum TokState { Neutral, Number, Unit, } fn tokenize(input: &str) -> Vec { let mut tokens = Vec::new(); let mut token = String::new(); let mut state = TokState::Neutral; for c in input.chars() { match state { TokState::Neutral => { if c.is_ascii_digit() || c == '-' { token.push(c); state = TokState::Number; } else if !c.is_whitespace() { token.push(c); state = TokState::Unit; } } TokState::Number => { if c.is_ascii_digit() || c.is_whitespace() || c == '.' { token.push(c); } else { tokens.push(Token::Number(token.trim().to_string())); state = TokState::Unit; token = String::new(); token.push(c); } } TokState::Unit => { if c.is_ascii_digit() || c == '-' { tokens.push(Token::Unit(token)); state = TokState::Number; token = String::new(); token.push(c); } else if !c.is_whitespace() { token.push(c); } else { tokens.push(Token::Unit(token)); state = TokState::Neutral; token = String::new(); } } } } match state { TokState::Neutral => { assert!(token.len() == 0); } TokState::Number => { tokens.push(Token::Number(token.trim().to_string())); } TokState::Unit => { tokens.push(Token::Unit(token)); } } tokens } #[cfg(test)] mod test { use super::*; #[test] fn parsing() { assert_eq!(parse(""), Ok(vec![])); assert_eq!(parse("5 ft"), Ok(vec![ NonMetricQuantity { amount: 5.0, unit: NonMetric::Foot }, ])); assert_eq!(parse("5 ft 8 in"), Ok(vec![ NonMetricQuantity { amount: 5.0, unit: NonMetric::Foot }, NonMetricQuantity { amount: 8.0, unit: NonMetric::Inch }, ])); assert_eq!(parse("20 000 lbs"), Ok(vec![ NonMetricQuantity { amount: 20_000.0, unit: NonMetric::Pound }, ])); assert_eq!(parse("12.0."), Err(ParseError::NotValidNumber("12.0.".to_string()))); assert_eq!(parse("ft"), Err(ParseError::UnexpectedUnit("ft".to_string()))); assert_eq!(parse("5 tf"), Err(ParseError::UnknownUnit("tf".to_string()))); assert_eq!(parse("12"), Err(ParseError::ExpectedUnit)); } #[test] fn numbers() { assert_eq!(parse_number("".to_string()), Err(ParseError::NotValidNumber("".to_string()))); assert_eq!(parse_number("1".to_string()), Ok(1.0)); assert_eq!(parse_number("1.0".to_string()), Ok(1.0)); assert_eq!(parse_number("0.1".to_string()), Ok(0.1)); assert_eq!(parse_number("0.1.".to_string()), Err(ParseError::NotValidNumber("0.1.".to_string()))); assert_eq!(parse_number("-10".to_string()), Ok(-10.0)); assert_eq!(parse_number("10\t00\u{1680}000".to_string()), Ok(10_00_000.0)); } #[test] fn units() { // Length assert_eq!(parse_unit("inch".to_string()), Ok(NonMetric::Inch)); assert_eq!(parse_unit("inches".to_string()), Ok(NonMetric::Inch)); assert_eq!(parse_unit("in".to_string()), Ok(NonMetric::Inch)); assert_eq!(parse_unit("\"".to_string()), Ok(NonMetric::Inch)); assert_eq!(parse_unit("″".to_string()), Ok(NonMetric::Inch)); assert_eq!(parse_unit("foot".to_string()), Ok(NonMetric::Foot)); assert_eq!(parse_unit("feet".to_string()), Ok(NonMetric::Foot)); assert_eq!(parse_unit("ft".to_string()), Ok(NonMetric::Foot)); assert_eq!(parse_unit("'".to_string()), Ok(NonMetric::Foot)); assert_eq!(parse_unit("′".to_string()), Ok(NonMetric::Foot)); assert_eq!(parse_unit("yard".to_string()), Ok(NonMetric::Yard)); assert_eq!(parse_unit("yards".to_string()), Ok(NonMetric::Yard)); assert_eq!(parse_unit("yd".to_string()), Ok(NonMetric::Yard)); assert_eq!(parse_unit("mile".to_string()), Ok(NonMetric::Mile)); assert_eq!(parse_unit("miles".to_string()), Ok(NonMetric::Mile)); assert_eq!(parse_unit("mi".to_string()), Ok(NonMetric::Mile)); assert_eq!(parse_unit("m".to_string()), Ok(NonMetric::Mile)); // Weight assert_eq!(parse_unit("ounce".to_string()), Ok(NonMetric::Ounce)); assert_eq!(parse_unit("ounces".to_string()), Ok(NonMetric::Ounce)); assert_eq!(parse_unit("oz".to_string()), Ok(NonMetric::Ounce)); assert_eq!(parse_unit("pound".to_string()), Ok(NonMetric::Pound)); assert_eq!(parse_unit("pounds".to_string()), Ok(NonMetric::Pound)); assert_eq!(parse_unit("lb".to_string()), Ok(NonMetric::Pound)); assert_eq!(parse_unit("lbs".to_string()), Ok(NonMetric::Pound)); assert_eq!(parse_unit("#".to_string()), Ok(NonMetric::Pound)); assert_eq!(parse_unit("stone".to_string()), Ok(NonMetric::Stone)); assert_eq!(parse_unit("stones".to_string()), Ok(NonMetric::Stone)); assert_eq!(parse_unit("st".to_string()), Ok(NonMetric::Stone)); // Temperature assert_eq!(parse_unit("°F".to_string()), Ok(NonMetric::Fahrenheit)); assert_eq!(parse_unit("F".to_string()), Ok(NonMetric::Fahrenheit)); // Area assert_eq!(parse_unit("inch²".to_string()), Ok(NonMetric::SquareInch)); assert_eq!(parse_unit("inches²".to_string()), Ok(NonMetric::SquareInch)); assert_eq!(parse_unit("in²".to_string()), Ok(NonMetric::SquareInch)); assert_eq!(parse_unit("foot²".to_string()), Ok(NonMetric::SquareFoot)); assert_eq!(parse_unit("feet²".to_string()), Ok(NonMetric::SquareFoot)); assert_eq!(parse_unit("ft²".to_string()), Ok(NonMetric::SquareFoot)); // Unknown unit assert_eq!(parse_unit("hutenosa".to_string()), Err(ParseError::UnknownUnit("hutenosa".to_string()))); } #[test] fn tokens() { assert_eq!(tokenize(""), vec![]); assert_eq!(tokenize("10"), vec![Token::Number("10".to_string())]); assert_eq!(tokenize(" 10 "), vec![Token::Number("10".to_string())]); assert_eq!(tokenize("10 000"), vec![Token::Number("10 000".to_string())]); assert_eq!(tokenize("10\t000"), vec![Token::Number("10\t000".to_string())]); assert_eq!(tokenize("10\u{1680}000"), vec![Token::Number("10\u{1680}000".to_string())]); assert_eq!(tokenize("10.0.1"), vec![Token::Number("10.0.1".to_string())]); assert_eq!(tokenize("ft"), vec![Token::Unit("ft".to_string())]); assert_eq!( tokenize("10 ft"), vec![ Token::Number("10".to_string()), Token::Unit("ft".to_string()), ] ); assert_eq!( tokenize("ft in"), vec![ Token::Unit("ft".to_string()), Token::Unit("in".to_string()), ] ); assert_eq!( tokenize("5 ft 7 in"), vec![ Token::Number("5".to_string()), Token::Unit("ft".to_string()), Token::Number("7".to_string()), Token::Unit("in".to_string()), ] ); assert_eq!( tokenize("5\"7'"), vec![ Token::Number("5".to_string()), Token::Unit("\"".to_string()), Token::Number("7".to_string()), Token::Unit("'".to_string()), ] ); assert_eq!( tokenize(" 2.2lbs "), vec![ Token::Number("2.2".to_string()), Token::Unit("lbs".to_string()), ] ); } }