332 lines
9.9 KiB
Rust
332 lines
9.9 KiB
Rust
use crate::error::{self, Error, ErrorType};
|
|
use crate::context::{CtxString, Context};
|
|
|
|
use std::fmt::{self, Display};
|
|
|
|
type Result<T> = std::result::Result<T, ParserError>;
|
|
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub struct Parser<'a> {
|
|
pub input: &'a str,
|
|
end_of_file: bool,
|
|
end_of_entry: bool,
|
|
new_line: bool,
|
|
multiline_level: isize,
|
|
context: Context,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub enum Token {
|
|
Value(CtxString),
|
|
Control(CtxString),
|
|
Blank,
|
|
EndOfLine,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub struct TokenStream<'a> {
|
|
pub pa: Parser<'a>,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
enum ParserError {
|
|
Eof,
|
|
ParseError(Error<ErrorType>)
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub enum ParseError {
|
|
UnexpectedCharacter(char),
|
|
UnexpectedEndOfFile,
|
|
}
|
|
|
|
impl Display for ParseError {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
match *self {
|
|
ParseError::UnexpectedCharacter(ch) => write!(f, "Unexpected character '{}'", ch),
|
|
ParseError::UnexpectedEndOfFile => write!(f, "Unexpected end of file"),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<Error<ErrorType>> for ParserError {
|
|
fn from(parse_error: Error<ErrorType>) -> Self {
|
|
ParserError::ParseError(parse_error)
|
|
}
|
|
}
|
|
|
|
impl<'a> Parser<'a> {
|
|
const BLANK: [char; 3] = [' ', '\t', '\r'];
|
|
const SEPARATORS: [char; 7] = [';', '"', ' ', '\t', '\n', '(', ')'];
|
|
|
|
pub fn from_str(input: &'a str) -> Self {
|
|
Parser {
|
|
input,
|
|
end_of_file: false,
|
|
end_of_entry: true,
|
|
new_line: true,
|
|
multiline_level: 0,
|
|
context: Context::default()
|
|
}
|
|
}
|
|
|
|
fn peek_char(&mut self) -> Result<char> {
|
|
self.input.chars().next().ok_or(ParserError::Eof)
|
|
}
|
|
|
|
fn read_char(&mut self) -> Result<char> {
|
|
let ch = self.peek_char()?;
|
|
self.input = &self.input[ch.len_utf8()..];
|
|
|
|
if self.new_line {
|
|
self.context.column = 1;
|
|
self.context.line += 1;
|
|
self.new_line = false;
|
|
} else {
|
|
self.context.column += 1;
|
|
}
|
|
|
|
self.context.index += 1;
|
|
|
|
if ch == '\n' {
|
|
self.new_line = true;
|
|
}
|
|
|
|
Ok(ch)
|
|
}
|
|
|
|
fn read_blank(&mut self) -> Result<Option<Token>> {
|
|
while Self::BLANK.contains(&self.peek_char()?) {
|
|
self.read_char()?;
|
|
}
|
|
if self.end_of_entry {
|
|
self.end_of_entry = false;
|
|
Ok(Some(Token::Blank))
|
|
} else {
|
|
Ok(None)
|
|
}
|
|
}
|
|
|
|
fn read_comment(&mut self) -> Result<Option<Token>> {
|
|
let ch = self.read_char()?;
|
|
if ch == ';' {
|
|
loop {
|
|
match self.peek_char() {
|
|
Ok('\n') => break,
|
|
Ok(_) => { self.read_char()?; },
|
|
Err(ParserError::Eof) => break,
|
|
Err(err) => return Err(err),
|
|
}
|
|
}
|
|
Ok(None)
|
|
} else {
|
|
Err(Error {
|
|
message: Some(format!("Expected ';', got '{}'", ch)),
|
|
context: self.context.clone(),
|
|
error_type: ErrorType::ParseError(ParseError::UnexpectedCharacter(ch))
|
|
}.into())
|
|
}
|
|
}
|
|
|
|
fn read_character_string(&mut self) -> Result<Option<Token>> {
|
|
let mut res = String::new();
|
|
let mut context = None;
|
|
|
|
let quoted = self.peek_char()? == '"';
|
|
if quoted {
|
|
res.push(self.read_char()?);
|
|
context = Some(self.context.clone());
|
|
}
|
|
|
|
loop {
|
|
match self.peek_char() {
|
|
Ok('"') if quoted => {
|
|
res.push(self.read_char()?);
|
|
break
|
|
},
|
|
Ok(ch) if !quoted && Self::SEPARATORS.contains(&ch) => break,
|
|
Ok(_) => {
|
|
let ch = self.read_char()?;
|
|
res.push(ch);
|
|
|
|
if ch == '\\' {
|
|
res.push(self.read_char()?)
|
|
}
|
|
},
|
|
Err(ParserError::Eof) => break,
|
|
Err(err) => return Err(err)
|
|
};
|
|
|
|
if context == None {
|
|
context = Some(self.context.clone())
|
|
}
|
|
}
|
|
// TODO: Check if quoted string ended
|
|
// TODO: Stop parsing quoted string if line break occurs
|
|
|
|
if res.len() == 0 {
|
|
// NOTE: Need double check but probably useless code (and error), otherwise create appropriate error
|
|
// Err(ErrorType::ExpectedCharacterString.into())
|
|
unreachable!()
|
|
} else {
|
|
self.end_of_entry = false;
|
|
Ok(Some(Token::Value(CtxString {
|
|
value: res,
|
|
context: context.unwrap()
|
|
})))
|
|
}
|
|
}
|
|
|
|
fn read_opening_braket(&mut self) -> Result<Option<Token>> {
|
|
self.read_char()?;
|
|
self.multiline_level += 1;
|
|
Ok(None)
|
|
}
|
|
|
|
fn read_closing_braket(&mut self) -> Result<Option<Token>> {
|
|
self.read_char()?;
|
|
self.multiline_level -= 1;
|
|
if self.multiline_level < 0 {
|
|
Err(Error {
|
|
message: Some(format!("Braket count mismatch, unexpected closing braket")),
|
|
context: self.context.clone(),
|
|
error_type: ErrorType::ParseError(ParseError::UnexpectedCharacter(')'))
|
|
}.into())
|
|
} else {
|
|
Ok(None)
|
|
}
|
|
}
|
|
|
|
fn read_line_break(&mut self) -> Result<Option<Token>> {
|
|
self.read_char()?;
|
|
|
|
if self.multiline_level == 0 && !self.end_of_entry {
|
|
self.end_of_entry = true;
|
|
Ok(Some(Token::EndOfLine))
|
|
} else {
|
|
Ok(None)
|
|
}
|
|
}
|
|
|
|
fn read_control_entry(&mut self) -> Result<Option<Token>> {
|
|
// TODO: Decide if check sould be in parent or not, currently incoherent with some other functions
|
|
let ch = self.read_char()?;
|
|
if ch == '$' {
|
|
match self.read_character_string()? {
|
|
Some(Token::Value(s)) => Ok(Some(Token::Control(s))),
|
|
_ => unreachable!()
|
|
}
|
|
} else {
|
|
Err(Error {
|
|
message: Some(format!("Expected ';', got '{}'", ch)),
|
|
context: self.context.clone(),
|
|
error_type: ErrorType::ParseError(ParseError::UnexpectedCharacter(ch))
|
|
}.into())
|
|
}
|
|
}
|
|
|
|
fn forward(&mut self) -> Result<Option<Token>> {
|
|
match self.peek_char() {
|
|
Ok(';') => self.read_comment(),
|
|
Ok('(') => self.read_opening_braket(),
|
|
Ok(')') => self.read_closing_braket(),
|
|
Ok('\n') => self.read_line_break(),
|
|
Ok('$') => self.read_control_entry(),
|
|
Ok(ch) if ch.is_whitespace() => self.read_blank(),
|
|
Ok(_) => self.read_character_string(),
|
|
Err(ParserError::Eof) => {
|
|
// TODO: Check if multi line is false. If not err Unexpected end of file
|
|
if !self.end_of_entry {
|
|
self.end_of_entry = true;
|
|
Ok(Some(Token::EndOfLine))
|
|
} else {
|
|
Err(ParserError::Eof)
|
|
}
|
|
},
|
|
Err(err) => Err(err)
|
|
}
|
|
}
|
|
|
|
pub fn tokens(self) -> TokenStream<'a> {
|
|
TokenStream {
|
|
pa: self
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for TokenStream<'a> {
|
|
type Item = error::Result<Token>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
match self.pa.forward() {
|
|
Ok(None) => self.next(),
|
|
Err(ParserError::Eof) => None,
|
|
Err(ParserError::ParseError(err)) => Some(Err(err)),
|
|
Ok(Some(item)) => Some(Ok(item))
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::context::CtxData;
|
|
|
|
#[test]
|
|
fn test_read_blank() {
|
|
let inputs = ["something", " something", "\t \t\t something", " \r\n"].iter();
|
|
let expected = ["something", "something", "something", "\n"].iter();
|
|
|
|
for (input, expected) in inputs.zip(expected) {
|
|
let mut pa = Parser::from_str(input);
|
|
pa.read_blank().unwrap();
|
|
assert_eq!(&pa.input, expected);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_read_comment() {
|
|
let inputs = ["; my comment\nan other line", "; eof"].iter();
|
|
let expected = ["\nan other line", ""].iter();
|
|
|
|
for (input, expected) in inputs.zip(expected) {
|
|
let mut pa = Parser::from_str(input);
|
|
pa.read_comment().unwrap();
|
|
assert_eq!(&pa.input, expected);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_read_character_string() {
|
|
let inputs = ["my-string", "my-other-string\n", "99;my comment", "\"quoted; \\\"string\\\"\" other-string", "test\\ test"].iter();
|
|
let expected = ["my-string", "my-other-string", "99", "\"quoted; \\\"string\\\"\"", "test\\ test"].iter();
|
|
|
|
for (input, &expected) in inputs.zip(expected) {
|
|
let mut pa = Parser::from_str(input);
|
|
let res = pa.read_character_string().unwrap().unwrap();
|
|
assert_eq!(res, Token::Value(CtxData::new(expected.into(), 1, 1, 1)));
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_read_simple_token_stream() {
|
|
let input = "domain.tld 300 IN A (\n 198.51.100.1; some comment\n)\n TXT \"some value\"";
|
|
let expected = [
|
|
Token::Value(CtxData::new("domain.tld".into(), 1, 1, 1)),
|
|
Token::Value(CtxData::new("300".into(), 1, 12, 12)),
|
|
Token::Value(CtxData::new("IN".into(), 1, 16, 16)),
|
|
Token::Value(CtxData::new("A".into(), 1, 19, 19)),
|
|
Token::Value(CtxData::new("198.51.100.1".into(), 2, 2, 24)),
|
|
Token::EndOfLine,
|
|
Token::Blank,
|
|
Token::Value(CtxData::new("TXT".into(), 4, 2, 54)),
|
|
Token::Value(CtxData::new("\"some value\"".into(), 4, 6, 58)),
|
|
Token::EndOfLine,
|
|
];
|
|
let pa = Parser::from_str(input);
|
|
let tokens: Vec<_> = pa.tokens().map(|e| e.unwrap()).collect();
|
|
assert_eq!(tokens, expected);
|
|
}
|
|
}
|