bind-conf/dns_zone/src/parser.rs

332 lines
9.9 KiB
Rust

use crate::error::{self, Error, ErrorType};
use crate::context::{CtxString, Context};
use std::fmt::{self, Display};
type Result<T> = std::result::Result<T, ParserError>;
#[derive(Debug, PartialEq)]
pub struct Parser<'a> {
pub input: &'a str,
end_of_file: bool,
end_of_entry: bool,
new_line: bool,
multiline_level: isize,
context: Context,
}
#[derive(Debug, PartialEq)]
pub enum Token {
Value(CtxString),
Control(CtxString),
Blank,
EndOfLine,
}
#[derive(Debug, PartialEq)]
pub struct TokenStream<'a> {
pub pa: Parser<'a>,
}
#[derive(Debug, PartialEq)]
enum ParserError {
Eof,
ParseError(Error<ErrorType>)
}
#[derive(Debug, PartialEq)]
pub enum ParseError {
UnexpectedCharacter(char),
UnexpectedEndOfFile,
}
impl Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ParseError::UnexpectedCharacter(ch) => write!(f, "Unexpected character '{}'", ch),
ParseError::UnexpectedEndOfFile => write!(f, "Unexpected end of file"),
}
}
}
impl From<Error<ErrorType>> for ParserError {
fn from(parse_error: Error<ErrorType>) -> Self {
ParserError::ParseError(parse_error)
}
}
impl<'a> Parser<'a> {
const BLANK: [char; 3] = [' ', '\t', '\r'];
const SEPARATORS: [char; 7] = [';', '"', ' ', '\t', '\n', '(', ')'];
pub fn from_str(input: &'a str) -> Self {
Parser {
input,
end_of_file: false,
end_of_entry: true,
new_line: true,
multiline_level: 0,
context: Context::default()
}
}
fn peek_char(&mut self) -> Result<char> {
self.input.chars().next().ok_or(ParserError::Eof)
}
fn read_char(&mut self) -> Result<char> {
let ch = self.peek_char()?;
self.input = &self.input[ch.len_utf8()..];
if self.new_line {
self.context.column = 1;
self.context.line += 1;
self.new_line = false;
} else {
self.context.column += 1;
}
self.context.index += 1;
if ch == '\n' {
self.new_line = true;
}
Ok(ch)
}
fn read_blank(&mut self) -> Result<Option<Token>> {
while Self::BLANK.contains(&self.peek_char()?) {
self.read_char()?;
}
if self.end_of_entry {
self.end_of_entry = false;
Ok(Some(Token::Blank))
} else {
Ok(None)
}
}
fn read_comment(&mut self) -> Result<Option<Token>> {
let ch = self.read_char()?;
if ch == ';' {
loop {
match self.peek_char() {
Ok('\n') => break,
Ok(_) => { self.read_char()?; },
Err(ParserError::Eof) => break,
Err(err) => return Err(err),
}
}
Ok(None)
} else {
Err(Error {
message: Some(format!("Expected ';', got '{}'", ch)),
context: self.context.clone(),
error_type: ErrorType::ParseError(ParseError::UnexpectedCharacter(ch))
}.into())
}
}
fn read_character_string(&mut self) -> Result<Option<Token>> {
let mut res = String::new();
let mut context = None;
let quoted = self.peek_char()? == '"';
if quoted {
res.push(self.read_char()?);
context = Some(self.context.clone());
}
loop {
match self.peek_char() {
Ok('"') if quoted => {
res.push(self.read_char()?);
break
},
Ok(ch) if !quoted && Self::SEPARATORS.contains(&ch) => break,
Ok(_) => {
let ch = self.read_char()?;
res.push(ch);
if ch == '\\' {
res.push(self.read_char()?)
}
},
Err(ParserError::Eof) => break,
Err(err) => return Err(err)
};
if context == None {
context = Some(self.context.clone())
}
}
// TODO: Check if quoted string ended
// TODO: Stop parsing quoted string if line break occurs
if res.len() == 0 {
// NOTE: Need double check but probably useless code (and error), otherwise create appropriate error
// Err(ErrorType::ExpectedCharacterString.into())
unreachable!()
} else {
self.end_of_entry = false;
Ok(Some(Token::Value(CtxString {
value: res,
context: context.unwrap()
})))
}
}
fn read_opening_braket(&mut self) -> Result<Option<Token>> {
self.read_char()?;
self.multiline_level += 1;
Ok(None)
}
fn read_closing_braket(&mut self) -> Result<Option<Token>> {
self.read_char()?;
self.multiline_level -= 1;
if self.multiline_level < 0 {
Err(Error {
message: Some(format!("Braket count mismatch, unexpected closing braket")),
context: self.context.clone(),
error_type: ErrorType::ParseError(ParseError::UnexpectedCharacter(')'))
}.into())
} else {
Ok(None)
}
}
fn read_line_break(&mut self) -> Result<Option<Token>> {
self.read_char()?;
if self.multiline_level == 0 && !self.end_of_entry {
self.end_of_entry = true;
Ok(Some(Token::EndOfLine))
} else {
Ok(None)
}
}
fn read_control_entry(&mut self) -> Result<Option<Token>> {
// TODO: Decide if check sould be in parent or not, currently incoherent with some other functions
let ch = self.read_char()?;
if ch == '$' {
match self.read_character_string()? {
Some(Token::Value(s)) => Ok(Some(Token::Control(s))),
_ => unreachable!()
}
} else {
Err(Error {
message: Some(format!("Expected ';', got '{}'", ch)),
context: self.context.clone(),
error_type: ErrorType::ParseError(ParseError::UnexpectedCharacter(ch))
}.into())
}
}
fn forward(&mut self) -> Result<Option<Token>> {
match self.peek_char() {
Ok(';') => self.read_comment(),
Ok('(') => self.read_opening_braket(),
Ok(')') => self.read_closing_braket(),
Ok('\n') => self.read_line_break(),
Ok('$') => self.read_control_entry(),
Ok(ch) if ch.is_whitespace() => self.read_blank(),
Ok(_) => self.read_character_string(),
Err(ParserError::Eof) => {
// TODO: Check if multi line is false. If not err Unexpected end of file
if !self.end_of_entry {
self.end_of_entry = true;
Ok(Some(Token::EndOfLine))
} else {
Err(ParserError::Eof)
}
},
Err(err) => Err(err)
}
}
pub fn tokens(self) -> TokenStream<'a> {
TokenStream {
pa: self
}
}
}
impl<'a> Iterator for TokenStream<'a> {
type Item = error::Result<Token>;
fn next(&mut self) -> Option<Self::Item> {
match self.pa.forward() {
Ok(None) => self.next(),
Err(ParserError::Eof) => None,
Err(ParserError::ParseError(err)) => Some(Err(err)),
Ok(Some(item)) => Some(Ok(item))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::context::CtxData;
#[test]
fn test_read_blank() {
let inputs = ["something", " something", "\t \t\t something", " \r\n"].iter();
let expected = ["something", "something", "something", "\n"].iter();
for (input, expected) in inputs.zip(expected) {
let mut pa = Parser::from_str(input);
pa.read_blank().unwrap();
assert_eq!(&pa.input, expected);
}
}
#[test]
fn test_read_comment() {
let inputs = ["; my comment\nan other line", "; eof"].iter();
let expected = ["\nan other line", ""].iter();
for (input, expected) in inputs.zip(expected) {
let mut pa = Parser::from_str(input);
pa.read_comment().unwrap();
assert_eq!(&pa.input, expected);
}
}
#[test]
fn test_read_character_string() {
let inputs = ["my-string", "my-other-string\n", "99;my comment", "\"quoted; \\\"string\\\"\" other-string", "test\\ test"].iter();
let expected = ["my-string", "my-other-string", "99", "\"quoted; \\\"string\\\"\"", "test\\ test"].iter();
for (input, &expected) in inputs.zip(expected) {
let mut pa = Parser::from_str(input);
let res = pa.read_character_string().unwrap().unwrap();
assert_eq!(res, Token::Value(CtxData::new(expected.into(), 1, 1, 1)));
}
}
#[test]
fn test_read_simple_token_stream() {
let input = "domain.tld 300 IN A (\n 198.51.100.1; some comment\n)\n TXT \"some value\"";
let expected = [
Token::Value(CtxData::new("domain.tld".into(), 1, 1, 1)),
Token::Value(CtxData::new("300".into(), 1, 12, 12)),
Token::Value(CtxData::new("IN".into(), 1, 16, 16)),
Token::Value(CtxData::new("A".into(), 1, 19, 19)),
Token::Value(CtxData::new("198.51.100.1".into(), 2, 2, 24)),
Token::EndOfLine,
Token::Blank,
Token::Value(CtxData::new("TXT".into(), 4, 2, 54)),
Token::Value(CtxData::new("\"some value\"".into(), 4, 6, 58)),
Token::EndOfLine,
];
let pa = Parser::from_str(input);
let tokens: Vec<_> = pa.tokens().map(|e| e.unwrap()).collect();
assert_eq!(tokens, expected);
}
}