diff --git a/src/codewars/RegExParser.py b/src/codewars/RegExParser.py new file mode 100644 index 0000000..19e7b22 --- /dev/null +++ b/src/codewars/RegExParser.py @@ -0,0 +1,164 @@ +class RegExp: + def __init__(self, *args): + self.args = args + + def __repr__(self): + args = ", ".join(map(repr, self.args)) + return f"{self.__class__.__name__}({args})" + + def __eq__(self, other): + return type(self) is type(other) and self.args == other.args + + +class Any(RegExp): + pass + + +class Normal(RegExp): + pass + + +class Or(RegExp): + pass + + +class Str(RegExp): + pass + + +class ZeroOrMore(RegExp): + pass + + +# Your task is to build an AST using those nodes. +# See sample tests or test output for examples of usage. + + +def parse_regexp(pattern: str): + try: + parser = RegexParser(pattern) + result = parser.parse_regex() + + if not parser.end(): + raise ValueError("Unexpected characters") + + return result + except ValueError: + return None + + +class RegexParser: + def __init__(self, pattern): + self.pattern = pattern + self.pos = 0 + + # ---------------------------- + # helpers + # ---------------------------- + + def peek(self): + if self.pos >= len(self.pattern): + return None + return self.pattern[self.pos] + + def consume(self): + c = self.peek() + if c is not None: + self.pos += 1 + return c + + def end(self): + return self.pos >= len(self.pattern) + + # ---------------------------- + # regex grammar + # ---------------------------- + + def parse_regex(self): + return self.parse_alternation() + + def parse_alternation(self): + left = self.parse_concatenation() + + seen_or = False + + while self.peek() == "|": + if seen_or: + raise ValueError("Only one '|' allowed per group") + + seen_or = True + self.consume() + + right = self.parse_concatenation() + left = Or(left, right) + + return left + + def parse_concatenation(self): + nodes = [] + + while True: + c = self.peek() + + if c is None or c in "|)": + break + + nodes.append(self.parse_repetition()) + + if not nodes: + return None + + if len(nodes) == 1: + return nodes[0] + + return Str(nodes) + + def parse_repetition(self): + node = self.parse_atom() + + while self.peek() == "*": + self.consume() + + if isinstance(node, ZeroOrMore): + raise ValueError("Consecutive '*' not allowed") + + node = ZeroOrMore(node) + + return node + + def parse_atom(self): + c = self.peek() + + if c is None: + return None + + if c == "(": + self.consume() + node = self.parse_regex() + + if self.peek() != ")": + raise ValueError("Unmatched '('") + + self.consume() + return node + + if c == "*": + raise ValueError("'*' cannot start an expression") + + if c == ".": + self.consume() + return Any() + + self.consume() + return Normal(c) + + +if __name__ == "__main__": + test_cases = [ + "(a|b)*", + #'I%T8]dX9b=k;lm_e/4i\x0b-+pFPWq#~\\,"a5.n}(Hcs{uCz*yA`OKJwZ7V