diff --git a/src/codewars/RegExParser.py b/src/codewars/RegExParser.py new file mode 100644 index 0000000..d3920ba --- /dev/null +++ b/src/codewars/RegExParser.py @@ -0,0 +1,171 @@ +class RegExp: + def __init__(self, *args): + self.args = args + def __repr__(self): + args = ", ".join(map(repr, self.args)) + return f"{self.__class__.__name__}({args})" + def __eq__(self, other): + return type(self) is type(other) and self.args == other.args +class Any(RegExp): pass +class Normal(RegExp): pass +class Or(RegExp): pass +class Str(RegExp): pass +class ZeroOrMore(RegExp): pass + +# Your task is to build an AST using those nodes. +# See sample tests or test output for examples of usage. + +def parse_regexp(indata): + return RegExParser(indata).compile() + + +class RegExParser: + compiled_pattern: RegExp + operators = "()*|." + + def __init__(self, pattern: str): + self.pattern = pattern + + def compile(self) -> RegExp | None: + # validate + if not self.is_valid(): + return None + + sequences = RegExParser.parse_sequences(self.pattern) + + return RegExParser.parse_regex(sequences) + + def is_valid(self): + if not self.pattern: + return False + + sequences = RegExParser.parse_sequences(self.pattern) + + if not sequences: + return False + + if any([not string or string.count("|") > 1 for string in sequences]): + return False + + if self.pattern.find("*") == 0: + return False + + if "**" in self.pattern: + return False + + return True + + @staticmethod + def parse_sequences(pattern): + left_paren = pattern.find('(') + right_paren = pattern.rfind(')') + + if left_paren == -1 and right_paren == -1: + return [pattern] + + if left_paren == -1 or right_paren == -1: + return None + + if left_paren > right_paren: + return None + + left_side = pattern[:left_paren] + middle = pattern[left_paren + 1: right_paren] + right_side = pattern[right_paren + 1:] + + result = [] + if left_side: + result.append(left_side) + + if middle: + result.append(RegExParser.parse_sequences(middle)) + + if right_side: + result.append(right_side) + + return result + + @staticmethod + def parse_regex(sequences) -> RegExp | None: + if isinstance(sequences, str): + if sequences.find("*") == 0: + sequences = sequences[1:] + + print(sequences) + if sequences == "": + return None + + if sequences.count("|") == 1: + or_groups = sequences.split("|") + left_exp = RegExParser.get_type(or_groups[0]) + right_exp = RegExParser.get_type(or_groups[1]) + + return Or(left_exp, right_exp) + else: + return RegExParser.get_type(sequences) + else: + result: list[RegExp] = [] + + for sequence_idx in range(len(sequences)): + current_sequence = sequences[sequence_idx] + + if sequence_idx < len(sequences) - 1: + next_sequence = sequences[sequence_idx + 1] + if isinstance(next_sequence, str) and next_sequence.startswith("*"): + regex = ZeroOrMore(RegExParser.parse_regex(current_sequence)) + if regex: + result.append(regex) + else: + regex = RegExParser.parse_regex(current_sequence) + if regex: + result.append(regex) + else: + regex = RegExParser.parse_regex(current_sequence) + if regex: + result.append(regex) + + return Str(result) if len(result) > 1 else result[0] + + + @staticmethod + def get_type(sequence) -> RegExp: + regex: RegExp + + if len(sequence) > 1: + string = [] + + for char_idx in range(len(sequence) - 1): + if sequence[char_idx] == "*": + continue + + next_char = sequence[char_idx + 1] + if next_char == "*": + string.append(ZeroOrMore(RegExParser.get_type(sequence[char_idx]))) + else: + string.append(RegExParser.get_type(sequence[char_idx])) + + last_char = sequence[len(sequence) - 1] + if last_char != "*": + string.append(RegExParser.get_type(last_char)) + + regex = Str(string) if len(string) > 1 else string[0] + else: + regex = Any() if sequence == "." else Normal(sequence) + + return regex + +if __name__ == "__main__": + test_cases = [ + "", + "(", + "(hi!", + ")(", + "a|t|y", + "a**", + "]K\nBYg