//============= Copyright (c) Ludic GmbH, All rights reserved. ============== // // Purpose: Part of the My Behaviour Tree Code // //============================================================================= using System; using System.Collections.Generic; using System.Linq; using System.Text.RegularExpressions; using UnityEngine; namespace MyBT { public enum SyntaxLabel { Undefined, // math operators Plus, Minus, SingleLineCommentStart, MultiLineCommentStart, MultiLineCommentEnd, Equal, EOL, QuoteToken, Colon, Semicolon, Comma, Questionmark, Whitespace, // generic functions and variable tokens Variable, Function, // behaviour tree tokens TreeNameKeyword, RunTreeNameKeyword, CompositeNameKeyword, ActionNameKeyword, DecoratorNameKeyword, // Literal Values TrueKeyword, FalseKeyword, NumberSymbol, CharSymbol, StringSymbol, OpenNormalBracket, CloseNormalBracket, OpenCurlyBracket, CloseCurlyBracket, OpenSquareBracket, CloseSquareBracket, } public enum PatternLabel { CharPattern, StringPattern, NumberCharPattern, NumberPattern, WhitespacePattern, NewlinePattern } public class Tokenizer { // TaskController taskController; private static readonly Dictionary<string, SyntaxLabel> keywords = new Dictionary<string, SyntaxLabel>() { {"True", SyntaxLabel.TrueKeyword }, {"False", SyntaxLabel.FalseKeyword }, {"Tree", SyntaxLabel.TreeNameKeyword }, {"Composite", SyntaxLabel.CompositeNameKeyword }, {"Action", SyntaxLabel.ActionNameKeyword }, {"Decorator", SyntaxLabel.DecoratorNameKeyword }, {"RunTree", SyntaxLabel.RunTreeNameKeyword } }; private static readonly Dictionary<PatternLabel, Regex> patterns = new Dictionary<PatternLabel, Regex>() { {PatternLabel.CharPattern, new Regex("[\\$a-zA-Z\\._]") }, {PatternLabel.StringPattern, new Regex("[^\"]") }, {PatternLabel.NumberPattern, new Regex("[-0-9\\.]") }, {PatternLabel.NumberCharPattern, new Regex("[-[0-9a-zA-Z\\._]") }, {PatternLabel.WhitespacePattern, new Regex("[ \t]") }, {PatternLabel.NewlinePattern, new Regex("\r\n|\n") } }; public static readonly Dictionary<SyntaxLabel, Regex> Definitions = new Dictionary<SyntaxLabel, Regex>() { { SyntaxLabel.Colon, new Regex(":") }, { SyntaxLabel.Semicolon, new Regex(";") }, { SyntaxLabel.Minus, new Regex("\\-") }, { SyntaxLabel.Plus, new Regex("\\+") }, { SyntaxLabel.Comma, new Regex(",") }, { SyntaxLabel.EOL, new Regex("\\r\\n|\\n")}, { SyntaxLabel.Whitespace, new Regex("\\s") }, { SyntaxLabel.Questionmark, new Regex("\\?") }, { SyntaxLabel.SingleLineCommentStart, new Regex("\\/\\/") }, { SyntaxLabel.MultiLineCommentStart, new Regex("\\/\\*") }, { SyntaxLabel.MultiLineCommentEnd, new Regex("\\*\\/") }, { SyntaxLabel.Equal, new Regex("==") }, { SyntaxLabel.CharSymbol, new Regex("\'") }, { SyntaxLabel.StringSymbol, new Regex("\"") }, { SyntaxLabel.NumberSymbol, new Regex("[0-9-]") }, { SyntaxLabel.OpenCurlyBracket, new Regex("\\{") }, { SyntaxLabel.CloseCurlyBracket, new Regex("\\}") }, { SyntaxLabel.OpenNormalBracket, new Regex("\\(") }, { SyntaxLabel.CloseNormalBracket, new Regex("\\)") }, { SyntaxLabel.OpenSquareBracket, new Regex("\\[") }, { SyntaxLabel.CloseSquareBracket, new Regex("\\]") } }; static readonly SyntaxLabel[] LiteralTokens = { SyntaxLabel.EOL, SyntaxLabel.Whitespace, SyntaxLabel.Questionmark, SyntaxLabel.Colon, SyntaxLabel.Semicolon, SyntaxLabel.Comma, SyntaxLabel.OpenNormalBracket, SyntaxLabel.CloseNormalBracket, SyntaxLabel.OpenSquareBracket, SyntaxLabel.CloseSquareBracket, SyntaxLabel.OpenCurlyBracket, SyntaxLabel.CloseCurlyBracket, SyntaxLabel.Plus, SyntaxLabel.Minus }; private static bool IsAllGenerated (List<TokenList> tokens) { bool generated = true; if ((tokens != null) && (tokens.Count > 0)) { // Debug.Log("NodeGenerator.IsAllGenerated "+tokens.Count); foreach (TokenList tl in tokens) { // Debug.Log("- TokenList: "+tl.Count+": '"+string.Join(", ",tl.Select(t=>t.type))+"'"); generated &= (tl != null) && (tl.Count > 0); // foreach (Token t in tl) { // Debug.Log("- Token: "+t.type); // } } } else { generated &= false; } return generated; } // private static bool IsGenerated (TokenList tokens) { // return (tokens != null) && (tokens.Count > 0); // } public static void DestroyAll (ref List<TokenList> tokens) { if (tokens != null) { foreach (TokenList tok in tokens) { foreach (Token t in tok) { if (t != null) { t.Destroy(); UnityEngine.Object.DestroyImmediate(t); } } } tokens = null; } } // private static void Reset (ref TokenList tokens) { // if (tokens != null) { // foreach (Token t in tokens) { // if (t != null) { // t.Reset(); // UnityEngine.Object.DestroyImmediate(t); // } // } // tokens = null; // } // } private static bool PatternMatch(Regex expression, int cursor, string code, int size = 1) => code.Length >= cursor + size && expression.IsMatch(code.Substring(cursor, size)); private static bool SynaxMatch(SyntaxLabel kind, int cursor, string code, int size = 1) => PatternMatch(Definitions[kind], cursor, code, size); public static bool Tokenize (out List<TokenList> tokenList, List<UnityEngine.TextAsset> taskScripts) { bool allSucceeded = true; tokenList = new List<TokenList>(); for (int i=0; i<taskScripts.Count; i++) { // (UnityEngine.TextAsset taskScript in taskScripts) { TokenList t; bool succeeded = Tokenizer.TokenizeTextAsset(out t, taskScripts[i]); tokenList.Add(t); allSucceeded &= succeeded; // if (!succeeded) { // // if (debugLog) // Debug.LogError($"TaskController.Generate: Tokenizer Error: "+(taskScripts[i]!=null?taskScripts[i].name:"TextAsset is null")); // } } if (!allSucceeded) { Debug.LogError($"TaskController.Generate: Tokenizer Error Script Count: {taskScripts.Count}"); for (int i=0; i<taskScripts.Count; i++) { if (taskScripts[i] != null) { Debug.LogError($"TaskController.Generate: Index: {i} Name: {taskScripts[i].name} Size: {taskScripts[i].text.Length}"); } else { Debug.LogError($"TaskController.Generate: Index: {i} is Null"); } } } return IsAllGenerated(tokenList) && allSucceeded; } public static bool TokenizeTextAsset(out TokenList tokens, UnityEngine.TextAsset source) { // taskController.tokens = new TokenList(); int cursor = 0; int line = 1, col = 0; tokens = new TokenList(); string code = ""; if (source != null) { code = source.text; //code.Replace("\r\n", "\n").Replace("\n\n", "\n"); } else { // textasset is null return false; } for (cursor = 0; cursor < code.Length;) { int savecursor = cursor; if (SynaxMatch(SyntaxLabel.SingleLineCommentStart, cursor, code, 2)) { int oldcursor = cursor; cursor += 2; do { cursor++; } while (!SynaxMatch(SyntaxLabel.EOL, cursor, code)); MakeToken(SyntaxLabel.SingleLineCommentStart, oldcursor, cursor - oldcursor, ref tokens, ref line, ref col, source); continue; } if (SynaxMatch(SyntaxLabel.MultiLineCommentStart, cursor, code, 2)) { int oldcursor = cursor; cursor += 2; do { bool isEol = SynaxMatch(SyntaxLabel.EOL, cursor, code); cursor++; if (isEol) { // Debug.Log("Tokenizer.Tokenize: new comment "+line+" "+col); MakeToken(SyntaxLabel.MultiLineCommentStart, oldcursor, cursor - oldcursor, ref tokens, ref line, ref col, source); oldcursor = cursor; line++; col = 0; } } while (!SynaxMatch(SyntaxLabel.MultiLineCommentEnd, cursor, code, 2)); cursor += 2; MakeToken(SyntaxLabel.MultiLineCommentStart, oldcursor, cursor - oldcursor, ref tokens, ref line, ref col, source); continue; } // this should not occur on its own if (SynaxMatch(SyntaxLabel.MultiLineCommentEnd, cursor, code, 2)) { MakeToken(SyntaxLabel.MultiLineCommentEnd, cursor, 2, ref tokens, ref line, ref col, source); cursor += 2; continue; } if (SynaxMatch(SyntaxLabel.Equal, cursor, code, 2)) { MakeToken(SyntaxLabel.Equal, cursor, 2, ref tokens, ref line, ref col, source); cursor += 2; continue; } if (SynaxMatch(SyntaxLabel.CharSymbol, cursor, code)) { int oldcursor = cursor; do { cursor++; } while (PatternMatch(patterns[PatternLabel.NumberPattern], cursor, code)); if (SynaxMatch(SyntaxLabel.CharSymbol, cursor, code)) { MakeToken(SyntaxLabel.CharSymbol, oldcursor, cursor - oldcursor + 1, ref tokens, ref line, ref col, source); } else { //throw new Exception($"Char {code.Substring(oldcursor, cursor - oldcursor + 1)} could not be tokenized"); Debug.LogError($"Char {code.Substring(oldcursor, cursor - oldcursor + 1)} could not be tokenized"); } cursor++; continue; } if (SynaxMatch(SyntaxLabel.StringSymbol, cursor, code)) { int oldcursor = cursor; do { cursor++; if (PatternMatch(patterns[PatternLabel.NewlinePattern], cursor, code)) { //throw new Exception($"String {code.Substring(oldcursor, cursor - oldcursor + 1)} contains newline {line}:{col}"); Debug.LogError($"String {code.Substring(oldcursor, cursor - oldcursor + 1)} contains newline {line}:{col}"); } } while (PatternMatch(patterns[PatternLabel.StringPattern], cursor, code)); if (SynaxMatch(SyntaxLabel.StringSymbol, cursor, code)) { MakeToken(SyntaxLabel.StringSymbol, oldcursor, cursor - oldcursor + 1, ref tokens, ref line, ref col, source); } else { //throw new Exception($"String {code.Substring(oldcursor, cursor - oldcursor + 1)} could not be tokenized {line}:{col}"); Debug.LogError($"String {code.Substring(oldcursor, cursor - oldcursor + 1)} could not be tokenized {line}:{col}"); } cursor++; continue; } if (SynaxMatch(SyntaxLabel.NumberSymbol, cursor, code)) { int oldcursor = cursor; do { cursor++; } while (PatternMatch(patterns[PatternLabel.NumberPattern], cursor, code)); MakeToken(SyntaxLabel.NumberSymbol, oldcursor, cursor - oldcursor, ref tokens, ref line, ref col, source); continue; } if (PatternMatch(patterns[PatternLabel.CharPattern], cursor, code)) { int oldcursor = cursor; do { cursor++; } while (PatternMatch(patterns[PatternLabel.NumberCharPattern], cursor, code)); int len = cursor - oldcursor; string TokenString = code.Substring(oldcursor, len); if (keywords.ContainsKey(TokenString)) { MakeToken(keywords[TokenString], oldcursor, len, ref tokens, ref line, ref col, source); } else { while (PatternMatch(patterns[PatternLabel.WhitespacePattern], cursor, code)) { cursor++; } char nextchar = code[cursor]; if (nextchar == '(') { MakeToken(SyntaxLabel.Function, oldcursor, len, ref tokens, ref line, ref col, source); } else { MakeToken(SyntaxLabel.Variable, oldcursor, len, ref tokens, ref line, ref col, source); } } continue; } if (LiteralTokens.Any(n => SynaxMatch(n, cursor, code))) { MakeToken(LiteralTokens.First(n => SynaxMatch(n, cursor, code)), cursor, 1, ref tokens, ref line, ref col, source); cursor++; continue; } //else { // throw new Exception($"CountNotTokenizeCharException char: {taskController.code[cursor]} could not be tokenized {cursor}"); //} if (savecursor == cursor) { //throw new Exception($"TokenizeScriptError char: {source.text[cursor]} unknown error on {cursor} line:{line}/col:{col}"); Debug.LogError($"TokenizeScriptError char: {source.text[cursor]} unknown error on {cursor} line:{line}/col:{col}"); } cursor++; } return true; } // public static string AsCode(TokenList tokens) { // return string.Join("", tokens.Select(n => n.GetValue())); // } private static void MakeToken(SyntaxLabel label, int pos, int len, ref TokenList tokens, ref int line, ref int col, UnityEngine.TextAsset source) { col += len; if (label == SyntaxLabel.EOL) { line++; col = 0; } Token t = ScriptableObject.CreateInstance<Token>(); TokenLocation l = ScriptableObject.CreateInstance<TokenLocation>(); l.Init(source, pos, len, line, col); t.Init(label, l); tokens.Add(t); } } }