UP-Viagg-io/Viagg-io/Assets/Packages/MyBT/BT/Compiler/Tokenizer/Tokenizer.cs

400 lines
16 KiB
C#

//============= Copyright (c) Ludic GmbH, All rights reserved. ==============
//
// Purpose: Part of the My Behaviour Tree Code
//
//=============================================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using UnityEngine;
namespace MyBT
{
public enum SyntaxLabel {
Undefined,
// math operators
Plus,
Minus,
SingleLineCommentStart,
MultiLineCommentStart,
MultiLineCommentEnd,
Equal,
EOL,
QuoteToken,
Colon,
Semicolon,
Comma,
Questionmark,
Whitespace,
// generic functions and variable tokens
Variable,
Function,
// behaviour tree tokens
TreeNameKeyword,
RunTreeNameKeyword,
CompositeNameKeyword,
ActionNameKeyword,
DecoratorNameKeyword,
// Literal Values
TrueKeyword,
FalseKeyword,
NumberSymbol,
CharSymbol,
StringSymbol,
OpenNormalBracket,
CloseNormalBracket,
OpenCurlyBracket,
CloseCurlyBracket,
OpenSquareBracket,
CloseSquareBracket,
}
public enum PatternLabel {
CharPattern,
StringPattern,
NumberCharPattern,
NumberPattern,
WhitespacePattern,
NewlinePattern
}
public class Tokenizer {
// TaskController taskController;
private static readonly Dictionary<string, SyntaxLabel> keywords = new Dictionary<string, SyntaxLabel>() {
{"True", SyntaxLabel.TrueKeyword },
{"False", SyntaxLabel.FalseKeyword },
{"Tree", SyntaxLabel.TreeNameKeyword },
{"Composite", SyntaxLabel.CompositeNameKeyword },
{"Action", SyntaxLabel.ActionNameKeyword },
{"Decorator", SyntaxLabel.DecoratorNameKeyword },
{"RunTree", SyntaxLabel.RunTreeNameKeyword }
};
private static readonly Dictionary<PatternLabel, Regex> patterns = new Dictionary<PatternLabel, Regex>() {
{PatternLabel.CharPattern, new Regex("[\\$a-zA-Z\\._]") },
{PatternLabel.StringPattern, new Regex("[^\"]") },
{PatternLabel.NumberPattern, new Regex("[-0-9\\.]") },
{PatternLabel.NumberCharPattern, new Regex("[-[0-9a-zA-Z\\._]") },
{PatternLabel.WhitespacePattern, new Regex("[ \t]") },
{PatternLabel.NewlinePattern, new Regex("\r\n|\n") }
};
public static readonly Dictionary<SyntaxLabel, Regex> Definitions = new Dictionary<SyntaxLabel, Regex>() {
{ SyntaxLabel.Colon, new Regex(":") },
{ SyntaxLabel.Semicolon, new Regex(";") },
{ SyntaxLabel.Minus, new Regex("\\-") },
{ SyntaxLabel.Plus, new Regex("\\+") },
{ SyntaxLabel.Comma, new Regex(",") },
{ SyntaxLabel.EOL, new Regex("\\r\\n|\\n")},
{ SyntaxLabel.Whitespace, new Regex("\\s") },
{ SyntaxLabel.Questionmark, new Regex("\\?") },
{ SyntaxLabel.SingleLineCommentStart, new Regex("\\/\\/") },
{ SyntaxLabel.MultiLineCommentStart, new Regex("\\/\\*") },
{ SyntaxLabel.MultiLineCommentEnd, new Regex("\\*\\/") },
{ SyntaxLabel.Equal, new Regex("==") },
{ SyntaxLabel.CharSymbol, new Regex("\'") },
{ SyntaxLabel.StringSymbol, new Regex("\"") },
{ SyntaxLabel.NumberSymbol, new Regex("[0-9-]") },
{ SyntaxLabel.OpenCurlyBracket, new Regex("\\{") },
{ SyntaxLabel.CloseCurlyBracket, new Regex("\\}") },
{ SyntaxLabel.OpenNormalBracket, new Regex("\\(") },
{ SyntaxLabel.CloseNormalBracket, new Regex("\\)") },
{ SyntaxLabel.OpenSquareBracket, new Regex("\\[") },
{ SyntaxLabel.CloseSquareBracket, new Regex("\\]") }
};
static readonly SyntaxLabel[] LiteralTokens = {
SyntaxLabel.EOL,
SyntaxLabel.Whitespace,
SyntaxLabel.Questionmark,
SyntaxLabel.Colon,
SyntaxLabel.Semicolon,
SyntaxLabel.Comma,
SyntaxLabel.OpenNormalBracket,
SyntaxLabel.CloseNormalBracket,
SyntaxLabel.OpenSquareBracket,
SyntaxLabel.CloseSquareBracket,
SyntaxLabel.OpenCurlyBracket,
SyntaxLabel.CloseCurlyBracket,
SyntaxLabel.Plus,
SyntaxLabel.Minus
};
private static bool IsAllGenerated (List<TokenList> tokens) {
bool generated = true;
if ((tokens != null) && (tokens.Count > 0)) {
// Debug.Log("NodeGenerator.IsAllGenerated "+tokens.Count);
foreach (TokenList tl in tokens) {
// Debug.Log("- TokenList: "+tl.Count+": '"+string.Join(", ",tl.Select(t=>t.type))+"'");
generated &= (tl != null) && (tl.Count > 0);
// foreach (Token t in tl) {
// Debug.Log("- Token: "+t.type);
// }
}
} else {
generated &= false;
}
return generated;
}
// private static bool IsGenerated (TokenList tokens) {
// return (tokens != null) && (tokens.Count > 0);
// }
public static void DestroyAll (ref List<TokenList> tokens) {
if (tokens != null) {
foreach (TokenList tok in tokens) {
foreach (Token t in tok) {
if (t != null) {
t.Destroy();
UnityEngine.Object.DestroyImmediate(t);
}
}
}
tokens = null;
}
}
// private static void Reset (ref TokenList tokens) {
// if (tokens != null) {
// foreach (Token t in tokens) {
// if (t != null) {
// t.Reset();
// UnityEngine.Object.DestroyImmediate(t);
// }
// }
// tokens = null;
// }
// }
private static bool PatternMatch(Regex expression, int cursor, string code, int size = 1)
=> code.Length >= cursor + size && expression.IsMatch(code.Substring(cursor, size));
private static bool SynaxMatch(SyntaxLabel kind, int cursor, string code, int size = 1)
=> PatternMatch(Definitions[kind], cursor, code, size);
public static bool Tokenize (out List<TokenList> tokenList, List<UnityEngine.TextAsset> taskScripts) {
bool allSucceeded = true;
tokenList = new List<TokenList>();
for (int i=0; i<taskScripts.Count; i++) { // (UnityEngine.TextAsset taskScript in taskScripts) {
TokenList t;
bool succeeded = Tokenizer.TokenizeTextAsset(out t, taskScripts[i]);
tokenList.Add(t);
allSucceeded &= succeeded;
// if (!succeeded) {
// // if (debugLog)
// Debug.LogError($"TaskController.Generate: Tokenizer Error: "+(taskScripts[i]!=null?taskScripts[i].name:"TextAsset is null"));
// }
}
if (!allSucceeded) {
Debug.LogError($"TaskController.Generate: Tokenizer Error Script Count: {taskScripts.Count}");
for (int i=0; i<taskScripts.Count; i++) {
if (taskScripts[i] != null) {
Debug.LogError($"TaskController.Generate: Index: {i} Name: {taskScripts[i].name} Size: {taskScripts[i].text.Length}");
}
else {
Debug.LogError($"TaskController.Generate: Index: {i} is Null");
}
}
}
return IsAllGenerated(tokenList) && allSucceeded;
}
public static bool TokenizeTextAsset(out TokenList tokens, UnityEngine.TextAsset source) {
// taskController.tokens = new TokenList();
int cursor = 0;
int line = 1, col = 0;
tokens = new TokenList();
string code = "";
if (source != null) {
code = source.text;
//code.Replace("\r\n", "\n").Replace("\n\n", "\n");
}
else {
// textasset is null
return false;
}
for (cursor = 0; cursor < code.Length;) {
int savecursor = cursor;
if (SynaxMatch(SyntaxLabel.SingleLineCommentStart, cursor, code, 2)) {
int oldcursor = cursor;
cursor += 2;
do { cursor++; }
while (!SynaxMatch(SyntaxLabel.EOL, cursor, code));
MakeToken(SyntaxLabel.SingleLineCommentStart, oldcursor, cursor - oldcursor, ref tokens, ref line, ref col, source);
continue;
}
if (SynaxMatch(SyntaxLabel.MultiLineCommentStart, cursor, code, 2)) {
int oldcursor = cursor;
cursor += 2;
do {
bool isEol = SynaxMatch(SyntaxLabel.EOL, cursor, code);
cursor++;
if (isEol) {
// Debug.Log("Tokenizer.Tokenize: new comment "+line+" "+col);
MakeToken(SyntaxLabel.MultiLineCommentStart, oldcursor, cursor - oldcursor, ref tokens, ref line, ref col, source);
oldcursor = cursor;
line++;
col = 0;
}
}
while (!SynaxMatch(SyntaxLabel.MultiLineCommentEnd, cursor, code, 2));
cursor += 2;
MakeToken(SyntaxLabel.MultiLineCommentStart, oldcursor, cursor - oldcursor, ref tokens, ref line, ref col, source);
continue;
}
// this should not occur on its own
if (SynaxMatch(SyntaxLabel.MultiLineCommentEnd, cursor, code, 2)) {
MakeToken(SyntaxLabel.MultiLineCommentEnd, cursor, 2, ref tokens, ref line, ref col, source);
cursor += 2;
continue;
}
if (SynaxMatch(SyntaxLabel.Equal, cursor, code, 2)) {
MakeToken(SyntaxLabel.Equal, cursor, 2, ref tokens, ref line, ref col, source);
cursor += 2;
continue;
}
if (SynaxMatch(SyntaxLabel.CharSymbol, cursor, code)) {
int oldcursor = cursor;
do { cursor++; }
while (PatternMatch(patterns[PatternLabel.NumberPattern], cursor, code));
if (SynaxMatch(SyntaxLabel.CharSymbol, cursor, code)) {
MakeToken(SyntaxLabel.CharSymbol, oldcursor, cursor - oldcursor + 1, ref tokens, ref line, ref col, source);
} else {
//throw new Exception($"Char {code.Substring(oldcursor, cursor - oldcursor + 1)} could not be tokenized");
Debug.LogError($"Char {code.Substring(oldcursor, cursor - oldcursor + 1)} could not be tokenized");
}
cursor++;
continue;
}
if (SynaxMatch(SyntaxLabel.StringSymbol, cursor, code)) {
int oldcursor = cursor;
do {
cursor++;
if (PatternMatch(patterns[PatternLabel.NewlinePattern], cursor, code)) {
//throw new Exception($"String {code.Substring(oldcursor, cursor - oldcursor + 1)} contains newline {line}:{col}");
Debug.LogError($"String {code.Substring(oldcursor, cursor - oldcursor + 1)} contains newline {line}:{col}");
}
}
while (PatternMatch(patterns[PatternLabel.StringPattern], cursor, code));
if (SynaxMatch(SyntaxLabel.StringSymbol, cursor, code)) {
MakeToken(SyntaxLabel.StringSymbol, oldcursor, cursor - oldcursor + 1, ref tokens, ref line, ref col, source);
} else {
//throw new Exception($"String {code.Substring(oldcursor, cursor - oldcursor + 1)} could not be tokenized {line}:{col}");
Debug.LogError($"String {code.Substring(oldcursor, cursor - oldcursor + 1)} could not be tokenized {line}:{col}");
}
cursor++;
continue;
}
if (SynaxMatch(SyntaxLabel.NumberSymbol, cursor, code)) {
int oldcursor = cursor;
do { cursor++; }
while (PatternMatch(patterns[PatternLabel.NumberPattern], cursor, code));
MakeToken(SyntaxLabel.NumberSymbol, oldcursor, cursor - oldcursor, ref tokens, ref line, ref col, source);
continue;
}
if (PatternMatch(patterns[PatternLabel.CharPattern], cursor, code)) {
int oldcursor = cursor;
do { cursor++; }
while (PatternMatch(patterns[PatternLabel.NumberCharPattern], cursor, code));
int len = cursor - oldcursor;
string TokenString = code.Substring(oldcursor, len);
if (keywords.ContainsKey(TokenString)) {
MakeToken(keywords[TokenString], oldcursor, len, ref tokens, ref line, ref col, source);
} else {
while (PatternMatch(patterns[PatternLabel.WhitespacePattern], cursor, code)) {
cursor++;
}
char nextchar = code[cursor];
if (nextchar == '(') {
MakeToken(SyntaxLabel.Function, oldcursor, len, ref tokens, ref line, ref col, source);
} else {
MakeToken(SyntaxLabel.Variable, oldcursor, len, ref tokens, ref line, ref col, source);
}
}
continue;
}
if (LiteralTokens.Any(n => SynaxMatch(n, cursor, code))) {
MakeToken(LiteralTokens.First(n => SynaxMatch(n, cursor, code)), cursor, 1, ref tokens, ref line, ref col, source);
cursor++;
continue;
}
//else {
// throw new Exception($"CountNotTokenizeCharException char: {taskController.code[cursor]} could not be tokenized {cursor}");
//}
if (savecursor == cursor) {
//throw new Exception($"TokenizeScriptError char: {source.text[cursor]} unknown error on {cursor} line:{line}/col:{col}");
Debug.LogError($"TokenizeScriptError char: {source.text[cursor]} unknown error on {cursor} line:{line}/col:{col}");
}
cursor++;
}
return true;
}
// public static string AsCode(TokenList tokens) {
// return string.Join("", tokens.Select(n => n.GetValue()));
// }
private static void MakeToken(SyntaxLabel label, int pos, int len, ref TokenList tokens, ref int line, ref int col, UnityEngine.TextAsset source) {
col += len;
if (label == SyntaxLabel.EOL) {
line++;
col = 0;
}
Token t = ScriptableObject.CreateInstance<Token>();
TokenLocation l = ScriptableObject.CreateInstance<TokenLocation>();
l.Init(source, pos, len, line, col);
t.Init(label, l);
tokens.Add(t);
}
}
}