import {Token} from "./token";
import {TermOperator} from './term-operator';

/*---------------------------------------------------------------------------------------------------------------
 RuleLanguageTokenizer is a 1:1 TypeScript implementation of _fk-rules-executor_ Java class "TermTokenizer".
 Remark: the class structure, and all field and method names are the same in both implementations, to make it easy
 to keep both implementations in sync.
----------------------------------------------------------------------------------------------------------------*/
export class RuleLanguageTokenizer {

  private static readonly META_CHARACTERS: string = "$()*+.?[\\]^{|}";
  private static readonly PATTERN: RegExp = new RegExp(RuleLanguageTokenizer.buildRegexPattern(), "g");

  static buildRegexPattern(): string {
    let tokenDelimiters: string[] = [];
    // handle comments
    tokenDelimiters.push("//[^\n]*");
    // assert that text wrapped in single quotes is not parsed for literals
    tokenDelimiters.push("'[^']*'");
    // assert that text wrapped in double quotes is not parsed for literals
    tokenDelimiters.push("\"[^\"]*\"");
    // add each operator (precondition: TermOperator enum is well-ordered. E.g. '>=' is before '>')
    for (let op of TermOperator.getSymbols()) {
      tokenDelimiters.push(op);
    }
    // add function-related characters
    tokenDelimiters.push("(");
    tokenDelimiters.push(")");
    tokenDelimiters.push(",");
    return tokenDelimiters.map(it => this.mapRegexToken(it)).join('|');
  }

  static toRegExp(values: string[]): RegExp {
    const regexTokens: string[] = values.map(it => RuleLanguageTokenizer.mapRegexToken(it));
    return new RegExp(`(${regexTokens.join('|')})`);
  }

  private static mapRegexToken(it: string): string {
    if (/\w+/.test(it)) {
      return `\\b${it}\\b`;
    } else if (RuleLanguageTokenizer.META_CHARACTERS.includes(it.substring(0, 1))) {
      return `\\${it}`;
    } else {
      return it;
    }
  }

  tokenize(expression: string): Token[] {
    let tokens: Token[] = [];
    const matches = expression.matchAll(RuleLanguageTokenizer.PATTERN)
    let pos: number = 0;
    for (const m of matches) {
      // While there's a delimiter in the string
      if (pos != m.index) {
        // If there's something between the current and the previous delimiter
        // Add to the tokens list
        this.addToTokens(tokens, expression, pos, m.index);
      }
      this.addToTokens(tokens, expression, m.index, m.index + m[0].length); // add the delimiter
      pos = m.index + m[0].length; // Remember end of delimiter
    }
    if (pos != expression.length) {
      // If it remains some characters in the string after last delimiter
      this.addToTokens(tokens, expression, pos, expression.length);
    }
    return tokens;
  }

  private addToTokens(tokens: Token[], expression: string, start: number, end: number): void {
    for (let i = start; i < end; i++) {
      if (/\S/.test(expression.charAt(i))) {
        break;
      }
      start++;
    }
    for (let i = end - 1; i >= start; i--) {
      if (/\S/.test(expression.charAt(i))) {
        break;
      }
      end--;
    }
    let strToken: string = expression.substring(start, end);
    if (strToken) {
      tokens.push(new Token(Token.getKind(strToken), expression, start, end));
    }
  }

}
