##################################################
# Filename:     libExpr.rb
# Author:       David Ljung Madison Stellar <DaveSource.com>
# See License:  http://MarginalHacks.com/License/
# Info:         http://marginalhacks.com/Hacks/libExpr.rb/
#
# Expression handler
#
# Converts string expressions with variables to RPN or Prefix or AST (abstract syntax tree) for quick evaluation
#
# Uses a modified Shunting-Yard Algorithm (that can handle Unary/Ternary operations)
#
# Performance-wise (not counting initial conversion, which is linear time) runs at
# 50% of the speed of actual ruby eval of expressions.
#
# Here's a table of the formats and their capabilities:
#
# Name    Short-Circuit (||,&&,?:)    Ternary Ops (?:)
# ----    -------------               -----------
# RPN     No                          No
# Prefix  Yes                         No
# AST     Yes                         Yes
#
# RPN (Reverse Polish Notation aka "postfix"):
#   Fastest (slightly), simplest to view and debug, but missing short-circuit/ternary
# Prefix
#   Converts to RPN then quick conversion to Prefix.  Supports short-circuit (for &&,||)
# AST
#   Can do ternary ops.  Also fast, though harder to debug/understand since it's a tree
#
# Useful when:
# 1) Eval is not safe (no need to sanitize inputs here)
# 2) 'Variable' evaluation is different from ruby variables
# 3) Expressions are evaluated many times with different variable values
# 4) 'Variables' may not actually be normal variable notation.  I.e.:  [`PARAMS@SOME_VAL("DEFAULT")]
#
# Short-circuit notes:
#
# Since there aren't function calls (although you could arguably implement them with the variables)
# then the short-circuiting mostly will only effect performance (by doing unnecessary calculations),
# but there are also cases such as:
#   (a == 0) || (num/a)
# Which will incorrectly cause a ZeroDivisionError if you don't have short-circuit turned on.
#
# Requires:  (see testbench for example implementation)
#   Verif::assert(<cond>, "<error string>")
#
#
# Example:
#
# require 'libExpr'
# someExpr = Expr.new("5*32-6")
# answer = someExpr.eval()   # result is 154
#
# # or if you don't need to save the expr object:
# answer = Expr.new("5*32-6").eval()
#
#
# Example with variables:
#
# vars['foo'] = 42
# vars['bar'] = 3
# require 'libExpr'
# someExpr = Expr.new("5*foo-6/bar")
# someExpr.eval() { |varname| vars[varname] }
# # Returns  5*42-6/3 = 208
#
require 'set'

#########################
# Operators
#
# Including fake operators for '(' and ')'
#########################
class ExprCloseParen
	def to_s
		')'
	end
	def ==(other)
		other.is_a?(ExprCloseParen)
	end
end

class ExprOpenParen
	def to_s
		'('
	end
	def ==(other)
		other.is_a?(ExprOpenParen)
	end
end

class ExprOperator
	attr_reader :str, :precedence, :arity

	# Ruby precedence for operators
	# https://www.techotopia.com/index.php/Ruby_Operator_Precedence
	# https://ruby-doc.org/core-2.7.0/doc/syntax/precedence_rdoc.html
	@@rubyPrecedence = {
		' !' => 3,	# Unary !, need to handle differently
		' -' => 3,	# Unary minus, diff from binary minus below. (note the space)
		' +' => 3,	# Unary plus, diff from binary plus below.
		'*'  => 5,
		'/'  => 5,
		'%'  => 5,
		'+'  => 6,
		'-'  => 6,
		'<<' => 7,
		'>>' => 7,
		'&'  => 8,
		'^'  => 9,
		'|'  => 9,
		'<'  => 10,
		'<=' => 10,
		'>'  => 10,
		'>=' => 10,
		'==' => 11,
		'!=' => 11,
		'&&' => 14,
		'||' => 15,
		'?'  => 16,	# Half of the ?: ternary op
		':'  => 17,	# Half of the ?: ternary op
    '->' => 18,	# This is an MDI operator,  "a->b" which is basically "!a || b"
	}

	# C/C++ precedence.
	# Currently we use Ruby precedence (above), but this would be easy to change or even make as an option
	# Lower numbers are higher precedence
	# https://en.cppreference.com/w/cpp/language/operator_precedence
	@@CPrecedence = {
		' !' => 3,	# Unary !, need to handle differently
		' -' => 3,	# Unary minus, diff from binary minus below. (note the space)
		' +' => 3,	# Unary plus, diff from binary plus below.
		'*'  => 5,
		'/'  => 5,
		'%'  => 5,
		'+'  => 6,
		'-'  => 6,
		'<<' => 7,
		'>>' => 7,
		'<'  => 9,
		'<=' => 9,
		'>'  => 9,
		'>=' => 9,
		'==' => 10,
		'!=' => 10,
		'&'  => 11,
		'^'  => 12,
		'|'  => 13,
		'&&' => 14,
		'||' => 15,
		'?'  => 16,	# Half of the ?: ternary op
		':'  => 17,	# Half of the ?: ternary op
    '->' => 18,	# This is an MDI operator,  "a->b" which is basically "!a || b"
	}

	@@precedence = @@rubyPrecedence

	def self.operators
		@@precedence.keys
	end

	def initialize(str)
		@str = str
		@precedence = @@precedence[@str]
		unary = @str[0]==' ' ? true : false
		ternary = @str[0]=='?' ? true : false
		@arity = 2
		@arity = 1 if unary
		Verif::assert(@precedence, "Unknown operator [#{@str}]")

		# Right associative:  Unary ops, ternary ops (assignment and exponentiation which we don't do)
		# everything else is left associative.  We mark unary ops with a space prior.
		@lassoc = unary || ternary ? false : true
	end

	# Compare precedence.  The C++ precedence table has the numbers backwards,
	# so lower numbers are higher precedence.
	# But here we use a>b to mean "a has higher precedence than b"
	def <(other)
		@precedence > other.precedence
	end
	def >(other)
		@precedence < other.precedence
	end
	def >=(other)
		@precedence <= other.precedence
	end
	def <=(other)
		@precedence >= other.precedence
	end
	def ==(other)
		return @str == other if other.is_a?(String)
		return @str == other.str if other.is_a?(ExprOperator)
		false
	end

	# Does an operator come 'after' another operator in precedence if we read left to right?
	def after(other)
		return false unless other
		return false unless other.is_a?(ExprOperator)
		# We go after if the other op has a higher precedence
		# If we are left-assoc, then we also go after if it's the same precedence
		return @lassoc ? (other >= self) : (other > self)
	end

	def to_s
		@str
	end

	# Execute the operator on a stack (postfix notation)
	# Note that they pop in reverse order.
	# This was originally written for RPN/postfix, so the order is backwards,
	# though it is now used by Prefix/AST.
	# Should probably convert to a hash of Procs with yield for getting operands
	# (though postfix would still be backwards unless we check arity first)
	def do(stack)
		operand = stack.pop
		Verif::assert(operand!=nil, "Missing operand for RPN operation [#{str}]")
		got = case @str
		when ' !'
			puts "Unary op: #{@str}#{operand}" if Expr::debug
			operand == Expr.false ? Expr.true : Expr.false
		#'-'  => ExprOperator.new('-'),	# Unary Minus.  See issue above
		when ' +'	# Unary plus, not addition
			puts "Unary op: #{@str}#{operand}" if Expr::debug
			operand
		when ' -'	# Unary minus, not subtraction
			puts "Unary op: #{@str}#{operand}" if Expr::debug
			-operand
		else
			op1, op2 = stack.pop, operand
			Verif::assert(op1!=nil, "Missing operand1 for RPN operation [#{str}]")
			puts "Binary op: #{op1} #{@str} #{op2}" if Expr::debug
			case @str
			when '*'
				op1 * op2
			when '/'
				op1 / op2
			when '%'
				op1 % op2
			when '+'
				op1 + op2
			when '-'
				op1 - op2
			when '<<'
				# We can end up with massive numbers, and then try to shift by those numbers,
				# and we'll run out of memory.  But once we run out of memory, ruby falls apart.
				# So we'll just pre-emptively raise it before we try to do the shift.
				# https://stackoverflow.com/questions/70254103/ruby-can-we-survive-nomemoryerror
				raise NoMemoryError if op2 > 100000000000 || op2 < -100000000000
				op1 << op2
			when '>>'
				raise NoMemoryError if op2 > 100000000000 || op2 < -100000000000
				op1 >> op2
			# Logical ops on numbers
			when '<'
				op1 < op2 ? Expr.true : Expr.false
			when '<='
				op1 <= op2 ? Expr.true : Expr.false
			when '>'
				op1 > op2 ? Expr.true : Expr.false
			when '>='
				op1 >= op2 ? Expr.true : Expr.false
			when '=='
				op1 == op2 ? Expr.true : Expr.false
			when '!='
				op1 != op2 ? Expr.true : Expr.false
			# Binary ops on numbers
			when '&'
				op1 & op2
			when '^'
				op1 ^ op2
			when '|'
				op1 | op2
			# Logical ops on logical values
			when '&&'
				(op1!=Expr.false && op2!=Expr.false) ? Expr.true : Expr.false
			when '||'
				(op1!=Expr.false || op2!=Expr.false) ? Expr.true : Expr.false
			when '->'
				(op1==Expr.false || op2!=Expr.false) ? Expr.true : Expr.false
			when '?'
				cond = stack.pop
				cond ? op1 : op2
			else
				Verif::assert("Unknown RPN stack action: [#{str}]")
			end
		end
		#puts "ExprOperator(#{str})  #{b} #{str} #{a} -> #{got}"
		got
	end
end

##################################################
# AST operation node for AST parsing
##################################################
class ASTOp
	attr_reader :op, :operands
	def initialize(op, *operands)
		@op = op
		@operands = operands
	end

	def to_s
		strs = ['(']
		operands.each_with_index { |operand,index|
			strs << operand.to_s
			strs << @op.to_s if index==0
			strs << ':' if index==1 && @op == '?'
		}
		strs << ')'
		strs.join(' ')
	end

	def inspect(lvl=1)
		pad = " "*(lvl*2)
		"AST: [#{@op.str}] operands [#{@operands.size}]:\n#{pad}" + @operands.map {|n|
			n.is_a?(ASTOp) ? n.inspect(lvl+1) : n.inspect
		}.join("\n#{pad}")
	end
end

##################################################
# Expr class
#
# Converts infix expressions such as:
#    (3 +1*-5 / (32 - (varA || varB < 12 ? 9 : 11)))
# Into RPN and/or Prefix and/or AST trees, and
# then can evaluate them accordingly (including
# deferring of handling variables back to the caller)
##################################################
class Expr
	attr_reader :expr, :ast, :rpn, :prefix

	@@true = 1
	@@false = 0

	# Use true and false for logical ops
	def self.useTrueFalse; @@true,@@false = true,false; end

	@@debug = false
	def self.debug(val=nil)
		@@debug = val unless val==nil
		@@debug
	end

	# Use 1 and 0 for logical ops
	def self.use10;; @@true,@@false = 1,0; end

	def self.true;  @@true;  end
	def self.false; @@false; end

	# Create a hash of all operators (including parens)
	@@openParen = ExprOpenParen.new()
	@@closeParen = ExprCloseParen.new()
	@@operators = {
		'('  => @@openParen,
		')'  => @@closeParen,
	}
	ExprOperator.operators.each { |op| @@operators[op] = ExprOperator.new(op) }

	# Create an expression object, with an optionally specified regexp that matches labels
	# Can also specify an "origExpr" if we have sanitized/cleaned the expression prior to
	# calling this class, but we want errors to still use the origExpr.
	def initialize(expr, label = nil, origExpr = nil)
		@label = label || '`?[a-zA-Z0-9_\.\@\"]+'
		@expr = origExpr || expr	# This is for printing/output.
		@useExpr = expr

		@tokens = nil
		@vars = Set.new	# List of vars used in the expression
		@ast = nil
		@rpn = nil
		@prefix = nil
	end

	def vars
		# The list of vars is created when we tokenize
		tokenize
		@vars.dup
	end

	#########################
	# Tokens
	#########################
	def convertToken(tok, lastToken)
		# Is it a number?
		return tok.to_num if tok.match(/^0x[0-9a-fA-F]+$/)
		return tok.gsub('_','').to_num if tok.match(/^0b[01_]+$/)
		return tok.to_i if tok.match(/^\d+/)
		return @@false if tok == 'false'
		return @@true if tok == 'true'

		# Maybe it's an operation
		# Unary operators always follow either openParen or another operator.
		# And it's the only operator that does that - so that's how we'll tell
		unary = (lastToken == nil || lastToken.is_a?(ExprOperator) || lastToken == @@openParen)
		unary = false if tok=='('	# Because '(' kind of looks like an "operator" in @@operators
		op = unary ? @@operators[' '+tok] : @@operators[tok]
		return op if op
		Verif::assert(op, "Operator [#{tok}] is not a unary operator") if @@operators[tok]

		# Must be just a label/variable
		@vars << tok
		tok
	end

	def tokenize
		return @tokens if @tokens
		# End scan with a '\S' otherwise we'll ignore any non-space characters that don't match, and we probably want an error.
		# I moved '%' after @label because I have some cases where we are using '%LABELS'
		tokens = @useExpr.scan(/\(|\)|\*|\/|\->|\+|\-|<<|>>|<=|>=|<|>|==|!=|&&|\|\||&|\^|\||\?|\:|\!|#{@label}|%|0x[0-9a-fA-F]+|0b[01_]+|\d+|\S/)
		lastToken = nil
		@tokens = tokens.map { |tok|
			got = convertToken(tok, lastToken)
			lastToken = got
			got
		}
	end

	#########################
	# We get variable values through yield:
	#
	# Something like:  expr.eval { |name|  return myVariables[name] }
	#########################
	def getVariable(name, block)
		Verif::assert(block, "Can't evaluate expression with variables unless a block is supplied to resolve vars")
		got = block.call(name)
		Verif::assert(got.is_a?(Numeric) || got.is_a?(FalseClass) || got.is_a?(TrueClass), "Variable evaluation [#{name}] gave non-numeric result? #{got} is #{got.class}")
		return got
	end

	#########################
	# AST Code
	#########################
	# Convert infix to postfix
	# Using a modified Shunting-Yard Algorithm
	#   https://en.wikipedia.org/wiki/Shunting-yard_algorithm
	# Can handle unary ops (actually it's a trick in the tokenizer that does it)
	def addASTOp(ast, op)
		# Pull the operands from the AST stack, pop them off and
		# then reverse them because they are on the stack in reverse order.
		operands = Array.new(op.arity) { ast.pop }.reverse

		puts "New AST node: #{op} and #{operands.map(&:to_s)}" if @@debug

		# Special case, when we see ':' then fold it back into the '?'
		if op == ':'
			ternary = operands.shift
			Verif::assert(ternary.op == '?', "Expression saw ':' that wasn't following a '?'\n  #{@expr}")
			ternary.operands.push(operands.shift)
			ast << ternary
			return
		end

		ast << ASTOp.new(op, *operands)
	end

	def astize
		return @ast if @ast
		tokenize
		operatorStack = []
		ast = []
		@tokens.unshift(@@openParen)
		@tokens.push(@@closeParen)
		@tokens.each_with_index { |token, index|
			if token == @@openParen
				# Add '(' to operatorStack
				operatorStack << token
			elsif token == @@closeParen
				# Pull off operatorStack until matching ')'
				while true
					Verif::assert(operatorStack.size>0, "Expression had mismatched parens: #{@tokens[0..index].join('')}")
					token = operatorStack.pop
					break if token == @@openParen
					addASTOp(ast, token)
				end
				# If we want to handle 'function(..)' calls, we would check
				# if operatorStack.last is a function call and move that to the AST
			elsif token.is_a?(ExprOperator)
				# Not operand or '(' or ')'
				# Must be an operator
				# Move from end of operatorStack -> ast any operator that should happen prior
				# (with higher precedence or same precedence and we are left-associative)
				Verif::assert(token.is_a?(ExprOperator), "Unknown operator: #{token}")
				while token.after(operatorStack.last)
					addASTOp(ast, operatorStack.pop)
				end
				operatorStack << token
			else
				# Add operators directly to ast
				ast << token
			end
		}

		Verif::assert(operatorStack.size==0, "Converting expr to AST failed, operatorStack not empty (expr ended with operator and no operand?)\n#{@expr}")
		Verif::assert(ast.size==1, "Conversion to AST somehow had more than one node?\n  #{@expr}")
		@ast = ast[0]
	end

	def evalASTOp(astOp, block)
		return astOp if astOp.is_a?(Numeric) || astOp.is_a?(FalseClass) || astOp.is_a?(TrueClass)

		return getVariable(astOp, block) unless astOp.is_a?(ASTOp)

		op = astOp.op
		operands = astOp.operands

		# Handle short-circuiting expressions here, so we don't even eval that part of the tree.
		if op == '?'
			cond = evalASTOp(operands[0], block)
			return cond==@@true ? evalASTOp(operands[1], block) : evalASTOp(operands[2], block)
		end
		return (evalASTOp(operands[0], block)!=@@false || evalASTOp(operands[1], block)!=@@false) ? @@true : @@false if op == '||'
		return (evalASTOp(operands[0], block)!=@@false && evalASTOp(operands[1], block)!=@@false) ? @@true : @@false if op == '&&'

		operandVals = []
		operandVals = operands.map { |operand| evalASTOp(operand, block) }
		return op.do(operandVals)
	end

	def evalAST(&block)
		astize
		## Show the expression as AST sees it (with lots of parens)
		puts @ast if @@debug
		## Show the whole AST tree
		#puts @ast.inspect if @@debug
		evalASTOp(@ast, block)
	end

	#########################
	# RPN Code
	#########################
	# Convert infix to postfix
	# Using a modified Shunting-Yard Algorithm
	#   https://en.wikipedia.org/wiki/Shunting-yard_algorithm
	# Can handle unary ops (actually it's a trick in the tokenizer that does it)
	def rpnize
		return @rpn if @rpn
		tokenize
		stack = []
		rpn = []
		@tokens.unshift(@@openParen)
		@tokens.push(@@closeParen)
		@tokens.each_with_index { |token, index|
			if token == @@openParen
				# Add '(' to stack
				stack << token
			elsif token == @@closeParen
				# Pull off stack until matching ')'
				while true
					Verif::assert(stack.size>0, "Expression had mismatched parens: #{@tokens[0..index].join('')}")
					got = stack.pop
					break if got == @@openParen
					rpn << got
				end
				# If we want to handle 'function(..)' calls, we would check
				# if stack.last is a function call and move that to the rpn list
			elsif token.is_a?(ExprOperator)
				# Not operand or '(' or ')'
				# Must be an operator
				# Move from end of stack -> rpn any operator that should happen prior
				# (with higher precedence or same precedence and we are left-associative)
				Verif::assert(token.is_a?(ExprOperator), "Unknown operator: #{token}")
				while token.after(stack.last)
					rpn << stack.pop
				end
				stack << token
			else
				# Add operands directly to rpn
				rpn << token
			end
		}

		Verif::assert(stack.size==0, "Converting expr to RPN failed, stack not empty (expr ended with operator and no operand?)\n#{@expr}")
		@rpn = rpn
	end

	# Evals the rpn
	# If the expression has any vars, then supply it with a block that will take a variable and return the value
	def evalRPN(&block)
		rpnize
		puts "RPN: #{to_s(true)}" if @@debug
		stack = []
		@rpn.each { |token|
			if token.is_a?(ExprOperator)
				ans = token.do(stack)
				stack << ans
			elsif token.is_a?(Numeric) || token.is_a?(FalseClass) || token.is_a?(TrueClass)
				stack << token
			else
				# It's a variable
				stack << getVariable(token, block)
			end
		}
		Verif::assert(stack.size==1, "RPN Expression is unbalanced (final stack is [#{stack}])\n  #{to_s}")
		stack[0]
	end

	#########################
	# Prefix Code
	# (Converts to RPN then to Prefix)
	#########################
	# Convert RPN to Prefix stack
	def prefixize
		return @prefix if @prefix
		rpnize
		@prefix = []
		@rpn.each { |token|
			unless token.is_a?(ExprOperator)
				@prefix << token
			else
				# It's an operator, figure out arity and grab this many pieces from the end of the stack,
				# then push it on as a whole list (so this ends up being one stack operand)
				Verif::assert(@prefix.size >= token.arity, "Not enough operands on stack for #{token} - was RPN stack corrupted??")
				add = []
				token.arity.times { add.unshift(@prefix.pop) }
				add.unshift(token)
				@prefix << add
			end
		}
		@prefix.flatten!
	end


	def evalPreOp(notShort, stack, block)
		token = stack.shift

		# Number
		return token if token.is_a?(Numeric) || token.is_a?(FalseClass) || token.is_a?(TrueClass)

		if token.is_a?(ExprOperator)
			# It's an Operator
			operands = []
			shortCircuitAnswer = nil

			# If it's a short circuit op, deal with operands one step at a time.
			#
			# Operands are reversed because we call 'ExprOperator.do' which was written for postfix
			token.arity.times { |num|
				operand = evalPreOp(notShort, stack, block)

				# Check for short circuit conditions
				if num==0
					shortCircuitAnswer = @@true if token == '||' && operand == @@true
					shortCircuitAnswer = @@false if token == '&&' && operand == @@false
					notShort = false if shortCircuitAnswer != nil
				end

				operands << operand
			}
			#puts "Prefix eval: #{token} on #{operands.reverse}" if @@debug
			return shortCircuitAnswer unless notShort
			got = token.do(operands)
			return got
		end

		# Assume it's a variable
		return getVariable(token, block)
	end

	# Converts to prefix and evaluates as prefix instead of postfix.
	# This is useful (albeit a tad slower) because it does short circuit
	# of operators ||, && and ?:
	def evalPrefix(&block)
		prefixize
		stack = @prefix.dup
		evalPreOp(true, stack, block)
	end

	#########################
	# Eval
	#
	# We'll default to AST, since it's the most complete (does ?: ops and short circuiting)
	#########################
	def eval(&block)
		evalAST(&block)
	end

	#########################
	# To string
	# Can output stacks as well
	#########################
	def to_s(full = false)
		return @expr.dup unless full
		strs = ["Expr: #{@expr}"]
		if @rpn
			strs = ["RPN: "]
			@rpn.each { |rpn|
				strs << ((rpn.is_a?(ExprOperator)) ? "[#{rpn}]" : rpn)
			}
		end
		if @prefix
			strs.push("\nPre: ")
			@prefix.each { |pre|
				strs << ((pre.is_a?(ExprOperator)) ? "[#{pre}]" : pre)
			}
		end
		if @ast
			strs.push("\nAST:\n#{@ast.inspect}")
		end
		return strs.join(' ')
	end

	#########################
	# Compare expresssions
	# Currently we just compare expr, though to be clever we could compare RPN and
	# that would help us ignore whitespace/parens
	#########################
	def ==(other)
		to_s == other.to_s
	end
end

