;; -*- Mode: Irken -*-

;;; where to store properties?
;;; Some properties belong to the node itself.  For example, a RECURSIVE
;;;   flag can apply to a particular call, but not all of them.
;;; Other properties belong to a variable or a function - for example a
;;;   function can also be RECURSIVE.  But the property of escaping belongs
;;;   to the function/variable, not to a particular node - although you could
;;;   fake it by attaching it to the definition of the function/variable.
;;;
;;; The cost of accessing these flags could be fairly high, since
;;;   we've chosen to use a red-black tree to store them, and that
;;;   involves symbol (and thus string) comparisons. In an old-style
;;;   lisp, we could throw things onto the property list of each
;;;   symbol.  We don't have or want that, but maybe it might make
;;;   sense to reproduce it in some way?  Rather than using 'symbols'
;;;   to hold names, we make a new symbol-like object to which we can
;;;   attach data?

(define (analyze exp context)
  ;; clear the variable table
  (set! context.vars (tree:empty))
  ;; rebuild it
  (build-vars exp context)
  (find-recursion exp context)
  (find-refs exp context)
  (escape-analysis exp context))

;; XXX consider combining these passes

(define (find-recursion exp context)

  (define (walk exp fenv)
    (match exp.t with
      (node:call)
      -> (let ((rator (nth exp.subs 0)))
	   (match rator.t with
	     (node:varref name)
	     -> (let ((var (vars-get-var context name)))
		  (cond ((member-eq? name fenv)
			 (set! var.flags (bit-set var.flags VFLAG-RECURSIVE))
			 (node-set-flag! exp NFLAG-RECURSIVE)))
		  (set! var.calls (+ 1 var.calls)))
	     _ -> #u
	     ))
      (node:function name _)
      -> (begin
	   (PUSH fenv name)
	   (vars-set-flag! context name VFLAG-FUNCTION))
      _ -> #u)
    (for-each (lambda (x) (walk x fenv)) exp.subs))
  
  (walk exp '()))


(define (find-refs node context)

  (define (add-ref name)
    (let ((var (vars-get-var context name)))
      (set! var.refs (+ 1 var.refs))))

  (define (add-set name)
    (let ((var (vars-get-var context name)))
      (set! var.sets (+ 1 var.sets))))

  (define (walk node)
    (match node.t with
      (node:varref name) -> (add-ref name)
      (node:varset name) -> (add-set name)
      _ -> #u)
    (for-each walk node.subs))

  (walk node))

(define (symbol-add-suffix sym suffix)
  (string->symbol (format (symbol->string sym) suffix)))

(define inline-threshold 13)

(define (do-inlining root context)

  (let ((inline-counter (make-counter 0))
	(rename-counter (make-counter 0)))

    (define (set-multiplier name calls)
      ;; XXX NYI
      #u)

    (define (inline node fenv)

      (let/cc return

	  (match node.t with
	    (node:fix names)
	    -> (for-range
		   i (length names)
		   (set! fenv (alist:entry (nth names i)
					   (nth node.subs i)
					   fenv)))

	    (node:call)
	    -> (match node.subs with
		 () -> (impossible)
		 ({t=(node:varref name) ...} . rands)
		 -> (let ((fun (alist/get fenv name "inline"))
			  (var (vars-get-var context name))
			  (escapes (bit-get var.flags VFLAG-ESCAPES))
			  (recursive (bit-get var.flags VFLAG-RECURSIVE))
			  (calls var.calls))
		      (print-string (format "testing " (sym name) " calls " (int calls) " escapes " (bool escapes) " recursive " (bool recursive) "\n"))
		      (cond ((and (not (eq? (string-ref (symbol->string name) 0) #\^))
				  (> calls 0)
				  (and (or (<= fun.size inline-threshold)
					   (and (= calls 1) (not escapes)))
				       (not recursive)))
			     (if (> calls 1)
				 (set-multiplier name calls))
			     ;; XXX re-walk ('sneaky')
			     ;;(return (inline (inline-application fun rands) fenv))
			     (let ((r (inline-application fun rands)))
			       (print-string "inlined, looks like this:")
			       (pp-node r 4) (newline)
			       (return r)))
			    (else #u)))
		 ;;({t=(node:function name formals) ...})
		 ;; _ -> node))
		 _ -> #u)
	    _ -> #u)
    
	(set! node.subs (map (lambda (x) (inline x fenv)) node.subs))
	node
	))

    (define (instantiate fun)
      (let ((new-vars '())
	    (suffix (format "_i" (int (inline-counter.inc))))
	    (body (nth fun.subs 0)))

	(define (append-suffix sym)
	  (string->symbol (format (symbol->string sym) suffix)))

	(define (rename node lenv)

	  (define (get-new-names names)
	    (let ((names0 (map append-suffix names)))
	      (set! new-vars (append names0 new-vars))
	      (set! lenv (append names lenv))
	      names0))

	  ;; start with a copy of this node.
	  (set! node (node-copy node))
	  (match node.t with
	    (node:let names) -> (set! node.t (node:let (get-new-names names)))
	    (node:fix names) -> (set! node.t (node:fix (get-new-names names)))
	    ;; XXX should we rename the function too?
	    (node:function name formals) -> (set! node.t (node:function name (get-new-names formals)))
	    (node:varref name) -> (if (member-eq? name lenv)
				      (set! node.t (node:varref (append-suffix name))))
	    (node:varset name) -> (if (member-eq? name lenv)
				      (set! node.t (node:varset (append-suffix name))))
	    _ -> #u)
	  (set! node.subs (map (lambda (x) (rename x lenv)) node.subs))
	  node)

	(rename body '())
	))

    (define (safe-nvget-inline rands)
      (match rands with
	({t=(node:primapp '%nvget' _) subs=({t=(node:varref name) ...} . _)  ...} . _)
	-> (let ((var (vars-get-var context name)))
	     (= 0 var.sets))
	_ -> #f))

    (define (inline-application fun rands)
      (let ((simple '())
	    (complex '())
	    (n (length rands))
	    (body (instantiate fun))) ;; alpha converted copy of the function
	(match fun.t with
	  (node:function name formals)
	  -> (cond ((not (= n (length formals))) (error1 "inline: bad arity" fun))
		   (else (for-range
			     i n
			     (let ((formal (nth formals i))
				   (fvar (vars-get-var context formal))
				   (rand (nth rands i)))
			       (match rand.t with
				 (node:literal _) -> (PUSH simple i)
				 (node:varref arg)
				 -> (let ((avar (vars-get-var context arg)))
				      (if (or (> avar.sets 0) (> fvar.sets 0))
					  (PUSH complex i)
					  (PUSH simple i)))
				 _ -> (if (and (= 1 fvar.refs) (safe-nvget-inline rands))
					  (PUSH simple i)
					  (PUSH complex i)))))
			 (print-string "name=") (printn name)
			 (print-string "simple=") (printn simple)
			 (print-string "complex=") (printn complex)
			 (let ((substs
				(if (not (null? simple))
				    (map (lambda (i) (:pair (nth formals i) (nth rands i))) simple)
				    '())))
			   (if (eq? complex (list:nil))
			       ;; simple - substitute arguments directly
			       (substitute body substs)
			       ;; complex - bind args into (let ...), then inline body
			       ;; generate new names for complex args
			       (let ((names '())
				     (inits '())
				     (nc (length complex)))
				 (for-each
				  (lambda (i)
				    (let ((name (symbol-add-suffix
						 (nth formals i)
						 (format "_i" (int (rename-counter.inc))))))
				      (PUSH names name)
				      (PUSH inits (nth rands i))
				      (PUSH substs (:pair (nth formals i) (node/varref name)))
				      ))
				  complex)
				 (let ((body (substitute body substs)))
				   (print-string "substituted body:")
				   (pp-node body 4) (newline)
				   (node/let (reverse names) (reverse inits) body)))
			       ))))
	  _ -> (error "inline-application")
	  )))

    (define (substitute body substs)

      (define (lookup name)
	(let loop ((substs substs))
	  (match substs with
	    () -> (maybe:no)
	    ((:pair from to) . tl)
	    -> (if (eq? name from)
		   (maybe:yes to)
		   (loop tl)))))

      (define (walk node)
	(let ((node0
	       (match node.t with
		 (node:varref name)
		 -> (match (lookup name) with
		      (maybe:yes val) -> val
		      (maybe:no) -> node)
		 (node:varset name)
		 -> (match (lookup name) with
		      (maybe:yes val) -> (node/varset name val)
		      (maybe:no) -> node)
		 _ -> node)))
	  (set! node0.subs (map walk node0.subs))
	  node0))
    
      (walk body))

    ;; body of do-inlining
    (inline root (alist:nil))
    ))

(define (escape-analysis root context)

  (let ((escaping-funs '()))

    ;; for each variable, we need to know if it might potentially
    ;;  escape.  a variable 'escapes' when it is referenced while free
    ;;  inside a function that escapes (i.e., any function that is
    ;;  varref'd outside of the operator position).
  
    (define (fun-escapes name node)
      (vars-set-flag! context name VFLAG-ESCAPES)
      (PUSH escaping-funs node))

    (define (find-escaping-functions node parent)
      (match node.t with
	(node:function name _)
	-> (match parent.t with
	     (node:fix _) -> #u
	     ;; any function defined outside a fix (i.e., a lambda) is by
	     ;;   definition an escaping function - because we always reduce
	     ;;   ((lambda ...) ...) to (let ...)
	     _ -> (fun-escapes name node))
	(node:varref name)
	-> (if (vars-get-flag context name VFLAG-FUNCTION)
	       (match parent.t with
		 (node:call)
		 ;; any function referenced in a non-rator position
		 -> (if (not (eq? (first parent.subs) node))
			(fun-escapes name node))
		 _ -> (fun-escapes name node)))
	_ -> #u)
      (for-each (lambda (x) (find-escaping-functions x node)) node.subs))

    (define (maybe-var-escapes name lenv)
      (if (not (member-eq? name lenv))
	  ;; reference to a free variable. flag it as escaping.
	  (vars-set-flag! context name VFLAG-ESCAPES)))

    ;; XXX make sure we still need to know if particular variables escape.

    ;; within each escaping function, we search for escaping variables.
    (define (find-escaping-variables node lenv)
      (match node.t with
	;; the three binding constructs extend the environment...
	(node:function _ formals) -> (set! lenv (append formals lenv))
	(node:fix names) -> (set! lenv (append names lenv))
	(node:let names) -> (set! lenv (append names lenv))
	;; ... and here we search the environment.
	(node:varref name) -> (maybe-var-escapes name lenv)
	(node:varset name) -> (maybe-var-escapes name lenv)
	_ -> #u)
      (for-each (lambda (x) (find-escaping-variables x lenv)) node.subs))

    ;; first we identify escaping functions
    (find-escaping-functions root (node/literal (literal:int 0)))
    (for-each
     (lambda (fun)
       (find-escaping-variables fun '()))
     escaping-funs)
    ))


;; ok, what all is done inside analyze.py?
;;  find aliases
;;  optimize nvcase
;;  transforms
;;  recursion
;;  find applications / tree-walk
;;  escape analysis
;;  call graph
;;  inlining
;;  again: transform, find-applications
;;  pruning
;;  again: escape analysis
;;  find leaves

;; what transforms are done?
;; primapp: only &vcase
;; conditional: optimized #t/#f case
;; let_splat:
;;   coalesce
;;   (let (x <init>) x) => <init>
;;   (let (x (let ((a _) (b _) ...) ...)))
;; varref: aliases
;; fix: coalesce
;; sequence: coalesce
;; pvcase: this is the only difficult one
;;   change %vcase prims into a single node
;; 
;; It would be nice if we could let the macro system
;;   handle the transforms, but we can't: the transforms
;;   are applied again after inlining.  Maybe if we applied
;;   inlining *before* turning into nodes?  That'd be a pretty
;;   big design change though.

;; what's needed for inlining?
;; replace
;; instantiate
;; substitute
;; count fun calls
;; count assignments
;; inline multiplier

(define (replace-nodes p node)
  (define (replace n)
    (let ((n0 (p n))
	  (new-subs (map replace n0.subs)))
      (set! n0.subs new-subs)
    n0))
  (replace node))

;; simple cascading optimizations - these only work from the
;; outside-in, not the inside-out, so we make repeated passes
;; until we don't get any more.

(define simpleopt-hits 0)

(define (do-simple-optimizations node)
  (set! simpleopt-hits 0)
  (let loop ((r (simpleopt node)))
    (print-string (format "simpleopt: " (int simpleopt-hits) "\n"))
    (cond ((> simpleopt-hits 0)
	   (set! simpleopt-hits 0)
	   (loop (simpleopt r)))
	  (else r))))

(define (simpleopt node)
  
  ;; early exit means we've rewritten the node, and will recur without
  ;;   the default processing at the end.
  (let/cc return

    ;; assume we'll hit an optimization, undo it down below if not.
    (set! simpleopt-hits (+ 1 simpleopt-hits))
    (match node.t with

      (node:fix ())
      ;; empty fix
      -> (return (simpleopt (first node.subs)))

      (node:fix names0)
      -> (match (unpack-fix node.subs) with
	   ;; body of fix is another fix...
	   (:fixsubs {t=(node:fix names1) subs=subs1 ...} inits0)
	   -> (match (unpack-fix subs1) with
		(:fixsubs body1 inits1)
		-> (return
		    (simpleopt
		     (node/fix (append names0 names1)
			       (append inits0 inits1)
			       body1))))
	   _ -> #u)

      (node:let ())
      ;; empty let
      -> (return (simpleopt (first node.subs)))

      (node:let names0)
      -> (match (unpack-fix node.subs) with
	   ;; body of let is another let...
	   (:fixsubs {t=(node:let names1) subs=subs1 ...} inits0)
	   -> (match (unpack-fix subs1) with
		(:fixsubs body1 inits1)
		-> (return
		    (simpleopt
		     (node/let (append names0 names1)
			       (append inits0 inits1)
			       body1))))
	   (:fixsubs body0 inits0)
	   ;; search for any let in the inits
	   -> (let ((n (length inits0)))
		(for-range
		    i n
		    (match (nth inits0 i) with
		      {t=(node:let names1) subs=subs1 ...}
		      -> (match (unpack-fix subs1) with
			   (:fixsubs body1 inits1)
			   -> (return
			       (simpleopt
				;; (let (a b (c (let (d e) <body1>)) f g) <body0>)
				;;   => (let (a b d e (c <body1>) f g) <body0>)
				;; Note: this looks like it makes <d> and <e> visible where they shouldn't be,
				;;   but access to those variables in <body0> will have already been caught
				;;   during earlier phases of the compiler.
				(node/let (append (append (slice names0 0 i) names1) (slice names0 i n))
					  (append (append (slice inits0 0 i) inits1) (list:cons body1 (slice inits0 (+ i 1) n)))
					  body0))))
		      _ -> #u))
		#u))

      (node:if)
      -> (match node.subs with
	   ;; (if #t a b) => a
	   ({t=(node:literal (literal:bool b)) ...} then else)
	   -> (if b (return (simpleopt then)) (return (simpleopt else)))
	   _ -> #u)
	
      (node:sequence)
      -> (if (= 1 (length node.subs))
	     ;; single-item sequence
	     (return (simpleopt (nth node.subs 0)))
	     ;; sequence within sequence
	     (let ((subs0 node.subs)
		   (n (length subs0)))
	       (for-range
		   i n
		   (match (nth subs0 i) with
		     {t=(node:sequence) subs=subs1 ...}
		     -> (return
			 (simpleopt
			  (node/sequence
			   (append (append (slice subs0 0 i) subs1) (slice subs0 (+ 1 i) n)))))
		     _ -> #u))
	       #u))

      _ -> #u)
    (set! simpleopt-hits (- simpleopt-hits 1))
    ;; if we get here, there was no early exit, so just recurse
    ;;  onto the sub-expressions
    (let ((new-subs (map simpleopt node.subs)))
      (set! node.subs new-subs)
      node
      )))

(define removed-count 0)

(define (do-trim node context)
  (set! removed-count 0)
  (analyze node context)
  (let loop ((r (trim-unused node context)))
    (print-string (format "trim-unused: " (int removed-count) "\n"))
    (cond ((> removed-count 0)
	   (set! removed-count 0)
	   (analyze r context)
	   (loop (trim-unused r context)))
	  (else r))))

(define (trim-unused exp context)
  
  (define (walk exp)
    (let/cc return
      (set! removed-count (+ removed-count 1))
      (match exp.t with
	(node:fix names)
	-> (let ((n (length names))
		 (inits exp.subs)
		 (remove '()))
	     (for-range
		 i n
		 (let ((name (nth names i))
		       (var (vars-get-var context name)))
		   (if (= 0 var.refs)
		       (PUSH remove i))))
	     (if (null? remove)
		 #u
		 (let ((new-names '())
		       (new-inits '())
		       (trimmed (map (lambda (i) (nth names i)) remove)))
		   (print-string (format "trimming: " (join symbol->string ", " trimmed) "\n"))
		   (for-range
		       i n
		       (cond ((not (member-eq? i remove))
			      (PUSH new-names (nth names i))
			      (PUSH new-inits (nth inits i)))))
		   (return (node/fix (reverse new-names)
				     (reverse new-inits)
				     (walk (last inits))))
		   )))
	_ -> #u)
      (set! removed-count (- removed-count 1))
      (set! exp.subs (map walk exp.subs))
      exp))

  (walk exp))