わーい\(^O^)/Whitespaceインタプリタできたよー

id:yuyarinにそそのかされて、whitespaceインタプリタschemeで書いてみた。4時間くらい奮闘して、とりあえずサンプルは全部動くようになった。まだ未実装の動作もあるけれど。ねむい。

概要としては、字句解析→構文解析Schemeのコードに変換→eval というかんじ。

#!/usr/bin/env gosh
;; -*- coding: utf-8 mode: scheme -*-

(use util.match)
(use text.tree)

(define (main args)
  (run-file (cadr args))
  0)

(define (run compiled-code)
  (eval compiled-code (interaction-environment)))

(define (run-file file)
  (run (compile (parse (tokenize-file file)))))

(define (tokenize-file file)
  (with-input-from-file file
    (lambda ()
      (let loop((ret ()))
	(let1 c (read-char)
	  (if (eof-object? c)
	      (reverse ret)
	      (let/cc cc
		(let1 c (case c
			  ((#\space) 'A)
			  ((#\tab) 'B)
			  ((#\newline #\return) 'C)
			  (else (cc (loop ret))))
		  (loop (cons c ret))))))))))

(define (parse tokens)
  (let loop((ret ()) (tokens tokens))
    (if (null? tokens)
	(reverse ret)
	(match tokens
	  (('A 'A . xs1)
	   (receive (num rest)
	       (parse-num xs1)
	     (loop (cons `(ws-push ,num) ret) rest)))
	  (('A 'C 'A . xs2)
	   (loop (cons '(ws-dup) ret) xs2))
	  (('A 'B 'A . xs3)
	   (receive (num rest)
	       (parse-num xs3)
	     (loop (cons `(ws-copy ,num) ret) rest)))
	  (('A 'B 'C . xs4)
	   (receive (num rest)
	       (parse-num xs4)
	     (loop (cons `(ws-slide ,num) ret) rest)))
	  (('A 'C 'B . xs5)
	   (loop (cons '(ws-swap) ret) xs5))
	  (('A 'C 'C . xs6)
	   (loop (cons '(ws-discard) ret) xs6))
	  (('B 'A 'A 'A . xs) ;;  = (Infix Plus):(parse xs)
	   (loop (cons '(ws-plus) ret) xs))
	  (('B 'A 'A 'B . xs) ;;  = (Infix Minus):(parse xs)
	   (loop (cons '(ws-minus) ret) xs))
	  (('B 'A 'A 'C . xs) ;;  = (Infix Times):(parse xs)
	   (loop (cons '(ws-mult) ret) xs))
	  (('B 'A 'B 'A . xs) ;;  = (Infix Divide):(parse xs)
	   (loop (cons '(ws-div) ret) xs))
	  (('B 'A 'B 'B . xs) ;;  = (Infix Modulo):(parse xs)
	   (loop (cons '(ws-mod) ret) xs))
	  (('B 'B 'A . xs) ;;  = Store:(parse xs)
	   (loop (cons '(ws-store) ret) xs))
	  (('B 'B 'B . xs) ;;  = Retrieve:(parse xs)
	   (loop (cons '(ws-load) ret) xs))
	  (('C 'A 'A . xs) ;;  = let (string,rest) = parseString xs in (Label string):(parse rest)
	   (receive (str rest)
	       (parse-string xs)
	     (loop (cons `(ws-label ,str) ret) rest)))
	  (('C 'A 'B . xs) ;;  = let (string,rest) = parseString xs in (Call string):(parse rest)
	   (receive (str rest)
	       (parse-string xs)
	     (loop (cons `(ws-call ,str) ret) rest)))
	  (('C 'A 'C . xs) ;;  = let (string,rest) = parseString xs in (Jump string):(parse rest)
	   (receive (str rest)
	       (parse-string xs)
	     (loop (cons `(ws-jump ,str) ret) rest)))
	  (('C 'B 'A . xs) ;;  = let (string,rest) = parseString xs in (If Zero string):(parse rest)
	   (receive (str rest)
	       (parse-string xs)
	     (loop (cons `(ws-jumpifzero ,str) ret) rest)))
	  (('C 'B 'B . xs) ;;  = let (string,rest) = parseString xs in (If Negative string):(parse rest)
	   (receive (str rest)
	       (parse-string xs)
	     (loop (cons `(ws-jumpifnegative ,str) ret) rest)))
	  (('C 'B 'C . xs) ;;  = Return:(parse xs)
	   (loop (cons '(ws-return) ret) xs))
	  (('C 'C 'C . xs) ;;  = End:(parse xs)
	   (loop (cons '(ws-end) ret) xs))
	  (('B 'C 'A 'A . xs) ;;  = OutputChar:(parse xs)
	   (loop (cons '(ws-outputchar) ret) xs))
	  (('B 'C 'A 'B . xs) ;;  = OutputNum:(parse xs)
	   (loop (cons '(ws-outputnum) ret) xs))
	  (('B 'C 'B 'A . xs) ;;  = ReadChar:(parse xs)
	   (loop (cons '(ws-readchar) ret) xs))
	  (('B 'C 'B 'B . xs) ;;  = ReadNum:(parse xs)
	   (loop (cons '(ws-readnum) ret) xs))
	  (else (error "INVALID INPUT")))))
  )

(define (parse-literal maker tokens)
  (let loop((tokens tokens) (acc ()))
    (match tokens
      (('C . rest) ;; terminator
       (values (maker (reverse acc))
	       rest))
      ((x . rest) 
       (loop rest
	     (cons x acc)))
      (else
       (error "INVALID INPUT: malformed number")))))

(define (parse-num tokens)
  (parse-literal make-number tokens))

(define (make-number tokens)
    (* (case (car tokens)
	 ((A) +1)
	 ((B) -1)
	 (else (error "INVALID INPUT: invalid sign char")))
       (let loop((ret 0) (tokens (cdr tokens)))
	 (if (null? tokens)
	     ret
	     (loop (case (car tokens)
		     ((A) (* 2 ret))
		     ((B) (+ 1 (* 2 ret)))
		     (else (error "INVALID INPUT: invalid digit char")))
		   (cdr tokens))))))

;; actually not string, but symbol
(define (parse-string tokens)
  (parse-literal make-string tokens))

(define (make-string tokens)
  (string->symbol (tree->string tokens)))


(define (compile raw-insns)
  (define (compile-subroutine raw-insns)
    (let loop((main-insns ()) (raw-insns raw-insns))
      (cond
       ((null? raw-insns)
	(values `(lambda ()
		   (let/cc jump-cc 
		     ,@(reverse main-insns)))
		raw-insns))
       ((eq? 'ws-label (caar raw-insns))
	(values `(lambda ()
		   (let/cc jump-cc 
		     ,@(reverse (cons `(,(cadar raw-insns)) main-insns))))
		raw-insns))
       (else
	(loop (cons (car raw-insns) main-insns)
		(cdr raw-insns))))))
  (define (compile-rest raw-insns)
    (if (null? raw-insns)
	()
	(match (car raw-insns)
	  (('ws-label label)
	   (receive (subroutine rest)
	       (compile-subroutine (cdr raw-insns))
	     (cons `(,label ,subroutine)
		   (compile-rest rest)))))))
  (receive (main rest-insns)
      (compile-subroutine raw-insns)
    `(let/cc end-cc
       (letrec ((main ,main)
		,@(compile-rest rest-insns))
	 (main)))))

;;;;
;;;; Whitespace API
;;;;

;;; stack operation
(define *stack* ())
(define (ws-pop)
  (pop! *stack*))
(define (ws-push num)
  (set! *stack* (cons num *stack*)))
(define (ws-dup)
  (set! *stack* (cons (car *stack*) *stack*)))
(define (ws-copy num)
  (ws-push (ref num *stack*)))
(define (ws-discard)
  (pop! *stack*))
(define (ws-swap)
  (let* ((a1 (ws-pop))
	 (a2 (ws-pop)))
    (set! *stack*
	  (cons a2 (cons a1 *stack*)))))
(define (ws-slide num)
  (set! *stack* (cons (car *stack*)
		      (drop num (cdr *stack*)))))

(define (drop n ls)
  (let loop((n n) (ls ls))
    (if (or (null? ls) (= n 0))
	ls
	(loop (- n 1) (cdr ls)))))

;;; arithmetic operation
(define (ws-plus)
  (let1 ret (+ (ws-pop) (ws-pop))
    (ws-push ret)))
(define (ws-minus)
  (let* ((a1 (ws-pop))
	 (a2 (ws-pop)))
    (let1 ret (- a2 a1)
      (ws-push ret))))
(define (ws-mult)
  (let1 ret (* (ws-pop) (ws-pop))
    (ws-push ret)))
(define (ws-div)
  (let* ((a1 (ws-pop))
	 (a2 (ws-pop)))
    (let1 ret (/ a2 a1)
      (ws-push ret))))

(define (ws-outputnum)
  (display (ws-pop))
  (flush))
(define (ws-outputchar)
  (display (integer->char (ws-pop)))
  (flush))

;;; heap
(define *heap* (make-vector 10000 0))
(define (ws-load)
  (ws-push (vector-ref *heap* (ws-pop))))
(define (ws-store)
  (vector-set! *heap* (cadr *stack*) (car *stack*))
  (ws-pop)(ws-pop))

;;; flow control
(define-macro (ws-jump label)
  `(begin
     (,label)
     (jump-cc ())))
(define-macro (ws-jumpifzero label)
  `(if (= 0 (ws-pop))
       (begin (,label)
	      (jump-cc ()))
       ()))
(define-macro (ws-end) '(end-cc ()))
(define-macro (ws-return) '(jump-cc ()))
(define-macro (ws-call label) `(begin (,label)))

;;; I/O
(define (ws-readchar)
  (let1 c (read-char)
    (vector-set! *heap* (ws-pop) (char->integer c))))
(define (ws-readnum)
  (let1 ret (read)
    (if (not (number? ret))
	(error "INVALID INPUT: cannot parse as number.")
	(vector-set! *heap* (ws-pop) ret))))