Racket - 用 token-char 定义一个字符
Racket - define one character with token-char
我正在为 class 做一个项目,我们的任务是为 Python 和 Racket 中的数字、符号、注释、算术运算符、括号和 EOF 编写扫描器。我正在研究球拍版本,我写了以下行来将一个或多个字符定义为一个符号:
[(any-char) (token-CHAR (string->character lexeme))]
我有以下行将一个或多个数字定义为数字:
[(:+ digit) (token-NUM (string->number lexeme))]
我是 Racket 的新手,这是我的第三个程序,所以我不确定如何处理这个问题,所以非常感谢任何建议。我已经搜索了 Racket 文档,但找不到我要找的东西。
谢谢!
这是一个最小的入门示例 - 有大量评论。
#lang racket
;;; IMPORT
;; Import the lexer tools
(require parser-tools/yacc
parser-tools/lex
(prefix-in : parser-tools/lex-sre) ; names from lex-sre are prefixed with :
; to avoid name collisions
syntax/readerr)
;;; REGULAR EXPRESSIONS
;; Names for regular expressions matching letters and digits.
;; Note that :or are prefixed with a : due to (prefix-in : ...) above
(define-lex-abbrevs
[letter (:or (:/ "a" "z") (:/ #\A #\Z) )]
[digit (:/ #[=10=] #)])
;;; TOKENS
;; Tokens such as numbers (and identifiers and strings) carry a value
;; In the example only the NUMBER token is used, but you may need more.
(define-tokens value-tokens (NUMBER IDENTIFIER STRING))
;; Tokens that don't carry a value.
(define-empty-tokens op-tokens (newline := = < > + - * / ^ EOF))
;;; LEXER
;; Here the lexer (aka the scanner) is defined.
;; The construct lexer-src-pos evaluates to a function which scans an input port
;; returning one position-token at a time.
;; A position token contains besides the actual token also source location information
;; (i.e. you can see where in the file the token was read)
(define lex
(lexer-src-pos
[(eof) ; input: eof of file
'EOF] ; output: the symbol EOF
[(:or #\tab #\space #\newline) ; input: whitespace
(return-without-pos (lex input-port))] ; output: the next token
; (i.e. skip the whitespace)
[#\newline ; input: newline
(token-newline)] ; ouput: a newline-token
; ; note: (token-newline) returns 'newline
[(:or ":=" "+" "-" "*" "/" "^" "<" ">" "=") ; input: an operator
(string->symbol lexeme)] ; output: corresponding symbol
[(:+ digit) ; input: digits
(token-NUMBER (string->number lexeme))])) ; outout: a NUMBER token whose value is
; ; the number
; ; note: (token-value token)
; returns the number
;;; TEST
(define input (open-input-string "123+456"))
(lex input) ; (position-token (token 'NUMBER 123) (position 1 #f #f) (position 4 #f #f))
(lex input) ; (position-token '+ (position 4 #f #f) (position 5 #f #f))
(lex input) ; (position-token (token 'NUMBER 456) (position 5 #f #f) (position 8 #f #f))
(lex input) ; (position-token 'EOF (position 8 #f #f) (position 8 #f #f))
;; Let's make it a little easier to play with the lexer.
(define (string->tokens s)
(port->tokens (open-input-string s)))
(define (port->tokens in)
(define token (lex in))
(if (eq? (position-token-token token) 'EOF)
'()
(cons token (port->tokens in))))
(map position-token-token (string->tokens "123*45/3")) ; strip positions
; Output:
; (list (token 'NUMBER 123)
; '*
; (token 'NUMBER 45)
; '/
; (token 'NUMBER 3))
我正在为 class 做一个项目,我们的任务是为 Python 和 Racket 中的数字、符号、注释、算术运算符、括号和 EOF 编写扫描器。我正在研究球拍版本,我写了以下行来将一个或多个字符定义为一个符号:
[(any-char) (token-CHAR (string->character lexeme))]
我有以下行将一个或多个数字定义为数字:
[(:+ digit) (token-NUM (string->number lexeme))]
我是 Racket 的新手,这是我的第三个程序,所以我不确定如何处理这个问题,所以非常感谢任何建议。我已经搜索了 Racket 文档,但找不到我要找的东西。
谢谢!
这是一个最小的入门示例 - 有大量评论。
#lang racket
;;; IMPORT
;; Import the lexer tools
(require parser-tools/yacc
parser-tools/lex
(prefix-in : parser-tools/lex-sre) ; names from lex-sre are prefixed with :
; to avoid name collisions
syntax/readerr)
;;; REGULAR EXPRESSIONS
;; Names for regular expressions matching letters and digits.
;; Note that :or are prefixed with a : due to (prefix-in : ...) above
(define-lex-abbrevs
[letter (:or (:/ "a" "z") (:/ #\A #\Z) )]
[digit (:/ #[=10=] #)])
;;; TOKENS
;; Tokens such as numbers (and identifiers and strings) carry a value
;; In the example only the NUMBER token is used, but you may need more.
(define-tokens value-tokens (NUMBER IDENTIFIER STRING))
;; Tokens that don't carry a value.
(define-empty-tokens op-tokens (newline := = < > + - * / ^ EOF))
;;; LEXER
;; Here the lexer (aka the scanner) is defined.
;; The construct lexer-src-pos evaluates to a function which scans an input port
;; returning one position-token at a time.
;; A position token contains besides the actual token also source location information
;; (i.e. you can see where in the file the token was read)
(define lex
(lexer-src-pos
[(eof) ; input: eof of file
'EOF] ; output: the symbol EOF
[(:or #\tab #\space #\newline) ; input: whitespace
(return-without-pos (lex input-port))] ; output: the next token
; (i.e. skip the whitespace)
[#\newline ; input: newline
(token-newline)] ; ouput: a newline-token
; ; note: (token-newline) returns 'newline
[(:or ":=" "+" "-" "*" "/" "^" "<" ">" "=") ; input: an operator
(string->symbol lexeme)] ; output: corresponding symbol
[(:+ digit) ; input: digits
(token-NUMBER (string->number lexeme))])) ; outout: a NUMBER token whose value is
; ; the number
; ; note: (token-value token)
; returns the number
;;; TEST
(define input (open-input-string "123+456"))
(lex input) ; (position-token (token 'NUMBER 123) (position 1 #f #f) (position 4 #f #f))
(lex input) ; (position-token '+ (position 4 #f #f) (position 5 #f #f))
(lex input) ; (position-token (token 'NUMBER 456) (position 5 #f #f) (position 8 #f #f))
(lex input) ; (position-token 'EOF (position 8 #f #f) (position 8 #f #f))
;; Let's make it a little easier to play with the lexer.
(define (string->tokens s)
(port->tokens (open-input-string s)))
(define (port->tokens in)
(define token (lex in))
(if (eq? (position-token-token token) 'EOF)
'()
(cons token (port->tokens in))))
(map position-token-token (string->tokens "123*45/3")) ; strip positions
; Output:
; (list (token 'NUMBER 123)
; '*
; (token 'NUMBER 45)
; '/
; (token 'NUMBER 3))