(* Allows more print depth. *)
Compiler.Control.Print.printDepth:= 100;
datatype token = ID of string | NUM of int | Plus | Minus | Mult | Div | Neg
| LParen | RParen | EOF;
signature PCFLEXER =
sig
val lex : string -> token list
val lexstr : string -> token list
end
structure PCFlexer: PCFLEXER =
struct
open TextIO;
(* Return true if c is a letter or digit *)
fun alphanum c = (Char.isAlpha c) orelse (Char.isDigit c)
(* Extracts consecutive alphanumeric characters from the input to
build up an identifier. Returns a tuple containing the next
identifier in the input and the input left over after removing
the identifier.
Precondition: The initial character of the identifier has
already been found and is passed in in the second parameter.
Parameter 1: Input to extract the identifier from
Parameter 2: The characters found so far in the identifier.
*)
fun getid nil id = (id, nil)
| getid (s as c::rest) id =
if (alphanum c) then getid rest (id ^ (str(c)))
else (id, s)
(* Extracts consecutive digits from the input to
build up an integer. Returns a tuple containing the next
integer in the input and the input left over after removing
the integer.
Precondition: The initial digit of the integer has
already been found and is passed in in the second parameter.
Parameter 1: Input to extract the integer from
Parameter 2: The digits found so far in the integer.
*)
fun getnum nil num = (num, nil)
| getnum (s as c::rest) num =
if (Char.isDigit c) then
getnum rest (num*10 + ((ord c)-ord #"0"))
else (num, s)
(* Return the list of tokens found in the input.
Parameter: A character list to tokenize
*)
fun gettokens nil = [EOF]
| gettokens (#"+"::rest) = Plus::gettokens rest
| gettokens (#"-"::rest) = Minus::gettokens rest
| gettokens (#"*"::rest) = Mult::gettokens rest
| gettokens (#"/"::rest) = Div::gettokens rest
| gettokens (#"~"::rest) = Neg::gettokens rest
| gettokens (#"("::rest) = LParen::gettokens rest
| gettokens (#")"::rest) = RParen::gettokens rest
| gettokens (c::rest) =
if Char.isSpace c then
(* Recurse to skip white space *)
gettokens rest
else if Char.isAlpha c then
(* Return keyword or identifier *)
let val (id, remainder) = (getid rest (str c))
in
(ID id)::gettokens remainder
end
else if (Char.isDigit c) then
(* Return number *)
let
val (num, remainder) = getnum rest ((ord c) - (ord #"0"))
in
(NUM (num))::gettokens remainder
end
else
(print ("Skipping illegal character "^(str c) ^".");
gettokens rest)
(* Returns the list of tokens found in a string. *)
fun lexstr s = gettokens (explode s)
(* Returns the list of tokens found in a file.
Parameter: filename *)
fun lex file =
let
val strm = openIn file
val filecontents = explode (input strm)
in
(closeIn strm; gettokens filecontents)
end
end;