(* Programmieraufgabe P-47 (lex.ml) *) (* Leonhard Fellermayr *) open Syntax open String exception IllegalSymbol of char;; exception Really_Weird_Case;; (* ************************************************************************ first : Returns the first character of the given string. Return-Type is string. Returns empty string if there is no first character. ************************************************************************ *) let first s = if (String.length s) == 0 then "" else String.sub s 0 1 (* ************************************************************************ rest : Returns the given string without its first character. ************************************************************************ *) let rest s = if (String.length s) == 0 then "" else String.sub s 1 ((String.length s)-1) (* ************************************************************************ notempty : Returns true if string is NOT empty. Returns false otherwise. ************************************************************************ *) let notempty s = if (String.length s) > 0 then true else false;; (* ************************************************************************ char_of_string : Convert string to char ************************************************************************ *) let char_of_string s = String.get s 0;; (* ************************************************************************ inASCII : Checks whether string contains only ASCII characters from lo to hi. ************************************************************************ *) let rec inASCII s lo hi = if (String.length s) = 0 then false else let fc = char_of_string (first s) in if (fc >= lo) && (fc <= hi) then if (rest s) = "" then true else inASCII (rest s) lo hi else false;; (* ************************************************************************ Token Assignments ************************************************************************ *) let tok_assigns = [("(",TokL); (")",TokR); ("+",TokOp Plus); ("-",TokOp Minus); ("*",TokOp Mal)];; (* ************************************************************************ Token Functions (accessing tok_assigns) ************************************************************************ *) let rec get_special_token ele lst = match lst with ((x,t)::xs) -> if (x = ele) then t else get_special_token ele xs | [] -> raise Really_Weird_Case;; let rec is_special_token ele lst = match lst with ((x,t)::xs) -> if (x = ele) then true else is_special_token ele xs | [] -> false;; (* ************************************************************************ Main Func with a useful buffering mechanism ************************************************************************ *) let rec ew s buf = let fst = first s and rst = rest s in (* (1) fst is a special token -> flush buf, add token, go on *) if (is_special_token fst tok_assigns) then (ew buf "") @ (get_special_token fst tok_assigns) :: (ew (rst) "") (* (2) fst is part of TokNum or TokVar -> append fst to buf, go on *) else if (inASCII fst '0' '9') || ((inASCII fst 'a' 'z') && not (inASCII (first buf) '0' '9')) then ew (rst) (buf ^ fst) (* (4) no other case matched -> we can safely flush buf here *) else if (notempty buf) then if (inASCII buf '0' '9') then (* buf contains 0..9 only -> TokNum *) let buf_i = int_of_string (buf) in (TokNum buf_i) :: (ew s "") else (* else -> TokVar *) (TokVar buf) :: (ew s "") (* (5) always remove leading space and go on with clean buffer *) else if (fst = " ") then ew (rst) "" (* (6) fst is empty -> we are done *) else if (fst = "") then [] (* (7) no other case matched -> let's raise the exception *) else raise (IllegalSymbol (char_of_string fst));; (* ************************************************************************ the user func ************************************************************************ *) let lex s = ew s "";;