From f45d070856c4d919f4dca9944bfbcc3843a1c65a Mon Sep 17 00:00:00 2001 From: Simon Forman Date: Fri, 23 Sep 2022 17:31:16 -0700 Subject: [PATCH] I think I got the tokenizer right. --- implementations/Ocaml/helloworld/bin/main.ml | 74 ++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/implementations/Ocaml/helloworld/bin/main.ml b/implementations/Ocaml/helloworld/bin/main.ml index 0da27c8..3473643 100644 --- a/implementations/Ocaml/helloworld/bin/main.ml +++ b/implementations/Ocaml/helloworld/bin/main.ml @@ -19,8 +19,76 @@ let rec joy_to_string jt = | JoyTrue -> "true" | JoyFalse -> "false" | JoyInt i -> string_of_int i - | JoyList el -> "[" ^ expression_to_joy el ^ "]" + | JoyList el -> "[" ^ expression_to_string el ^ "]" -and expression_to_joy el = String.concat " " (List.map joy_to_string el) +and expression_to_string el = String.concat " " (List.map joy_to_string el) -let () = print_endline (joy_to_string dummy) + + +type token = + | Left_bracket + | Right_bracket + | Token of string + +let delimiter ch = String.contains "[] " ch + +(* string -> int -> int -> token * int *) +let rec tokenize1 str start last = + if last >= String.length str || delimiter (String.get str last) + then (Token (String.sub str start (last - start)), last) + else tokenize1 str start (last + 1) + +let rec tokenize0 str start acc = + if start >= String.length str + then acc + else + let ch = String.get str start in + match ch with + | '[' -> Left_bracket :: (tokenize0 str (start + 1) acc) + | ']' -> Right_bracket :: (tokenize0 str (start + 1) acc) + | ' ' -> tokenize0 str (start + 1) acc + | _ -> let (token, n) = tokenize1 str start (start + 1) in + token :: (tokenize0 str n acc) + +let tokenize str = tokenize0 str 0 [] + + +let token_to_string token = + match token with + | Left_bracket -> "[" + | Right_bracket -> "]" + | Token str -> str + + +(* +let char_tok ch acc = + match ch with + | '[' -> Left_bracket :: acc + | ']' -> Right_bracket :: acc + | ' ' -> acc + | x -> (Token x) :: acc + +let tokenize str = + String.fold_right char_tok str [] + +let text_to_expression str = + let tokens = tokenize str in + tokens + +let token_to_string token = + match token with + | Left_bracket -> "[" + | Right_bracket -> "]" + | Token x -> Char.escaped x + +let s = String.concat "" (List.map token_to_string (text_to_expression "1 [2]3" )) +*) + +(* let () = print_endline (joy_to_string dummy) *) + +let s = String.concat " " (List.map token_to_string (tokenize "1 [2]3" )) + + +let () = + print_endline s ; + print_endline (joy_to_string dummy) \ No newline at end of file -- 2.11.0