combo icon indicating copy to clipboard operation
combo copied to clipboard

A simple parser combinator library for Ocaml

  • Combo Combo is a simple parser combinator library for Ocaml providing common parsers and combinators. It is highly inspired by [[http://www.cs.uu.nl/research/techreps/repo/CS-2008/2008-044.pdf][this]] paper with a few changes to integrate better with Ocaml and ideas taken from other libraries. It is named combo because it is based on COMBinig COMBinators in Ocaml. ** Example Here is a simple calculator described in the paper: #+BEGIN_SRC ocaml open Combo open Fun

    let positive = int_of_string <$> (inplode <$> many1 digit) let int = opt id (( * ) (-1) <$ char '-') <*> positive

    let op (f, s) = spaces > (f <$ word s) < spaces let anyop l = choice (List.map op l) let addops = anyop [(+), "+"; (-), "-"] let mulops = anyop [( * ), "*"] let rec expr s = List.fold_right chainl1 [addops; mulops] (int <|> packs "(" expr ")") s

    let () = let s = read_line () in match expr (explode s) with None -> print_endline "ERROR: bad expression." | Some (n, _) -> print_int n #+END_SRC More examples can be found in the example directory (note: the JSON example needs this [[https://github.com/ocaml-ppx/ppx_deriving][ppx]]). ** Installation Combo is not yet in opam, so it needs ~dune~ to be installed, assuming you are on a Unix like operating system, you can do: #+BEGIN_SRC shell git clone https://github.com/4y8/combo.git cd combo/src dune build ./combo.a dune install #+END_SRC ** Documentation (extracted from the mli file) *** Utils ~explode s~ turns the string ~s~ into a list of characters. #+BEGIN_SRC ocaml val explode : string -> char list #+END_SRC ~inplode l~ turns the list of characters ~l~ into a string. #+BEGIN_SRC ocaml val inplode : char list -> string #+END_SRC ~parser~ is the type of parsers. #+BEGIN_SRC ocaml type ('a, 'b) parser = 'a list -> ('b * 'a list) option #+END_SRC *** Basic combinators ~return a~ is a basic combinator that always succeeds returning the value ~a~. #+BEGIN_SRC ocaml val return : 'a -> ('s, 'a) parser #+END_SRC ~fail~ is a basic combinator which always fails. #+BEGIN_SRC ocaml val fail: ('s, 'a) parser #+END_SRC ~p <> q~ is the sequence combinator appliying the result of parser ~p~ to the parser ~q~. #+BEGIN_SRC ocaml val ( <> ) : ('s, 'b -> 'a) parser -> ('s, 'b) parser -> ('s, 'a) parser #+END_SRC ~p <> q~ is the sequence combinator applying the result of parser ~q~ to the parser ~p~, it is the same as ~<*>~ but in the other way. #+BEGIN_SRC ocaml val ( <> ) : ('s, 'b) parser -> ('s, 'b -> 'a) parser -> ('s, 'a) parser #+END_SRC ~?>~ is the reverse sequencing operator but which doesn't modify the first result if the second one failed. #+BEGIN_SRC ocaml val ( ?> ) : ('s, 'a) parser -> ('s, 'a -> 'a) parser -> ('s, 'a) parser #+END_SRC Sequence monad. #+BEGIN_SRC ocaml val ( >>= ) : ('s, 'a) parser -> ('a -> ('s, 'b) parser) -> ('s, 'b) parser #+END_SRC ~p <|> q~ is the choice combinator trying the parser ~p~, if it works, returns the result, else return the result of the parser ~q~. #+BEGIN_SRC ocaml val ( <|> ) : ('s, 'a) parser -> ('s, 'a) parser -> ('s, 'a) parser #+END_SRC ~f <$> p~ is the map combinator applying the function ~f~ the witness returned by the parser ~p~, if he succeeds. #+BEGIN_SRC ocaml val ( <$> ) : ('b -> 'a) -> ('s, 'b) parser -> ('s, 'a) parser #+END_SRC ~p <&> f~ is the flipped map combinator applying the function ~f~ the witness returned by the parser ~p~, if he succeeds. #+BEGIN_SRC ocaml val ( <&> ) : ('b -> 'a) -> ('s, 'b) parser -> ('s, 'a) parser #+END_SRC ~f <$ p~ is the map combinator ignoring the value returned by the parser ~p~. #+BEGIN_SRC ocaml val ( <$ ) : 'a -> ('s, 'b) parser -> ('s, 'a) parser #+END_SRC ~p $> f~ is the reverse map combinator ignoring the value returned by the parser ~p~. #+BEGIN_SRC ocaml val ( $> ) : ('s, 'a) parser -> 'b -> ('s, 'b) parser #+END_SRC ~p > q~ is the sequence combinator but ignores value returned by the parser ~p~, it's the missing bracket. #+BEGIN_SRC ocaml val ( > ) : ('s, 'a) parser -> ('s, 'b) parser -> ('s, 'b) parser #+END_SRC ~p < q~ is the sequence combinator but ignores value returned by the parser ~q~, it's the missing bracket. #+BEGIN_SRC ocaml val ( < ) : ('s, 'a) parser -> ('s, 'b) parser -> ('s, 'a) parser #+END_SRC ~p > err~ is the error combinator raising the error err if the parser ~p~ failed. #+BEGIN_SRC ocaml val( > ) : ('s, 'a) parser -> exn -> ('s, 'a) parser #+END_SRC ~choice l~ is a combinator that turns the list of parser ~l~ into a single one which will match one of them. #+BEGIN_SRC ocaml val choice : ('s, 'a) parser list -> ('s, 'a) parser #+END_SRC ~seq l~ is a combinator that turns a list of parser ~l~ into a single one which will match all of them and return the result in a list. #+BEGIN_SRC ocaml val seq : ('s, 'a) parser list -> ('s, 'a list) parser #+END_SRC ~between open p close~ parses the parser ~open~, then ~p~ and ~close~ and returns the value of ~p~. #+BEGIN_SRC ocaml val between : ('s, 'a) parser -> ('s, 'b) parser -> ('s, 'c) parser -> ('s, 'b) parser #+END_SRC ~sepBy sep p~ is a parser that parses 0 or more times the parser ~p~ separated by the parser ~sep~. #+BEGIN_SRC ocaml val sepBy : ('s, 'a) parser -> ('s, 'b) parser -> ('s, 'b list) parser #+END_SRC ~sepBy1 sep p~ is a parser that parses 1 or more times the parser ~p~ separated by the parser ~sep~. #+BEGIN_SRC ocaml val sepBy1 : ('s, 'a) parser -> ('s, 'b) parser -> ('s, 'b list) parser #+END_SRC ~endBy sep p~ is a parser that parses 0 or more times the parser ~p~ separated and ended by the parser ~sep~. #+BEGIN_SRC ocaml val endBy : ('s, 'a) parser -> ('s, 'b) parser -> ('s, 'b list) parser #+END_SRC ~endBy1 sep p~ is a parser that parses 1 or more times the parser ~p~ separated and ended by the parser ~sep~. #+BEGIN_SRC ocaml val endBy1 : ('s, 'a) parser -> ('s, 'b) parser -> ('s, 'b list) parser #+END_SRC ~sepEndBy sep p~ is a parser that parses 0 or more times the parser ~p~ separated and optionally ended by the parser ~sep~. #+BEGIN_SRC ocaml val seEndpBy : ('s, 'a) parser -> ('s, 'b) parser -> ('s, 'b list) parser #+END_SRC ~sepEndBy1 sep p~ is a parser that parses 1 or more times the parser ~p~ separated and optionally ended by the parser ~sep~. #+BEGIN_SRC ocaml val sepEndBy1 : ('s, 'a) parser -> ('s, 'b) parser -> ('s, 'b list) parser #+END_SRC *** Lazy Combinators Lazy combinators are really useful for some recursive combinators that may cause a stack overflow otherwise.

~p <>| q~ is the lazy sequence combinator appliying the result of parser ~p~ to the parser ~q~, but only evaluating the parser ~q~ if ~p~ worked. #+BEGIN_SRC ocaml val ( <>| ) : ('s, 'b -> 'a) parser -> ('s, 'b) parser lazy_t -> ('s, 'a) parser #+END_SRC ~p <|>| q~ is the lazy choice combinator trying the parser ~p~, if it works, returns the result, else evaluate the parser ~q~ and returns it result. #+BEGIN_SRC ocaml val ( <|>| ) : ('s, 'a) parser -> ('s, 'a) parser lazy_t -> ('s, 'a) parser #+END_SRC ~p >| q~ is the lazy sequence combinator but ignores value returned by the parser ~p~, it's the missing bracket. The parser ~q~ is evaluated only if ~p~ succeeded. #+BEGIN_SRC ocaml val( >| ) : ('s, 'a) parser -> ('s, 'b) parser lazy_t -> ('s, 'b) parser #+END_SRC ~p <| q~ is the sequence combinator but ignores value returned by the parser ~q~, it's the missing bracket. The parser ~q~ is evaluated only if ~p~ succeeded. #+BEGIN_SRC ocaml val( <| ) : ('s, 'a) parser -> ('s, 'b) parser lazy_t -> ('s, 'a) parser #+END_SRC *** Basic parsers ~satisfyp~ is a parser that matches an element satisfying the predicate ~p~. #+BEGIN_SRC ocaml val satisfy: ('a -> bool) -> ('a, 'a) parser #+END_SRC ~any~ is a parser that matches anything. #+BEGIN_SRC ocaml val any : ('a, 'a) parser #+END_SRC ~opt default p~ is parser that runs the parser ~p~ and if it succeeds return the result, else, it returns the ~default~ value given. #+BEGIN_SRC ocaml val opt : 'a -> ('s, 'a) parser -> ('s, 'a) parser #+END_SRC ~many p~ is a parser that runs the parser ~p~ 0 or more times and returns all the obtained results in a list. #+BEGIN_SRC ocaml val many : ('s, 'a) parser -> ('s, 'a list) parser #+END_SRC ~many1 p~ is a parser that runs the parser ~p~ 1 or more times and returns all the obtained results in a list. #+BEGIN_SRC ocaml val many1 : ('s, 'a) parser -> ('s, 'a list) parser #+END_SRC ~chainl1 op p~ is a parser that parses the operand ~p~, as left-associative, separated by the separator ~op~, one or more times. #+BEGIN_SRC ocaml val chainl1 : ('s, 'a -> 'a -> 'a) parser -> ('s, 'a) parser -> ('s, 'a) parser #+END_SRC ~chainl op p default~ is a parser that parses the operand ~p~, as left-associative, separated by the separator ~op~, if it failed, returns the value ~default~. #+BEGIN_SRC ocaml val chainl : ('s, 'a -> 'a -> 'a) parser -> ('s, 'a) parser -> 'a -> ('s, 'a) parser #+END_SRC ~chainr1 op p~ is a parser that parses the operand ~p~, as right-associative, separated by the separator ~op~, one or more times. #+BEGIN_SRC ocaml val chainr1 : ('s, 'a -> 'a -> 'a) parser -> ('s, 'a) parser -> ('s, 'a) parser #+END_SRC ~chainr op p default~ is a parser that parses the operand ~p~, as right-associative, separated by the separator ~op~, if it failed, returns the value ~default~. #+BEGIN_SRC ocaml val chainr : ('s, 'a -> 'a -> 'a) parser -> ('s, 'a) parser -> 'a -> ('s, 'a) parser #+END_SRC ~sym s~ is a parser that matches the symbol ~s~. #+BEGIN_SRC ocaml val sym : 'a -> ('a, 'a) parser #+END_SRC ~syms s~ is a parser that matches the list of symbol ~s~. #+BEGIN_SRC ocaml val syms : 'a list -> ('a, 'a list) parser #+END_SRC ~char c~ is a parser that matches the character ~c~. #+BEGIN_SRC ocaml val char : char -> (char, char) parser #+END_SRC ~word w~ is a parser that matches the string ~w~. #+BEGIN_SRC ocaml val word : string -> (char, char list) parser #+END_SRC ~range l r~ is a parser that matches a character between the characters ~l~ and ~r~ included. #+BEGIN_SRC ocaml val range : char -> char -> (char, char) parser #+END_SRC ~lower~ is a parser that matches a lowercase character #+BEGIN_SRC ocaml val lower : (char, char) parser #+END_SRC ~upper~ is a parser that matches an uppercase character #+BEGIN_SRC ocaml val upper : (char, char) parser #+END_SRC ~letter~ is a parser that matches an alphabet character. #+BEGIN_SRC ocaml val letter : (char, char) parser #+END_SRC ~digit~ is a parser that matches a digit. #+BEGIN_SRC ocaml val digit : (char, char) parser #+END_SRC ~alphaNum~ is a parser that matches a letter or a digit. #+BEGIN_SRC ocaml val alphaNum : (char, char) parser #+END_SRC ~octDigit~ is a parser that matches an octal digit. #+BEGIN_SRC ocaml val octDigit : (char, char) parser #+END_SRC ~hexDigit~ is a parser that matches a hexadecimal digit. #+BEGIN_SRC ocaml val octDigit : (char, char) parser #+END_SRC ~space~ is a parser that matches a space. #+BEGIN_SRC ocaml val space : (char, char) parser #+END_SRC ~spaces~ is a parser that matches 0 or more spaces. #+BEGIN_SRC ocaml val spaces : (char, char list) parser #+END_SRC ~newline~ is a parser that matches a newline character. #+BEGIN_SRC ocaml val newline : (char, char) parser #+END_SRC ~tab~ is a parser that matches a tab character. #+BEGIN_SRC ocaml val tab : (char, char) parser #+END_SRC ~pack l p r~ is a parser that matches the parser ~p~ between the symbols ~l~ and ~r~. #+BEGIN_SRC ocaml val pack : 's list -> ('s, 'a) parser -> 's list -> ('s, 'a) parser #+END_SRC ~packs l p r~ is a parser that matches the parser ~p~ between the strings ~l~ and ~r~. #+BEGIN_SRC ocaml val packs : string -> (char, 'a) parser -> string -> (char, 'a) parser #+END_SRC ~oneOf l~ is a parser that matches a symbol from the list ~l~. #+BEGIN_SRC ocaml val oneOf : 'a list -> ('a, 'a) parser #+END_SRC