试图了解如何在 OCaml 中使用哈希表实现 Tries

Trying to understand how to implement Tries with hash tables in OCaml

我正在尝试了解如何在 Ocaml 中使用哈希表实现 Tries。这是 https://www.fun-mooc.fr/.

中 MOOC "Introduction to Functional Programming in OCaml" 的练习 W06 04

如果有人能帮助我理解如何使用命令式哈希表实现递归尝试,我将不胜感激。

课程结束了,我只是想了解一下

这就是我正在尝试的(给出了模块 GenericTrie 和模块 Trie 的模板,我们必须实例化 Hashtbl.Make 仿函数并实现空、查找和插入):

module type GenericTrie = sig
  type 'a char_table
  type 'a trie = Trie of 'a option * 'a trie char_table
  val empty : unit -> 'a trie
  val insert : 'a trie -> string -> 'a -> 'a trie
  val lookup : 'a trie -> string -> 'a option
end

module CharHashedType = struct
  type t = char
  let equal a b = a = b
  let hash a = Hashtbl.hash a
end

module CharHashtbl  = Hashtbl.Make(CharHashedType)

module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t =
  struct
    type 'a char_table = 'a CharHashtbl.t
    type 'a trie = Trie of 'a option * 'a trie char_table

    let empty () =
      Trie (None, CharHashtbl.create 10)
    ;;

    let lookup trie w =
      let len = String.length w in
      let rec lookup' trie' idx =
        if idx >= len then let Trie (x, _) = trie' in x
        else
          let Trie (_, table) = trie' in
          if CharHashtbl.mem table w.[idx] then
            lookup' (CharHashtbl.find table w.[idx]) (idx + 1)
          else raise Not_found
      in
      lookup' trie 0
    ;;

    let insert trie w v =

    ;;
  end

根据我给出的答案 ,唯一的区别是 Map 你有 Hashtbl

所以,据我所知,你的特里树与 string'a option 相关联,这意味着你可以存储结果字符串或布尔值或其他任何东西,但我们不关心那。

首先,我会写

type 'a trie = {value : 'a option;
                children : 'a trie char_table}

因为如果你只需要一个构造函数,我觉得使用构造函数很奇怪,记录对以后会有帮助:

module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t = struct
  type 'a char_table = 'a CharHashtbl.t
  type 'a trie = {value : 'a option;
                  children : 'a trie char_table}

  let empty () =
    {value = None; children = CharHashtbl.create 10}
  ;;


  let lookup trie w =
    let len = String.length w in
    let rec lookup' {value; children} idx =
      if idx >= len then value
      else
        let char = w.[idx] in
        let child = CharHashtbl.find children char in
        lookup' child (idx + 1)
    in
    lookup' trie 0
  ;;

我简化了lookup(注意写if Module.mem ... then Module.find ... else raise Not_found严格等同于Module.find ...

那么,insert呢?该算法非常简单:

  • 如果我到达字符串的最后一个字母,我会将值关联到它
  • 如果没有,要么有一个 child 关联到当前字母,我递归地通过这个分支,要么没有,我需要创建这个分支。

在 OCaml 中,给出:

  let insert trie w v =
    let len = String.length w in
    let rec aux trie idx =
      if idx >= len then {trie with value = Some v}
      else
        let char = w.[idx] in
        let child = try CharHashtbl.find trie.children char
          with Not_found -> empty () in
        let child' = aux child (idx + 1) in
        CharHashtbl.add trie.children char child';
        trie
    in aux trie 0
  ;;

作为旁注,我想指出一个事实,即混合持久数据类型和可变数据类型真的很奇怪。在这种情况下,我的偏好是:

module type GenericTrie = sig
  type 'a char_table
  type 'a trie = {mutable value : 'a option;
                  children : 'a trie char_table}
  val empty : unit -> 'a trie
  val insert : 'a trie -> string -> 'a -> unit
  val lookup : 'a trie -> string -> 'a option
end;;

module CharHashedType = struct
  type t = char
  let equal a b = a = b
  let hash a = Hashtbl.hash a
end;;

module CharHashtbl  = Hashtbl.Make(CharHashedType)

module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t = struct
  type 'a char_table = 'a CharHashtbl.t
  type 'a trie = {mutable value : 'a option;
                  children : 'a trie char_table}

  let empty () =
    {value = None; children = CharHashtbl.create 10}
  ;;

  let lookup trie w =
    let len = String.length w in
    let rec lookup' {value; children} idx =
      if idx >= len then value
      else
        let char = w.[idx] in
        let child = CharHashtbl.find children char in
        lookup' child (idx + 1)
    in
    lookup' trie 0
  ;;

  let insert trie w v =
    let len = String.length w in
    let rec aux trie idx =
      if idx >= len then trie.value <- Some v
      else
        let char = w.[idx] in
        let child = try CharHashtbl.find trie.children char
          with Not_found ->
            let child = empty () in
            CharHashtbl.add trie.children char child;
            child
        in
        aux child (idx + 1)
    in aux trie 0
  ;;

在这两种情况下,让我们用这个函数打印它:

let (t : string Trie.trie) = Trie.empty ();;

let rec pp fmt trie =
  let open Trie in
  let {value; children} = trie in
  Format.fprintf fmt "@[<v 1>";
  (match value with
    | None -> Format.fprintf fmt "None"
    | Some s -> Format.fprintf fmt "Some %s" s
  );
  CharHashtbl.iter (fun char trie ->
    Format.fprintf fmt "@ %c@ %a" char pp trie
  ) children;
  Format.fprintf fmt "@]"

let () = 
  Trie.insert t "foo" "foo";
  Trie.insert t "fol" "fol";
  Trie.insert t "far" "far";
  Format.printf "%a@." pp t

我会有这样的输出:

None
 f
 None
  o
  None
   o
   Some foo
   l
   Some fol
  a
  None
   r
   Some far