基于 Tomas 的回答,让我们定义两个模块:
module Kurt =
type Gen<'a> = Gen of (int -> 'a)
let unit x = Gen (fun _ -> x)
let bind k (Gen m) =
Gen (fun n ->
let (Gen m') = k (m n)
m' n)
type GenBuilder() =
member x.Return(v) = unit v
member x.Bind(v,f) = bind f v
let gen = GenBuilder()
module Tomas =
type Gen<'a> = Gen of (int -> ('a -> unit) -> unit)
let unit x = Gen (fun _ f -> f x)
let bind k (Gen m) =
Gen (fun n f ->
m n (fun r ->
let (Gen m') = k r
m' n f))
type GenBuilder() =
member x.Return v = unit v
member x.Bind(v,f) = bind f v
let gen = GenBuilder()
为了简化一点,让我们将你原来的序列函数重写为
let rec sequence = function
| [] -> gen { return [] }
| m::ms -> gen {
let! x = m
let! xs = sequence ms
return x::xs }
现在,无论sequence 是根据Kurt.gen 还是Tomas.gen 定义的,sequence [for i in 1 .. 100000 -> unit i] 都将运行完成。问题不在于 sequence 在使用您的定义时导致堆栈溢出,而是从调用 sequence 返回的函数在调用 it 时导致堆栈溢出。
要了解为什么会这样,让我们根据底层的一元操作扩展sequence 的定义:
let rec sequence = function
| [] -> unit []
| m::ms ->
bind (fun x -> bind (fun xs -> unit (x::xs)) (sequence ms)) m
内联 Kurt.unit 和 Kurt.bind 值并疯狂简化,我们得到
let rec sequence = function
| [] -> Kurt.Gen(fun _ -> [])
| (Kurt.Gen m)::ms ->
Kurt.Gen(fun n ->
let (Kurt.Gen ms') = sequence ms
(m n)::(ms' n))
现在希望清楚为什么调用 let (Kurt.Gen f) = sequence [for i in 1 .. 1000000 -> unit i] in f 0 会溢出堆栈:f 需要对结果函数的序列和求值进行非尾递归调用,因此每次递归调用都会有一个堆栈帧。
将Tomas.unit 和Tomas.bind 内联到sequence 的定义中,我们得到以下简化版本:
let rec sequence = function
| [] -> Tomas.Gen (fun _ f -> f [])
| (Tomas.Gen m)::ms ->
Tomas.Gen(fun n f ->
m n (fun r ->
let (Tomas.Gen ms') = sequence ms
ms' n (fun rs -> f (r::rs))))
对此变体的推理很棘手。您可以凭经验验证它不会因某些任意大的输入而破坏堆栈(正如 Tomas 在他的回答中所显示的那样),并且您可以逐步进行评估以说服自己相信这一事实。但是,堆栈消耗取决于传入的列表中的 Gen 实例,对于本身不是尾递归的输入,可能炸毁堆栈:
// ok
let (Tomas.Gen f) = sequence [for i in 1 .. 1000000 -> unit i]
f 0 (fun list -> printfn "%i" list.Length)
// not ok...
let (Tomas.Gen f) = sequence [for i in 1 .. 1000000 -> Gen(fun _ f -> f i; printfn "%i" i)]
f 0 (fun list -> printfn "%i" list.Length)