F# 或 Ocaml 优化
F# or Ocaml optimization
我试图理解为什么以下 Python 代码 运行 比 F# 和 Ocaml 版本更快:
from sys import stdin, stdout
def read():
return stdin.readline().rstrip()
def readints():
return [int(x) for x in read().split()]
_, _, q = readints()
A = readints()
B = readints()
for _ in range(q):
arr = [0]*4001
l1, r1, l2, r2 = readints()
for i in range(l1-1, r1):
arr[A[i]] ^= 1
for i in range(l2-1, r2):
arr[B[i]] ^= 1
result = sum(arr)
stdout.write(str(result)+"\n")
这是我花了几个小时学习该语言后得出的 Ocaml 版本:
let open Printf in
let parse_line () =
read_line()
|> String.trim
|> Str.split (Str.regexp_string " ")
|> Array.of_list
|> Array.map int_of_string in
let nmq = parse_line() in
let n = nmq.(0) in
let m = nmq.(1) in
let q = nmq.(2) in
let a = parse_line() in
let b = parse_line() in
for i = 1 to q do
let arr = Array.create 4001 0 in
let llrr = parse_line() in
let l1 = llrr.(0) in
let r1 = llrr.(1) in
let l2 = llrr.(2) in
let r2 = llrr.(3) in
for i = l1-1 to r1-1 do
arr.(a.(i)) <- arr.(a.(i)) lxor 1
done;
for i = l2-1 to r2-1 do
arr.(b.(i)) <- arr.(b.(i)) lxor 1
done;
printf "%d\n" (Array.fold_left (+) 0 arr)
done;
这是相同的,但在 F#
open System
open System.IO
let out = new StreamWriter(Console.OpenStandardOutput())
let readints () = [ for x in stdin.ReadLine().TrimEnd().Split() -> int(x) ]
let mutable [ _; _; q ] = readints()
let a = readints()
let b = readints()
for _ in 1..q do
let arr = Array.create 4001 0
let [ l1; r1; l2; r2 ] = readints()
for i in l1-1 .. r1-1 do
arr.[a.[i]] <- arr.[a.[i]] ^^^ 1
for i in l2-1 .. r2-1 do
arr.[b.[i]] <- arr.[b.[i]] ^^^ 1
out.WriteLine(Array.sum(arr))
out.Close()
示例输入:
5 5 2
76 56 34 52 12
10 91 86 10 91
1 5 2 5
2 2 2 5
输出:
7
3
关于如何提高 Ocaml 或 F# 程序的性能有什么想法吗?谢谢!
更新
实施@FyodorSoikin 和@JL0PD,F# 代码现在通过了 5 个测试用例中的 2 个:
open System
open System.IO
let out = new StreamWriter(Console.OpenStandardOutput())
let readints () = [| for x in stdin.ReadLine().TrimEnd().Split() -> int(x) |]
let mutable q = readints().[2]
let a = readints()
let b = readints()
for t=1 to q do
let arr = Array.zeroCreate 4001
let llrr = readints()
let l1 = llrr.[0]
let r1 = llrr.[1]
let l2 = llrr.[2]
let r2 = llrr.[3]
for i = l1-1 to r1-1 do
arr.[a.[i]] <- arr.[a.[i]] ^^^ 1
for i = l2-1 to r2-1 do
arr.[b.[i]] <- arr.[b.[i]] ^^^ 1
out.WriteLine(Array.sum(arr))
out.Close()
这是一个更地道的 OCaml 代码,尽管您没有说明任务是什么,因此实现尽可能地遵循您的代码
open Printf
let parse_line () =
read_line () |> String.trim |>
String.split_on_char ' ' |>
List.map int_of_string |>
Array.of_list
let main () =
match parse_line () with
| [|n;m;q|] ->
let a = parse_line () in
let b = parse_line () in
let arr = Array.make 4001 0 in
for i = 1 to q do
match parse_line () with
| [|l1; r1; l2; r2 |] ->
Array.fill arr 0 (Array.length arr) 0;
for i = l1-1 to r1-1 do
arr.(a.(i)) <- arr.(a.(i)) lxor 1
done;
for i = l2-1 to r2-1 do
arr.(b.(i)) <- arr.(b.(i)) lxor 1
done;
printf "%d\n" (Array.fold_left (+) 0 arr)
| _ -> failwith "wrong input: expects four numbers"
done
| _ -> failwith "wrong input on line 1: expects three numbers"
let () = main ()
它使用更高效的 parse_line
函数,不依赖正则表达式进行字符串拆分,但我认为这并不重要,因为它不经常被调用(假设 q
很小)。另一个小优化是我将数组创建移出循环并用零重新填充它而不是在每次迭代时创建一个新的(也是一个小优化,考虑到 OCaml 的速度,我不希望它产生大的影响'
GC 正在工作)。
更好的实现将完全避开中间 arr
值。
此外,我真的不认为 Python 实现在绝对数量上比 OCaml 或 F# 快,我认为它没有通过测试,因为它比其他 OCaml 实现慢。
P.S。找到实际的 task 后,我可以看到 q
可能很大, parse_line
将成为瓶颈,这里有很大的改进空间。
此外,您的方法的最弱点是您正在进行 4k 次迭代并创建一个 4k 条目的数组,这两者都是完全不必要的并且对性能至关重要。您只需要计算主题的数量,因此为每个可能的主题编号分配一个 4k 条目的数组并不是最优的。您可以为此使用哈希表。
终于有一个版本通过了 F# 中 5 个测试用例中的 5 个。我不相信这个问题今天可以在 Python 中解决,因为平台当前的限制。
open System
open System.Collections
// Required manually implementation since the old .NET didn't support BitOperations.PopCount
let cardinality (bitArray: BitArray) =
let arr: int32[] = Array.create ((bitArray.Length >>> 5) + 1) 0
bitArray.CopyTo(arr, 0)
let mutable count = 0
let n = arr.Length-1
arr.[n] <- arr.[n] &&& (~~~(-1 <<< (bitArray.Count % 32)))
for i = 0 to n do
let mutable c = arr.[i]
c <- c - ((c >>> 1) &&& 0x55555555)
c <- (c &&& 0x33333333) + ((c >>> 2) &&& 0x33333333)
c <- ((c + (c >>> 4) &&& 0xF0F0F0F) * 0x1010101) >>> 24
count <- count + c
count
let readints () = [| for x in stdin.ReadLine().TrimEnd().Split() -> int(x) |]
let [| n; m; q |] = readints()
let a = readints()
let b = readints()
let abits = Array.init (n+5) (fun _ -> BitArray(4005))
let bbits = Array.init (m+5) (fun _ -> BitArray(4005))
for i = 1 to n do
let k = a.[i-1]
abits.[i] <- BitArray(abits.[i-1])
abits.[i].[k] <- not abits.[i].[k]
for i = 1 to m do
let k = b.[i-1]
bbits.[i] <- BitArray(bbits.[i-1])
bbits.[i].[k] <- not bbits.[i].[k]
for i = 1 to q do
let [| l1; r1; l2; r2 |] = readints()
let result = BitArray(abits.[l1-1]).Xor(BitArray(abits.[r1]))
result.Xor(BitArray(bbits.[l2-1]).Xor(BitArray(bbits.[r2])))
printfn "%d" (cardinality result)
我试图理解为什么以下 Python 代码 运行 比 F# 和 Ocaml 版本更快:
from sys import stdin, stdout
def read():
return stdin.readline().rstrip()
def readints():
return [int(x) for x in read().split()]
_, _, q = readints()
A = readints()
B = readints()
for _ in range(q):
arr = [0]*4001
l1, r1, l2, r2 = readints()
for i in range(l1-1, r1):
arr[A[i]] ^= 1
for i in range(l2-1, r2):
arr[B[i]] ^= 1
result = sum(arr)
stdout.write(str(result)+"\n")
这是我花了几个小时学习该语言后得出的 Ocaml 版本:
let open Printf in
let parse_line () =
read_line()
|> String.trim
|> Str.split (Str.regexp_string " ")
|> Array.of_list
|> Array.map int_of_string in
let nmq = parse_line() in
let n = nmq.(0) in
let m = nmq.(1) in
let q = nmq.(2) in
let a = parse_line() in
let b = parse_line() in
for i = 1 to q do
let arr = Array.create 4001 0 in
let llrr = parse_line() in
let l1 = llrr.(0) in
let r1 = llrr.(1) in
let l2 = llrr.(2) in
let r2 = llrr.(3) in
for i = l1-1 to r1-1 do
arr.(a.(i)) <- arr.(a.(i)) lxor 1
done;
for i = l2-1 to r2-1 do
arr.(b.(i)) <- arr.(b.(i)) lxor 1
done;
printf "%d\n" (Array.fold_left (+) 0 arr)
done;
这是相同的,但在 F#
open System
open System.IO
let out = new StreamWriter(Console.OpenStandardOutput())
let readints () = [ for x in stdin.ReadLine().TrimEnd().Split() -> int(x) ]
let mutable [ _; _; q ] = readints()
let a = readints()
let b = readints()
for _ in 1..q do
let arr = Array.create 4001 0
let [ l1; r1; l2; r2 ] = readints()
for i in l1-1 .. r1-1 do
arr.[a.[i]] <- arr.[a.[i]] ^^^ 1
for i in l2-1 .. r2-1 do
arr.[b.[i]] <- arr.[b.[i]] ^^^ 1
out.WriteLine(Array.sum(arr))
out.Close()
示例输入:
5 5 2
76 56 34 52 12
10 91 86 10 91
1 5 2 5
2 2 2 5
输出:
7
3
关于如何提高 Ocaml 或 F# 程序的性能有什么想法吗?谢谢!
更新
实施@FyodorSoikin 和@JL0PD,F# 代码现在通过了 5 个测试用例中的 2 个:
open System
open System.IO
let out = new StreamWriter(Console.OpenStandardOutput())
let readints () = [| for x in stdin.ReadLine().TrimEnd().Split() -> int(x) |]
let mutable q = readints().[2]
let a = readints()
let b = readints()
for t=1 to q do
let arr = Array.zeroCreate 4001
let llrr = readints()
let l1 = llrr.[0]
let r1 = llrr.[1]
let l2 = llrr.[2]
let r2 = llrr.[3]
for i = l1-1 to r1-1 do
arr.[a.[i]] <- arr.[a.[i]] ^^^ 1
for i = l2-1 to r2-1 do
arr.[b.[i]] <- arr.[b.[i]] ^^^ 1
out.WriteLine(Array.sum(arr))
out.Close()
这是一个更地道的 OCaml 代码,尽管您没有说明任务是什么,因此实现尽可能地遵循您的代码
open Printf
let parse_line () =
read_line () |> String.trim |>
String.split_on_char ' ' |>
List.map int_of_string |>
Array.of_list
let main () =
match parse_line () with
| [|n;m;q|] ->
let a = parse_line () in
let b = parse_line () in
let arr = Array.make 4001 0 in
for i = 1 to q do
match parse_line () with
| [|l1; r1; l2; r2 |] ->
Array.fill arr 0 (Array.length arr) 0;
for i = l1-1 to r1-1 do
arr.(a.(i)) <- arr.(a.(i)) lxor 1
done;
for i = l2-1 to r2-1 do
arr.(b.(i)) <- arr.(b.(i)) lxor 1
done;
printf "%d\n" (Array.fold_left (+) 0 arr)
| _ -> failwith "wrong input: expects four numbers"
done
| _ -> failwith "wrong input on line 1: expects three numbers"
let () = main ()
它使用更高效的 parse_line
函数,不依赖正则表达式进行字符串拆分,但我认为这并不重要,因为它不经常被调用(假设 q
很小)。另一个小优化是我将数组创建移出循环并用零重新填充它而不是在每次迭代时创建一个新的(也是一个小优化,考虑到 OCaml 的速度,我不希望它产生大的影响'
GC 正在工作)。
更好的实现将完全避开中间 arr
值。
此外,我真的不认为 Python 实现在绝对数量上比 OCaml 或 F# 快,我认为它没有通过测试,因为它比其他 OCaml 实现慢。
P.S。找到实际的 task 后,我可以看到 q
可能很大, parse_line
将成为瓶颈,这里有很大的改进空间。
此外,您的方法的最弱点是您正在进行 4k 次迭代并创建一个 4k 条目的数组,这两者都是完全不必要的并且对性能至关重要。您只需要计算主题的数量,因此为每个可能的主题编号分配一个 4k 条目的数组并不是最优的。您可以为此使用哈希表。
终于有一个版本通过了 F# 中 5 个测试用例中的 5 个。我不相信这个问题今天可以在 Python 中解决,因为平台当前的限制。
open System
open System.Collections
// Required manually implementation since the old .NET didn't support BitOperations.PopCount
let cardinality (bitArray: BitArray) =
let arr: int32[] = Array.create ((bitArray.Length >>> 5) + 1) 0
bitArray.CopyTo(arr, 0)
let mutable count = 0
let n = arr.Length-1
arr.[n] <- arr.[n] &&& (~~~(-1 <<< (bitArray.Count % 32)))
for i = 0 to n do
let mutable c = arr.[i]
c <- c - ((c >>> 1) &&& 0x55555555)
c <- (c &&& 0x33333333) + ((c >>> 2) &&& 0x33333333)
c <- ((c + (c >>> 4) &&& 0xF0F0F0F) * 0x1010101) >>> 24
count <- count + c
count
let readints () = [| for x in stdin.ReadLine().TrimEnd().Split() -> int(x) |]
let [| n; m; q |] = readints()
let a = readints()
let b = readints()
let abits = Array.init (n+5) (fun _ -> BitArray(4005))
let bbits = Array.init (m+5) (fun _ -> BitArray(4005))
for i = 1 to n do
let k = a.[i-1]
abits.[i] <- BitArray(abits.[i-1])
abits.[i].[k] <- not abits.[i].[k]
for i = 1 to m do
let k = b.[i-1]
bbits.[i] <- BitArray(bbits.[i-1])
bbits.[i].[k] <- not bbits.[i].[k]
for i = 1 to q do
let [| l1; r1; l2; r2 |] = readints()
let result = BitArray(abits.[l1-1]).Xor(BitArray(abits.[r1]))
result.Xor(BitArray(bbits.[l2-1]).Xor(BitArray(bbits.[r2])))
printfn "%d" (cardinality result)