performance - F# much slower than Ocaml for handling complex keys like int*int*int in data structures -


i have converted ocaml program f#, , overall performance same ocaml.

however, in order point, had try replace exceptions option values.

the program works lot list, maps etc has int*int*int (=tuples of 3 ints).

i have performance leak not understand. can explain how can have 90% of total execution time inside function called

system.tuple`3.system.collections.istructuralequatable.equals(    object, class system.collections.iequalitycomparer) 

and can it?

well people have noted pretty hard diagnose problem no code, i'll best :-)

your question made me run test had been planning on running while, test performance of reference types vs values types keys associative containers. hypothesis, based on vague feelins .net runtime smallish key sizes (your 3 ints) value type win out. appear have been wrong ([edit] further testing proved correct! [/edit])

let's see code (as per usual, micro performance test, take grain of salt):

i used 5 different containers of various flavours store ints (f# nice enough create equality , comparers struct types records):

type keystruct(_1':int, _2':int, _3':int) = struct     member this._1 = _1'     member this._2 = _2'     member this._3 = _3' end  type keygenericstruct<'a>(_1':'a, _2':'a, _3':'a) = struct     member this._1 = _1'     member this._2 = _2'     member this._3 = _3' end  type keyrecord = { _1 : int; _2 : int; _3 : int }  type keygenericrecord<'a> = { _1 : 'a; _2 : 'a; _3 : 'a } 

plus used original tuple (int*int*int)

i created following test harness:

let inline runtest<'a when 'a : equality> iterationcount createassociativemap (createkey:_->_->_->'a) =     system.gc.collect ()     system.gc.waitforfullgccomplete () |> ignore      let data = [|         in 0..99             b in 0..99                 c in 0..99                     yield a,b,c |]     // shuffle     let r = system.random (0)     = 0 data.length-1         let j = r.next (i, data.length)         let t = data.[i]         data.[i] <- data.[j]         data.[j] <- t      let keyvalues =         data         |> array.mapi (fun k -> k, 0.5-(float i)/(float data.length))         |> array.toseq      let sw = system.diagnostics.stopwatch.startnew ()     let mapper = createassociativemap createkey keyvalues     let creationtime = sw.elapsedmilliseconds      let sw = system.diagnostics.stopwatch.startnew ()     let mutable checksum = 0.     = 0 iterationcount         let a, b, c = r.next 100, r.next 100, r.next 100         let key = createkey b c         checksum <- checksum + (mapper key)     let accesstime= sw.elapsedmilliseconds      printfn "checksum %f elapsed %d/%d (%s)" checksum creationtime accesstime (typeof<'a>.name)  let runntrials<'a when 'a : equality> = runtest<'a> 1000000 

and ran various types of associative containers:

let createdictionary create keyvalues =      let d = system.collections.generic.dictionary<_,_> ()     keyvalues     |> seq.map (fun ((_1,_2,_3),value) -> create _1 _2 _3, value)     |> seq.iter (fun (key,value) -> d.[key] <- value)     (fun key -> d.[key])  let createdict create keyvalues =     let d =          keyvalues         |> seq.map (fun ((_1,_2,_3),value) -> create _1 _2 _3, value)         |> dict     (fun key -> d.[key])  let createmap create keyvalues =     let d =          keyvalues         |> seq.map (fun ((_1,_2,_3),value) -> create _1 _2 _3, value)         |> map.ofseq     (fun key -> d.[key])  let createcustomarray create keyvalues =     let maxa = 1 + (keyvalues |> seq.map (fun ((a,_,_),_) -> a) |> seq.max)     let maxb = 1 + (keyvalues |> seq.map (fun ((_,b,_),_) -> b) |> seq.max)     let maxc = 1 + (keyvalues |> seq.map (fun ((_,_,c),_) -> c) |> seq.max)      let createindex b c = * maxb * maxc + b * maxc + c      let values : array<float> = array.create (maxa * maxb * maxc) 0.     keyvalues     |> seq.iter (fun ((a,b,c),d) -> values.[createindex b c] <- d)      (fun (a,b,c) -> values.[a * maxb * maxc + b * maxc + c])  let rundictionary<'a when 'a : equality> = runntrials<'a> createdictionary  let rundict<'a when 'a : equality> = runntrials<'a> createdict let runmap<'a when 'a : comparison> = runntrials<'a> createmap let runcustomarray = runntrials<_> createcustomarray 

and ran tests follows:

printfn "using .net's system.collections.generic.dictionary" rundictionary (fun b c -> { keyrecord._1=a; _2=b; _3=c })  rundictionary (fun b c -> { keygenericrecord._1=a; _2=b; _3=c }) rundictionary (fun b c -> keystruct(a, b, c)) rundictionary (fun b c -> keygenericstruct(a, b, c)) rundictionary (fun b c -> (a, b, c))  printfn "using f# 'dict'" rundict (fun b c -> { keyrecord._1=a; _2=b; _3=c })  rundict (fun b c -> { keygenericrecord._1=a; _2=b; _3=c }) rundict (fun b c -> keystruct(a, b, c)) rundict (fun b c -> keygenericstruct(a, b, c)) rundict (fun b c -> (a, b, c))  printfn "using f# 'map'" runmap (fun b c -> { keyrecord._1=a; _2=b; _3=c })  runmap (fun b c -> { keygenericrecord._1=a; _2=b; _3=c }) runmap (fun b c -> keystruct(a, b, c)) runmap (fun b c -> keygenericstruct(a, b, c)) runmap (fun b c -> (a, b, c))  printfn "using custom array" runcustomarray (fun b c -> (a, b, c)) 

and got following results (the checksum try ensure i'm not doing stupid) "elapsed n/m" "elapsed {container creations time}/{container access time}":

using .net's system.collections.generic.dictionary checksum -55.339450 elapsed 874/562 (keyrecord) checksum -55.339450 elapsed 1251/898 (keygenericrecord`1) checksum -55.339450 elapsed 569/1024 (keystruct) checksum -55.339450 elapsed 740/1427 (keygenericstruct`1) checksum -55.339450 elapsed 2497/2218 (tuple`3) using f# 'dict' checksum -55.339450 elapsed 979/628 (keyrecord) checksum -55.339450 elapsed 1614/1206 (keygenericrecord`1) checksum -55.339450 elapsed 3237/5625 (keystruct) checksum -55.339450 elapsed 3290/5626 (keygenericstruct`1) checksum -55.339450 elapsed 2448/1914 (tuple`3) using f# 'map' checksum -55.339450 elapsed 8453/2638 (keyrecord) checksum -55.339450 elapsed 31301/25441 (keygenericrecord`1) checksum -55.339450 elapsed 30956/26931 (keystruct) checksum -55.339450 elapsed 53699/49274 (keygenericstruct`1) checksum -55.339450 elapsed 32203/25274 (tuple`3) using custom array checksum -55.339450 elapsed 484/160 (tuple`3) 

multiple runs showed small variation in numbers, major trends did hold. we're testing if boxing occurs (in map , dict does) gethashcode () implementation (generic version if slower typed version; , tuple worst of all) , in case of map compareto.

now leave question? potentially if time being spent in equals of tuple, changing record type might help!

but not :-) [because if hash container gethashcode shouldn't causing many clashes , map compareto)

anyway, in real code have different garbage collector affects, etc, etc. said take is...


i did further testing, wrapping starting each test multiple times in tasks , each individually in parallel on , on (starting more tasks have cores), , taking average time complete.

the reason doing take garbage collector time account.

when did this, original hypothesis of non generic struct key did win.

if interested , unwilling make change can post code, think i'm 1 reading this, it's more personal notes :-)


Comments

Popular posts from this blog

jquery - How can I dynamically add a browser tab? -

node.js - Getting the socket id,user id pair of a logged in user(s) -

keyboard - C++ GetAsyncKeyState alternative -