weave icon indicating copy to clipboard operation
weave copied to clipboard

UX: auto-transform captured seq to ptr UncheckedArray behind the scenes

Open mratsim opened this issue 3 years ago • 3 comments

having to cast[ptr UncheckedArray[T]] any captured seq[T] before use is a lot of boilerplate and quite annoying.

Example from Discord:

import std/[
  random,
  strutils,
  sequtils,
  sugar]
import chroma
import weave
import std/[times, monotimes]

randomize()
proc createColor() : string = 
  rand(0xFFFFFF).toHex(6)

proc main_serial(height, width: int) =
  var distance = newSeq[float64](height * width)

  # We create a power of 2 dataset so that indexing it is cheap and we measure processing speed.
  const pow2size = 8 # 2⁸ = 256
  let dataset = collect(for color in 0 ..< (1 shl pow2size): createColor())

  let start = getMonoTime()
  for upper in 0 ..< height:
    for lower in 0 ..< width:
      if upper == lower: 
        continue
      
      let colorUpper = parseHex(dataset[upper and (pow2size-1)]) # datasetAddr[upper mod pow2size]
      let colorLower = parseHex(dataset[lower and (pow2size-1)]) # datasetAddr[lower mod pow2size]
      distance[upper*width + lower] = distance(colorUpper, colorLower)
  let stop = getMonoTime()

  # echo distance.foldl(a+b)/distance.len.float
  echo "Time spent serial (ms): ", inMilliseconds(stop-start)

proc main_parallel(height, width: int) =
  var distance = newSeq[float64](height * width)

  # We create a power of 2 dataset so that indexing it is cheap and we measure processing speed.
  const pow2size = 8 # 2⁸ = 256
  let dataset = collect(for color in 0 ..< (1 shl pow2size): createColor())

  init(Weave)

  # Annoying ... to access seqs without causing GC issues
  let distanceAddr = cast[ptr UncheckedArray[float64]](distance[0].addr)
  let datasetAddr = cast[ptr UncheckedArray[string]](dataset[0].unsafeaddr)

  let start = getMonoTime()
  parallelFor upper in 0 ..< height:
    captures: {width, distanceAddr, datasetAddr}
    parallelFor lower in 0 ..< width:
      captures: {upper, width, distanceAddr, datasetAddr}
      if upper != lower: 
        let colorUpper = parseHex(datasetAddr[upper and (pow2size-1)]) # datasetAddr[upper mod pow2size]
        let colorLower = parseHex(datasetAddr[lower and (pow2size-1)]) # datasetAddr[lower mod pow2size]
        distanceAddr[upper*width + lower] = distance(colorUpper, colorLower)
  syncRoot(Weave)
  let stop = getMonoTime()

  # echo distance.foldl(a+b)/distance.len.float
  echo "Time spent parallel (ms): ", inMilliseconds(stop-start)

let height = 9600
let width = 1200

main_serial(height, width)
main_parallel(height, width)

mratsim avatar Jan 14 '22 10:01 mratsim

To me this doesn't seem as boilerplate at all (actually making it explicit is definitely a good thing!).

It seems to be rather a performance & safety problem (GC).

dumblob avatar Jan 14 '22 12:01 dumblob

To me this doesn't seem as boilerplate at all (actually making it explicit is definitely a good thing!).

It seems to be rather a performance & safety problem (GC).

I don't think this is really an issue. It's essentially a "bug" that one cannot just pass a regular seq (I put that into quotation marks, as I don't fully understand where the problems come from). Hiding the required transformation from the user seems nice. Especially, because it is a fully mechanical transformation.

However, there is a practical issue implementing this as far as I can tell: parallelFor and friends currently are untyped macros. That means we have no way to tell, whether something actually is a seq. So:

  • either one turns them into typed macros, but I'm rather certain we don't want to go down that path
  • or we add some annotation to tell the macro "hey, this is a seq", like:
  parallelFor upper in 0 ..< height:
    captures: {width, @distance, @dataset}
    # ...

which seems reasonable, given that I don't see why someone would hand some identifier that is supposed to be turned into a sequence in the context of captures.

Vindaar avatar Jan 14 '22 13:01 Vindaar

either one turns them into typed macros, but I'm rather certain we don't want to go down that path

We don't want to go that path but we can always have the untyped macro quote do the typed macro like in Arraymancer https://github.com/mratsim/Arraymancer/blob/20e0dc3/src/arraymancer/tensor/accessors_macros_read.nim#L56

The @ would be nice also to indicate transformation of a var T into ptr T as well though.

mratsim avatar Jan 16 '22 22:01 mratsim