regexp2 icon indicating copy to clipboard operation
regexp2 copied to clipboard

runtime error: index out of range [<number>] with length <samenumber>

Open mstoykov opened this issue 3 years ago • 4 comments

One more that was fuzzed during the night ;)

package main

import (
        "fmt"
        "runtime/debug"

        "github.com/dlclark/regexp2"
)

var testCases = []struct {
        r, s []byte
}{
        {
                r: []byte{0x30, 0x28, 0x3f, 0x3e, 0x28, 0x29, 0x2b, 0x3f, 0x30, 0x29, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x77},
                s: []byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30},
        },
        {
                r: []byte{0x28, 0x3f, 0x3e, 0x28, 0x3f, 0x3e, 0x29, 0x2b, 0x3f, 0x3e, 0x29, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30},
                s: []byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x3e, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30},
        },
}

func test(r, s []byte) (b bool) {
        defer func() {
                if r := recover(); r != nil {
                        fmt.Println(r)
                        debug.PrintStack()
                        b = true
                }
        }()

        re, err := regexp2.Compile(string(r), regexp2.ECMAScript)
        if err != nil {
                return false
        }
        _, _ = re.FindStringMatch(string(s))
        return false
}

func main() {
        for _, c := range testCases {
                fmt.Printf("Test case regex='%#v', string='%#v' panics\nstring values '%s', '%s'\n",
                        c.r, c.s, string(c.r), string(c.s),
                )
                fmt.Println("#############################################################################")
                if test(c.r, c.s) {
                } else {
                        fmt.Printf("Test case regex='%#v', string='%#v' DOES NOT panic\nstring values '%s', '%s'\n",
                                c.r, c.s, string(c.r), string(c.s),
                        )
                }
        }
}

panics with

Test case regex='[]byte{0x30, 0x28, 0x3f, 0x3e, 0x28, 0x29, 0x2b, 0x3f, 0x30, 0x29, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x77}', string='[]byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}' panics
string values '0(?>()+?0)00000000w', '0000000000000000000'
#############################################################################
runtime error: index out of range [72] with length 72
goroutine 1 [running]:
runtime/debug.Stack(0x36, 0x0, 0x0)
        runtime/debug/stack.go:24 +0x9d
runtime/debug.PrintStack()
        runtime/debug/stack.go:16 +0x22
main.test.func1(0xc00015be38)
        command-line-arguments/test.go:27 +0x97
panic(0x4f0b40, 0xc0001320e0)
        runtime/panic.go:969 +0x166
github.com/dlclark/regexp2.(*runner).backtrack(0xc000162000)
        github.com/dlclark/regexp2/runner.go:1033 +0x246
github.com/dlclark/regexp2.(*runner).execute(0xc000162000, 0x0, 0x0)
        github.com/dlclark/regexp2/runner.go:904 +0x9b
github.com/dlclark/regexp2.(*runner).scan(0xc000162000, 0xc0001340a0, 0x13, 0x14, 0x0, 0x0, 0x7fffffffffffffff, 0x13, 0x14, 0x4490be)
        github.com/dlclark/regexp2/runner.go:144 +0x1c3
github.com/dlclark/regexp2.(*Regexp).run(0xc000160080, 0xc00015bd00, 0xffffffffffffffff, 0xc0001340a0, 0x13, 0x14, 0x0, 0x0, 0x0)
        github.com/dlclark/regexp2/runner.go:91 +0xf0
github.com/dlclark/regexp2.(*Regexp).FindStringMatch(...)
        github.com/dlclark/regexp2/regexp.go:159
main.test(0x5b9710, 0x13, 0x13, 0x5b9730, 0x13, 0x13, 0x0)
        command-line-arguments/test.go:36 +0x168
main.main()
        command-line-arguments/test.go:46 +0x355
Test case regex='[]byte{0x28, 0x3f, 0x3e, 0x28, 0x3f, 0x3e, 0x29, 0x2b, 0x3f, 0x3e, 0x29, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}', string='[]byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x3e, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}' panics
string values '(?>(?>)+?>)0000000000', '00000000000000>000000'
#############################################################################
runtime error: index out of range [32] with length 32
goroutine 1 [running]:
runtime/debug.Stack(0x36, 0x0, 0x0)
        runtime/debug/stack.go:24 +0x9d
runtime/debug.PrintStack()
        runtime/debug/stack.go:16 +0x22
main.test.func1(0xc00015be38)
        command-line-arguments/test.go:27 +0x97
panic(0x4f0b40, 0xc000132160)
        runtime/panic.go:969 +0x166
github.com/dlclark/regexp2.(*runner).popcrawl(...)
        github.com/dlclark/regexp2/runner.go:938
github.com/dlclark/regexp2.(*runner).uncapture(...)
        github.com/dlclark/regexp2/runner.go:1467
github.com/dlclark/regexp2.(*runner).execute(0xc000162100, 0x0, 0x0)
        github.com/dlclark/regexp2/runner.go:507 +0x408c
github.com/dlclark/regexp2.(*runner).scan(0xc000162100, 0xc000100120, 0x15, 0x18, 0x0, 0x0, 0x7fffffffffffffff, 0x15, 0x18, 0x4490be)
        github.com/dlclark/regexp2/runner.go:144 +0x1c3
github.com/dlclark/regexp2.(*Regexp).run(0xc000160180, 0xc00015bd00, 0xffffffffffffffff, 0xc000100120, 0x15, 0x18, 0x0, 0x0, 0x0)
        github.com/dlclark/regexp2/runner.go:91 +0xf0
github.com/dlclark/regexp2.(*Regexp).FindStringMatch(...)
        github.com/dlclark/regexp2/regexp.go:159
main.test(0x5b9750, 0x15, 0x15, 0x5b9770, 0x15, 0x15, 0x0)
        command-line-arguments/test.go:36 +0x168
main.main()
        command-line-arguments/test.go:46 +0x355

mstoykov avatar Oct 14 '20 06:10 mstoykov

Oddly enough I think this one is related to #34. The ()+? construct generates a state machine that ends up with a corrupt stack. I suspect the Lazybranchmark operation isn't handling the scenario when we have an empty group with a lazy repeat.

Sometimes the out-of-sync stack leads to bad match groups being created (in the case of #34), sometimes it causes bad jumps (these two examples), and sometimes it just fails silently and leaves extra items on the stack.

We'll see if Microsoft can figure out the right change...otherwise I'll take a look at it.

dlclark avatar Oct 14 '20 21:10 dlclark

It's still happening in a production code with this pattern ((?:[\w*\s])+?(?:\s|[*]))([a-zA-Z_]\w*)(\s*\([^;]*?\))([^;{]*)(\{). Any update on this?

gandarez avatar Jan 12 '24 23:01 gandarez

@gandarez What exact input and options cause the pattern to panic? It'd help to have a simple test reproducing the problem.

dlclark avatar Jan 13 '24 03:01 dlclark

It supposed to happen at this pattern because you mentioned this piece ()+?. I can't show the code is causing it because wakatime-cli uses a 3rd party library called chroma that relies on regexp2.

gandarez avatar Jan 13 '24 13:01 gandarez