regexp2
regexp2 copied to clipboard
runtime error: index out of range [<number>] with length <samenumber>
One more that was fuzzed during the night ;)
package main
import (
"fmt"
"runtime/debug"
"github.com/dlclark/regexp2"
)
var testCases = []struct {
r, s []byte
}{
{
r: []byte{0x30, 0x28, 0x3f, 0x3e, 0x28, 0x29, 0x2b, 0x3f, 0x30, 0x29, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x77},
s: []byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30},
},
{
r: []byte{0x28, 0x3f, 0x3e, 0x28, 0x3f, 0x3e, 0x29, 0x2b, 0x3f, 0x3e, 0x29, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30},
s: []byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x3e, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30},
},
}
func test(r, s []byte) (b bool) {
defer func() {
if r := recover(); r != nil {
fmt.Println(r)
debug.PrintStack()
b = true
}
}()
re, err := regexp2.Compile(string(r), regexp2.ECMAScript)
if err != nil {
return false
}
_, _ = re.FindStringMatch(string(s))
return false
}
func main() {
for _, c := range testCases {
fmt.Printf("Test case regex='%#v', string='%#v' panics\nstring values '%s', '%s'\n",
c.r, c.s, string(c.r), string(c.s),
)
fmt.Println("#############################################################################")
if test(c.r, c.s) {
} else {
fmt.Printf("Test case regex='%#v', string='%#v' DOES NOT panic\nstring values '%s', '%s'\n",
c.r, c.s, string(c.r), string(c.s),
)
}
}
}
panics with
Test case regex='[]byte{0x30, 0x28, 0x3f, 0x3e, 0x28, 0x29, 0x2b, 0x3f, 0x30, 0x29, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x77}', string='[]byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}' panics
string values '0(?>()+?0)00000000w', '0000000000000000000'
#############################################################################
runtime error: index out of range [72] with length 72
goroutine 1 [running]:
runtime/debug.Stack(0x36, 0x0, 0x0)
runtime/debug/stack.go:24 +0x9d
runtime/debug.PrintStack()
runtime/debug/stack.go:16 +0x22
main.test.func1(0xc00015be38)
command-line-arguments/test.go:27 +0x97
panic(0x4f0b40, 0xc0001320e0)
runtime/panic.go:969 +0x166
github.com/dlclark/regexp2.(*runner).backtrack(0xc000162000)
github.com/dlclark/regexp2/runner.go:1033 +0x246
github.com/dlclark/regexp2.(*runner).execute(0xc000162000, 0x0, 0x0)
github.com/dlclark/regexp2/runner.go:904 +0x9b
github.com/dlclark/regexp2.(*runner).scan(0xc000162000, 0xc0001340a0, 0x13, 0x14, 0x0, 0x0, 0x7fffffffffffffff, 0x13, 0x14, 0x4490be)
github.com/dlclark/regexp2/runner.go:144 +0x1c3
github.com/dlclark/regexp2.(*Regexp).run(0xc000160080, 0xc00015bd00, 0xffffffffffffffff, 0xc0001340a0, 0x13, 0x14, 0x0, 0x0, 0x0)
github.com/dlclark/regexp2/runner.go:91 +0xf0
github.com/dlclark/regexp2.(*Regexp).FindStringMatch(...)
github.com/dlclark/regexp2/regexp.go:159
main.test(0x5b9710, 0x13, 0x13, 0x5b9730, 0x13, 0x13, 0x0)
command-line-arguments/test.go:36 +0x168
main.main()
command-line-arguments/test.go:46 +0x355
Test case regex='[]byte{0x28, 0x3f, 0x3e, 0x28, 0x3f, 0x3e, 0x29, 0x2b, 0x3f, 0x3e, 0x29, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}', string='[]byte{0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x3e, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30}' panics
string values '(?>(?>)+?>)0000000000', '00000000000000>000000'
#############################################################################
runtime error: index out of range [32] with length 32
goroutine 1 [running]:
runtime/debug.Stack(0x36, 0x0, 0x0)
runtime/debug/stack.go:24 +0x9d
runtime/debug.PrintStack()
runtime/debug/stack.go:16 +0x22
main.test.func1(0xc00015be38)
command-line-arguments/test.go:27 +0x97
panic(0x4f0b40, 0xc000132160)
runtime/panic.go:969 +0x166
github.com/dlclark/regexp2.(*runner).popcrawl(...)
github.com/dlclark/regexp2/runner.go:938
github.com/dlclark/regexp2.(*runner).uncapture(...)
github.com/dlclark/regexp2/runner.go:1467
github.com/dlclark/regexp2.(*runner).execute(0xc000162100, 0x0, 0x0)
github.com/dlclark/regexp2/runner.go:507 +0x408c
github.com/dlclark/regexp2.(*runner).scan(0xc000162100, 0xc000100120, 0x15, 0x18, 0x0, 0x0, 0x7fffffffffffffff, 0x15, 0x18, 0x4490be)
github.com/dlclark/regexp2/runner.go:144 +0x1c3
github.com/dlclark/regexp2.(*Regexp).run(0xc000160180, 0xc00015bd00, 0xffffffffffffffff, 0xc000100120, 0x15, 0x18, 0x0, 0x0, 0x0)
github.com/dlclark/regexp2/runner.go:91 +0xf0
github.com/dlclark/regexp2.(*Regexp).FindStringMatch(...)
github.com/dlclark/regexp2/regexp.go:159
main.test(0x5b9750, 0x15, 0x15, 0x5b9770, 0x15, 0x15, 0x0)
command-line-arguments/test.go:36 +0x168
main.main()
command-line-arguments/test.go:46 +0x355
Oddly enough I think this one is related to #34. The ()+?
construct generates a state machine that ends up with a corrupt stack. I suspect the Lazybranchmark
operation isn't handling the scenario when we have an empty group with a lazy repeat.
Sometimes the out-of-sync stack leads to bad match groups being created (in the case of #34), sometimes it causes bad jumps (these two examples), and sometimes it just fails silently and leaves extra items on the stack.
We'll see if Microsoft can figure out the right change...otherwise I'll take a look at it.
It's still happening in a production code with this pattern ((?:[\w*\s])+?(?:\s|[*]))([a-zA-Z_]\w*)(\s*\([^;]*?\))([^;{]*)(\{)
. Any update on this?
@gandarez What exact input and options cause the pattern to panic? It'd help to have a simple test reproducing the problem.
It supposed to happen at this pattern because you mentioned this piece ()+?
. I can't show the code is causing it because wakatime-cli uses a 3rd party library called chroma that relies on regexp2.