rod icon indicating copy to clipboard operation
rod copied to clipboard

How about to add a one-time hijack router?

Open kvii opened this issue 3 years ago • 8 comments

Rod Version: v0.109.1

If the website has a form with a submit button. When you click the button. It will send a XHR request.

It's too boring that you must write a lot of template code to hijack just one XHR request. So how about to add a one-time hijack router like page.MustWaitRequestIdle?

There is a simple demo to do that.

func HijackOnce(page *rod.Page, fn func(r *router)) func() error {
	ctx := page.GetContext()
	r := newRouter(page.HijackRequests())
	fn(r)

	go r.Run()
	return func() error {
		defer r.Stop()
		select {
		case <-ctx.Done():
			return ctx.Err()
		case <-r.Done():
			return nil
		}
	}
}

// create a one-time HijackRouter
func newRouter(r *rod.HijackRouter) *router {
	return &router{
		router: r,
		ch:     make(chan struct{}),
	}
}

type router struct {
	router *rod.HijackRouter
	ch     chan struct{}
}

func (r *router) Add(pattern string, resourceType proto.NetworkResourceType, handler func(*rod.Hijack)) error {
	// close channel when handler finished.
	fn := func(h *rod.Hijack) {
		defer close(r.ch)
		handler(h)
	}
	return r.router.Add(pattern, resourceType, fn)
}

func (r *router) Done() <-chan struct{} { return r.ch }
func (r *router) Run()                  { r.router.Run() }
func (r *router) Stop() error           { return r.router.Stop() }

Then you can write some code like this.

var body string
wait := HijackOnce(p, func(r *router) {
    r.Add("submit", proto.NetworkResourceTypeXHR, func(h *rod.Hijack) {
        h.MustLoadResponse()
        body = h.Response.Body()
    })
})

page.MustElement("#submit").MustClick()
wait()

kvii avatar Aug 19 '22 02:08 kvii

How about name it func (p *Page) HijackOnce(pattern string, h *rod.HijackContext) error?

ysmood avatar Aug 19 '22 02:08 ysmood

How about name it func (p *Page) HijackOnce(pattern string, h *rod.HijackContext) error?

It is just a simple demo,not the final implementation. I thought it would be a separate struct like HijackRouter.

kvii avatar Aug 19 '22 02:08 kvii

How about name it func (p *Page) HijackOnce(pattern string, h *rod.HijackContext) error?

No. There is a parameter that determine the stage at which to begin intercepting requests called rod.NetworkRequestPattern.InterceptionStage. That field can't be set if the argument list only contain "pattern".

kvii avatar Aug 22 '22 09:08 kvii

How about only retain page.NewHijack, rod. NewHijackRequest, rod.NewHijackResponse? So I can implement HijackOnce outside the project. So there are no bugs in rod itself.

kvii avatar Sep 08 '22 06:09 kvii

How about adding this HijackOnce without refactoring the old code?

We can work on this issue together to solve all existing problems: #395

ysmood avatar Sep 08 '22 07:09 ysmood

I have seen #607. And you said

How about adding this HijackOnce without refactoring the old code?

In fact, I have implemented it by myself outside the rod project.

This is the code I actually use in business.

package rodx

import (
	"encoding/base64"

	"github.com/go-rod/rod"
	"github.com/go-rod/rod/lib/proto"
)

// NewHijackOnce create hijack from page.
func NewHijackOnce(page *rod.Page) *HijackOnce {
	return &HijackOnce{
		page:    page,
		disable: &proto.FetchDisable{},
	}
}

// HijackHandler type
type HijackHandler = func(e *proto.FetchRequestPaused) error

// HijackOnce is a one-time hijack.
type HijackOnce struct {
	page    *rod.Page
	enable  *proto.FetchEnable
	disable *proto.FetchDisable
	cancel  func()
}

// SetPattern set pattern directly
func (h *HijackOnce) SetPattern(pattern *proto.FetchRequestPattern) {
	h.enable = &proto.FetchEnable{
		Patterns: []*proto.FetchRequestPattern{pattern},
	}
}

// Start hijack.
func (h *HijackOnce) Start(handler HijackHandler) func() error {
	if h.enable == nil {
		panic("hijack pattern not set")
	}

	p, cancel := h.page.WithCancel()
	h.cancel = cancel

	err := h.enable.Call(p)
	if err != nil {
		return func() error { return err }
	}

	wait := p.EachEvent(func(e *proto.FetchRequestPaused) bool {
		if handler != nil {
			err = handler(e)
		}
		return true
	})

	return func() error {
		wait()
		return err
	}
}

// Stop
func (h *HijackOnce) Stop() error {
	if h.cancel != nil {
		h.cancel()
	}
	return h.disable.Call(h.page)
}

// FetchGetResponseBody get request body.
func FetchGetResponseBody(page *rod.Page, e *proto.FetchRequestPaused) ([]byte, error) {
	m := proto.FetchGetResponseBody{
		RequestID: e.RequestID,
	}
	r, err := m.Call(page)
	if err != nil {
		return nil, err
	}

	if !r.Base64Encoded {
		return []byte(r.Body), nil
	}

	bs, err := base64.StdEncoding.DecodeString(r.Body)
	if err != nil {
		return nil, err
	}
	return bs, nil
}

// FetchContinueRequest continue request
func FetchContinueRequest(page *rod.Page, e *proto.FetchRequestPaused) error {
	m := proto.FetchContinueRequest{
		RequestID: e.RequestID,
	}
	return m.Call(page)
}

And there is an example:

// configure hijack
once := rodx.NewHijackOnce(page)
once.SetPattern(&proto.FetchRequestPattern{
	URLPattern:   "http://xx.com/yy",
	ResourceType: proto.NetworkResourceTypeXHR,
	RequestStage: proto.FetchRequestStageResponse,
})

var body []byte
wait := once.Start(func(e *proto.FetchRequestPaused) error {
	body, _ = rodx.FetchGetResponseBody(page, e)
	return rodx.FetchContinueRequest(page, e)
})
defer once.Stop()

// trigger the request
err := btn.Click(proto.InputMouseButtonLeft)
if err != nil {
	return errors.New("click failed")
}

err = wait()
if err != nil || len(body) == 0 {
	return errors.New("hijack failed")
}
//...

kvii avatar Sep 19 '22 08:09 kvii

Another draft that only gets resource content with one pattern.


// Usage:
//
//	wait, _ := p.HijackResource("/download/foo.pdf", proto.NetworkResourceTypeFetch)
//	p.MustElement("button.foo.download").MustClick()
//	_ := wait().Save("foo.pdf", 0644)
func (p *Page) HijackResource(pattern string, resourceType proto.NetworkResourceType) (func() HijackResult, error) {
	err := proto.FetchEnable{
		Patterns: []*proto.FetchRequestPattern{{
			URLPattern:   pattern,
			ResourceType: resourceType,
			RequestStage: proto.FetchRequestStageResponse,
		}},
	}.Call(p)
	if err != nil {
		return nil, err
	}

	var e proto.FetchRequestPaused
	wait := p.WaitEvent(&e)

	return func() HijackResult {
		defer proto.FetchDisable{}.Call(p)
		wait()

		body, err := proto.FetchGetResponseBody{
			RequestID: e.RequestID,
		}.Call(p)
		return HijackResult{body, err}
	}, nil
}

type HijackResult struct {
	body *proto.FetchGetResponseBodyResult
	err  error
}

func (h HijackResult) Byte() ([]byte, error) {
	if h.err != nil {
		return nil, h.err
	}
	if h.body.Base64Encoded {
		return base64.StdEncoding.DecodeString(h.body.Body)
	}
	return []byte(h.body.Body), nil
}

func (h HijackResult) String() (string, error) {
	if h.err != nil {
		return "", h.err
	}
	if h.body.Base64Encoded {
		b, err := base64.StdEncoding.DecodeString(h.body.Body)
		if err != nil {
			return "", err
		}
		return string(b), nil
	}
	return h.body.Body, nil
}

func (h HijackResult) Save(name string, perm fs.FileMode) error {
	bs, err := h.Byte()
	if err != nil {
		return err
	}
	return os.WriteFile(name, bs, perm)
}

kvii avatar Apr 21 '24 06:04 kvii

LGTM

ysmood avatar Apr 21 '24 06:04 ysmood