AngleSharp.Js icon indicating copy to clipboard operation
AngleSharp.Js copied to clipboard

cant get the Location modify by JS.

Open sgf opened this issue 1 year ago • 9 comments

Bug Report

[Description of the bug]

the script not executed or its executed but i cant get the result. the script is modify the location.but i cant get any change with

Steps to Reproduce

  1. [First Step]
<html>

<head>
    <script>
        function getName() {
            var caller = getName.caller; if (caller.name) { return caller.name }
            var str = caller.toString().replace(/[\s]*/g, "");
            var name = str.match(/^function([^\(]+?)\(/);
            if (name && name[1]) { return name[1]; } else { return ''; }
        }
        cD = '?';
        function tI() { 'return tI'; return '5' }
        t8 = '_';
        IsoT = function () { 'return IsoT'; return 'ign'; };
        Y8 = function () { 'Y8'; var _Y = function () { return '4' }; return _Y(); };
        AGjj = function () {
            'AGjj';
            var _A = function () { return 'thr' }; return _A();
        };
        _Fi30O = 'href';
        _bBbb3 = 'replace';
        Oh = 'e';
        ua2d = function () { 'ua2d'; var _u = function () { return '-1-' }; return _u(); };
        H0 = '/';
        function s2() { 's2'; function _s() { return '1' }; return _s(); }
        _AiXGg = window;
        UM = '1';
        _SOq5H = 'assign';
        _ai4mU = location;
        lIk = function (lIk_) { var _l = function (lIk_) { 'return lIk'; return lIk_; }; return _l(lIk_); };
        So = function () { 'So'; var _S = function () { return 'd' }; return _S(); };
        VP = function () { 'return VP'; return '='; };
		debugger
		 tmp1= H0 +
            AGjj() +
            Oh +
            (function () { 'return BgQP'; return (function () { return 'ad-'; })(); })() +
            UM +
            (function () { 'return nh'; return (function () { return '3'; })(); })() +
            tI() +
            (function (f9Z_) { return (function (f9Z_) { return f9Z_; })(f9Z_); })('87') +
            ua2d() +
            s2() +
            (function (RTD_) { return (function (RTD_) { return RTD_; })(RTD_); })('.h') +
            (function () { 'return w9'; return 't' })() +
            (function (s0o_) { return (function (s0o_) { return s0o_; })(s0o_); })('ml') +
            cD +
            t8 +
            So() +
            (function () { 'return WK'; return 's' })() +
            IsoT() +
            VP() +
            (function () { 'return gd'; return (function () { return 'b'; })(); })() +
            Y8() +
            (function (CPh_) { return (function (CPh_) { return CPh_; })(CPh_); })('60') +
            lIk('e6') +
            (function (S7H_) { return (function (S7H_) { return S7H_; })(S7H_); })('5b');
		
        location=tmp1;//modify here : location=“/thread-13587-1-1.html?_dsign=b460e65b”;
		 tmp2= H0 + AGjj() + Oh +
            (function () { 'return BgQP'; return (function () { return 'ad-'; })(); })() +
            UM +
            (function () { 'return nh'; return (function () { return '3'; })(); })() +
            tI() +
            (function (f9Z_) { return (function (f9Z_) { return f9Z_; })(f9Z_); })('87');
        _AiXGg[_Fi30O] =tmp2;//modify here  : window["href"] =“/thread-13587”;
    </script>
</head>

<body>
</body>
<html>
  1. [Second Step]

 var configWithJs = Configuration.Default
        //.Without<JsNavigationHandler>()
        .WithDefaultLoader().WithJs()
.WithConsoleLogger(ctx => new MyConsoleLogger(ctx));
    //var handler = new HttpClientHandler();
    //-handler.AllowAutoRedirect = true;
    //+handler.AllowAutoRedirect = false;
    //configWithJs.WithRequesters()
    //configWithJs.WithRequester < ()
    //This is our sample source, we will trigger the load event
    var source = @"<html><head><script type=""text/javascript"">B0=function(){'B0';var _B=function(){return '/'}; return _B();};mpiS='e65';function cTh8(){'return cTh8';return '1-1'}_R84h2 = 'replace';GZ='t';function Y9r(Y9r_){function tm(){return getName();};return tm();return 'Y9r'}function getName(){var caller=getName.caller;if(caller.name){return caller.name} var str=caller.toString().replace(/[\s]*/g,"""");var name=str.match(/^function([^\(]+?)\(/);if(name && name[1]){return name[1];} else {return '';}}function YNsE(YNsE_){function _Y(YNsE_){function b46(){return getName();}function YNsE_(){}return b46();return YNsE_}; return _Y(YNsE_);}function F5(){'return F5';return '?'}function zY(){'zY';function _z(){return '.'}; return _z();}gC=function(){'return gC';return '_';};I0=function(){'I0';var _I=function(){return 'l'}; return _I();};_cqkf1 = window;_UK8tv = 'assign';EZ='h';Hz2A='dsi';function YB(YB_){function h(){return getName();};return h();return 'YB'}cnJ=function(cnJ_){var _c=function(cnJ_){'return cnJ';return cnJ_;}; return _c(cnJ_);};_Ieo6L = 'href';_Fd5t2 = location;Yw='-';go=function(){'go';var _g=function(){return '7'}; return _g();};_Fd5t2[_Ieo6L]=B0()+GZ+YB('ZV')+cnJ('re')+(function(sFT_){'return sFT';return sFT_})('ad')+'-1'+(function(){'return kW0j';return (function(){return '358';})();})()+go()+Yw+cTh8()+zY()+EZ+Y9r('DZB')+I0()+F5()+gC()+Hz2A+(function(wnn_){'return wnn';return wnn_})('gn')+(function(){'return Hi';return '='})()+YNsE('F3T4')+(function(){'return B3';return '0'})()+mpiS+(function(QB7_){'return QB7';return QB7_})('b');_cqkf1.href=B0()+GZ+YB('ZV')+cnJ('re')+(function(sFT_){'return sFT';return sFT_})('ad')+'-1'+(function(){'return kW0j';return (function(){return '358';})();})()+go();</script></head><body></body></html>";

    var contextWithJs = BrowsingContext.New(configWithJs);

    await contextWithJs.OpenAsync(req =>
    {
        req.Content(source);
        req.Address(@"http://localhost:8080");

    }).WaitUntilAvailable();
    var threadDoc = contextWithJs.Active;
    var threadDocUrl = contextWithJs.Active.Url;
    var threadDoclocation= contextWithJs.Active.location;

  1. [and so on...]

Expected behavior: [What you expected to happen]

got the threadDoclocation : http://localhost:8080/thread-13587-1-1.html?_dsign=b460e65b

Actual behavior: [What actually happened]

Environment details: [OS, .NET Runtime, ...]

.net 8.0

Possible Solution

[Optionally, share your idea to fix the issue]

sgf avatar Sep 22 '24 00:09 sgf

  • What version of AngleSharp
  • What version of AngleSharp.Js
  • Why is the website not streamed directly (you might have encoding issues otherwise)
  • For any reproducible we'd like to have a clean code / MWE - here is see a lot of different things in the code (+a minified JS source); can you change this?

Thanks for your support.

FlorianRappl avatar Sep 22 '24 10:09 FlorianRappl

  • What version of AngleSharp
  • What version of AngleSharp.Js

every last version

  • Why is the website not streamed directly (you might have encoding issues otherwise)

The website only returns the HTML I gave it. The script in this HTML is randomly generated. This is obviously an anti-crawler method.

  • For any reproducible we'd like to have a clean code / MWE - here is see a lot of different things in the code (+a minified JS source); can you change this?

The first paragraph (Html) is what I expanded and formatted as much as possible to analyze the function of this script. And I added intermediate variables to the script in order to debug this script at the browser breakpoint as much as possible to observe its true purpose.

sgf avatar Sep 22 '24 15:09 sgf

The situation in the browser is: I visited the target website http://localhost:8080/thread-13587-1-1.html path, but the target website returned the above html to me, and then the page was redirected to: http://localhost:8080/thread-13587-1-1.html?_dsign=b460e65b If I don't allow it to jump, then I can't get the real web page content. As for the real web page content, I don't want to enable JS, because if JS is enabled, the key content will be deleted by JS.

But "jump to web page" = "thread-13587-1-1.html" = "the above Html", currently in the above AngleSharp.JS, it seems that it can't be executed normally. It won't jump. Through analysis, I noticed that the script part is mainly modified. Location=thread-13587-1-1.html?_dsign=b460e65b,Window['Href']=thread-13587. The most critical part is: Location=thread-13587-1-1.html?_dsign=b460e65b

I hope:

  1. If the script can be executed normally, and I can get the new URL from C# and prevent the web page from jumping, I will take over the jump through C# myself (open with OpenAsync).
  2. If 1 cannot be done, then I hope that at least the jump can be normal, and JS is disabled on the page after the jump.
  3. If 1&2 is not possible, please at least tell me if there is any way to make the script run normally (cannot jump, I can accept that), and at the same time, at least I need to be able to intercept the assignment operation of Location=NewUrl to obtain the new URL.

sgf avatar Sep 22 '24 15:09 sgf

If you want to check the function of the script, you can save the above Html to an HTML file and open this page directly with a browser. F12 tool, please turn on <Keep Log>, otherwise, due to the jump, the browser network access log will not show the path before the jump.

You should be able to see my code, I tried multiple things before asking the question, but to no avail.

sgf avatar Sep 22 '24 15:09 sgf

I noticed that the original return only has . html, head, body tags, which are added to me by AngleSharp's IDocuemnt object.

sgf avatar Sep 22 '24 16:09 sgf

I still don't get the issue and how this is a bug. Right now it seems to me that this script is modfiying the location and thus the location / page changes. That's how it should be. I don't see any bug right now. Please explain / modify the MWE to show your scenario.

FlorianRappl avatar Sep 22 '24 21:09 FlorianRappl

I still don't get the issue and how this is a bug. Right now it seems to me that this script is modfiying the location and thus the location / page changes. That's how it should be. I don't see any bug right now. Please explain / modify the MWE to show your scenario.

the problem is location / page not any changes what im got in C#

sgf avatar Sep 23 '24 02:09 sgf

I still don't get the issue and how this is a bug. Right now it seems to me that this script is modfiying the location and thus the location / page changes. That's how it should be. I don't see any bug right now. Please explain / modify the MWE to show your scenario.

here is MWE, https://github.com/sgf/AngleSharpJsMWE

sgf avatar Sep 23 '24 03:09 sgf

Describe the bug

at AngleSharp.Js.DomPrototypeInstance.<>c__DisplayClass13_0.<SetProperty>b__1(JsValue obj, JsValue[] values) at Jint.Runtime.Interop.ClrFunction.CallSlow(JsValue thisObject, JsValue[] arguments) at D:\MySrc\jint\Jint\Runtime\Interop\ClrFunction.cs : 52 line

To Reproduce

<script type="text/javascript">
    location = ”xxx.html?q=123456“;
</script>

but if the jscode changed to: window.location = ”xxx.html?q=123456“;

The exception will disappear.

screenshot:

image

image

sgf avatar Sep 24 '24 07:09 sgf