Different behaviors in python and rust
I'm trying to use the Reqwest library to get a login cookie for SSO, but it's not working properly. I've tried to implement a version in python, and it works
import requests
def print_cookies(response, description):
cookies = response.cookies.get_dict()
print(f"{description} URL: {response.url}")
print(f"{description} Code: {response.status_code}")
print(f"{description} Cookies: {cookies}")
print("---------------------------------------")
session = requests.Session()
login_headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': '_7da9a=http://10.0.3.144:8080',
'Host': 'sso.buaa.edu.cn',
'Origin': 'https://sso.buaa.edu.cn',
'Referer': 'https://sso.buaa.edu.cn/login?service=https%3A%2F%2Fspoc.buaa.edu.cn%2Fspoc%2FmoocMainIndex%2FspocWelcome',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0',
}
payload = {
'username': 'username',
'password': 'password',
'submit': '%E7%99%BB%E5%BD%95',
'type': 'username_password',
'execution': 'c28e880d=', #This is a very long string, dynamically fetched on a web page.
'_eventId': 'submit'
}
login_url = 'https://sso.buaa.edu.cn/login'
# Disable automatic redirects to capture the redirection URL
login_response = session.post(login_url, headers=login_headers, data=payload, allow_redirects=False)
print_cookies(login_response, "Login Response")
# Manually follow the redirect
redirect_url = login_response.headers.get('Location')
if redirect_url:
redirect_response = session.get(redirect_url, allow_redirects=False)
print_cookies(redirect_response, "Redirect Response")
# Check if there's another redirect
redirect_url = redirect_response.headers.get('Location')
if redirect_url:
redirect_response = session.get(redirect_url, allow_redirects=True)
print_cookies(redirect_response, "Final Response")
In order to better track what happens when a request occurs, I disabled automatic redirection and performed three manual redirections
And the output is as follows
Login Response URL: https://sso.buaa.edu.cn/login
Login Response Code: 302
Login Response Cookies: {'CASTGC': 'TGT-326977'}
---------------------------------------
Redirect Response URL: https://spoc.buaa.edu.cn/spoc/moocMainIndex/spocWelcome?ticket=ST-542018
Redirect Response Code: 302
Redirect Response Cookies: {'JSESSIONID': 'C4A37C1F0', 'INCO': '258c6d9'}
---------------------------------------
Final Response URL: https://spoc.buaa.edu.cn/spoc/moocMainIndex/spocWelcome;jsessionid=C4A37C1F
Final Response Code: 200
Final Response Cookies: {'name': 'value', 'JSESSIONID': 'C4A37C1F048'}
---------------------------------------
I've hidden some of the sensitive information, but you can see that two redirects have occurred, the last request has a status code of 200, and I've got the three cookies I need: CASTGC, INCO, and JSESSIONID. And they are valid for requests such as login.
But in rust, the situation is different.
use reqwest::{Client, Response};
use reqwest::header::HeaderMap;
use thiserror::Error;
use scraper::{Html, Selector};
use std::sync::Arc;
const SSOURL: &str = "https://sso.buaa.edu.cn/login";
/// Used to insert request headers
macro_rules! header {
( $header:expr, $( $k:expr, $v:expr );* ) => {
$header.insert("Accept", reqwest::header::HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"));
$header.insert("Accept-Encoding", reqwest::header::HeaderValue::from_static("gzip, deflate, br, zstd"));
$header.insert("Accept-Language", reqwest::header::HeaderValue::from_static("zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"));
$header.insert("Cache-Control", reqwest::header::HeaderValue::from_static("max-age=0"));
$header.insert("Connection", reqwest::header::HeaderValue::from_static("keep-alive"));
$(
$header.insert($k, reqwest::header::HeaderValue::from_str($v).unwrap());
)*
$header.insert("Upgrade-Insecure-Requests", reqwest::header::HeaderValue::from_static("1"));
$header.insert("User-Agent", reqwest::header::HeaderValue::from_static("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0"));
}
}
/// No use for now.
#[derive(Error, Debug)]
pub enum SSOError {
#[error("Error caused by expired cookies")]
ExpiredCookie,
#[error("Error caused by failed to get execution value from sso login page")]
FailedGetExecution,
#[error("Error caused by request fail in reqwest crate")]
FailedRequest(#[from] reqwest::Error),
}
pub async fn get_login_cookie(usr: &str, pw: &str) -> Result<String, SSOError> {
let url = "https://sso.buaa.edu.cn/login?service=https%3A%2F%2Fspoc.buaa.edu.cn%2Fspoc%2FmoocMainIndex%2FspocWelcome";
let cookie_jar = Arc::new(reqwest::cookie::Jar::default());
let client = Client::builder()
.gzip(true)
.cookie_store(true)
.cookie_provider(Arc::clone(&cookie_jar))
.redirect(reqwest::redirect::Policy::none())
.build()?;
let mut headers = HeaderMap::new();
header!(
headers,
"Content-Type", "application/x-www-form-urlencoded";
"Cookie", "_7da9a=http://10.0.3.144:8080";
"Host", "sso.buaa.edu.cn";
"Origin", "https://sso.buaa.edu.cn";
"Referer", url
);
// Only used to get the "execution" value
let login_page = client.get(url).send().await.unwrap();
let execution = if login_page.status().is_success() {
let body = login_page.text().await.unwrap();
let document = Html::parse_document(&body);
let input_selector = Selector::parse("input[name='execution']").unwrap();
let execution_value = document
.select(&input_selector)
.next()
.and_then(|input| input.value().attr("value"));
if let Some(value) = execution_value {
String::from(value)
} else {
return Err(SSOError::FailedGetExecution)
}
} else {
return Err(SSOError::FailedGetExecution)
};
let form = [
("username", usr),
("password", pw),
("submit", "%E7%99%BB%E5%BD%95"),
("type", "username_password"),
("execution", &execution),
("_eventId", "submit")
];
// The request starts here.
let res = client.post(SSOURL)
.headers(headers.clone())
.form(&form)
.send()
.await.unwrap();
print_response(&res, "Initial Response");
if let Some(redirect_url) = res.headers().get("Location") {
let redirect_res = client.get(redirect_url.to_str().unwrap())
.headers(headers.clone())
.send()
.await?;
print_response(&redirect_res, "Redirect Response");
if let Some(second_redirect_url) = redirect_res.headers().get("Location") {
let final_res = client.get(second_redirect_url.to_str().unwrap())
.headers(headers)
.send()
.await?;
print_response(&final_res, "Final Response");
}
}
// This is intended to return the cookie string
Ok(String::from("cookie"))
}
fn print_response(response: &Response, description: &str) {
println!("{} Status: {:?}", description, response.status());
println!("{} URL: {}", description, response.url());
for c in response.headers().get_all("set-cookie").iter() {
println!("{} Set-Cookie Headers: {:#?}", description, c);
}
}
Since reqwest doesn't seem to be able to save cookies for historical requests, I also disabled automatic redirection, and manually redirected in three passes.
And got the following output.
Initial Response Status: 302
Initial Response URL: https://sso.buaa.edu.cn/login
Initial Response Set-Cookie Headers: "CASTGC=TGT-327439; Path=/; Secure; HttpOnly"
Redirect Response Status: 302
Redirect Response URL: https://spoc.buaa.edu.cn/spoc/moocMainIndex/spocWelcome?ticket=ST-542698
Redirect Response Set-Cookie Headers: "JSESSIONID=AE52E67190D; Path=/spoc; Secure; HttpOnly"
Redirect Response Set-Cookie Headers: "INCO=7fe726c; Path=/"
Final Response Status: 302
Final Response URL: https://spoc.buaa.edu.cn/spoc/moocMainIndex/spocWelcome
Final Response Set-Cookie Headers: "INCO=137c06ba96; Path=/"
You can see that the third request redirects to a different url and a different status code than the python version. And the rust version seems to be stuck in a redirection loop, no matter how many times I manually redirects the request, the status code is always 302.
And the returned cookie value is different, three requests returned two different INCO fields. And these three cookie fields are invalid and cannot be used for login requests
Cargo.toml:
reqwest = { version = "0.12.5", features = ["cookies", "gzip", "json"] }
scraper = "0.19.0"
serde = { version = "1.0.203", features = ["derive"] }
serde_json = "1.0.120"
thiserror = "1.0.61"
tokio = { version = "1.38.0", features = ["full"] }
rust version: cargo 1.80.0-nightly (431db31d0 2024-05-28) Windows x86 msvc
I don't know if my two codes are functionally equivalent at all, but why the different behavior?
Thank you!