I am trying to authenticate to login.microsoftonline.com to reach a resource hidden behind SSO. I need to get the SAML response to post to SSO to allow me to authenticate. The only problem is I can't authenticate past with microsoft because of the flow token and requests doesn't load the flow token because it needs to be loaded with javascript. How can I authenticate?
I would like to say the answer is easy but it takes some work, and I'm not sure how this work in different environments, but here is what I did for mine.
class Microsoft:
# TODO: Add MFA option
def __init__(self, username: str, password: str):
self.sess = requests.Session()
self.username = username
self.password = password
self.base = "https://login.microsoftonline.com/"
self.tenant = "redacted/"
self.url = ""
# used to return values of input fields rendered
def get_html_name_value(self, html: str, name: str) -> str:
return bs(html, "lxml").find("input", {"name": name}).get("value")
# get redirected to obtain flow token with appropriate SAML
def _get_tokens(self):
session = HTMLSession()
# update this
resp = session.get(
"https://sso.redacted.com/cas/clientredirect?client_name=SAML2Client45&"
+ "service=https://full/path/to/service_behind_sso"
)
self.url = resp.url
resp.html.render()
html = resp.html.html
data = {
"flowToken": bs(resp.html.html, "lxml")
.find(id="i0327")
.get("value"),
"ctx": self.get_html_name_value(html, "ctx"),
"canary": self.get_html_name_value(html, "canary"),
"hpgrequestid": self.get_html_name_value(html, "hpgrequestid"),
}
return session, data
def _get_saml_tokens(self):
sess, payload = self._get_tokens()
payload["login"] = self.username
payload["loginfmt"] = self.username
payload["passwd"] = self.password
headers = {
"Host": "login.microsoftonline.com",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:88.0) Gecko/20100101"
+ " Firefox/88.0",
"Referer": self.url,
"Origin": "https://login.microsoftonline.com",
}
html = sess.post(
self.base + self.tenant + "/login",
data=payload,
headers=headers,
).text
data = {
"SAMLResponse": self.get_html_name_value(html, "SAMLResponse"),
"RelayState": self.get_html_name_value(html, "RelayState"),
}
return sess, data
def _login(self):
sess, payload = self._get_saml_tokens()
return sess.post(
"https://sso.redacted.com/cas/login?client_name=SAML2Client45",
data=payload,
)
From here you can extract the cookies and put them in a normal requests session or keep operating in the HTMLSession()
You can do something similar with token auth for other resources, but I haven't needed to access it yet and haven't written the code.
The key would be to change the initial request in step 2 so step 3 will receive a token and then post the correct auth there. Hopefully everyone can adapt this as needed