I came across this post, the answer to this question shows how to handle POST request for hotels.com
here is the code made by αԋɱҽԃ αмєяιcαη
import trio
import httpx
import pandas as pd
async def main():
async with httpx.AsyncClient(timeout=None) as client:
data = {
"operationName": "reviewsQuery",
"query": "query reviewsQuery($hotelId: String!, $reviewType: String, $reviewOrder: String, $tripTypeFilter: String, $paginationURL: String) {\n reviews(\n hotelId: $hotelId\n reviewType: $reviewType\n reviewOrder: $reviewOrder\n tripTypeFilter: $tripTypeFilter\n paginationURL: $paginationURL\n ) {\n body {\n reviewContent {\n filters {\n type\n name\n count\n url\n __typename\n }\n overall {\n selectedFilterType\n rating\n badgeText\n total\n scores {\n score\n count\n url\n __typename\n }\n ratingAspects {\n cleanliness\n service\n comfort\n condition\n neighbourhood\n __typename\n }\n whatGuestsSay {\n type\n text\n __typename\n }\n topRated {\n category\n explanation\n __typename\n }\n __typename\n }\n sort {\n url\n options {\n value\n label\n __typename\n }\n __typename\n }\n reviews {\n hermes {\n groups {\n separatorText\n items {\n itineraryId\n brand\n googleTranslateEnabled\n reviewDbDate\n ...GuestReviewsFragment\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n pagination {\n currentPage\n nextURL\n totalPages\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment GuestReviewsFragment on ReviewsItem {\n genuineMsg\n tripType\n tripTypeText\n reviewDate\n reviewSubmitDate\n rating\n reviewer {\n name\n locality\n locale\n __typename\n }\n badge\n summary\n description\n __typename\n}\n",
"variables": {
"hotelId": "344560",
"reviewOrder": "date_newest_first",
"reviewType": "brand",
"tripTypeFilter": "all"
}
}
r = await client.post('https://fr.hotels.com/kes/graphql', json=data)
allin = []
for x in r.json()['data']['reviews']['body']['reviewContent']['reviews']['hermes']['groups']:
for i in x['items']:
allin.append(i)
df = pd.DataFrame(allin)
print(df)
if __name__ == "__main__":
trio.run(main)
I have two questions related to this:
How the above code, because the above code is not able to extract all the reviews it just extracts 50 reviews, I am trying to understand how to read the POST query here
How to modify the code for expedia.com I tried above method for expedia.com but it did not work, here :
import trio
import httpx
import pandas as pd
async def main():
async with httpx.AsyncClient(timeout=None) as client:
data = {
"operationName": "reviewsQuery",
"query": "query reviewsQuery($hotelId: String!, $reviewType: String, $reviewOrder: String, $tripTypeFilter: String, $paginationURL: String) {\n reviews(\n hotelId: $hotelId\n reviewType: $reviewType\n reviewOrder: $reviewOrder\n tripTypeFilter: $tripTypeFilter\n paginationURL: $paginationURL\n ) {\n body {\n reviewContent {\n filters {\n type\n name\n count\n url\n __typename\n }\n overall {\n selectedFilterType\n rating\n badgeText\n total\n scores {\n score\n count\n url\n __typename\n }\n ratingAspects {\n cleanliness\n service\n comfort\n condition\n neighbourhood\n __typename\n }\n whatGuestsSay {\n type\n text\n __typename\n }\n topRated {\n category\n explanation\n __typename\n }\n __typename\n }\n sort {\n url\n options {\n value\n label\n __typename\n }\n __typename\n }\n reviews {\n hermes {\n groups {\n separatorText\n items {\n itineraryId\n brand\n googleTranslateEnabled\n reviewDbDate\n ...GuestReviewsFragment\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n pagination {\n currentPage\n nextURL\n totalPages\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment GuestReviewsFragment on ReviewsItem {\n genuineMsg\n tripType\n tripTypeText\n reviewDate\n reviewSubmitDate\n rating\n reviewer {\n name\n locality\n locale\n __typename\n }\n badge\n summary\n description\n __typename\n}\n",
"variables": {
"hotelId": "344560",
"reviewOrder": "date_newest_first",
"reviewType": "brand",
"tripTypeFilter": "all"
}
}
r = await client.post('https://fr.expedia.com/kes/graphql', json=data)
allin = []
for x in r.json()['data']['reviews']['body']['reviewContent']['reviews']['hermes']['groups']:
for i in x['items']:
allin.append(i)
df = pd.DataFrame(allin)
print(df)
if __name__ == "__main__":
trio.run(main)
here is the error gaierror Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/httpcore/_exceptions.py in map_exceptions(map) 9 try: ---> 10 yield 11 except Exception as exc: # noqa: PIE786
31 frames
gaierror: [Errno -2] Name or service not known
The above exception was the direct cause of the following exception:
ConnectError Traceback (most recent call last)
ConnectError: [Errno -2] Name or service not known
The above exception was the direct cause of the following exception:
ConnectError Traceback (most recent call last)
[... skipping hidden 1 frame]
/usr/local/lib/python3.10/dist-packages/httpx/_transports/default.py in map_httpcore_exceptions() 75 76 message = str(exc) ---> 77 raise mapped_exc(message) from exc 78 79
ConnectError: [Errno -2] Name or service not known
here is the another code I tried this one showed requests.exceptions.HTTPError: 429 Client Error: Too Many Requests for url: https://www.expedia.com/graphql
import requests
import pandas as pd
def main():
headers = {
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0"
}
data = [
{
"operationName": "PropertyFilteredReviewsQuery",
"variables": {
"context": {
"siteId": 1,
"locale": "en_US",
"eapid": 0,
"currency": "USD",
"device": {
"type": "DESKTOP"
},
"identity": {
"duaid": "-1",
"expUserId": "832921361",
"tuid": "-1",
"authState": "ANONYMOUS"
},
"privacyTrackingState": "CAN_TRACK",
"debugContext": {
"abacusOverrides": [],
"alterMode": "RELEASED"
}
},
"propertyId": "24625",
"searchCriteria": {
"primary": {
"dateRange": None,
"rooms": [{"adults": 2}],
"destination": {"regionId": "178305"}
},
"secondary": {
"booleans": [
{"id": "includeRecentReviews", "value": True},
{"id": "includeRatingsOnlyReviews", "value": True},
{"id": "overrideEmbargoForIndividualReviews", "value": True}
],
"counts": [{"id": "startIndex", "value": 0}, {"id": "size", "value": 10}],
"selections": [{"id": "sortBy", "value": "NEWEST_TO_OLDEST_BY_LANGUAGE"}, {"id": "searchTerm", "value": ""}]
}
}
},
"query": "query PropertyFilteredReviewsQuery($context: ContextInput!, $propertyId: String!, $searchCriteria: PropertySearchCriteriaInput!) {\n propertyReviewSummaries(\n context: $context\n propertyIds: [$propertyId]\n searchCriteria: $searchCriteria\n ) {\n ...__PropertyReviewSummaryFragment\n __typename\n }\n propertyInfo(context: $context, propertyId: $propertyId) {\n id\n reviewInfo(searchCriteria: $searchCriteria) {\n ...__PropertyReviewsListFragment\n sortAndFilter {\n ...TravelerTypeFragment\n ...SortTypeFragment\n ...SearchTextFragment\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\n... (rest of the GraphQL query) ..."
}
]
response = requests.post("https://www.expedia.com/graphql", json=data, headers=headers)
response.raise_for_status()
allin = []
for x in response.json()[0]['data']['propertyInfo']['reviewInfo']['reviews']['content']['reviews']:
allin.append(x)
df = pd.DataFrame(allin)
print(df)
if __name__ == "__main__":
main()
This works for me but I did not look into how client-info and duaid are generated.
import requests
import pandas as pd
def main():
headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
'Client-Info': "blossom-flex-ui,9382ef788e9311fcea3ce7a7b749cd68c4059a45,us-west-2", # not sure where this comes from just took it from browser dev tools
}
data = [
{
"operationName": "PropertyFilteredReviewsQuery",
"variables": {
"context": {
"siteId": 1,
"locale": "en_US",
"eapid": 0,
"currency": "USD",
"device": {
"type": "DESKTOP"
},
"identity": {
"duaid": "1003be2b-6834-4cf8-bb66-66a49107b76c", # not sure where this comes from just took it from browser dev tools
"expUserId": "-1",
"tuid": "-1",
"authState": "ANONYMOUS"
},
"privacyTrackingState": "CAN_TRACK",
"debugContext": {
"abacusOverrides": [],
"alterMode": "RELEASED"
}
},
"propertyId": "24625",
"searchCriteria": {
"primary": {
"dateRange": None,
"rooms": [
{
"adults": 2
}
],
"destination": {
"regionId": "178305"
}
},
"secondary": {
"booleans": [
{
"id": "includeRecentReviews",
"value": True
},
{
"id": "includeRatingsOnlyReviews",
"value": True
},
{
"id": "overrideEmbargoForIndividualReviews",
"value": True
}
],
"counts": [
{
"id": "startIndex",
"value": 0
},
{
"id": "size",
"value": 10
}
],
"selections": [
{
"id": "sortBy",
"value": "NEWEST_TO_OLDEST_BY_LANGUAGE"
},
{
"id": "searchTerm",
"value": ""
}
]
}
}
},
"query": "query PropertyFilteredReviewsQuery($context: ContextInput!, $propertyId: String!, $searchCriteria: PropertySearchCriteriaInput!) {\n propertyReviewSummaries(\n context: $context\n propertyIds: [$propertyId]\n searchCriteria: $searchCriteria\n ) {\n ...__PropertyReviewSummaryFragment\n __typename\n }\n propertyInfo(context: $context, propertyId: $propertyId) {\n id\n reviewInfo(searchCriteria: $searchCriteria) {\n ...__PropertyReviewsListFragment\n sortAndFilter {\n ...TravelerTypeFragment\n ...SortTypeFragment\n ...SearchTextFragment\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment __PropertyReviewSummaryFragment on PropertyReviewSummary {\n accessibilityLabel\n overallScoreWithDescriptionA11y {\n ...LodgingEnrichedMessageFragment\n __typename\n }\n propertyReviewCountDetails {\n fullDescription\n __typename\n }\n ...ReviewDisclaimerFragment\n reviewSummaryDetails {\n label\n ratingPercentage\n formattedRatingOutOfMax\n __typename\n }\n totalCount {\n raw\n __typename\n }\n __typename\n}\n\nfragment ReviewDisclaimerFragment on PropertyReviewSummary {\n reviewDisclaimer\n reviewDisclaimerHeading\n strategy\n reviewDisclaimerValues {\n text\n __typename\n }\n reviewDisclaimerLabel\n reviewDisclaimerAnalytics {\n referrerId\n linkName\n __typename\n }\n reviewDisclaimerUrl {\n value\n accessibilityLabel\n link {\n url\n __typename\n }\n __typename\n }\n reviewDisclaimerAccessibilityLabel\n __typename\n}\n\nfragment LodgingEnrichedMessageFragment on LodgingEnrichedMessage {\n __typename\n subText\n value\n theme\n state\n accessibilityLabel\n icon {\n id\n size\n theme\n __typename\n }\n mark {\n id\n __typename\n }\n egdsMark {\n url {\n value\n __typename\n }\n __typename\n }\n}\n\nfragment __PropertyReviewsListFragment on PropertyReviews {\n summary {\n paginateAction {\n text\n analytics {\n referrerId\n linkName\n __typename\n }\n __typename\n }\n __typename\n }\n reviews {\n contentDirectFeedbackPromptId\n ...ReviewParentFragment\n managementResponses {\n ...ReviewChildFragment\n __typename\n }\n reviewInteractionSections {\n primaryDisplayString\n reviewInteractionType\n __typename\n }\n __typename\n }\n ...NoResultsMessageFragment\n __typename\n}\n\nfragment ReviewParentFragment on PropertyReview {\n id\n superlative\n locale\n title\n brandType\n reviewScoreWithDescription {\n label\n value\n __typename\n }\n text\n seeMoreAnalytics {\n linkName\n referrerId\n __typename\n }\n submissionTime {\n longDateFormat\n __typename\n }\n impressionAnalytics {\n event\n referrerId\n __typename\n }\n themes {\n ...ReviewThemeFragment\n __typename\n }\n reviewFooter {\n ...PropertyReviewFooterSectionFragment\n __typename\n }\n ...FeedbackIndicatorFragment\n ...AuthorFragment\n ...PhotosFragment\n ...TravelersFragment\n ...ReviewTranslationInfoFragment\n ...PropertyReviewSourceFragment\n ...PropertyReviewRegionFragment\n __typename\n}\n\nfragment AuthorFragment on PropertyReview {\n reviewAuthorAttribution {\n text\n __typename\n }\n __typename\n}\n\nfragment PhotosFragment on PropertyReview {\n id\n photoSection {\n imageClickAnalytics {\n referrerId\n linkName\n __typename\n }\n exitAnalytics {\n referrerId\n linkName\n __typename\n }\n navClickAnalytics {\n referrerId\n linkName\n __typename\n }\n __typename\n }\n photos {\n description\n url\n __typename\n }\n __typename\n}\n\nfragment TravelersFragment on PropertyReview {\n travelers\n __typename\n}\n\nfragment ReviewThemeFragment on ReviewThemes {\n icon {\n id\n __typename\n }\n label\n __typename\n}\n\nfragment FeedbackIndicatorFragment on PropertyReview {\n reviewInteractionSections {\n primaryDisplayString\n accessibilityLabel\n reviewInteractionType\n feedbackAnalytics {\n linkName\n referrerId\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment ReviewTranslationInfoFragment on PropertyReview {\n translationInfo {\n loadingTranslationText\n targetLocale\n translatedBy {\n description\n __typename\n }\n translationCallToActionLabel\n seeOriginalText\n __typename\n }\n __typename\n}\n\nfragment PropertyReviewSourceFragment on PropertyReview {\n propertyReviewSource {\n accessibilityLabel\n graphic {\n description\n id\n size\n token\n url {\n value\n __typename\n }\n __typename\n }\n text {\n value\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment PropertyReviewRegionFragment on PropertyReview {\n reviewRegion {\n id\n __typename\n }\n __typename\n}\n\nfragment PropertyReviewFooterSectionFragment on PropertyReviewFooterSection {\n messages {\n seoStructuredData {\n itemscope\n itemprop\n itemtype\n content\n __typename\n }\n text {\n ... on EGDSPlainText {\n text\n __typename\n }\n ... on EGDSGraphicText {\n text\n graphic {\n ... on Mark {\n description\n id\n size\n url {\n ... on HttpURI {\n relativePath\n value\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment ReviewChildFragment on ManagementResponse {\n id\n header {\n text\n __typename\n }\n response\n __typename\n}\n\nfragment NoResultsMessageFragment on PropertyReviews {\n noResultsMessage {\n __typename\n ...MessagingCardFragment\n ...EmptyStateFragment\n }\n __typename\n}\n\nfragment MessagingCardFragment on UIMessagingCard {\n graphic {\n __typename\n ... on Icon {\n id\n description\n __typename\n }\n }\n primary\n secondaries\n __typename\n}\n\nfragment EmptyStateFragment on UIEmptyState {\n heading\n body\n __typename\n}\n\nfragment TravelerTypeFragment on SortAndFilterViewModel {\n sortAndFilter {\n name\n label\n options {\n label\n isSelected\n optionValue\n description\n clickAnalytics {\n linkName\n referrerId\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment SortTypeFragment on SortAndFilterViewModel {\n sortAndFilter {\n name\n label\n clickAnalytics {\n linkName\n referrerId\n __typename\n }\n options {\n label\n isSelected\n optionValue\n description\n clickAnalytics {\n linkName\n referrerId\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment SearchTextFragment on SortAndFilterViewModel {\n sortAndFilter {\n name\n label\n graphic {\n ... on Icon {\n description\n id\n token\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n}\n"
}
]
response = requests.post("https://www.expedia.com/graphql", json=data, headers=headers)
response.raise_for_status()
allin = pd.DataFrame()
for x in response.json()[0]['data']['propertyInfo']['reviewInfo']['reviews']:
review = {
'title': x['title'],
'superlative': x['superlative'],
'text': x['text'],
}
allin = pd.concat([allin, pd.DataFrame([review])])
print(allin)
if __name__ == "__main__":
main()
I noticed that the duaid
comes from the endpoint https://www.expedia.com/api/ucs/shortlist//fetch/?clientId=flex
the request to that endpoint requires a client-token which is in a tag when the page loads. I haven't checked if it can be obtained using requests since it is presumably javascript. But this should be a start in the right direction. You need the right duaid and client-info token to automate making these calls.