Search code examples
pythonjsoncsvweb-scrapingpython-re

Regular expression operations to separate store locations with in Walmart locations


I am currently trying to implement a re to my code for Mcdonald locations across Canada. The goal is to add a column to my csv that states if the locations has walmart. All the address in "address1" that are in walmart have a tag that says (walmart). I am hoping to separate it,if anyone can help with that would be great! If there is a way to do it in excel that would be just as good.

import csv
import json
import requests
import re 

url = "https://www.mcdonalds.com/googleapps/GoogleRestaurantLocAction.do?method=searchLocation&latitude=43.6936965&longitude=-79.2969938&radius=1000000&maxResults=1700&country=ca&language=en-ca&showClosed=&hours24Text=Open%2024%20hr"

payload = {}
files = {}
headers = {
    "authority": "www.mcdonalds.com",
    "sec-ch-ua": '" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
    "accept": "*/*",
    "x-requested-with": "XMLHttpRequest",
    "sec-ch-ua-mobile": "?0",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36",
    "sec-fetch-site": "same-origin",
    "sec-fetch-mode": "cors",
    "sec-fetch-dest": "empty",
    "referer": "https://www.mcdonalds.com/ca/en-ca/restaurant-locator.html",
    "accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
    "cookie": "bm_sz=C04645E7F7A956C5F9D9C5A20DEAEC97~YAAQ1Cv2SEtfMBN6AQAAItxfEwwTVV2V2Tr7UWpPt1Ps7gl84FzQlmbWIm4kBBh5dxlK3w8RenwiEiKtvERE6dLmrwPwJUuy+14gU/LeEZvP+uxzyBr04oQXdcSEQuiOgdkAGasqnBrTw1mp5E5iehnRpvHBDdSqh8wRSgJV0eG4f8YwSz66BfntCBALtQNCAFK2; _abck=F05779F2345218EA4989FF467D897C5A~0~YAAQ1Cv2SExfMBN6AQAAItxfEwaIwCrBeP25JBhBb7TX+HmnLQgrj1TkosrB+oHSv9ctrxRukqEDUaHPL1KkjpqjY1XY1yyulQ0ZRhsEfhY968YVsTOqfiosAu3kykd3pJG/bQ37XHwWs5qXpIdhMXRwJwXmkYtl3ETG8kXK2iZ22Q31COaSjNVACLaa7s9tCk9ItgLvUj5x9Nldjnd8AdXR0pXicrQY1IaruJyNqwMcJv42AUHW7iH4Ex9ZOSYsgEjLMNd44mS525X/gSNUTSOzoqoWsnH4MU59vfgLTwc2hVncAv67LBViTLxbWw4eVAvz7Z5phQfCmvoIy0PD8gy5iwPDMaD3GASrK9xScDPAPUI2wquxmSJ+f2cQaxZQKhvJCeH9cz14OZfx8ksA2ss53E0l0kDvgmnw~-1~-1~-1; ak_bmsc=BA4817D8DEE20E92C1E6251C54FC124348F62BD48F5F00005F91C9608B679D5F~plUkbYfsvYr5dCayJ9dMGEJ3QDgkmkv2mLpE7pCY9vW0xrdawvmyxfSnupw/4F7C48Akdn8PKsBniqz+7F+RZb8v4AkvH3c0RuvnynqJoni+kJcDYtPOxdMvdtGdTlZGIkSQNfpcxHNQDVlzojdSBX0vyBh/8seKQv10U67M7m787olYzg9jnsUwk3/VHBrnMDogiWJT8rNV7saSXunN0pAgucZWo/XhCpTJL+tI9urt0=; MCDCountry_code=US; bm_mi=BEE06312635FD442995BC0237BAFDA7C~f/RxgMW/JJSUc/wB9ZRg9fPD/76+wq/TaoWEZR1/ttrAiVTO256xhDTsVYc/kdHIjWkxvfO4XDcBjqe4hQ4qXt8Anpfi09vna/zcC7l6OVWpWeRSoZNztl7h5VF407L3XG+9CpzjSHNcaqAPRk5d0J5gLMtL/KmR8XBkAC0Syim7ST97nxNrPfLdlkSPMGm4Oy86xvY5PH5Nu47zS/gwhanBFg69tAdrQdaZewE2eGuzoJPsZit3UsihTzhXc4LY92hfSdh3/kZRId+NE8Jp0w==; bm_sv=7CACE3495320A7C0A6CF8F41DFE0EB36~F9KzvznVNk/fE4+ijLD5H/szY7O161rWlemmShElumIW7HN49Gq2d9Sd2tqBjCa9sJOX4zoehAkc8WvsID5Idon/hDlDeLJZuqnEmff4PN4a9yst3R170rBCm1egzGvCBmB1jq9aCwQm5VgIJgloPOdpiIPfD3kDxFbKhqMuS5U=; JSESSIONID=64PZkBXhhpvNjM4NganzSZ0r1npIIaM7Fo84EsxN.eap7node7; _abck=F05779F2345218EA4989FF467D897C5A~-1~YAAQ1Cv2SExyMBN6AQAA5Et0EwZueCejZbKz1VDGCq2sB43Yx4dq0SiiGeUS6gVpXRIdw3rA3OdpNGHq7tVzQ+IvPpEKwLML9736x1qB5SQxV3jai89y2B2QF6K8nKtyrDAes0qbeTyIrHu0Rh1HLs7CjNxiLi0wswbCZfSsPI6fJZiEt+Itre3lfmua/HkhIRwpVTKqlVN5eQ8XIX+s1jJbINx/jUmMTW+jB5k4A5NARGChYH7rJQGYIT/oyZYpSbS3Yweqa4FRgGMW4gYZBN39+t2xSfewADLdpihfOnoZtakw9VhcvAKaf4mEzjB7WEfNJIZSjSE8DzvbJNIF41MGuAhhrnEBwBE8uVCZsA+2qjVPSADVp2Nn8JanJXCbucnLFOLsmPz3oVtGzentht1cHog4+eYOUlmw~0~-1~-1; bm_sv=7CACE3495320A7C0A6CF8F41DFE0EB36~F9KzvznVNk/fE4+ijLD5H/szY7O161rWlemmShElumIW7HN49Gq2d9Sd2tqBjCa9sJOX4zoehAkc8WvsID5Idon/hDlDeLJZuqnEmff4PN5ZCTzA250oKEeVeXaa6j4gEGJ9RRtrTXQdYXzzSx6fM9aLwif+We2vtIc1yLQgTt4=",
    "dnt": "1",
}

response = requests.request(
    "GET", url, headers=headers, data=payload, files=files
)
stores = json.loads(response.text)

with open("McDonaldworkshop
          \.csv", mode="w") as CSVFile:
    writer = csv.writer(
        CSVFile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
    )

    writer.writerow(
        [
            "address",
            "postcode",
            "telephone",
        ]
    )

    for store in stores["features"]:
        address =  store["properties"]["addressLine1"]
        post_code = store["properties"]["postcode"]
        telephone = store["properties"].get("telephone", "N/A")

        writer.writerow([address,post_code, telephone])

Solution

  • Try:

    import csv
    import json
    import requests
    
    url = "https://www.mcdonalds.com/googleapps/GoogleRestaurantLocAction.do?method=searchLocation&latitude=43.6936965&longitude=-79.2969938&radius=1000000&maxResults=1700&country=ca&language=en-ca&showClosed=&hours24Text=Open%2024%20hr"
    
    payload = {}
    files = {}
    headers = {
        "authority": "www.mcdonalds.com",
        "sec-ch-ua": '" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
        "accept": "*/*",
        "x-requested-with": "XMLHttpRequest",
        "sec-ch-ua-mobile": "?0",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36",
        "sec-fetch-site": "same-origin",
        "sec-fetch-mode": "cors",
        "sec-fetch-dest": "empty",
        "referer": "https://www.mcdonalds.com/ca/en-ca/restaurant-locator.html",
        "accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
        "cookie": "bm_sz=C04645E7F7A956C5F9D9C5A20DEAEC97~YAAQ1Cv2SEtfMBN6AQAAItxfEwwTVV2V2Tr7UWpPt1Ps7gl84FzQlmbWIm4kBBh5dxlK3w8RenwiEiKtvERE6dLmrwPwJUuy+14gU/LeEZvP+uxzyBr04oQXdcSEQuiOgdkAGasqnBrTw1mp5E5iehnRpvHBDdSqh8wRSgJV0eG4f8YwSz66BfntCBALtQNCAFK2; _abck=F05779F2345218EA4989FF467D897C5A~0~YAAQ1Cv2SExfMBN6AQAAItxfEwaIwCrBeP25JBhBb7TX+HmnLQgrj1TkosrB+oHSv9ctrxRukqEDUaHPL1KkjpqjY1XY1yyulQ0ZRhsEfhY968YVsTOqfiosAu3kykd3pJG/bQ37XHwWs5qXpIdhMXRwJwXmkYtl3ETG8kXK2iZ22Q31COaSjNVACLaa7s9tCk9ItgLvUj5x9Nldjnd8AdXR0pXicrQY1IaruJyNqwMcJv42AUHW7iH4Ex9ZOSYsgEjLMNd44mS525X/gSNUTSOzoqoWsnH4MU59vfgLTwc2hVncAv67LBViTLxbWw4eVAvz7Z5phQfCmvoIy0PD8gy5iwPDMaD3GASrK9xScDPAPUI2wquxmSJ+f2cQaxZQKhvJCeH9cz14OZfx8ksA2ss53E0l0kDvgmnw~-1~-1~-1; ak_bmsc=BA4817D8DEE20E92C1E6251C54FC124348F62BD48F5F00005F91C9608B679D5F~plUkbYfsvYr5dCayJ9dMGEJ3QDgkmkv2mLpE7pCY9vW0xrdawvmyxfSnupw/4F7C48Akdn8PKsBniqz+7F+RZb8v4AkvH3c0RuvnynqJoni+kJcDYtPOxdMvdtGdTlZGIkSQNfpcxHNQDVlzojdSBX0vyBh/8seKQv10U67M7m787olYzg9jnsUwk3/VHBrnMDogiWJT8rNV7saSXunN0pAgucZWo/XhCpTJL+tI9urt0=; MCDCountry_code=US; bm_mi=BEE06312635FD442995BC0237BAFDA7C~f/RxgMW/JJSUc/wB9ZRg9fPD/76+wq/TaoWEZR1/ttrAiVTO256xhDTsVYc/kdHIjWkxvfO4XDcBjqe4hQ4qXt8Anpfi09vna/zcC7l6OVWpWeRSoZNztl7h5VF407L3XG+9CpzjSHNcaqAPRk5d0J5gLMtL/KmR8XBkAC0Syim7ST97nxNrPfLdlkSPMGm4Oy86xvY5PH5Nu47zS/gwhanBFg69tAdrQdaZewE2eGuzoJPsZit3UsihTzhXc4LY92hfSdh3/kZRId+NE8Jp0w==; bm_sv=7CACE3495320A7C0A6CF8F41DFE0EB36~F9KzvznVNk/fE4+ijLD5H/szY7O161rWlemmShElumIW7HN49Gq2d9Sd2tqBjCa9sJOX4zoehAkc8WvsID5Idon/hDlDeLJZuqnEmff4PN4a9yst3R170rBCm1egzGvCBmB1jq9aCwQm5VgIJgloPOdpiIPfD3kDxFbKhqMuS5U=; JSESSIONID=64PZkBXhhpvNjM4NganzSZ0r1npIIaM7Fo84EsxN.eap7node7; _abck=F05779F2345218EA4989FF467D897C5A~-1~YAAQ1Cv2SExyMBN6AQAA5Et0EwZueCejZbKz1VDGCq2sB43Yx4dq0SiiGeUS6gVpXRIdw3rA3OdpNGHq7tVzQ+IvPpEKwLML9736x1qB5SQxV3jai89y2B2QF6K8nKtyrDAes0qbeTyIrHu0Rh1HLs7CjNxiLi0wswbCZfSsPI6fJZiEt+Itre3lfmua/HkhIRwpVTKqlVN5eQ8XIX+s1jJbINx/jUmMTW+jB5k4A5NARGChYH7rJQGYIT/oyZYpSbS3Yweqa4FRgGMW4gYZBN39+t2xSfewADLdpihfOnoZtakw9VhcvAKaf4mEzjB7WEfNJIZSjSE8DzvbJNIF41MGuAhhrnEBwBE8uVCZsA+2qjVPSADVp2Nn8JanJXCbucnLFOLsmPz3oVtGzentht1cHog4+eYOUlmw~0~-1~-1; bm_sv=7CACE3495320A7C0A6CF8F41DFE0EB36~F9KzvznVNk/fE4+ijLD5H/szY7O161rWlemmShElumIW7HN49Gq2d9Sd2tqBjCa9sJOX4zoehAkc8WvsID5Idon/hDlDeLJZuqnEmff4PN5ZCTzA250oKEeVeXaa6j4gEGJ9RRtrTXQdYXzzSx6fM9aLwif+We2vtIc1yLQgTt4=",
        "dnt": "1",
    }
    
    response = requests.request(
        "GET", url, headers=headers, data=payload, files=files
    )
    stores = json.loads(response.text)
    
    with open("data.csv", mode="w") as CSVFile:
        writer = csv.writer(
            CSVFile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
        )
    
        writer.writerow(["address", "postcode", "telephone", "has walmart"])
    
        for store in stores["features"]:
            has_walmart = "(Wal-Mart)" in store["properties"]["addressLine1"]
            address = store["properties"]["addressLine1"].replace("(Wal-Mart)", "")
            post_code = store["properties"]["postcode"]
            telephone = store["properties"].get("telephone", "N/A")
    
            writer.writerow(
                [address, post_code, telephone, "X" if has_walmart else ""]
            )
    

    This will create new column has walmart where it has "X" if the address contains (Wal-Mart). Also, it replaces (Wal-Mart) with empty string in address:

    enter image description here