Search code examples
pythonhtmlweb-scraping

Trying to scrape data from a table from a website


I'm trying to pull some data from a table and store it in a CSV file.

I'm using the following (all 64-bit):

  • Firefox version 135.0.1
  • GeckoDriver 0.36.0
  • Python version is 3.11.0

I'm trying to scrape the data from the table at [URL]

When I look at the HTML, it looks like that table element is , but my script can't seem to find the table. I'm not sure if I'm looking at the wrong element or if I'm doing something else wrong. Here's a snippit of my code:

# Print the page source for debugging
with open('page_source.html', 'w', encoding='utf-8') as f:
    f.write(driver.page_source)
print("Page source saved to 'page_source.html'.")

# Scroll to the bottom of the page
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)  # Wait for the table to load

# Wait for the table to load (increase timeout if needed)
try:
    wait = WebDriverWait(driver, 20)  # Wait up to 20 seconds
    table = wait.until(EC.presence_of_element_located((By.ID, 'table-container-table')))
    print("Table found!")
except Exception as e:
    print(f"Table not found: {e}")
    driver.quit()
    exit()

# Find all rows in the table body
rows = table.find_elements(By.TAG_NAME, 'tr')

# Create a list to store the data
data = []
for row in rows:
    cells = row.find_elements(By.TAG_NAME, 'td')
    if len(cells) == 4:  # Ensure there are 4 columns (SKU, Item Description, Platform, Value)
        sku = cells[0].text
        item_description = cells[1].text
        platform = cells[2].text
        value = cells[3].text
        data.append({
            'SKU': sku,
            'Item Description': item_description,
            'Platform': platform,
            'Value': value
        })

I get this error message when I run my code:

Page source saved to 'page_source.html'.
Table not found: Message:
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:197:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:527:5
dom.find/</<@chrome://remote/content/shared/DOM.sys.mjs:136:16

Edit:

Here is a copy of the inner HTML of the table-container:

<div id="table-container-table_wrapper" class="dataTables_wrapper form-inline dt-bootstrap no-footer">
    <div class="row">
        <div class="col-sm-12">
            <div id="table-container-table_filter" class="dataTables_filter">
                <label>Search:<input type="search" class="form-control input-lg" placeholder="" aria-controls="table-container-table" /></label>
            </div>
        </div>
    </div>
    <div class="row"><div class="databreak"></div></div>
    <div class="row">
        <div class="col-sm-12">
            <table class="table table-striped table-condensed dataTable no-footer" id="table-container-table" role="grid" aria-describedby="table-container-table_info">
                <thead>
                    <tr role="row">
                        <th class="sorting_asc" tabindex="0" aria-controls="table-container-table" rowspan="1" colspan="1" style="width: 106px;" aria-sort="ascending" aria-label="SKU: activate to sort column descending">SKU</th>
                        <th class="sorting" tabindex="0" aria-controls="table-container-table" rowspan="1" colspan="1" style="width: 566px;" aria-label="Item Description: activate to sort column ascending">Item Description</th>
                        <th class="sorting" tabindex="0" aria-controls="table-container-table" rowspan="1" colspan="1" style="width: 248px;" aria-label="Platform: activate to sort column ascending">Platform</th>
                        <th class="sorting" tabindex="0" aria-controls="table-container-table" rowspan="1" colspan="1" style="width: 133px;" aria-label="Values: activate to sort column ascending">Values</th>
                    </tr>
                </thead>
                <tbody>
                    <tr role="row" class="odd">
                        <td class="sorting_1">702645</td>
                        <td>RAINBOW 6 SEIGE PS4</td>
                        <td>Playstation 4</td>
                        <td>$5.00</td>
                    </tr>
                    <tr role="row" class="even">
                        <td class="sorting_1">709782</td>
                        <td>NEED FOR SPEED:RIVALS PS4</td>
                        <td>Playstation 4</td>
                        <td>$8.00</td>
                    </tr>
                    <tr role="row" class="odd">
                        <td class="sorting_1">709809</td>
                        <td>SYS-PS4 500GB Console</td>
                        <td>Playstation 4</td>
                        <td>$50.00</td>
                    </tr>
                    <tr role="row" class="even">
                        <td class="sorting_1">709823</td>
                        <td>ASSASSINS CREED IV PS4</td>
                        <td>Playstation 4</td>
                        <td>$6.00</td>
                    </tr>
                    <tr role="row" class="odd">
                        <td class="sorting_1">709833</td>
                        <td>ASSASSINS CREED IV XB1</td>
                        <td>Xbox One</td>
                        <td>$3.00</td>
                    </tr>
                    <tr role="row" class="even">
                        <td class="sorting_1">709843</td>
                        <td>JUST DANCE 2014 XB1</td>
                        <td>Xbox One</td>
                        <td>$3.00</td>
                    </tr>
                    <tr role="row" class="odd">
                        <td class="sorting_1">709847</td>
                        <td>JUST DANCE 2014 PS4</td>
                        <td>Playstation 4</td>
                        <td>$3.00</td>
                    </tr>
                    <tr role="row" class="even">
                        <td class="sorting_1">709871</td>
                        <td>FORZA MOTORSPORT 5 XB1</td>
                        <td>Xbox One</td>
                        <td>$3.00</td>
                    </tr>
                    <tr role="row" class="odd">
                        <td class="sorting_1">709878</td>
                        <td>COD:GHOSTS XB1</td>
                        <td>Xbox One</td>
                        <td>$3.00</td>
                    </tr>
                    <tr role="row" class="even">
                        <td class="sorting_1">709880</td>
                        <td>COD: GHOSTS PS4</td>
                        <td>Playstation 4</td>
                        <td>$4.00</td>
                    </tr>
                </tbody>
            </table>
        </div>
    </div>
    <div class="row"><div class="databreak"></div></div>
    <div class="row">
        <div class="col-sm-5"><div class="dataTables_info" id="table-container-table_info" role="status" aria-live="polite">Showing 1 to 10 of 5,378 entries</div></div>
        <div class="col-sm-7">
            <div class="dataTables_paginate paging_simple_numbers" id="table-container-table_paginate">
                <ul class="pagination">
                    <li class="paginate_button previous disabled" aria-controls="table-container-table" tabindex="0" id="table-container-table_previous"><a href="#">Previous</a></li>
                    <li class="paginate_button active" aria-controls="table-container-table" tabindex="0"><a href="#">1</a></li>
                    <li class="paginate_button" aria-controls="table-container-table" tabindex="0"><a href="#">2</a></li>
                    <li class="paginate_button" aria-controls="table-container-table" tabindex="0"><a href="#">3</a></li>
                    <li class="paginate_button" aria-controls="table-container-table" tabindex="0"><a href="#">4</a></li>
                    <li class="paginate_button" aria-controls="table-container-table" tabindex="0"><a href="#">5</a></li>
                    <li class="paginate_button disabled" aria-controls="table-container-table" tabindex="0" id="table-container-table_ellipsis"><a href="#">…</a></li>
                    <li class="paginate_button" aria-controls="table-container-table" tabindex="0"><a href="#">538</a></li>
                    <li class="paginate_button next" aria-controls="table-container-table" tabindex="0" id="table-container-table_next"><a href="#">Next</a></li>
                </ul>
            </div>
        </div>
    </div>
</div>

Here is a copy of the outer HTML:

<div id="table-container">
    <div id="table-container-table_wrapper" class="dataTables_wrapper form-inline dt-bootstrap no-footer">
        <div class="row">
            <div class="col-sm-12">
                <div id="table-container-table_filter" class="dataTables_filter">
                    <label>Search:<input type="search" class="form-control input-lg" placeholder="" aria-controls="table-container-table" /></label>
                </div>
            </div>
        </div>
        <div class="row"><div class="databreak"></div></div>
        <div class="row">
            <div class="col-sm-12">
                <table class="table table-striped table-condensed dataTable no-footer" id="table-container-table" role="grid" aria-describedby="table-container-table_info">
                    <thead>
                        <tr role="row">
                            <th class="sorting_asc" tabindex="0" aria-controls="table-container-table" rowspan="1" colspan="1" style="width: 106px;" aria-sort="ascending" aria-label="SKU: activate to sort column descending">SKU</th>
                            <th class="sorting" tabindex="0" aria-controls="table-container-table" rowspan="1" colspan="1" style="width: 566px;" aria-label="Item Description: activate to sort column ascending">Item Description</th>
                            <th class="sorting" tabindex="0" aria-controls="table-container-table" rowspan="1" colspan="1" style="width: 248px;" aria-label="Platform: activate to sort column ascending">Platform</th>
                            <th class="sorting" tabindex="0" aria-controls="table-container-table" rowspan="1" colspan="1" style="width: 133px;" aria-label="Values: activate to sort column ascending">Values</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr role="row" class="odd">
                            <td class="sorting_1">702645</td>
                            <td>RAINBOW 6 SEIGE PS4</td>
                            <td>Playstation 4</td>
                            <td>$5.00</td>
                        </tr>
                        <tr role="row" class="even">
                            <td class="sorting_1">709782</td>
                            <td>NEED FOR SPEED:RIVALS PS4</td>
                            <td>Playstation 4</td>
                            <td>$8.00</td>
                        </tr>
                        <tr role="row" class="odd">
                            <td class="sorting_1">709809</td>
                            <td>SYS-PS4 500GB Console</td>
                            <td>Playstation 4</td>
                            <td>$50.00</td>
                        </tr>
                        <tr role="row" class="even">
                            <td class="sorting_1">709823</td>
                            <td>ASSASSINS CREED IV PS4</td>
                            <td>Playstation 4</td>
                            <td>$6.00</td>
                        </tr>
                        <tr role="row" class="odd">
                            <td class="sorting_1">709833</td>
                            <td>ASSASSINS CREED IV XB1</td>
                            <td>Xbox One</td>
                            <td>$3.00</td>
                        </tr>
                        <tr role="row" class="even">
                            <td class="sorting_1">709843</td>
                            <td>JUST DANCE 2014 XB1</td>
                            <td>Xbox One</td>
                            <td>$3.00</td>
                        </tr>
                        <tr role="row" class="odd">
                            <td class="sorting_1">709847</td>
                            <td>JUST DANCE 2014 PS4</td>
                            <td>Playstation 4</td>
                            <td>$3.00</td>
                        </tr>
                        <tr role="row" class="even">
                            <td class="sorting_1">709871</td>
                            <td>FORZA MOTORSPORT 5 XB1</td>
                            <td>Xbox One</td>
                            <td>$3.00</td>
                        </tr>
                        <tr role="row" class="odd">
                            <td class="sorting_1">709878</td>
                            <td>COD:GHOSTS XB1</td>
                            <td>Xbox One</td>
                            <td>$3.00</td>
                        </tr>
                        <tr role="row" class="even">
                            <td class="sorting_1">709880</td>
                            <td>COD: GHOSTS PS4</td>
                            <td>Playstation 4</td>
                            <td>$4.00</td>
                        </tr>
                    </tbody>
                </table>
            </div>
        </div>
        <div class="row"><div class="databreak"></div></div>
        <div class="row">
            <div class="col-sm-5"><div class="dataTables_info" id="table-container-table_info" role="status" aria-live="polite">Showing 1 to 10 of 5,378 entries</div></div>
            <div class="col-sm-7">
                <div class="dataTables_paginate paging_simple_numbers" id="table-container-table_paginate">
                    <ul class="pagination">
                        <li class="paginate_button previous disabled" aria-controls="table-container-table" tabindex="0" id="table-container-table_previous"><a href="#">Previous</a></li>
                        <li class="paginate_button active" aria-controls="table-container-table" tabindex="0"><a href="#">1</a></li>
                        <li class="paginate_button" aria-controls="table-container-table" tabindex="0"><a href="#">2</a></li>
                        <li class="paginate_button" aria-controls="table-container-table" tabindex="0"><a href="#">3</a></li>
                        <li class="paginate_button" aria-controls="table-container-table" tabindex="0"><a href="#">4</a></li>
                        <li class="paginate_button" aria-controls="table-container-table" tabindex="0"><a href="#">5</a></li>
                        <li class="paginate_button disabled" aria-controls="table-container-table" tabindex="0" id="table-container-table_ellipsis"><a href="#">…</a></li>
                        <li class="paginate_button" aria-controls="table-container-table" tabindex="0"><a href="#">538</a></li>
                        <li class="paginate_button next" aria-controls="table-container-table" tabindex="0" id="table-container-table_next"><a href="#">Next</a></li>
                    </ul>
                </div>
            </div>
        </div>
    </div>
</div>

Solution

  • The URL (https://www.gamestop.ca/tradevalues) detects a scraping operation and therefore blocks the network call to fetch the tables. That is why your script is erroring out.

    They have a separate /microsites URL for these sorts of purposes and it works:

    import time
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    
    driver = webdriver.Chrome()
    
    driver.get('https://www.gamestop.ca/Views/Locale/Content/Microsites/trade-value-lookup/index.html')
    
    with open('page_source.html', 'w', encoding='utf-8') as f:
        f.write(driver.page_source)
    print("Page source saved to 'page_source.html'.")
    
    try:
        wait = WebDriverWait(driver, 40)
        table = wait.until(EC.presence_of_element_located((By.ID, 'table-container-table')))
        print("Table found!")
    except Exception as e:
        print(f"Table not found: {e}")
        driver.quit()
        exit()
    
    rows = table.find_elements(By.TAG_NAME, 'tr')
    
    data = []
    for row in rows:
        cells = row.find_elements(By.TAG_NAME, 'td')
        if len(cells) == 4:
            sku = cells[0].text
            item_description = cells[1].text
            platform = cells[2].text
            value = cells[3].text
            data.append({
                'SKU': sku,
                'Item Description': item_description,
                'Platform': platform,
                'Value': value
            })
    
    for item in data:
        print(item)
    
    driver.quit()
    
    

    Logs

    Page source saved to 'page_source.html'.
    Table found!
    {'SKU': '702645', 'Item Description': 'RAINBOW 6 SEIGE PS4', 'Platform': 'Playstation 4', 'Value': '$5.00'}
    {'SKU': '709782', 'Item Description': 'NEED FOR SPEED:RIVALS PS4', 'Platform': 'Playstation 4', 'Value': '$8.00'}
    {'SKU': '709809', 'Item Description': 'SYS-PS4 500GB Console', 'Platform': 'Playstation 4', 'Value': '$50.00'}
    {'SKU': '709823', 'Item Description': 'ASSASSINS CREED IV PS4', 'Platform': 'Playstation 4', 'Value': '$6.00'}
    {'SKU': '709833', 'Item Description': 'ASSASSINS CREED IV XB1', 'Platform': 'Xbox One', 'Value': '$3.00'}
    {'SKU': '709843', 'Item Description': 'JUST DANCE 2014 XB1', 'Platform': 'Xbox One', 'Value': '$3.00'}
    {'SKU': '709847', 'Item Description': 'JUST DANCE 2014 PS4', 'Platform': 'Playstation 4', 'Value': '$3.00'}
    {'SKU': '709871', 'Item Description': 'FORZA MOTORSPORT 5 XB1', 'Platform': 'Xbox One', 'Value': '$3.00'}
    {'SKU': '709878', 'Item Description': 'COD:GHOSTS XB1', 'Platform': 'Xbox One', 'Value': '$3.00'}
    {'SKU': '709880', 'Item Description': 'COD: GHOSTS PS4', 'Platform': 'Playstation 4', 'Value': '$4.00'}