Scrapy Python script gives raise TypeError("Cannot mix str and non-str arguments")

Hi I am new to programming and am running into this seemingly extremly common problem but honestly none of the answers I have seen helped me in my case.

My code is:

import json
import scrapy

class MoreKeysSpider(scrapy.Spider):
    name = 'getoffers'

    def __init__(self):
        with open(r'C:\Users\magnu\brickset-scraper\postscrape\postscrape\prod.json', encoding='utf-8') as data_file:
   = json.load(data_file)

    def start_requests(self):
        for item in
            request = scrapy.Request(item['url'], callback=self.parse)
            request.meta['item'] = item
            yield request

    def parse(self, response):
        item = response.meta['item']
        item['details'] = []

            "Name" : response.css('span[itemprop=name]::text').extract_first(),
            "Release" : response.xpath('//*[@id="info"]/div[2]/div[1]/div[1]/div[2]/text()').extract_first(),
            "Website" : response.xpath('//*[@id="info"]/div[2]/div[1]/div[2]/div[2]/a/@href').extract_first(),
            "Entwickler" : response.xpath('//*[@id="info"]/div[2]/div[1]/div[3]/div[2]/text()').extract_first(),
            "Publisher" : response.xpath('//*[@id="info"]/div[2]/div[1]/div[4]/div[2]/text()').extract_first(),
            "Tags" : response.xpath('//*[@id="info"]/div[2]/div[2]/div[3]/div[2]/descendant').getall(),
            "Systemanforderungenmin" : response.xpath('//*[@id="config"]/ul[1]/descendant').getall(),
            "Systemanforderungenmax" : response.xpath('//*[@id="config"]/ul[2]/descendant').getall(),
        yield item

        item['offer'] = []
        for div in response.css('#offers_table'):
            for offer_row in div.css('div.offers-table-row'):
                url = response.urljoin(offer_row.css(' a::attr(href)')).get(),
                url_str = ''.join(map(str, url))     #coverts list to str
                    "Shop": offer_row.css('div[itemprop ~= seller] div.offers-merchant::attr(title)').extract_first(),
                    "Typ": offer_row.css('div.offers-edition-region::text').extract_first(),
                    "Edition": offer_row.css("div[data-toggle=tooltip]::attr(data-content)"),
                    "Link": response.follow(url_str, self.parse_topics),
                yield item

As a response I get

    DEBUG: Scraped from <200>
{'url': '', 'details': [{'Name': '\n\t\t\t\t\tCrusader Kings 2\n\t\t\t\t', 'Release': '\n                                                    14. Februar 2012\n                            ', 'Website': '', 'Entwickler': '\n                                                    Paradox Development Studio\n
       ', 'Publisher': '\n                                                    Paradox Interactive\n
           ', 'Tags': [], 'Systemanforderungenmin': [], 'Systemanforderungenmax': []}]}
2021-03-22 21:47:22 [scrapy.core.scraper] ERROR: Spider error processing <GET> (referer: None)
Traceback (most recent call last):
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\utils\", line 120, in iter_errback
    yield next(it)
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\utils\", line 353, in __next__
    return next(
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\utils\", line 353, in __next__
    return next(
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\core\", line 62, in _evaluate_iterable
    for r in iterable:
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\spidermiddlewares\", line 29, in process_spider_output
    for x in result:
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\core\", line 62, in _evaluate_iterable
    for r in iterable:
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\spidermiddlewares\", line 340, in <genexpr>
    return (_set_referer(r) for r in result or ())
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\core\", line 62, in _evaluate_iterable
    for r in iterable:
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\spidermiddlewares\", line 37, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\core\", line 62, in _evaluate_iterable
    for r in iterable:
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\spidermiddlewares\", 
line 58, in <genexpr>
    return (r for r in result or () if _filter(r))
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\core\", line 62, in _evaluate_iterable
    for r in iterable:
  File "C:\Users\magnu\brickset-scraper\postscrape\postscrape\spiders\", line 40, in parse
    url = response.urljoin(offer_row.css(' a::attr(href)')).get(),
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\site-packages\scrapy\http\response\", line 
102, in urljoin
    return urljoin(get_base_url(self), url)
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\urllib\", line 524, in urljoin
    base, url, _coerce_result = _coerce_args(base, url)
  File "c:\users\magnu\appdata\local\programs\python\python39\lib\urllib\", line 122, in _coerce_args       
    raise TypeError("Cannot mix str and non-str arguments")
TypeError: Cannot mix str and non-str arguments

So the first part seemingly works and I am pretty sure the mistake is somewhere in the second item, but I cant seem to find it

item['offer'] = []
        for div in response.css('#offers_table'):
            for offer_row in div.css('div.offers-table-row'):
                url = response.urljoin(offer_row.css(' a::attr(href)')).get(),
                url_str = ''.join(map(str, url))     #coverts list to str
                    "Shop": offer_row.css('div[itemprop ~= seller] div.offers-merchant::attr(title)').extract_first(),
                    "Typ": offer_row.css('div.offers-edition-region::text').extract_first(),
                    "Edition": offer_row.css("div[data-toggle=tooltip]::attr(data-content)"),
                    "Link": response.follow(url_str, self.parse_topics),
                yield item


  • Had kind of a circular route to get this one, but I think the debugging process would be instructive.

    It's tougher to diagnose this without the json file the program is calling, but it looks like your problem is on this line: url = response.urljoin(offer_row.css(' a::attr(href)')).get(),

    From How Can I Fix "TypeError: Cannot mix str and non-str arguments"?

    According to the Scrapy documentation, the .css(selector) method that you're using, returns a SelectorList instance. If you want the actual (unicode) string version of the url, call the extract() method:

    So I tried:

    url = response.urljoin(offer_row.css(' a::attr(href)').extract()).get(),

    But I still get the same error. Strange!

    To diagnose, I dropped a breakpoint() into the spider here:

            for div in response.css('#offers_table'):
                for offer_row in div.css('div.offers-table-row'):
                    url = response.urljoin(offer_row.css(' a::attr(href)').extract()).get(),

    Running the spider again, I can test pieces of the next line:

    (Pdb) offer_row.css(' a::attr(href)').extract()
    ['', '']

    Ah, so extract() is giving back a list of strings rather than a single string. There must be two elements matching. However, they are identical, so we don't care which one we get. Looking at the scrapy docs at, we see there's also an extract-first() function.

    url = response.urljoin(offer_row.css(' a::attr(href)').extract-first()).get(),

    Although, looking at the scrapy docs, you probably want to use get() instead of extract-first()

    Which is when I finally notice your only mistake was putting the get() outside the wrong set of parenthesis.

    url = response.urljoin(offer_row.css(' a::attr(href)').get())