I am currently running a python script to detect changes in a website's json object that consists of products.
When i run it manually on my ubuntu server it works fine (sends tweet) but when it is run with the crontab, it gives me this error:
UnicodeEncodeError: 'ascii' codec can't encode character u'\xa0' in position 20: ordinal not in range(128)
Here is the code. Basically I am comparing different versions of the json file to see if there's a difference. If there's a difference, send a tweet.
#!/usr/bin/env python3
import twitter
import requests
from jsondiff import diff
from classes.logger import Logger
from classes.proxies import Proxy
import webbrowser
import time
import json
log = Logger().log
class Cactus:
proxy = Proxy()
def __init__(self):
self.url = 'http://api.bigcartel.com/cactusplantfleamarket/products.json'
self.front = 'http://www.cactusplantfleamarket.bigcartel.com'
self.api = twitter.Api(consumer_key='xxx',
consumer_secret='xxx',
access_token_key='xxx',
access_token_secret='xxx')
def scrape(self):
with open('cactus.txt') as oldjson:
old = json.load(oldjson);
current_proxy = self.proxy.getProxy()[self.proxy.countProxy()]
session = requests.session()
resp = session.get(self.url, proxies=current_proxy).json()
with open('cactus.txt', 'w') as outfile:
json.dump(resp, outfile)
if diff(resp, old) != {}:
if len(resp) == 0:
curr_time = time.strftime("%d %b %H:%M:%S", time.gmtime())
self.api.PostUpdate('Website Updated at ' + curr_time )
else:
for item in range(len(resp)):
try:
self.tweet(resp[item])
except Exception as e:
print(e)
def tweet(self, item):
print(item['name'])
curr_time = time.strftime("%d %b %H:%M:%S", time.gmtime())
url = self.front + item['url']
shorturl = self.goo_shorten_url(url)['id']
self.api.PostUpdate('CPFM: ' + item['name'] + ' - ' + curr_time + ' ' + shorturl)
def goo_shorten_url(self, url):
API_KEY = 'xxxx'
post_url = 'https://www.googleapis.com/urlshortener/v1/url?key={}'.format(API_KEY)
payload = {'longUrl': url}
headers = {'content-type': 'application/json'}
r = requests.post(post_url, data=json.dumps(payload), headers=headers)
return r.json()
We did some sleuthing in chat and figured out that the script was accidentally being run with Python 2 instead of Python 3 (the clue was the u
prefix on u'\xa0'
).