I'm in quite a pickle here.
So I'm using SQLAlchemy and Pyramid for a web app. One of the functions of this app is parsing input from a form which is passed to a Ruby parser over a XML-RPC bridge.
The issue arises when I try to use my renderer to return the JSON of a newly parsed object.
Here's the error, followed by details:
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 93: ordinal not in range(128)
Setup
DB setup
Collation: utf8_general_ci
Model
class Citation(Base):
__tablename__ = 'citations'
__table_args__ = {'autoload' : True}
authors = relationship("Author", secondary=author_of, backref='citations')
possible_matches = relationship("Citation", secondary=similar_to,
primaryjoin=citation_id==similar_to.c.citation_id1,
secondaryjoin=citation_id==similar_to.c.citation_id2
)
def __init__(self, citation_dict=None):
self.__dict__.update(citation_dict)
def __repr__(self):
return "<Citation %d: %s (%s)>" %\
(self.citation_id, self.title, self.year)
@property
def json(self):
attrs =\
['pubtype', 'abstract', 'keywords', 'doi', 'url', 'address',
'booktitle', 'chapter', 'crossref', 'edition', 'editor',
'translator', 'howpublished', 'institution', 'journal',
'bibtex_key', 'month', 'note', 'number', 'organization',
'pages', 'publisher', 'location', 'school', 'series', 'title',
'type', 'volume', 'year', 'raw', 'verified', 'last_modified',
'entryTime', 'citation_id']
struct = { 'authors' : [a.json for a in self.authors] }
for attr in attrs:
struct[attr] = getattr(self, attr, None)
struct["auth_string"] = " ".join([a.toString() for a in self.authors])
return struct
View
@view_config(route_name='citation_add', request_method='POST', renderer='pubs_json')
def citation_add(request):
raw = request.body
citation = parser.parse(raw)[0]
return citation.json
Renderer
# -*- coding: utf-8 -*-
import customjson
import os
from pyramid.asset import abspath_from_asset_spec
class PubsJSONRenderer:
def __init__(self, info):
""" Constructor: info will be an object having the the
following attributes: name (the renderer name), package
(the package that was 'current' at the time the
renderer was registered), type (the renderer type
name), registry (the current application registry) and
settings (the deployment settings dictionary). """
def __call__(self, value, system):
""" Call a the renderer implementation with the value
and the system value passed in as arguments and return
the result (a string or unicode object). The value is
the return value of a view. The system value is a
dictionary containing available system values
(e.g. view, context, and request). """
request = system.get('request')
if request is not None:
if not hasattr(request, 'response_content_type'):
request.response_content_type = 'application/json'
return customjson.dumps(value)
customjson.py
from json import JSONEncoder
from decimal import Decimal
class ExtJsonEncoder(JSONEncoder):
'''
Extends ``simplejson.JSONEncoder`` by allowing it to encode any
arbitrary generator, iterator, closure or functor.
'''
def default(self, c):
# Handles generators and iterators
if hasattr(c, '__iter__'):
return [i for i in c]
# Handles closures and functors
if hasattr(c, '__call__'):
return c()
# Handles precise decimals with loss of precision to float.
# Hack, but it works
if isinstance(c, Decimal):
return float(c)
return JSONEncoder.default(self, c)
def dumps(*args):
'''
Shortcut for ``ExtJsonEncoder.encode()``
'''
return ExtJsonEncoder(sort_keys=False, ensure_ascii=False,
skipkeys=True).encode(*args)
The stack trace
Traceback (most recent call last):
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid/router.py", line 242, in __call__
response = self.invoke_subrequest(request, use_tweens=True)
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid/router.py", line 217, in invoke_subrequest
response = handle_request(request)
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid_debugtoolbar/toolbar.py", line 160, in toolbar_tween
return handler(request)
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid/tweens.py", line 21, in excview_tween
response = handler(request)
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid_tm/__init__.py", line 82, in tm_tween
reraise(*exc_info)
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid_tm/__init__.py", line 63, in tm_tween
response = handler(request)
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid/router.py", line 163, in handle_request
response = view_callable(context, request)
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid/config/views.py", line 329, in attr_view
return view(context, request)
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid/config/views.py", line 305, in predicate_wrapper
return view(context, request)
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid/config/views.py", line 377, in rendered_view
context)
File "/var/site/sitvenv/lib/python2.7/site-packages/pyramid/renderers.py", line 418, in render_view
return self.render_to_response(response, system, request=request)
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid/renderers.py", line 441, in render_to_response
result = self.render(value, system_values, request=request)
File "/var/site/siteenv/lib/python2.7/site-packages/pyramid/renderers.py", line 437, in render
result = renderer(value, system_values)
File "/var/site/renderers.py", line 30, in __call__
return customjson.dumps(value)
File "/var/site/customjson.py", line 38, in dumps
skipkeys=True).encode(*args)
File "/usr/lib/python2.7/json/encoder.py", line 203, in encode
return ''.join(chunks)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 93: ordinal not in range(128)
Return from the parser
We feed in
Allen, C. 1995 "It isn't what you think: a new idea about intentional causation." Noûs 29,1:115-126
We get back a dict object like this from the parser:
{'title': '\\\\"It isn\\'t what you think: a new idea about intentional causation.\\\\"', 'journal': 'No\\\\xc3\\\\xbbs', 'author': 'Allen, C.', 'number': 1, 'volume': 29, 'date': '1995', 'type': 'article', 'pages': u'115\\u2013126'}
Tried
Because the app is operating in a virtual environment I felt alright with hopping over to page.py
and changing the default encoding from ascii
to utf-8
.
I've tried encoding and decoding and adding charset=utf8&use_unicode=1
to my SQLAlchemy URL to no avail.
My suspicion is that the problem lies with the ensure_ascii=False
option in the customjson.py
file. In fact, the documentation for the Python 2.7 JSON encoder says the following:
if ensure_ascii is False, some chunks written to fp may be unicode instances. This usually happens because the input contains unicode strings or the encoding parameter is used. Unless fp.write() explicitly understands unicode (as in codecs.getwriter()) this is likely to cause an error.
Setting ensure_ascii=True
seems to work around the error. Given that the default encoding for the json encoder is already utf-8
I'm not sure setting it manually will help. I need those unicode characters, so I'm not really sure how to approach the problem.
There was a call to JSON.stringify
on the client side that was escaping the troublesome characters. Removing this lead to the python working as desired.