My simple python script to create a dataset and to that dataset add a single PDF file as a resource is failing with "{file} is not json serializable".
# coding=utf-8
# import base64
import ckanapi
import requests
import csv
import json
import pprint
import socket
import netifaces as ni
# UPDATE THESE AND ONLY THESE.
api_token = '***'
the_hostname = socket.gethostname()
the_ipaddress = ni.ifaddresses('eth0')[ni.AF_INET][0]['addr']
site_url = 'http://' + the_ipaddress + ':5000'
endpoint_p = '{}/api/3/action/package_create'.format(site_url)
endpoint_r = '{}/api/3/action/resource_create'.format(site_url)
headers = {'Authorization': api_token}
payload_p = {
"name": "test01",
"private": "true",
"state": "active",
"owner_org": "b15a6f45-e2ed-4587-8c5e-a92dbc9f157d",
"maintainer" : "Forms Management",
"maintainer_email" : "[email protected]",
"author" : "Test Author",
"author_email" : "[email protected]"
}
payload_r = {
"package_id": "null",
"name": "English - test01 - Test Description",
"url": "upload",
"upload": open('/var/www/upload/2nd/unzipped/002-33-5098E/33-5098E.pdf', 'r'),
"description": "This is a test resource attached to dataset test01",
"notes": "This is a longer block of text that is for the resource test01e which is attached to the dataset test01"
}
filepaths = {
"thepath": "/var/www/upload/2nd/unzipped/002-33-5098E/33-5098E.pdf"
}
req_p = requests.post(endpoint_p, json=payload_p, headers=headers)
theLastResponse = req_p.json()
theLastPackageCreated = theLastResponse['result']['id']
payload_r["package_id"] = theLastPackageCreated
req_r = requests.post(endpoint_r, json = payload_r, headers = headers) # resource_create()
This throws an error "{file} is not json serializable". The file is a PDF, which is a binary file, but I'm not sure if some type of encoding is required (note the commented out "base64" module... I didn't want to go down that road without asking if that's the right approach.)
The CKAN API documentation here: https://docs.ckan.org/en/2.9/api/#ckan.logic.action.create.resource_create
says that the "upload" should be a "(FieldStorage (optional) needs multipart/form-data) – (optional)" but all the example scripts I've seen to upload files to CKAN show the code only and exactly what I've done here, no extra pre-processing of the file being uploaded or what not, so I'm not sure exactly what might be the issue... please help if you can!
I copied your code and ran a modified version against a local dev copy of CKAN and had it work without issue AFTER my mods which are included below.
Most notably:
data
instead of json
as it's multipart-form-data
. 2) send the file here with the files
param.Docs: https://docs.ckan.org/en/2.9/maintaining/filestore.html#filestore-api
IMO this is less of a CKAN issue and more of an understanding of the library chosen (i.e. requests). there are many ways to do this with different tools.
I also had to update the payloads to align with my schema but assuming that's correct for yours this should work.
# coding=utf-8
# import base64
import ckanapi
import requests
import csv
import json
import pprint
import socket
import netifaces as ni
# UPDATE THESE AND ONLY THESE.
api_token = '***'
the_hostname = socket.gethostname()
the_ipaddress = ni.ifaddresses('eth0')[ni.AF_INET][0]['addr']
site_url = 'http://' + the_ipaddress + ':5000'
endpoint_p = '{}/api/3/action/package_create'.format(site_url)
endpoint_r = '{}/api/3/action/resource_create'.format(site_url)
headers = {'Authorization': api_token}
payload_p = {
"name": "test01",
"private": "true",
"state": "active",
"owner_org": "b15a6f45-e2ed-4587-8c5e-a92dbc9f157d",
"maintainer" : "Forms Management",
"maintainer_email" : "[email protected]",
"author" : "Test Author",
"author_email" : "[email protected]"
}
payload_r = {
"package_id": "null"
}
filepaths = {
"thepath": "/var/www/upload/2nd/unzipped/002-33-5098E/33-5098E.pdf"
}
req_p = requests.post(endpoint_p, json=payload_p, headers=headers)
theLastResponse = req_p.json()
theLastPackageCreated = theLastResponse['result']['id']
payload_r["package_id"] = theLastPackageCreated
req_r = requests.post(endpoint_r, data=payload_r, headers=headers, files=[('upload', file('/var/www/upload/2nd/unzipped/002-33-5098E/33-5098E.pdf'))]) # resource_create()