Search code examples
pythonjsonprawreddit

python reddit praw psraw got decode json Value Error


I'm trying to get subreddit's content and comments and write them into txt files. One file will be each post's comments and another one will list each post's related information. However, I got these errors after the 7250 results and there are 36k+ results I need to get.

I'm also using praw 4.6, because after updated to 5.0, psraw cannot work though.

Error messages:

Traceback (most recent call last):
  File "C:/Users/PycharmProjects/untitled/subreddit psraw.py", line 12, in <module>
    for submission in psraw.submission_search(reddit, subreddit='R', limit=40000):
  File "C:\Python27\lib\site-packages\psraw\base.py", line 71, in endpoint_func
    data = requests.get(url).json()['data']
  File "C:\Python27\lib\site-packages\requests\models.py", line 894, in json
    return complexjson.loads(self.text, **kwargs)
  File "C:\Python27\lib\json\__init__.py", line 339, in loads
return _default_decoder.decode(s)
  File "C:\Python27\lib\json\decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "C:\Python27\lib\json\decoder.py", line 382, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded

My code:

import praw, datetime, os, psraw

reddit = praw.Reddit('bot1')

subreddit = reddit.subreddit('R')

count = 0
try:
  for submission in psraw.submission_search(reddit, subreddit='R', limit=40000):
  count_coment = 0

  #get comments
    for comment in submission.comments:
        subid = submission.id
        comid = comment.id
        comauthor = comment.author
        com_body = comment.body.encode('utf-8').replace("\n", " ")
        comscore = comment.score
        com_date = datetime.datetime.utcfromtimestamp(comment.created_utc)
        string_com = '"{0}", "{1}", "{2}", "{3}", "{4}"\n'
        formatted_string_com = string_com.format(comid, comauthor, com_body, com_date, comscore)
        indexFile_comment = open('C:/Users/PycharmProjects/untitled/reddit_output_diabetes/' + subid + '.txt', 'a+')
        indexFile_comment.write(formatted_string_com)
        count_coment += 1
    print 'comment count: ', count_coment

    #get index

    date = datetime.datetime.utcfromtimestamp(submission.created_utc)
    _id = submission.id
    title = submission.title.encode('utf-8')
    text = submission.selftext.encode('utf-8').replace("\n", " ")
    author = submission.author
    score = submission.score
    string = '"{0}", "{1}", "{2}", "{3}", "{4}", "{5}"\n' 

    formatted_string = string.format(_id, title, text, author, date, score)
    count += 1
    indexFile = open('C:/Users/PycharmProjects/untitled/reddit_output/' + 'index.txt', 'a+')
    indexFile.write(formatted_string)

    print ("Successfuly writing in file")
    print count
    indexFile.close()
  print count
except ValueError:
    pass

Solution

  • It should be:

    try:
    
    .......put code here...
    
    except ValueError:
       pass
       continue