I have got a json file containing 500 tweets from the Twitter API. They are stored as .json and I need to load them into python.
I tried multiple ways of loading/reading the file but nothing worked. Might be a problem with the file?
The error I get is:
Expecting value: line 1 column 1 (char 0)
This is one of many ways I tried:
file = 'resources/data/raw/500_random_tweets.json'
try:
with open(file, 'r', encoding="utf8") as myfile:
data = myfile.read()
tweets = json.loads(data)
except Exception as e:
print(e)
This is what the json looks like:
/* 1 */
{
"_id" : ObjectId("5abffac15374a8000feef65c"),
"created_at" : ISODate("2018-03-31T21:16:49.123Z"),
"id" : NumberLong(980192203329990656),
"id_str" : "980192203329990656",
"text" : "RT @skychainglobal: Don't miss the #last #chance to buy Skychain tokens with a 25% discount! Just 53,877 SKCH tokens left!\n#skychain #skych…",
"source" : "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>",
"truncated" : false,
"in_reply_to_status_id" : null,
"in_reply_to_status_id_str" : null,
"in_reply_to_user_id" : null,
"in_reply_to_user_id_str" : null,
"in_reply_to_screen_name" : null,
"user" : {
"id" : NumberLong(926157367787638784),
"id_str" : "926157367787638784",
"name" : "Dangerouslady #TraXionICO",
"screen_name" : "I_Demonangel",
"location" : "0x5c58e15bE26f8886895350B9d1Bf1fAD53c5C83B",
"url" : null,
"description" : "0x5c58e15bE26f8886895350B9d1Bf1fAD53c5C83B",
"translator_type" : "none",
"protected" : false,
"verified" : false,
"followers_count" : 3049,
"friends_count" : 4959,
"listed_count" : 3,
"favourites_count" : 1781,
"statuses_count" : 3257,
"created_at" : "Thu Nov 02 18:41:38 +0000 2017",
"utc_offset" : null,
"time_zone" : null,
"geo_enabled" : false,
"lang" : "en",
"contributors_enabled" : false,
"is_translator" : false,
"profile_background_color" : "F5F8FA",
"profile_background_image_url" : "",
"profile_background_image_url_https" : "",
"profile_background_tile" : false,
"profile_link_color" : "1DA1F2",
"profile_sidebar_border_color" : "C0DEED",
"profile_sidebar_fill_color" : "DDEEF6",
"profile_text_color" : "333333",
"profile_use_background_image" : true,
"profile_image_url" : "http://pbs.twimg.com/profile_images/973129911836782594/DXw4iXIf_normal.jpg",
"profile_image_url_https" : "https://pbs.twimg.com/profile_images/973129911836782594/DXw4iXIf_normal.jpg",
"profile_banner_url" : "https://pbs.twimg.com/profile_banners/926157367787638784/1520847227",
"default_profile" : true,
"default_profile_image" : false,
"following" : null,
"follow_request_sent" : null,
"notifications" : null
},
"geo" : null,
"coordinates" : null,
"place" : null,
"contributors" : null,
"retweeted_status" : {
"created_at" : "Sat Mar 31 12:06:59 +0000 2018",
"id" : NumberLong(980053845496815616),
"id_str" : "980053845496815616",
"text" : "Don't miss the #last #chance to buy Skychain tokens with a 25% discount! Just 53,877 SKCH tokens left!\n#skychain… ,
"display_text_range" : [
0,
140
],
"source" : "<a href=\"http://twitter.com\" rel=\"nofollow\">Twitter Web Client</a>",
"truncated" : true,
"in_reply_to_status_id" : null,
"in_reply_to_status_id_str" : null,
"in_reply_to_user_id" : null,
"in_reply_to_user_id_str" : null,
"in_reply_to_screen_name" : null,
"user" : {
"id" : NumberLong(935520196865019905),
"id_str" : "935520196865019905",
"name" : "Skychain",
"screen_name" : "skychainglobal",
"location" : "Москва, Россия",
"url" : "https://skychain.global/",
"description" : "Skychain is an open infrastructure, blockchain project\naimed to host, train and use\nmedical neural networks.",
"translator_type" : "none",
"protected" : false,
"verified" : false,
"followers_count" : 4848,
"friends_count" : 0,
"listed_count" : 114,
"favourites_count" : 34,
"statuses_count" : 125,
"created_at" : "Tue Nov 28 14:46:10 +0000 2017",
"utc_offset" : -25200,
"time_zone" : "Pacific Time (US & Canada)",
"geo_enabled" : false,
"lang" : "ru",
"contributors_enabled" : false,
"is_translator" : false,
"profile_background_color" : "000000",
"profile_background_image_url" : "http://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_image_url_https" : "https://abs.twimg.com/images/themes/theme1/bg.png",
"profile_background_tile" : false,
"profile_link_color" : "1B95E0",
"profile_sidebar_border_color" : "000000",
"profile_sidebar_fill_color" : "000000",
"profile_text_color" : "000000",
"profile_use_background_image" : false,
"profile_image_url" : "http://pbs.twimg.com/profile_images/945202909242896384/OUsbpDAO_normal.jpg",
"profile_image_url_https" : "https://pbs.twimg.com/profile_images/945202909242896384/OUsbpDAO_normal.jpg",
"profile_banner_url" : "https://pbs.twimg.com/profile_banners/935520196865019905/1514189033",
"default_profile" : false,
"default_profile_image" : false,
"following" : null,
"follow_request_sent" : null,
"notifications" : null
},
"geo" : null,
"coordinates" : null,
"place" : null,
"contributors" : null,
"is_quote_status" : false,
"extended_tweet" : {
"full_text" : "Don't miss the #last #chance to buy Skychain tokens with a 25% discount! Just 53,877 SKCH tokens left!\n#skychain #skychainglobal #ico #medicine #blockchain ...",
"display_text_range" : [
0,
155
],
"entities" : {
"hashtags" : [
{
"text" : "last",
"indices" : [
15,
20
]
},
{
"text" : "chance",
"indices" : [
21,
28
]
},
{
"text" : "skychain",
"indices" : [
103,
112
]
},
{
"text" : "skychainglobal",
"indices" : [
113,
128
]
},
{
"text" : "ico",
"indices" : [
129,
133
]
},
{
"text" : "medicine",
"indices" : [
134,
143
]
},
{
"text" : "blockchain",
"indices" : [
144,
155
]
}
],
"urls" : [],
"user_mentions" : [],
"symbols" : [],
"media" : [
{
"id" : NumberLong(980053810260467712),
"id_str" : "980053810260467712",
"indices" : [
156,
179
],
"media_url" : "http://pbs.twimg.com/media/DZnZv3VX4AAlflW.png",
"media_url_https" : "https://pbs.twimg.com/media/DZnZv3VX4AAlflW.png",
"url" : "...",
"display_url" : "pic.twitter.com/68Ma6BljIX",
"expanded_url" : "https://twitter.com/skychainglobal/status/980053845496815616/photo/1",
"type" : "photo",
"sizes" : {
"medium" : {
"w" : 256,
"h" : 256,
"resize" : "fit"
},
"small" : {
"w" : 256,
"h" : 256,
"resize" : "fit"
},
"large" : {
"w" : 256,
"h" : 256,
"resize" : "fit"
},
"thumb" : {
"w" : 150,
"h" : 150,
"resize" : "crop"
}
}
}
]
},
"extended_entities" : {
"media" : [
{
"id" : NumberLong(980053810260467712),
"id_str" : "980053810260467712",
"indices" : [
156,
179
],
"media_url" : "http://pbs.twimg.com/media/DZnZv3VX4AAlflW.png",
"media_url_https" : "https://pbs.twimg.com/media/DZnZv3VX4AAlflW.png",
"url" : "...",
"display_url" : "pic.twitter.com/68Ma6BljIX",
"expanded_url" : "https://twitter.com/skychainglobal/status/980053845496815616/photo/1",
"type" : "photo",
"sizes" : {
"medium" : {
"w" : 256,
"h" : 256,
"resize" : "fit"
},
"small" : {
"w" : 256,
"h" : 256,
"resize" : "fit"
},
"large" : {
"w" : 256,
"h" : 256,
"resize" : "fit"
},
"thumb" : {
"w" : 150,
"h" : 150,
"resize" : "crop"
}
}
}
]
}
},
"quote_count" : 7,
"reply_count" : 4,
"retweet_count" : 450,
"favorite_count" : 391,
"entities" : {
"hashtags" : [
{
"text" : "last",
"indices" : [
15,
20
]
},
{
"text" : "chance",
"indices" : [
21,
28
]
},
{
"text" : "skychain",
"indices" : [
103,
112
]
}
],
"urls" : [
{
"url" : "...",
"expanded_url" : "https://twitter.com/i/web/status/980053845496815616",
"display_url" : "twitter.com/i/web/status/9…",
"indices" : [
114,
137
]
}
],
"user_mentions" : [],
"symbols" : []
},
"favorited" : false,
"retweeted" : false,
"possibly_sensitive" : false,
"filter_level" : "low",
"lang" : "en"
},
"is_quote_status" : false,
"quote_count" : 0,
"reply_count" : 0,
"retweet_count" : 0,
"favorite_count" : 0,
"entities" : {
"hashtags" : [
{
"text" : "last",
"indices" : [
35,
40
]
},
{
"text" : "chance",
"indices" : [
41,
48
]
},
{
"text" : "skychain",
"indices" : [
123,
132
]
}
],
"urls" : [],
"user_mentions" : [
{
"screen_name" : "skychainglobal",
"name" : "Skychain",
"id" : NumberLong(935520196865019905),
"id_str" : "935520196865019905",
"indices" : [
3,
18
]
}
],
"symbols" : []
},
"favorited" : false,
"retweeted" : false,
"filter_level" : "low",
"lang" : "en",
"timestamp_ms" : "1522531007021",
"search_id" : "ai_blockchain"
}
/* 2 */
{
"_id" : ObjectId("5abffac15374a8000feef65d"),
"created_at" : ISODate("2018-03-31T21:16:49.571Z"),
"id" : NumberLong(980192209034100736),
"id_str" : "980192209034100736",
"text" : "RT @Waltonchain: We are glad to see our Waltonchain Windows Wallet is well received by the community upon launch. It's an effortless one-cl…",
"source" : "<a href=\"https://t.me/alt_time\" rel=\"nofollow\"> HJ's Coin Crawler 3</a>",
"truncated" : false,
"in_reply_to_status_id" : null,
"in_reply_to_status_id_str" : null,
"in_reply_to_user_id" : null,
"in_reply_to_user_id_str" : null,
"in_reply_to_screen_name" : null,
"user" : {
"id" : NumberLong(959659082956156928),
"id_str" : "959659082956156928",
"name" : "How to Coin",
"screen_name" : "how_to_coin",
...
This file is not JSON. Not only does it start with a comment, it contains several types - ObjectId, NumberLong, ISODate - which are not supported in JSON either.
Instead it appears to be BSON, which is the serialisation format used by MongoDb. You will need to install the BSON client from the MongoDb library to read it.