elephant-bird
elephant-bird copied to clipboard
PIG LOAD JSON FILE using Elephant-bird gives ERROR
Trying to load the json file which is having null values in it by using elephant-bird JsonLoader.
sample.json
{"created_at":"Mon Aug 22 10:48:23 +0000 2016","id":767674772662607873,"id_str":"767674772662607873","text":"KPIT Image Result for https:\/\/t.co\/Nas2ZnF1zZ... https:\/\/t.co\/9TnelwtIvm","source":"\u003ca href=\"http:\/\/twitter.com\" rel=\"nofollow\"\u003eTwitter Web Client\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":123,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/Nas2ZnF1zZ","expanded_url":"http:\/\/miltonious.com\/","display_url":"miltonious.com","indices":[24,47]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1471862903167"}
script:
REGISTER piggybank.jar
REGISTER json-simple-1.1.1.jar
REGISTER elephant-bird-pig-4.3.jar
REGISTER elephant-bird-core-4.1.jar
REGISTER elephant-bird-hadoop-compat-4.3.jar
json = LOAD 'sample.json' USING JsonLoader('created_at:chararray, id:chararray, id_str:chararray, text:chararray, source:chararray, in_reply_to_status_id:chararray, in_reply_to_status_id_str:chararray, in_reply_to_user_id:chararray, in_reply_to_user_id_str:chararray, in_reply_to_screen_name:chararray, geo:chararray, coordinates:chararray, place:chararray, contributors:chararray, is_quote_status:bytearray, retweet_count:long, favorite_count:chararray, entities:map[], favorited:bytearray, retweeted:bytearray, possibly_sensitive:bytearray, lang:chararray');
describe json;
dump json;
When I dump json,I am getting the following output and the worning
(Mon Aug 22 10:48:23 +0000 2016,767674772662607873,767674772662607873,google Image Result for Twitter Web Client,false,1234,12345,3214,43215,,,,,,,,,,,,,,)
WARN org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigHadoopLogger - org.apache.pig.builtin.JsonLoader(UDF_WARNING_1): Bad record, returning null for {complete json}
By warning i guess it is because of getting NULL values. So how can we load a Json which is having null values in it.
And I have tried in another way i.e
json = LOAD 'sample.json' USING com.twitter.elephantbird.pig.load.JsonLoader('created_at:chararray, id:chararray, id_str:chararray, text:chararray, source:chararray, in_reply_to_status_id:chararray, in_reply_to_status_id_str:chararray, in_reply_to_user_id:chararray, in_reply_to_user_id_str:chararray, in_reply_to_screen_name:chararray, geo:chararray, coordinates:chararray, place:chararray, contributors:chararray, is_quote_status:bytearray, retweet_count:long, favorite_count:chararray, entities:map[], favorited:bytearray, retweeted:bytearray, possibly_sensitive:bytearray, lang:chararray');
describe json;
Output
Schema for json unknown.
Please suggest me.
Thanks.