...
Code Block |
---|
{ "description": "gnip test2.5 million tweets related to Super Storm Sandy", "extractType": "File", "extractorOptionsfile": { "app.alchemyapi-metadata.batchSizeXmlPrimaryKey": 200"", "app.alchemyapi-metadata.numKeywordsXmlRootLevelValues": 5[], }, "filedomain": {"XXXXXXX", "XmlPrimaryKeypassword": "XXXXXX", "XmlRootLevelValuesusername": [], "XXXXXX" }, "domainisApproved": "WORKGROUP"true, "isPublic": true, "passwordmediaType": "PASSWORDSocial", "structuredAnalysis": { "usernamerejectDocCriteria": "PASSWORD" },$SCRIPT( if (null == _doc.metadata.json[0].link || null == _doc.metadata.json[0].object) return 'reject'; )", metadataFieldList: "isApproved":, true, docGeo "isPublic": true, { "mediaTypelat": "Social$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[0];} catch (err) {return '';})", "structuredAnalysis": { lon": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[1];} catch (err) {return '';})" }, "associations": [ { "assoc_type": "Event", "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor); )", "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)", "verb": "retweets", "verb_category": "retweets" }, { "assoc_type": "Event", "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor) && (null != _doc.metadata.json[0].object.actor.location); )", "entity1_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)", "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.location.displayName+ '/location';)", "verb": "twitter_location", "verb_category": "twitter_location" }, { "assoc_type": "Event", "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "entity2_index": "$SCRIPT( return _iterator.text + '/hashtag'; )", "iterateOver": "json.twitter_entities.hashtags", "verb": "tweets_about", "verb_category": "tweets_about" }, { "assoc_type": "Event", "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "entity2_index": "$SCRIPT( return _iterator.screen_name + '/twitterhandle'; )", "iterateOver": "json.twitter_entities.user_mentions", "verb": "tweets_to", "verb_category": "tweets_to" }, { "assoc_type": "Event", "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "entity2_index": "$SCRIPT( return _iterator.expanded_url + '/url'; )", "iterateOver": "json.gnip.urls", "verb": "tweets_link", "verb_category": "tweets_link" } ], "description": "$metadata.json.body", "entities": [ { "actual_name": "$metadata.json.actor.displayName", "dimension": "Who", "disambiguated_name": "$metadata.json.actor.preferredUsername", "linkdata": "$metadata.json.actor.link", "type": "TwitterHandle" }, { "iterateOver": "json.twitter_entities.user_mentions", "actual_name": "$SCRIPT(return _iterator.name;)", "dimension": "Who", "disambiguated_name": "$SCRIPT(return _iterator.screen_name;)", "linkdata": "$SCRIPT(return 'http://www.twitter.com/' + _iterator.screen_name;)", "type": "TwitterHandle" }, { "actual_name": "$metadata.json.object.actor.displayName", "dimension": "Who", "disambiguated_name": "$metadata.json.object.actor.preferredUsername", "linkdata": "$metadata.json.object.actor.link", "type": "TwitterHandle" }, { "dimension": "Where", "disambiguated_name": "$metadata.json.actor.location.displayName", "geotag": { "city": "$SCRIPT( return getAddressVal( _doc.metadata.json[0].actor.location.displayName, 0 ) )", "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )", "country_code" : "US", "alternatives": [ { "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )", "country_code" : "US", } ] }, "type": "Location" }, { "dimension": "Where", "disambiguated_name": "$metadata.json.object.actor.location.displayName", "type": "Location" }, { "disambiguated_name": "$SCRIPT(return _iterator.text;)", "iterateOver": "json.twitter_entities.hashtags", "type": "HashTag" }, { "actual_name": "$SCRIPT(return _iterator.url)", "disambiguated_name": "$SCRIPT(return _iterator.expanded_url;)", "iterateOver": "json.gnip.urls", "type": "URL" } ], "fullText": "$metadata.json.body", "script": "function getAddressVal( addressStr, number) { try { var addressArray = addressStr.split(','); if (addressArray != null && addressArray.length > 0) { return addressArray[number]; } else { return ''; } } catch (err) { return ''; } } function getRegion( code ) { if (code.toLowerCase()=='ny') {return 'New York';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else if (code.toLowerCase()=='ct') {return 'Connecticut';} else if (code.toLowerCase()=='md') {return 'Maryland';} else if (code.toLowerCase()=='va') {return 'Virginia';} else if (code.toLowerCase()=='pa') {return 'Pennsylvania';} else {return 'New York';} }", "scriptEngine": "javascript", "title": "$metadata.json.body", "url": "$metadata.json.link", "publishedDate": "$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)" }, "tags": [ "twitter", "gnip" ], "title": "gnip testSuper Storm Sandy - Twitter", "url": "smb://FILESHARE:139/datasift/gnipXXXXXXXXXXX/sandy_demo/test/", "useExtractor": "none", "useTextExtractor": "AlchemyAPI-metadatanone" } |
Sample output
TODO