...
Code Block |
---|
//TODO (INF-1865): need to distinguish between "tweets_to" and "retweets"... //TODO (INF-1865): looks like body is HTML encoded //TODO (INF-1865): aggregate sentiment vs user? //TODO (INF-1865): distinguish between tweets and mentions { "description": "A large set of tweets related to Super Storm Sandy", "extractTypeisApproved": "File"true, "extractorOptionsisPublic": false, { "mediaType": "Social", "app.alchemyapi-metadata.batchSizetags": 100,[ "app.alchemyapi-metadata.numKeywords": 5twitter", "gnip" ], "title": "app.alchemyapi-metadata.strict": "true" Super Storm Sandy - Twitter: SANDY_SUBSTRING", "processingPipeline": [ { }, "file": { "XmlPrimaryKey": "link", "XmlSourceName": "", "XmlRootLevelValues": [], "domain": "XXX", "password": "XXX", "username": "XXX", }, "isApproved": true, "isPublic": false, "mediaType": "Social", "structuredAnalysis": { "rejectDocCriteria": "$SCRIPT( if (null == _doc.metadata.json[0].link || null == _doc.metadata.json[0].object) return 'reject'; )", "metadataFieldList": "", "docGeo" : { "lat": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[0];} catch (err) {return '';})", "lon": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[1];} catch (err) {return '';})" }, "associations": [ "url": "smb://HOST:139/SHARE/PATH/TO/" } }, { "globals": { { "scripts": [ "assoc_type": "Event", "function getAddressVal( addressStr, number) "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor); )", "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)", "verb": "retweets", "verb_category": "retweets" },{ try { var addressArray = addressStr.split(/ *, */); if (addressArray != null && addressArray.length > 0) { if (addressArray[number].toLowerCase()=='ny') { return 'new york'; } else if (addressArray[number].toLowerCase()=='long island' || addressArray[number].toLowerCase()=='li') { return 'medford'; } else { return addressArray[number]; } } else { return ''; } } catch (err) { return ''; } } function getRegion( code ) { if (code.toLowerCase()=='ny') {return 'New York';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else if (code.toLowerCase()=='ct') {return 'Connecticut';} else if (code.toLowerCase()=='md') {return 'Maryland';} else if (code.toLowerCase()=='va') {return 'Virginia';} else if (code.toLowerCase()=='pa') {return 'Pennsylvania';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else {return 'New York';} }" { ] "assoc_type": "Event", } }, "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor) && (null != _doc.metadata.json[0].object.actor.location); )", { "docMetadata": { "entity1_indextitle": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",$metadata.json.body", "entity2_indexdescription": "$SCRIPT( return _doc.metadata.json[0].object.actor.location.displayName+ '/location';)",$metadata.json.body", "verbfullText": "twitter_location$metadata.json.body", "verb_categorypublishedDate": "twitter_location"$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)", }, "geotag": { { "assoc_type"lat": "Event",$SCRIPT( try "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",{return _doc.metadata.json[0].geo.coordinates[0];} catch (err) {return '';})", "entity2_indexlon": "$SCRIPT( try {return _iterator.text + '/hashtag'; )",doc.metadata.json[0].geo.coordinates[1];} catch (err) {return '';})" "iterateOver": "json.twitter_entities.hashtags", } "verb": "tweets_about" } }, { "verb_category": "tweets_about" "featureEngine": { }, "engineName": "AlchemyAPI-metadata", { "assoc_typeengineConfig": "Event",{ "entity1_index": "$SCRIPT( return _doc"app.alchemyapi-metadata.json[0].actor.preferredUsername + '/twitterhandle';)",batchSize": 100, "entity2_index": "$SCRIPT( return _iterator.screen_name + '/twitterhandle'; )", "app.alchemyapi-metadata.numKeywords": 5, "iterateOverapp.alchemyapi-metadata.strict": "json.twitter_entities.user_mentions",true" "verb": "tweets_to", } "verb_category": "tweets_to" } }, }, { { "entities": [ "assoc_type": "Event", { "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "actual_name": "$metadata.json.actor.displayName", "entity2_indexdimension": "$SCRIPT( return _iterator.expanded_url + '/url'; )","Who", "iterateOverdisambiguated_name": "$metadata.json.gnipactor.urlspreferredUsername", "verblinkdata": "tweets_$metadata.json.actor.link", "verb_category": "tweets_link" "type": "TwitterHandle" } }, ], "description": "$metadata.json.body", { "entities": [ { "actual_nameiterateOver": "$metadata.json.actor.displayNametwitter_entities.user_mentions", "dimension": "Who", "disambiguatedactual_name": "$metadata.json.actor.preferredUsername$SCRIPT(return _iterator.name;)", "linkdata": "$metadata.json.actor.link", "type": "TwitterHandle" }, "dimension": "Who", { "iterateOver": "json.twitter_entities.user_mentions", "actualdisambiguated_name": "$SCRIPT(return _iterator.screen_name;)", "dimension": "Who", "disambiguated_name": "$SCRIPT(return _iterator.screen_name;)", "linkdata": "$SCRIPT(return 'http://www.twitter.com/' + _iterator.screen_name;)", "type": "TwitterHandle" }, }, { "actual_name": "$metadata.json.object.actor.displayName", "dimension": "Who", "disambiguated_name": "$metadata.json.object.actor.preferredUsername", "linkdata": "$metadata.json.object.actor.link", "type": "TwitterHandle" }, }, { "dimension": "Where", "disambiguated_name": "$metadata.json.actor.location.displayName", "geotag": { "city": "$SCRIPT( return getAddressVal( _doc.metadata.json[0].actor.location.displayName, 0 ) )", "stateProvince": "$SCRIPT( return getRegion "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )", "countryCode" : "US", "alternatives": [ { "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )", "countryCode" : "US" "countryCode": "US" } } ] }, "type": "Location" ] }, { "dimension": "Where", }, "disambiguated_name": "$metadata.json.object.actor.location.displayName", "type": "Location" }, { "disambiguated_name": "$SCRIPT(return _iterator.text;)", { "iterateOver": "json.twitter_entities.hashtags", "typedimension": "HashTagWhere", }, { "actualdisambiguated_name": "$SCRIPT(return _iterator.url)", $metadata.json.object.actor.location.displayName", "disambiguated_name": "$SCRIPT(return _iterator.expanded_url;)", "iterateOvertype": "json.gnip.urls", "Location" }, "type": "URL" } { ], "fullText": "$metadata.json.body", "scriptdisambiguated_name": "function getAddressVal$SCRIPT( addressStrreturn _iterator.text;)", number) { try { var addressArray = addressStr.split(/ *, */); if (addressArray != null && addressArray.length > 0) { if (addressArray[number].toLowerCase()=='ny') { return 'new york'; } else if (addressArray[number].toLowerCase()=='long island' || addressArray[number].toLowerCase()=='li') { return 'medford'; } else { return addressArray[number]; } } else { return ''; } } catch (err) { return ''; } } function getRegion( code ) { if (code.toLowerCase()=='ny') {return 'New York';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else if (code.toLowerCase()=='ct') {return 'Connecticut';} else if (code.toLowerCase()=='md') {return 'Maryland';} else if (code.toLowerCase()=='va') {return 'Virginia';} else if (code.toLowerCase()=='pa') {return 'Pennsylvania';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else {return 'New York';} }", "iterateOver": "json.twitter_entities.hashtags", "type": "HashTag" }, { "actual_name": "$SCRIPT(return _iterator.url)", "scriptEngine": "javascript", "titledisambiguated_name": "$metadata.json.body$SCRIPT(return _iterator.expanded_url;)", "url": "$metadata.json.link", "publishedDate "iterateOver": "$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)" }, "tags": [gnip.urls", "twitter", "type": "gnipURL" ], "title": "Super Storm Sandy} - Twitter: SANDY_SUBSTRING", "url": "smb://HOST:139/SHARE/PATH/TO/", ] "useExtractor": "AlchemyAPI-metadata" }, "useTextExtractor": "none" } |
Sample output
Code Block | ||
---|---|---|
| ||
{ { "associations": [ { { "assoc_type": "Event", "entity1assoc_type": "focalcrmEvent", "entity1_indexcreationCriteriaScript": "focalcrm/twitterhandle", $SCRIPT( return (null != _doc.metadata.json[0].object.actor); )", "entity2": "crm", "entity2entity1_index": "crm/hashtag",$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "verbentity2_index": "tweets_about$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)", "verb_category": "tweets_about" }"verb": "retweets", { "assocverb_typecategory": "Eventretweets", "entity1": "focalcrm" }, "entity1_index": "focalcrm/twitterhandle", { "entity2": "http://www.crmbuyer.com/rsstory/76578.html", "entity2assoc_indextype": "http://www.crmbuyer.com/rsstory/76578.html/url", Event", "verb": "tweets_link", "verb_categorycreationCriteriaScript": "tweets_link" }$SCRIPT( return (null != _doc.metadata.json[0].object.actor) && (null != _doc.metadata.json[0].object.actor.location); )", ], "communityId": ["506dc16dfbf042893dd6b8f2"], "createdentity1_index": "May$SCRIPT( 16, 2013 12:28:09 PM UTCreturn _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)", "description": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm", "entitiesentity2_index": [ "$SCRIPT( return _doc.metadata.json[0].object.actor.location.displayName+ '/location';)", { "actual_nameverb": "CRM Buddytwitter_location", "dimension": "Who", "disambiguatedverb_namecategory": "FocalCRMtwitter_location", "doccount": 0, }, "frequency": 1, { "index": "focalcrm/twitterhandle", "linkdataassoc_type": ["http://www.twitter.com/FocalCRM"],"Event", "relevance "entity1_index": 0,"$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "totalfrequency": -1, "typeentity2_index": "TwitterHandle"$SCRIPT( return _iterator.text + }'/hashtag'; )", { "actual_nameiterateOver": "crmjson.twitter_entities.hashtags", "dimension": "What", "disambiguated_name"verb": "crmtweets_about", "doccount": 0, "frequency"verb_category": 1,"tweets_about" "index": "crm/hashtag", }, "relevance": 0, { "totalfrequency": -1, "assoc_type": "HashTag" }, Event", { "actualentity1_nameindex": "http://t.co/IvwmjJyV$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "dimension": "What", "entity2_index": "$SCRIPT( "disambiguated_name": "http://www.crmbuyer.com/rsstory/76578.htmlreturn _iterator.screen_name + '/twitterhandle'; )", "doccount": 0, "iterateOver": "json.twitter_entities.user_mentions", "frequency": 1, "indexverb": "http://www.crmbuyer.com/rsstory/76578.html/urltweets_to", "relevance": 0, "totalfrequency": -1, "verb_category": "tweets_to" "type": "URL" }, { { "actual_name": "Amex Teams", "dimensionassoc_type": "WhatEvent", "disambiguated_name": "Amex Teams", "doccount"entity1_index": -1, "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "frequency": 1, "entity2_index": "amex teams/keyword", $SCRIPT( return _iterator.expanded_url + '/url'; )", "relevance": 0.758636, "sentimentiterateOver": 0.160753,"json.gnip.urls", "totalfrequencyverb": -1"tweets_link", "type "verb_category": "Keywordtweets_link" }, } { ] "actual_name": "Halo", }, "dimension": "What", { "disambiguated_namestorageSettings": "Halo", { "doccountrejectDocCriteria": -1, "frequency": 1,"$SCRIPT( if (null == _doc.metadata.json[0].link || null == _doc.metadata.json[0].object) return 'reject'; )" } "index": "halo/keyword", } "relevance": 0.461833,] } |
Sample output
Code Block | ||
---|---|---|
| ||
{ "associations": [ "sentiment": 0.168822, { "totalfrequencyassoc_type": -1"Event", "typeentity1": "Keywordfocalcrm", }, {"entity1_index": "focalcrm/twitterhandle", "actual_nameentity2": "Master Chief Incentivescrm", "dimensionentity2_index": "Whatcrm/hashtag", "disambiguated_nameverb": "Master Chief Incentivestweets_about", "doccountverb_category": -1,"tweets_about" }, "frequency": 1, { "indexassoc_type": "master chief incentives/keywordEvent", "relevanceentity1": 0.981457"focalcrm", "sentimententity1_index": 0.168876"focalcrm/twitterhandle", "totalfrequencyentity2": -1, "type": "Keyword" }, {"http://www.crmbuyer.com/rsstory/76578.html", "actualentity2_nameindex": "http://www.crmbuyer.com/rsstory/t76578.cohtml/IvwmjJyVurl", "dimensionverb": "Whattweets_link", "disambiguatedverb_namecategory": "http://t.co/IvwmjJyV",tweets_link" } "doccount": -1 ], "communityId": ["506dc16dfbf042893dd6b8f2"], "created": "frequency": 1,May 16, 2013 12:28:09 PM UTC", "description": "Amex Teams With Halo 4 on Master Chief "index":Incentives "http://t.co/ivwmjjyv/keywordIvwmjJyV #crm", "entities": [ "relevance": 0.212007, { "sentimentactual_name": 0.126168"CRM Buddy", "totalfrequencydimension": -1"Who", "typedisambiguated_name": "KeywordFocalCRM", }, "doccount": 0, { "frequency": 1, "actual_name "index": "crmfocalcrm/twitterhandle", "dimensionlinkdata": "What"["http://www.twitter.com/FocalCRM"], "disambiguated_namerelevance": "crm"0, "doccounttotalfrequency": -1, "frequencytype": 1, "TwitterHandle" }, { "indexactual_name": "crm/keyword", "relevancedimension": 0.404086"What", "sentimentdisambiguated_name": 0.103838"crm", "totalfrequencydoccount": -10, "typefrequency": "Keyword"1, } ],"index": "crm/hashtag", "mediaType": ["Social"], "metadatarelevance": {"json": [{ 0, "actortotalfrequency": {-1, "displayNametype": "HashTag"CRM Buddy", }, "followersCount": "245", { "friendsCountactual_name": "0http://t.co/IvwmjJyV", "iddimension": "id:twitter.com:835627776What", "imagedisambiguated_name": "http://a0www.twimgcrmbuyer.com/profile_images/2630355549/8cad59efaddd57283dbb159332336744_normal.jpegrsstory/76578.html", "languagesdoccount": ["en"] 0, "frequency": 1, "linkindex": "http://www.twittercrmbuyer.com/FocalCRM/rsstory/76578.html/url", "linksrelevance": [{"rel": "me"}]0, "listedCounttotalfrequency": "6"-1, "objectTypetype": "person",URL" }, "postedTime": "2012-09-20T13:59:56.000Z", { "preferredUsernameactual_name": "FocalCRMAmex Teams", "statusesCountdimension": "3688What", "summarydisambiguated_name": "Amex Teams", "verifieddoccount": "false"-1, }, "frequency": 1, "bodyindex": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm","amex teams/keyword", "relevance": 0.758636, "generatorsentiment": {0.160753, "displayNametotalfrequency": "dlvr.it"-1, "linktype": "http://dlvr.itKeyword" }, "gnip": { "kloutactual_scorename": "48Halo", "languagedimension": {"value": "en"}What", "matchingdisambiguated_rulesname": [{"value": "halo 4"}]"Halo", "urlsdoccount": [{ -1, "frequency": 1, "expanded_urlindex": "http://www.crmbuyer.com/rsstory/76578.htmlhalo/keyword", "urlrelevance": "http://t.co/IvwmjJyV"0.461833, }] "sentiment": 0.168822, }, "totalfrequency": -1, "id": "tag:search.twitter.com,2005:266601489475186688", "linktype": "http://twitter.com/FocalCRM/statuses/266601489475186688"Keyword" }, "object": { "idactual_name": "object:search.twitter.com,2005:266601489475186688Master Chief Incentives", "linkdimension": "http://twitter.com/FocalCRM/statuses/266601489475186688What", "objectTypedisambiguated_name": "noteMaster Chief Incentives", "postedTimedoccount": "2012-11-08T18:02:02.000Z"-1, "summaryfrequency": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm" 1, "index": "master chief incentives/keyword", }, "objectTyperelevance": "activity",0.981457, "postedTimesentiment": "2012-11-08T18:02:02.000Z",0.168876, "providertotalfrequency": {-1, "displayNametype": "Twitter",Keyword" }, { "link "actual_name": "http://www.twitter.comt.co/IvwmjJyV", "objectTypedimension": "serviceWhat", }, "retweetCount"disambiguated_name": "0http://t.co/IvwmjJyV", "twitter_entitiesdoccount": {-1, "hashtagsfrequency": [{1, "index": "http://t.co/ivwmjjyv/keyword", "indices": [ "relevance": 0.212007, "71sentiment": 0.126168, "totalfrequency": -1, "75type": "Keyword" }, ], { "textactual_name": "crm", }]"dimension": "What", "urlsdisambiguated_name": [{ "crm", "display_urldoccount": "dlvr.it/2S6sjV", -1, "expanded_urlfrequency": "http://dlvr.it/2S6sjV", 1, "indicesindex": ["crm/keyword", "relevance": 0.404086, "50", "sentiment": 0.103838, "70totalfrequency": -1, "type": "Keyword" ], } ], "urlmediaType": "http://t.co/IvwmjJyV"["Social"], "metadata": {"json": [{ "actor": { "displayName": "CRM }]Buddy", "user_mentionsfollowersCount": []"245", },"friendsCount": "0", "verbid": "post"id:twitter.com:835627776", }]}, "modified": "Nov 8, 2012 06:02:44 PM UTC", "publishedDate": "Nov 8, 2012 06:02:02 PM UTC", "source": ["gnip test"], "sourceKey": [".mnt.fileshare.datasift.gnip."], "sourceUrl": "file:/mnt/fileshare/datasift/gnip/gnip.json", "tags": ["image": "http://a0.twimg.com/profile_images/2630355549/8cad59efaddd57283dbb159332336744_normal.jpeg", "languages": ["en"], "link": "http://www.twitter.com/FocalCRM", "links": [{"rel": "me"}], "listedCount": "6", "objectType": "person", "postedTime": "2012-09-20T13:59:56.000Z", "preferredUsername": "FocalCRM", "statusesCount": "3688", "summary": "", "verified": "false" }, "body": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm", "generator": { "displayName": "dlvr.it", "link": "http://dlvr.it" }, "gnip": { "klout_score": "48", "language": {"value": "en"}, "matching_rules": [{"value": "halo 4"}], "urls": [{ "expanded_url": "http://www.crmbuyer.com/rsstory/76578.html", "url": "http://t.co/IvwmjJyV" }] }, "id": "tag:search.twitter.com,2005:266601489475186688", "link": "http://twitter.com/FocalCRM/statuses/266601489475186688", "object": { "id": "object:search.twitter.com,2005:266601489475186688", "link": "http://twitter.com/FocalCRM/statuses/266601489475186688", "objectType": "note", "postedTime": "2012-11-08T18:02:02.000Z", "summary": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm" }, "objectType": "activity", "postedTime": "2012-11-08T18:02:02.000Z", "provider": { "displayName": "Twitter", "link": "http://www.twitter.com", "objectType": "service" }, "retweetCount": "0", "twitter_entities": { "hashtags": [{ "indices": [ "71", "75" ], "text": "crm" }], "urls": [{ "display_url": "dlvr.it/2S6sjV", "expanded_url": "http://dlvr.it/2S6sjV", "indices": [ "50", "70" ], "url": "http://t.co/IvwmjJyV" }], "user_mentions": [] }, "verb": "post" }]}, "modified": "Nov 8, 2012 06:02:44 PM UTC", "publishedDate": "Nov 8, 2012 06:02:02 PM UTC", "source": ["gnip test"], "sourceKey": [".mnt.fileshare.datasift.gnip."], "sourceUrl": "file:/mnt/fileshare/datasift/gnip/gnip.json", "tags": [ "twitter", "gnip" ], "title": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm", "url": "http://twitter.com/FocalCRM/statuses/266601489475186688" } |
Source
Code Block |
---|
//TODO (INF-1865): need to distinguish between "tweets_to" and "retweets"... //TODO (INF-1865): looks like body is HTML encoded //TODO (INF-1865): aggregate sentiment vs user? //TODO (INF-1865): distinguish between tweets and mentions { "description": "A large set of tweets related to Super Storm Sandy", "extractType": "File", "extractorOptions": { "app.alchemyapi-metadata.batchSize": 100, "app.alchemyapi-metadata.numKeywords": 5, "app.alchemyapi-metadata.strict": "true" }, "file": { "XmlPrimaryKey": "link", "XmlSourceName": "", "XmlRootLevelValues": [], "domain": "XXX", "password": "XXX", "username": "XXX" }, "isApproved": true, "isPublic": false, "mediaType": "Social", "structuredAnalysis": { "rejectDocCriteria": "$SCRIPT( if (null == _doc.metadata.json[0].link || null == _doc.metadata.json[0].object) return 'reject'; )", "metadataFieldList": "", "docGeo" : { "lat": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[0];} catch (err) {return '';})", "lon": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[1];} catch (err) {return '';})" }, "associations": [ { "assoc_type": "Event", "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor); )", "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)", "verb": "retweets", "verb_category": "retweets" }, { "assoc_type": "Event", "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor) && (null != _doc.metadata.json[0].object.actor.location); )", "entity1_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)", "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.location.displayName+ '/location';)", "verb": "twitter_location", "verb_category": "twitter_location" }, { "assoc_type": "Event", "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "entity2_index": "$SCRIPT( return _iterator.text + '/hashtag'; )", "iterateOver": "json.twitter_entities.hashtags", "verb": "tweets_about", "verb_category": "tweets_about" }, { "assoc_type": "Event", "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "entity2_index": "$SCRIPT( return _iterator.screen_name + '/twitterhandle'; )", "iterateOver": "json.twitter_entities.user_mentions", "verb": "tweets_to", "verb_category": "tweets_to" }, { "assoc_type": "Event", "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)", "entity2_index": "$SCRIPT( return _iterator.expanded_url + '/url'; )", "iterateOver": "json.gnip.urls", "verb": "tweets_link", "verb_category": "tweets_link" } ], "description": "$metadata.json.body", "entities": [ { "actual_name": "$metadata.json.actor.displayName", "dimension": "Who", "disambiguated_name": "$metadata.json.actor.preferredUsername", "linkdata": "$metadata.json.actor.link", "type": "TwitterHandle" }, { "iterateOver": "json.twitter_entities.user_mentions", "actual_name": "$SCRIPT(return _iterator.name;)", "dimension": "Who", "disambiguated_name": "$SCRIPT(return _iterator.screen_name;)", "linkdata": "$SCRIPT(return 'http://www.twitter.com/' + _iterator.screen_name;)", "type": "TwitterHandle" }, { "actual_name": "$metadata.json.object.actor.displayName", "dimension": "Who", "disambiguated_name": "$metadata.json.object.actor.preferredUsername", "linkdata": "$metadata.json.object.actor.link", "type": "TwitterHandle" }, { "dimension": "Where", "disambiguated_name": "$metadata.json.actor.location.displayName", "geotag": { "city": "$SCRIPT( return getAddressVal( _doc.metadata.json[0].actor.location.displayName, 0 ) )", "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )", "countryCode" : "US", "alternatives": [ { "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )", "countryCode" : "US" } ] }, "type": "Location" }, { "dimension": "Where", "disambiguated_name": "$metadata.json.object.actor.location.displayName", "type": "Location" }, { "disambiguated_name": "$SCRIPT(return _iterator.text;)", "iterateOver": "json.twitter_entities.hashtags", "type": "HashTag" }, { "actual_name": "$SCRIPT(return _iterator.url)", "disambiguated_name": "$SCRIPT(return _iterator.expanded_url;)", "iterateOver": "json.gnip.urls", "type": "URL" } ], "fullText": "$metadata.json.body", "script": "function getAddressVal( addressStr, number) { try { var addressArray = addressStr.split(/ *, */); if (addressArray != null && addressArray.length > 0) { if (addressArray[number].toLowerCase()=='ny') { return 'new york'; } else if (addressArray[number].toLowerCase()=='long island' || addressArray[number].toLowerCase()=='li') { return 'medford'; } else { return addressArray[number]; } } else { return ''; } } catch (err) { return ''; } } function getRegion( code ) { if (code.toLowerCase()=='ny') {return 'New York';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else if (code.toLowerCase()=='ct') {return 'Connecticut';} else if (code.toLowerCase()=='md') {return 'Maryland';} else if (code.toLowerCase()=='va') {return 'Virginia';} else if (code.toLowerCase()=='pa') {return 'Pennsylvania';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else {return 'New York';} }", "scriptEngine": "javascript", "title": "$metadata.json.body", "url": "$metadata.json.link", "publishedDate": "$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)" }, "tags": [ "twitter", "gnip" ], "title": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crmSuper Storm Sandy - Twitter: SANDY_SUBSTRING", "url": "smb://HOST:139/SHARE/PATH/TO/", "useExtractor": "AlchemyAPI-metadata", "urluseTextExtractor": "http://twitter.com/FocalCRM/statuses/266601489475186688none" } |