Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

Sample document

...

code
Code Block
language

Source

TODO: geo (use docGeo.alternatives, retain only those metadata fields that are wanted)

javascript
titleTwitter document
{
    "descriptionid": "gnip testtag:search.twitter.com,2005:266601489475186688",
    "extractTypeobjectType": "Fileactivity",
    "extractorOptionsactor": {
        "app.alchemyapi-metadata.batchSizeobjectType": 200"person",
        "app.alchemyapi-metadata.numKeywordsid": 5"id:twitter.com:835627776",
    },     "filelink": {
 "http://www.twitter.com/FocalCRM",
        "XmlPrimaryKeydisplayName": "CRM Buddy",
        "XmlRootLevelValuespostedTime": []"2012-09-20T13:59:56.000Z",
        "domainimage": "WORKGROUPhttp://a0.twimg.com/profile_images/2630355549/8cad59efaddd57283dbb159332336744_normal.jpeg",
        "passwordsummary": "PASSWORD",
        "usernamelinks": "PASSWORD"[
      },     "isApproved": true, {
    "isPublic": true,     "mediaType": "Social",     "structuredAnalysishref": { null,
                "associationsrel": "me"
[            }
{        ],
        "assoc_typefriendsCount": "Event"0,
        "followersCount": 245,
        "creationCriteriaScriptlistedCount": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor); )"6,
        "statusesCount": 3688,
        "twitterTimeZone": null,
        "entity1_indexverified": "$SCRIPT(false,
return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",        "utcOffset": null,
        "entity2_indexpreferredUsername": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
   FocalCRM",
        "languages": [
            "verben":
"retweets",        ]
    },
    "verb_category": "retweetspost",
    "postedTime": "2012-11-08T18:02:02.000Z",
    "generator": {
     }   "displayName": "dlvr.it",
        "link": "http://dlvr.it"
  {  },
    "provider": {
        "assoc_typeobjectType": "Eventservice",
        "displayName": "Twitter",
        "creationCriteriaScriptlink": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor) && (null != _doc.metadata.json[0].object.actor.location); )http://www.twitter.com"
    },
    "link": "http://twitter.com/FocalCRM/statuses/266601489475186688",
    "body": "Amex Teams With Halo 4 on Master Chief    "entity1_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",Incentives http://t.co/IvwmjJyV #crm",
    "object": {
        "objectType": "note",
        "entity2_indexid": "$SCRIPT( return _doc.metadata.json[0].object.actor.location.displayName+ '/location';)object:search.twitter.com,2005:266601489475186688",
        "summary": "Amex TeamsWith Halo 4 on Master  "verbChief Incentives http://t.co/IvwmjJyV #crm",
        "link": "twitter_locationhttp://twitter.com/FocalCRM/statuses/266601489475186688",
        "postedTime": "2012-11-08T18:02:02.000Z"
    },
 "verb_category":   "twitter_locationentities": {
           },"urls": [
            {
                "assocdisplay_typeurl": "Eventdlvr.it/2S6sjV",
                "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",indices": [
                  "entity2_index": "$SCRIPT( return _iterator.text + '/hashtag'; )",50,
                    "iterateOver": "json.twitter_entities.hashtags",70
                "verb": "tweets_about"],
                "verbexpanded_categoryurl": "tweets_about"http://dlvr.it/2S6sjV",
                }, "url": "http://t.co/IvwmjJyV"
            }
{        ],
        "assocuser_typementions": "Event"[],
        "hashtags": [
      "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",      {
                "entity2_indextext": "crm"$SCRIPT(,
 return _iterator.expanded_url + '/url'; )",
                "iterateOverindices": "json.gnip.urls",[
                 "verb": "tweets_link",  71,
              "verb_category": "tweets_link"     75
       }         ],
        "description": "$metadata.json.body",     }
   "entities": [    ]
    },
   { "retweetCount": 0,
    "gnip": {
        "actual_namelanguage": "$metadata.json.actor.displayName",
   {
            "dimensionvalue": "Whoen",
        },
        "disambiguatedmatching_namerules": "$metadata.json.actor.preferredUsername", [
            {
                "linkdatavalue": "$metadata.json.actor.linkhalo 4",
                "typetag": "TwitterHandle"null
            },
        ],
   {     "klout_score": 48,
          "actual_nameurls": "$metadata.json.object.actor.displayName",[
                "dimension": "Who",{
                  "disambiguated_nameurl": "$metadata.json.object.actor.preferredUsernamehttp://t.co/IvwmjJyV",
                "linkdataexpanded_url": "$metadata.json.object.actor.link",http://www.crmbuyer.com/rsstory/76578.html"
                "type": "TwitterHandle"}
        ]
    }
},
            {
           

Source

Code Block
//TODO (INF-1865): need to distinguish between "tweets_to" and "retweets"...
//TODO (INF-1865): looks like body is HTML encoded 
//TODO (INF-1865): aggregate sentiment vs user?
//TODO (INF-1865): distinguish between tweets and mentions
{
    "dimensiondescription": "Where",A large set of tweets related to Super Storm Sandy",
       "disambiguated_name"isApproved": true,
    "$metadata.json.actor.location.displayName"isPublic": false,
    "mediaType": "Social",
    "tags": [
        "type":twitter",
        "Locationgnip"
    ],
    "title": "Super Storm Sandy },- Twitter: SANDY_SUBSTRING",
    "processingPipeline": [
    {    {
            "dimensionfile": "Where",{
                "disambiguated_nameXmlPrimaryKey": "$metadata.json.object.actor.location.displayNamelink",
                "typeXmlSourceName": "Location",
               }, "XmlRootLevelValues": [],
               { "domain": "XXX",
                "disambiguated_namepassword": "$SCRIPT(return _iterator.text;)XXX",
                "iterateOverusername": "json.twitter_entities.hashtagsXXX",
                "typeurl": "HashTagsmb://HOST:139/SHARE/PATH/TO/"
            },
        },
    {    {
            "actual_name": "$SCRIPT(return _iterator.url)",globals": {
                "disambiguated_namescripts": "$SCRIPT(return _iterator.expanded_url;)",[
                 "iterateOver": "json.gnip.urls",  "function getAddressVal( addressStr, number) { try { var addressArray = addressStr.split(/ *, */); if  "type": "URL"
            }
        ],
        "fullText": "$metadata.json.body",
        (addressArray != null && addressArray.length > 0) { if (addressArray[number].toLowerCase()=='ny') { return 'new york'; } else if (addressArray[number].toLowerCase()=='long island' || addressArray[number].toLowerCase()=='li') { return 'medford'; } else { return addressArray[number]; } } else { return ''; } } catch (err) { return ''; } } function getRegion( code ) { if (code.toLowerCase()=='ny') {return 'New York';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else if (code.toLowerCase()=='ct') {return 'Connecticut';} else if (code.toLowerCase()=='md') {return 'Maryland';} else if (code.toLowerCase()=='va') {return 'Virginia';} else if (code.toLowerCase()=='pa') {return 'Pennsylvania';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else {return 'New York';} }"
                ]
            }
        },
        {
            "docMetadata": {
                "title": "$metadata.json.body",
                "description": "$metadata.json.body",
                "fullText": "$metadata.json.body",
                "publishedDate": "$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)",
                "geotag": {
                    "lat": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[0];} catch (err) {return '';})",
                    "lon": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[1];} catch (err) {return '';})"
                }
            }
        },
        {
            "featureEngine": {
                "engineName": "AlchemyAPI-metadata",
                "engineConfig": {
                    "app.alchemyapi-metadata.batchSize": 100,
                    "app.alchemyapi-metadata.numKeywords": 5,
                    "app.alchemyapi-metadata.strict": "true"
                }
            }
        },
        {
            "entities": [
                {
                    "actual_name": "$metadata.json.actor.displayName",
                    "dimension": "Who",
                    "disambiguated_name": "$metadata.json.actor.preferredUsername",
                    "linkdata": "$metadata.json.actor.link",
                    "type": "TwitterHandle"
                },
                {
                    "iterateOver": "json.twitter_entities.user_mentions",
                    "actual_name": "$SCRIPT(return _iterator.name;)",
                    "dimension": "Who",
                    "disambiguated_name": "$SCRIPT(return _iterator.screen_name;)",
                    "linkdata": "$SCRIPT(return 'http://www.twitter.com/' + _iterator.screen_name;)",
                    "type": "TwitterHandle"
                },
                {
                    "actual_name": "$metadata.json.object.actor.displayName",
                    "dimension": "Who",
                    "disambiguated_name": "$metadata.json.object.actor.preferredUsername",
                    "linkdata": "$metadata.json.object.actor.link",
                    "type": "TwitterHandle"
                },
                {
                    "dimension": "Where",
                    "disambiguated_name": "$metadata.json.actor.location.displayName",
                    "geotag": {
                        "city": "$SCRIPT( return getAddressVal( _doc.metadata.json[0].actor.location.displayName, 0 ) )",
                        "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )",
                        "countryCode": "US",
                        "alternatives": [
                            {
                                "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )",
                                "countryCode": "US"
                            }
                        ]
                    },
                    "type": "Location"
                },
                {
                    "dimension": "Where",
                    "disambiguated_name": "$metadata.json.object.actor.location.displayName",
                    "type": "Location"
                },
                {
                    "disambiguated_name": "$SCRIPT(return _iterator.text;)",
                    "iterateOver": "json.twitter_entities.hashtags",
                    "type": "HashTag"
                },
                {
                    "actual_name": "$SCRIPT(return _iterator.url)",
                    "disambiguated_name": "$SCRIPT(return _iterator.expanded_url;)",
                    "iterateOver": "json.gnip.urls",
                    "type": "URL"
                }
            ]
        },
        {
            "associations": [
                {
                    "assoc_type": "Event",
                    "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor); )",
                    "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
                    "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
                    "verb": "retweets",
                    "verb_category": "retweets"
                },
                {
                    "assoc_type": "Event",
                    "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor) && (null != _doc.metadata.json[0].object.actor.location); )",
                    "entity1_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
                    "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.location.displayName+ '/location';)",
                    "verb": "twitter_location",
                    "verb_category": "twitter_location"
                },
                {
                    "assoc_type": "Event",
                    "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
                    "entity2_index": "$SCRIPT( return _iterator.text + '/hashtag'; )",
                    "iterateOver": "json.twitter_entities.hashtags",
                    "verb": "tweets_about",
                    "verb_category": "tweets_about"
                },
                {
                    "assoc_type": "Event",
                    "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
                    "entity2_index": "$SCRIPT( return _iterator.screen_name + '/twitterhandle'; )",
                    "iterateOver": "json.twitter_entities.user_mentions",
                    "verb": "tweets_to",
                    "verb_category": "tweets_to"
                },
                {
                    "assoc_type": "Event",
                    "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
                    "entity2_index": "$SCRIPT( return _iterator.expanded_url + '/url'; )",
                    "iterateOver": "json.gnip.urls",
                    "verb": "tweets_link",
                    "verb_category": "tweets_link"
                }
            ]
        },
        {
            "storageSettings": {
                "rejectDocCriteria": "$SCRIPT( if (null == _doc.metadata.json[0].link || null == _doc.metadata.json[0].object) return 'reject'; )"
            }
        }
    ]
}

Sample output

Code Block
languagejavascript
{
    "associations": [
        {
            "assoc_type": "Event",
            "entity1": "focalcrm",
            "entity1_index": "focalcrm/twitterhandle",
            "entity2": "crm",
            "entity2_index": "crm/hashtag",
            "verb": "tweets_about",
            "verb_category": "tweets_about"
        },
        {
            "assoc_type": "Event",
            "entity1": "focalcrm",
            "entity1_index": "focalcrm/twitterhandle",
            "entity2": "http://www.crmbuyer.com/rsstory/76578.html",
            "entity2_index": "http://www.crmbuyer.com/rsstory/76578.html/url",
            "verb": "tweets_link",
            "verb_category": "tweets_link"
        }
    ],
    "communityId": ["506dc16dfbf042893dd6b8f2"],
    "created": "May 16, 2013 12:28:09 PM UTC",
    "description": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm",
    "entities": [
        {
            "actual_name": "CRM Buddy",
            "dimension": "Who",
            "disambiguated_name": "FocalCRM",
            "doccount": 0,
            "frequency": 1,
            "index": "focalcrm/twitterhandle",
            "linkdata": ["http://www.twitter.com/FocalCRM"],
            "relevance": 0,
            "totalfrequency": -1,
            "type": "TwitterHandle"
        },
        {
            "actual_name": "crm",
            "dimension": "What",
            "disambiguated_name": "crm",
            "doccount": 0,
            "frequency": 1,
            "index": "crm/hashtag",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "HashTag"
        },
        {
            "actual_name": "http://t.co/IvwmjJyV",
            "dimension": "What",
            "disambiguated_name": "http://www.crmbuyer.com/rsstory/76578.html",
            "doccount": 0,
            "frequency": 1,
            "index": "http://www.crmbuyer.com/rsstory/76578.html/url",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "URL"
        },
        {
            "actual_name": "Amex Teams",
            "dimension": "What",
            "disambiguated_name": "Amex Teams",
            "doccount": -1,
            "frequency": 1,
            "index": "amex teams/keyword",
            "relevance": 0.758636,
            "sentiment": 0.160753,
            "totalfrequency": -1,
            "type": "Keyword"
        },
        {
            "actual_name": "Halo",
            "dimension": "What",
            "disambiguated_name": "Halo",
            "doccount": -1,
            "frequency": 1,
            "index": "halo/keyword",
            "relevance": 0.461833,
            "sentiment": 0.168822,
            "totalfrequency": -1,
            "type": "Keyword"
        },
        {
            "actual_name": "Master Chief Incentives",
            "dimension": "What",
            "disambiguated_name": "Master Chief Incentives",
            "doccount": -1,
            "frequency": 1,
            "index": "master chief incentives/keyword",
            "relevance": 0.981457,
            "sentiment": 0.168876,
            "totalfrequency": -1,
            "type": "Keyword"
        },
        {
            "actual_name": "http://t.co/IvwmjJyV",
            "dimension": "What",
            "disambiguated_name": "http://t.co/IvwmjJyV",
            "doccount": -1,
            "frequency": 1,
            "index": "http://t.co/ivwmjjyv/keyword",
            "relevance": 0.212007,
            "sentiment": 0.126168,
            "totalfrequency": -1,
            "type": "Keyword"
        },
        {
            "actual_name": "crm",
            "dimension": "What",
            "disambiguated_name": "crm",
            "doccount": -1,
            "frequency": 1,
            "index": "crm/keyword",
            "relevance": 0.404086,
            "sentiment": 0.103838,
            "totalfrequency": -1,
            "type": "Keyword"
        }
    ],
    "mediaType": ["Social"],
    "metadata": {"json": [{
        "actor": {
            "displayName": "CRM Buddy",
            "followersCount": "245",
            "friendsCount": "0",
            "id": "id:twitter.com:835627776",
            "image": "http://a0.twimg.com/profile_images/2630355549/8cad59efaddd57283dbb159332336744_normal.jpeg",
            "languages": ["en"],
            "link": "http://www.twitter.com/FocalCRM",
            "links": [{"rel": "me"}],
            "listedCount": "6",
            "objectType": "person",
            "postedTime": "2012-09-20T13:59:56.000Z",
            "preferredUsername": "FocalCRM",
            "statusesCount": "3688",
            "summary": "",
            "verified": "false"
        },
        "body": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm",
        "generator": {
            "displayName": "dlvr.it",
            "link": "http://dlvr.it"
        },
        "gnip": {
            "klout_score": "48",
            "language": {"value": "en"},
            "matching_rules": [{"value": "halo 4"}],
            "urls": [{
                "expanded_url": "http://www.crmbuyer.com/rsstory/76578.html",
                "url": "http://t.co/IvwmjJyV"
            }]
        },
        "id": "tag:search.twitter.com,2005:266601489475186688",
        "link": "http://twitter.com/FocalCRM/statuses/266601489475186688",
        "object": {
            "id": "object:search.twitter.com,2005:266601489475186688",
            "link": "http://twitter.com/FocalCRM/statuses/266601489475186688",
            "objectType": "note",
            "postedTime": "2012-11-08T18:02:02.000Z",
            "summary": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm"
        },
        "objectType": "activity",
        "postedTime": "2012-11-08T18:02:02.000Z",
        "provider": {
            "displayName": "Twitter",
            "link": "http://www.twitter.com",
            "objectType": "service"
        },
        "retweetCount": "0",
        "twitter_entities": {
            "hashtags": [{
                "indices": [
                    "71",
                    "75"
                ],
                "text": "crm"
            }],
            "urls": [{
                "display_url": "dlvr.it/2S6sjV",
                "expanded_url": "http://dlvr.it/2S6sjV",
                "indices": [
                    "50",
                    "70"
                ],
                "url": "http://t.co/IvwmjJyV"
            }],
            "user_mentions": []
        },
        "verb": "post"
    }]},
    "modified": "Nov 8, 2012 06:02:44 PM UTC",
    "publishedDate": "Nov 8, 2012 06:02:02 PM UTC",
    "source": ["gnip test"],
    "sourceKey": [".mnt.fileshare.datasift.gnip."],
    "sourceUrl": "file:/mnt/fileshare/datasift/gnip/gnip.json",
    "tags": [
        "twitter",
        "gnip"
    ],
    "title": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm",
    "url": "http://twitter.com/FocalCRM/statuses/266601489475186688"
}

Source

Code Block
//TODO (INF-1865): need to distinguish between "tweets_to" and "retweets"...
//TODO (INF-1865): looks like body is HTML encoded 
//TODO (INF-1865): aggregate sentiment vs user?
//TODO (INF-1865): distinguish between tweets and mentions
{
	"description": "A large set of tweets related to Super Storm Sandy",
	"extractType": "File",
    "extractorOptions": {
        "app.alchemyapi-metadata.batchSize": 100,
        "app.alchemyapi-metadata.numKeywords": 5,        
        "app.alchemyapi-metadata.strict": "true"        
    },
	"file": {
		"XmlPrimaryKey": "link",
		"XmlSourceName": "",
		"XmlRootLevelValues": [],
		"domain": "XXX",
		"password": "XXX",
		"username": "XXX"
	},
	"isApproved": true,
	"isPublic": false,
	"mediaType": "Social",
	"structuredAnalysis": {
		"rejectDocCriteria": "$SCRIPT( if (null == _doc.metadata.json[0].link || null == _doc.metadata.json[0].object) return 'reject'; )",
		"metadataFieldList": "",
		"docGeo" : {
			"lat": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[0];} catch (err) {return '';})",
			"lon": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[1];} catch (err) {return '';})"
		},
		"associations": [
             {
            	 "assoc_type": "Event",
            	 "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor); )",
            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
            	 "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
            	 "verb": "retweets",
            	 "verb_category": "retweets"
             },
             {
            	 "assoc_type": "Event",
            	 "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor) && (null != _doc.metadata.json[0].object.actor.location); )",
            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
            	 "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.location.displayName+ '/location';)",
            	 "verb": "twitter_location",
            	 "verb_category": "twitter_location"
             },
             {
            	 "assoc_type": "Event",
            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
            	 "entity2_index": "$SCRIPT( return _iterator.text + '/hashtag'; )",
            	 "iterateOver": "json.twitter_entities.hashtags",
            	 "verb": "tweets_about",
            	 "verb_category": "tweets_about"
             },
             {
            	 "assoc_type": "Event",
            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
            	 "entity2_index": "$SCRIPT( return _iterator.screen_name + '/twitterhandle'; )",
            	 "iterateOver": "json.twitter_entities.user_mentions",
            	 "verb": "tweets_to",
            	 "verb_category": "tweets_to"
             },
             {
            	 "assoc_type": "Event",
            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
            	 "entity2_index": "$SCRIPT( return _iterator.expanded_url + '/url'; )",
            	 "iterateOver": "json.gnip.urls",
            	 "verb": "tweets_link",
            	 "verb_category": "tweets_link"
             }
         ],
         "description": "$metadata.json.body",
         "entities": [
		      {
		    	  "actual_name": "$metadata.json.actor.displayName",
		    	  "dimension": "Who",
		    	  "disambiguated_name": "$metadata.json.actor.preferredUsername",
		    	  "linkdata": "$metadata.json.actor.link",
		    	  "type": "TwitterHandle"
		      },
		      {
		    	  "iterateOver": "json.twitter_entities.user_mentions",
		    	  "actual_name": "$SCRIPT(return _iterator.name;)",
		    	  "dimension": "Who",
		    	  "disambiguated_name": "$SCRIPT(return _iterator.screen_name;)",
		    	  "linkdata": "$SCRIPT(return 'http://www.twitter.com/' + _iterator.screen_name;)",
		    	  "type": "TwitterHandle"
		      },
		      {
		    	  "actual_name": "$metadata.json.object.actor.displayName",
		    	  "dimension": "Who",
		    	  "disambiguated_name": "$metadata.json.object.actor.preferredUsername",
		    	  "linkdata": "$metadata.json.object.actor.link",
		    	  "type": "TwitterHandle"
		      },
		      {
		    	  "dimension": "Where",
		    	  "disambiguated_name": "$metadata.json.actor.location.displayName",
		    	  "geotag": {
		    		  "city": "$SCRIPT( return getAddressVal( _doc.metadata.json[0].actor.location.displayName, 0 ) )",
		    		  "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )",
		    		  "countryCode" : "US",
		    		  "alternatives": [
		                   {
		                	   "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )",
		                	   "countryCode" : "US"
		                   }
	                   ]
		    	  },
		    	  "type": "Location"
		      },
		      {
		    	  "dimension": "Where",
		    	  "disambiguated_name": "$metadata.json.object.actor.location.displayName",
		    	  "type": "Location"
		      },
		      {
		    	  "disambiguated_name": "$SCRIPT(return _iterator.text;)",
		    	  "iterateOver": "json.twitter_entities.hashtags",
		    	  "type": "HashTag"
		      },
		      {
		    	  "actual_name": "$SCRIPT(return _iterator.url)",
		    	  "disambiguated_name": "$SCRIPT(return _iterator.expanded_url;)",
		    	  "iterateOver": "json.gnip.urls",
		    	  "type": "URL"
		      }
		      ],
      "fullText": "$metadata.json.body",
      "script": "function getAddressVal( addressStr, number) { try { var addressArray = addressStr.split(/ *, */); if (addressArray != null && addressArray.length > 0) { if (addressArray[number].toLowerCase()=='ny') { return 'new york'; } else if (addressArray[number].toLowerCase()=='long island' || addressArray[number].toLowerCase()=='li') { return 'medford'; } else { return addressArray[number]; } } else { return ''; } } catch (err) { return ''; } } function getRegion( code ) { if (code.toLowerCase()=='ny') {return 'New York';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else if (code.toLowerCase()=='ct') {return 'Connecticut';} else if (code.toLowerCase()=='md') {return 'Maryland';} else if (code.toLowerCase()=='va') {return 'Virginia';} else if (code.toLowerCase()=='pa') {return 'Pennsylvania';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else {return 'New York';} }",
      "scriptEngine": "javascript",
        "title": "$metadata.json.body",
 
      "url": "$metadata.json.link",
    },  "publishedDate": "$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)"
	},
	"tags": [
         "twitter",
         "gnip"
     ],
     "title": "gnip testSuper Storm Sandy - Twitter: SANDY_SUBSTRING",
     "url": "smb://FILESHAREHOST:139/SHARE/datasiftPATH/gnipTO/",
     "useExtractor": "noneAlchemyAPI-metadata",
     "useTextExtractor": "AlchemyAPI-metadatanone"
}

Sample output

...