Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
//TODO (INF-1865): need to distinguish between "tweets_to" and "retweets"...
//TODO (INF-1865): looks like body is HTML encoded 
//TODO (INF-1865): aggregate sentiment vs user?
//TODO (INF-1865): distinguish between tweets and mentions
{
	    "description": "A large set of tweets related to Super Storm Sandy",
    	"extractTypeisApproved": "File"true,
    "extractorOptionsisPublic": false,
{    "mediaType": "Social",
    "app.alchemyapi-metadata.batchSizetags": 100,[
        "app.alchemyapi-metadata.numKeywords": 5twitter",
        "gnip"
    ],
    "title": "app.alchemyapi-metadata.strict": "true" Super Storm Sandy - Twitter: SANDY_SUBSTRING",
    "processingPipeline": [
        {
  },
	          "file": {
		
                "XmlPrimaryKey": "link",
		
                "XmlSourceName": "",
		
                "XmlRootLevelValues": [],
		
                "domain": "XXX",
		
                "password": "XXX",
		
                "username": "XXX",
	},
	"isApproved": true,
	"isPublic": false,
	"mediaType": "Social",
	"structuredAnalysis": {
		"rejectDocCriteria": "$SCRIPT( if (null == _doc.metadata.json[0].link || null == _doc.metadata.json[0].object) return 'reject'; )",
		"metadataFieldList": "",
		"docGeo" : {
			"lat": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[0];} catch (err) {return '';})",
			"lon": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[1];} catch (err) {return '';})"
		},
		"associations": [                "url": "smb://HOST:139/SHARE/PATH/TO/"
            }
        },
        {
            "globals": {
             {   "scripts": [
        	 "assoc_type": "Event",            "function getAddressVal( addressStr, number) 	 "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor); )",
            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
            	 "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
            	 "verb": "retweets",
            	 "verb_category": "retweets"
             },{ try { var addressArray = addressStr.split(/ *, */); if (addressArray != null && addressArray.length > 0) { if (addressArray[number].toLowerCase()=='ny') { return 'new york'; } else if (addressArray[number].toLowerCase()=='long island' || addressArray[number].toLowerCase()=='li') { return 'medford'; } else { return addressArray[number]; } } else { return ''; } } catch (err) { return ''; } } function getRegion( code ) { if (code.toLowerCase()=='ny') {return 'New York';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else if (code.toLowerCase()=='ct') {return 'Connecticut';} else if (code.toLowerCase()=='md') {return 'Maryland';} else if (code.toLowerCase()=='va') {return 'Virginia';} else if (code.toLowerCase()=='pa') {return 'Pennsylvania';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else {return 'New York';} }"
             {   ]
         	 "assoc_type": "Event", }
        },
  	 "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor) && (null != _doc.metadata.json[0].object.actor.location); )", {
            "docMetadata": {
               	 "entity1_indextitle": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",$metadata.json.body",
               	 "entity2_indexdescription": "$SCRIPT( return _doc.metadata.json[0].object.actor.location.displayName+ '/location';)",$metadata.json.body",
               	 "verbfullText": "twitter_location$metadata.json.body",
               	 "verb_categorypublishedDate": "twitter_location"$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)",
             },    "geotag": {
        {             	 "assoc_type"lat": "Event",$SCRIPT( try            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",{return _doc.metadata.json[0].geo.coordinates[0];} catch (err) {return '';})",
                  	  "entity2_indexlon": "$SCRIPT( try {return _iterator.text + '/hashtag'; )",doc.metadata.json[0].geo.coordinates[1];} catch (err) {return '';})"
             	 "iterateOver": "json.twitter_entities.hashtags",
 }
           	 "verb": "tweets_about" }
        },
        {
    	 "verb_category": "tweets_about"      "featureEngine": {
      },          "engineName": "AlchemyAPI-metadata",
  {             	 "assoc_typeengineConfig": "Event",{
                	 "entity1_index": "$SCRIPT( return _doc"app.alchemyapi-metadata.json[0].actor.preferredUsername + '/twitterhandle';)",batchSize": 100,
             	 "entity2_index": "$SCRIPT( return _iterator.screen_name + '/twitterhandle'; )", "app.alchemyapi-metadata.numKeywords": 5,
                   	 "iterateOverapp.alchemyapi-metadata.strict": "json.twitter_entities.user_mentions",true"
             	 "verb": "tweets_to", }
           	 "verb_category": "tweets_to" }
        },
     },   {
          {  "entities": [
         	 "assoc_type": "Event",     {
       	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",            "actual_name": "$metadata.json.actor.displayName",
                   	 "entity2_indexdimension": "$SCRIPT( return _iterator.expanded_url + '/url'; )","Who",
                   	 "iterateOverdisambiguated_name": "$metadata.json.gnipactor.urlspreferredUsername",
            	        "verblinkdata": "tweets_$metadata.json.actor.link",
            	 "verb_category": "tweets_link"        "type": "TwitterHandle"
        }        },
 ],          "description": "$metadata.json.body",    {
     "entities": [ 		      { 		    	  "actual_nameiterateOver": "$metadata.json.actor.displayNametwitter_entities.user_mentions",
		          	  "dimension": "Who", 		    	  "disambiguatedactual_name": "$metadata.json.actor.preferredUsername$SCRIPT(return _iterator.name;)",
		    	  "linkdata": "$metadata.json.actor.link", 		    	  "type": "TwitterHandle" 		      },
		"dimension": "Who",
     { 		    	  "iterateOver": "json.twitter_entities.user_mentions", 		    	  "actualdisambiguated_name": "$SCRIPT(return _iterator.screen_name;)",
		    	  "dimension": "Who", 		    	  "disambiguated_name": "$SCRIPT(return _iterator.screen_name;)", 		    	  "linkdata": "$SCRIPT(return 'http://www.twitter.com/' + _iterator.screen_name;)",
		
                  	  "type": "TwitterHandle"
		       },
		         },
                {
		
                  	  "actual_name": "$metadata.json.object.actor.displayName",
		
                  	  "dimension": "Who",
		
                  	  "disambiguated_name": "$metadata.json.object.actor.preferredUsername",
		
                  	  "linkdata": "$metadata.json.object.actor.link",
		
                  	  "type": "TwitterHandle"
		      },
		          },
                {
		
                  	  "dimension": "Where",
		
                  	  "disambiguated_name": "$metadata.json.actor.location.displayName",
		
                  	  "geotag": {
		
                      		  "city": "$SCRIPT( return getAddressVal( _doc.metadata.json[0].actor.location.displayName, 0 ) )",
		      		  "stateProvince": "$SCRIPT( return getRegion              "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )",
		
                      		  "countryCode" : "US",
		
                      		  "alternatives": [
		
                            {
		                             	   "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )",
		                	   "countryCode" : "US"
		           "countryCode": "US"
                 } 	          }
        ] 		    	  }, 		    	  "type": "Location" 		]
     }, 		      { 		    	  "dimension": "Where",
		 },
          	  "disambiguated_name": "$metadata.json.object.actor.location.displayName", 		    	  "type": "Location"
		
                },
		      { 		    	  "disambiguated_name": "$SCRIPT(return _iterator.text;)",
		   {
          	  "iterateOver": "json.twitter_entities.hashtags", 		    	  "typedimension": "HashTagWhere",
		      }, 		      { 		    	  "actualdisambiguated_name": "$SCRIPT(return _iterator.url)",
		$metadata.json.object.actor.location.displayName",
         	  "disambiguated_name": "$SCRIPT(return _iterator.expanded_url;)", 		    	  "iterateOvertype": "json.gnip.urls",
		"Location"
                },
     	  "type": "URL" 		      } 		{
     ],       "fullText": "$metadata.json.body",       "scriptdisambiguated_name": "function getAddressVal$SCRIPT( addressStrreturn _iterator.text;)",
number) { try { var addressArray = addressStr.split(/ *, */); if (addressArray != null && addressArray.length > 0) { if (addressArray[number].toLowerCase()=='ny') { return 'new york'; } else if (addressArray[number].toLowerCase()=='long island' || addressArray[number].toLowerCase()=='li') { return 'medford'; } else { return addressArray[number]; } } else { return ''; } } catch (err) { return ''; } } function getRegion( code ) { if (code.toLowerCase()=='ny') {return 'New York';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else if (code.toLowerCase()=='ct') {return 'Connecticut';} else if (code.toLowerCase()=='md') {return 'Maryland';} else if (code.toLowerCase()=='va') {return 'Virginia';} else if (code.toLowerCase()=='pa') {return 'Pennsylvania';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else {return 'New York';} }", "iterateOver": "json.twitter_entities.hashtags",
                    "type": "HashTag"
                },
                {
                    "actual_name": "$SCRIPT(return _iterator.url)",
            "scriptEngine": "javascript",       "titledisambiguated_name": "$metadata.json.body$SCRIPT(return _iterator.expanded_url;)",
      "url": "$metadata.json.link",       "publishedDate      "iterateOver": "$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)"
	},
	"tags": [gnip.urls",
           "twitter",         "type": "gnipURL"
       ],      "title": "Super Storm Sandy}
- Twitter: SANDY_SUBSTRING",      "url": "smb://HOST:139/SHARE/PATH/TO/",   ]
  "useExtractor": "AlchemyAPI-metadata"      },
     "useTextExtractor": "none"
}

Sample output

Code Block
languagejavascript
{   {
            "associations": [
         {       {
      "assoc_type": "Event",             "entity1assoc_type": "focalcrmEvent",
                    "entity1_indexcreationCriteriaScript": "focalcrm/twitterhandle",
    $SCRIPT( return (null != _doc.metadata.json[0].object.actor); )",
       "entity2": "crm",
            "entity2entity1_index": "crm/hashtag",$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
                    "verbentity2_index": "tweets_about$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
            "verb_category": "tweets_about"         }"verb": "retweets",
        {             "assocverb_typecategory": "Eventretweets",
               "entity1": "focalcrm" },
            "entity1_index": "focalcrm/twitterhandle",      {
       "entity2": "http://www.crmbuyer.com/rsstory/76578.html",             "entity2assoc_indextype": "http://www.crmbuyer.com/rsstory/76578.html/url",
     Event",
      "verb": "tweets_link",             "verb_categorycreationCriteriaScript": "tweets_link"
        }$SCRIPT( return (null != _doc.metadata.json[0].object.actor) && (null != _doc.metadata.json[0].object.actor.location); )",
         ],     "communityId": ["506dc16dfbf042893dd6b8f2"],     "createdentity1_index": "May$SCRIPT( 16, 2013 12:28:09 PM UTCreturn _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
    "description": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm",     "entitiesentity2_index": [
"$SCRIPT( return _doc.metadata.json[0].object.actor.location.displayName+ '/location';)",
       {             "actual_nameverb": "CRM Buddytwitter_location",
  
         "dimension": "Who",             "disambiguatedverb_namecategory": "FocalCRMtwitter_location",
            "doccount": 0,   },
         "frequency": 1,      {
      "index": "focalcrm/twitterhandle",             "linkdataassoc_type": ["http://www.twitter.com/FocalCRM"],"Event",
             "relevance       "entity1_index": 0,"$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
        "totalfrequency": -1,             "typeentity2_index": "TwitterHandle"$SCRIPT( return _iterator.text +      }'/hashtag'; )",
        {             "actual_nameiterateOver": "crmjson.twitter_entities.hashtags",
            "dimension": "What",             "disambiguated_name"verb": "crmtweets_about",
            "doccount": 0,
             "frequency"verb_category": 1,"tweets_about"
            "index": "crm/hashtag",   },
         "relevance": 0,      {
      "totalfrequency": -1,             "assoc_type": "HashTag"
        },
Event",
       {             "actualentity1_nameindex": "http://t.co/IvwmjJyV$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
            "dimension": "What",       "entity2_index": "$SCRIPT(     "disambiguated_name": "http://www.crmbuyer.com/rsstory/76578.htmlreturn _iterator.screen_name + '/twitterhandle'; )",
            "doccount": 0,       "iterateOver": "json.twitter_entities.user_mentions",
     "frequency": 1,               "indexverb": "http://www.crmbuyer.com/rsstory/76578.html/urltweets_to",
            "relevance": 0,             "totalfrequency": -1,
     "verb_category": "tweets_to"
      "type": "URL"         },
         {       {
     "actual_name": "Amex Teams",             "dimensionassoc_type": "WhatEvent",
   
        "disambiguated_name": "Amex Teams",             "doccount"entity1_index": -1,
   "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
        "frequency": 1,             "entity2_index": "amex teams/keyword",
     $SCRIPT( return _iterator.expanded_url + '/url'; )",
      "relevance": 0.758636,             "sentimentiterateOver": 0.160753,"json.gnip.urls",
                    "totalfrequencyverb": -1"tweets_link",
            "type        "verb_category": "Keywordtweets_link"
          },      }
  {          ]
  "actual_name": "Halo",     },
       "dimension": "What", {
            "disambiguated_namestorageSettings": "Halo", {
                "doccountrejectDocCriteria": -1,
            "frequency": 1,"$SCRIPT( if (null == _doc.metadata.json[0].link || null == _doc.metadata.json[0].object) return 'reject'; )"
            }
"index": "halo/keyword",       }
     "relevance": 0.461833,]
}

Sample output

Code Block
languagejavascript
{
    "associations": [
       "sentiment": 0.168822, {
            "totalfrequencyassoc_type": -1"Event",
            "typeentity1": "Keywordfocalcrm",
        },         {"entity1_index": "focalcrm/twitterhandle",
            "actual_nameentity2": "Master Chief Incentivescrm",
            "dimensionentity2_index": "Whatcrm/hashtag",
            "disambiguated_nameverb": "Master Chief Incentivestweets_about",
            "doccountverb_category": -1,"tweets_about"
        },
      "frequency": 1,  {
            "indexassoc_type": "master chief incentives/keywordEvent",
            "relevanceentity1": 0.981457"focalcrm",
            "sentimententity1_index": 0.168876"focalcrm/twitterhandle",
            "totalfrequencyentity2": -1,
            "type": "Keyword"
        },
        {"http://www.crmbuyer.com/rsstory/76578.html",
               "actualentity2_nameindex": "http://www.crmbuyer.com/rsstory/t76578.cohtml/IvwmjJyVurl",
            "dimensionverb": "Whattweets_link",
            "disambiguatedverb_namecategory": "http://t.co/IvwmjJyV",tweets_link"
        }
   "doccount": -1 ],
      "communityId": ["506dc16dfbf042893dd6b8f2"],
    "created": "frequency": 1,May 16, 2013 12:28:09 PM UTC",
    "description": "Amex Teams With Halo 4 on Master Chief "index":Incentives "http://t.co/ivwmjjyv/keywordIvwmjJyV #crm",
    "entities": [
       "relevance": 0.212007, {
            "sentimentactual_name": 0.126168"CRM Buddy",
            "totalfrequencydimension": -1"Who",
            "typedisambiguated_name": "KeywordFocalCRM",
        },    "doccount": 0,
   {         "frequency": 1,
   "actual_name         "index": "crmfocalcrm/twitterhandle",
            "dimensionlinkdata": "What"["http://www.twitter.com/FocalCRM"],
            "disambiguated_namerelevance": "crm"0,
            "doccounttotalfrequency": -1,
            "frequencytype": 1, "TwitterHandle"
        },
        {
            "indexactual_name": "crm/keyword",
            "relevancedimension": 0.404086"What",
            "sentimentdisambiguated_name": 0.103838"crm",
            "totalfrequencydoccount": -10,
            "typefrequency": "Keyword"1,
        }     ],"index": "crm/hashtag",
      "mediaType": ["Social"],     "metadatarelevance": {"json": [{ 0,
            "actortotalfrequency": {-1,
            "displayNametype": "HashTag"CRM Buddy",
        },
       "followersCount": "245", {
            "friendsCountactual_name": "0http://t.co/IvwmjJyV",
            "iddimension": "id:twitter.com:835627776What",
            "imagedisambiguated_name": "http://a0www.twimgcrmbuyer.com/profile_images/2630355549/8cad59efaddd57283dbb159332336744_normal.jpegrsstory/76578.html",
            "languagesdoccount": ["en"] 0,
            "frequency": 1,
            "linkindex": "http://www.twittercrmbuyer.com/FocalCRM/rsstory/76578.html/url",
            "linksrelevance": [{"rel": "me"}]0,
            "listedCounttotalfrequency": "6"-1,
            "objectTypetype": "person",URL"
        },
       "postedTime": "2012-09-20T13:59:56.000Z", {
            "preferredUsernameactual_name": "FocalCRMAmex Teams",
            "statusesCountdimension": "3688What",
            "summarydisambiguated_name": "Amex Teams",
            "verifieddoccount": "false"-1,
         },   "frequency": 1,
            "bodyindex": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm","amex teams/keyword",
            "relevance": 0.758636,
            "generatorsentiment": {0.160753,
            "displayNametotalfrequency": "dlvr.it"-1,
            "linktype": "http://dlvr.itKeyword"
        },
        "gnip": {
            "kloutactual_scorename": "48Halo",
            "languagedimension": {"value": "en"}What",
            "matchingdisambiguated_rulesname": [{"value": "halo 4"}]"Halo",
            "urlsdoccount": [{ -1,
            "frequency": 1,
            "expanded_urlindex": "http://www.crmbuyer.com/rsstory/76578.htmlhalo/keyword",
                "urlrelevance": "http://t.co/IvwmjJyV"0.461833,
            }]
"sentiment": 0.168822,
       },     "totalfrequency": -1,
  "id": "tag:search.twitter.com,2005:266601489475186688",         "linktype": "http://twitter.com/FocalCRM/statuses/266601489475186688"Keyword"
        },
        "object": {
            "idactual_name": "object:search.twitter.com,2005:266601489475186688Master Chief Incentives",
            "linkdimension": "http://twitter.com/FocalCRM/statuses/266601489475186688What",
            "objectTypedisambiguated_name": "noteMaster Chief Incentives",
            "postedTimedoccount": "2012-11-08T18:02:02.000Z"-1,
            "summaryfrequency": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm"
     1,
            "index": "master chief incentives/keyword",
   },         "objectTyperelevance": "activity",0.981457,
            "postedTimesentiment": "2012-11-08T18:02:02.000Z",0.168876,
            "providertotalfrequency": {-1,
            "displayNametype": "Twitter",Keyword"
        },
        {
        "link    "actual_name": "http://www.twitter.comt.co/IvwmjJyV",
            "objectTypedimension": "serviceWhat",
        },         "retweetCount"disambiguated_name": "0http://t.co/IvwmjJyV",
            "twitter_entitiesdoccount": {-1,
            "hashtagsfrequency": [{1,
            "index": "http://t.co/ivwmjjyv/keyword",
  "indices": [         "relevance": 0.212007,
            "71sentiment": 0.126168,
            "totalfrequency": -1,
            "75type": "Keyword"
         },
     ],   {
             "textactual_name": "crm",
            }]"dimension": "What",
            "urlsdisambiguated_name": [{
   "crm",
            "display_urldoccount": "dlvr.it/2S6sjV",
   -1,
            "expanded_urlfrequency": "http://dlvr.it/2S6sjV",
   1,
            "indicesindex": ["crm/keyword",
            "relevance": 0.404086,
      "50",       "sentiment": 0.103838,
            "70totalfrequency": -1,
            "type": "Keyword"
 ],       }
    ],
    "urlmediaType": "http://t.co/IvwmjJyV"["Social"],
    "metadata": {"json": [{
        "actor": {
            "displayName": "CRM }]Buddy",
            "user_mentionsfollowersCount": []"245",
             },"friendsCount": "0",
            "verbid": "post"id:twitter.com:835627776",
            }]},
    "modified": "Nov 8, 2012 06:02:44 PM UTC",
    "publishedDate": "Nov 8, 2012 06:02:02 PM UTC",
    "source": ["gnip test"],
    "sourceKey": [".mnt.fileshare.datasift.gnip."],
    "sourceUrl": "file:/mnt/fileshare/datasift/gnip/gnip.json",
    "tags": ["image": "http://a0.twimg.com/profile_images/2630355549/8cad59efaddd57283dbb159332336744_normal.jpeg",
            "languages": ["en"],
            "link": "http://www.twitter.com/FocalCRM",
            "links": [{"rel": "me"}],
            "listedCount": "6",
            "objectType": "person",
            "postedTime": "2012-09-20T13:59:56.000Z",
            "preferredUsername": "FocalCRM",
            "statusesCount": "3688",
            "summary": "",
            "verified": "false"
        },
        "body": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm",
        "generator": {
            "displayName": "dlvr.it",
            "link": "http://dlvr.it"
        },
        "gnip": {
            "klout_score": "48",
            "language": {"value": "en"},
            "matching_rules": [{"value": "halo 4"}],
            "urls": [{
                "expanded_url": "http://www.crmbuyer.com/rsstory/76578.html",
                "url": "http://t.co/IvwmjJyV"
            }]
        },
        "id": "tag:search.twitter.com,2005:266601489475186688",
        "link": "http://twitter.com/FocalCRM/statuses/266601489475186688",
        "object": {
            "id": "object:search.twitter.com,2005:266601489475186688",
            "link": "http://twitter.com/FocalCRM/statuses/266601489475186688",
            "objectType": "note",
            "postedTime": "2012-11-08T18:02:02.000Z",
            "summary": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm"
        },
        "objectType": "activity",
        "postedTime": "2012-11-08T18:02:02.000Z",
        "provider": {
            "displayName": "Twitter",
            "link": "http://www.twitter.com",
            "objectType": "service"
        },
        "retweetCount": "0",
        "twitter_entities": {
            "hashtags": [{
                "indices": [
                    "71",
                    "75"
                ],
                "text": "crm"
            }],
            "urls": [{
                "display_url": "dlvr.it/2S6sjV",
                "expanded_url": "http://dlvr.it/2S6sjV",
                "indices": [
                    "50",
                    "70"
                ],
                "url": "http://t.co/IvwmjJyV"
            }],
            "user_mentions": []
        },
        "verb": "post"
    }]},
    "modified": "Nov 8, 2012 06:02:44 PM UTC",
    "publishedDate": "Nov 8, 2012 06:02:02 PM UTC",
    "source": ["gnip test"],
    "sourceKey": [".mnt.fileshare.datasift.gnip."],
    "sourceUrl": "file:/mnt/fileshare/datasift/gnip/gnip.json",
    "tags": [
        "twitter",
        "gnip"
    ],
    "title": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crm",
    "url": "http://twitter.com/FocalCRM/statuses/266601489475186688"
}

Source

Code Block
//TODO (INF-1865): need to distinguish between "tweets_to" and "retweets"...
//TODO (INF-1865): looks like body is HTML encoded 
//TODO (INF-1865): aggregate sentiment vs user?
//TODO (INF-1865): distinguish between tweets and mentions
{
	"description": "A large set of tweets related to Super Storm Sandy",
	"extractType": "File",
    "extractorOptions": {
        "app.alchemyapi-metadata.batchSize": 100,
        "app.alchemyapi-metadata.numKeywords": 5,        
        "app.alchemyapi-metadata.strict": "true"        
    },
	"file": {
		"XmlPrimaryKey": "link",
		"XmlSourceName": "",
		"XmlRootLevelValues": [],
		"domain": "XXX",
		"password": "XXX",
		"username": "XXX"
	},
	"isApproved": true,
	"isPublic": false,
	"mediaType": "Social",
	"structuredAnalysis": {
		"rejectDocCriteria": "$SCRIPT( if (null == _doc.metadata.json[0].link || null == _doc.metadata.json[0].object) return 'reject'; )",
		"metadataFieldList": "",
		"docGeo" : {
			"lat": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[0];} catch (err) {return '';})",
			"lon": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[1];} catch (err) {return '';})"
		},
		"associations": [
             {
            	 "assoc_type": "Event",
            	 "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor); )",
            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
            	 "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
            	 "verb": "retweets",
            	 "verb_category": "retweets"
             },
             {
            	 "assoc_type": "Event",
            	 "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor) && (null != _doc.metadata.json[0].object.actor.location); )",
            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
            	 "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.location.displayName+ '/location';)",
            	 "verb": "twitter_location",
            	 "verb_category": "twitter_location"
             },
             {
            	 "assoc_type": "Event",
            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
            	 "entity2_index": "$SCRIPT( return _iterator.text + '/hashtag'; )",
            	 "iterateOver": "json.twitter_entities.hashtags",
            	 "verb": "tweets_about",
            	 "verb_category": "tweets_about"
             },
             {
            	 "assoc_type": "Event",
            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
            	 "entity2_index": "$SCRIPT( return _iterator.screen_name + '/twitterhandle'; )",
            	 "iterateOver": "json.twitter_entities.user_mentions",
            	 "verb": "tweets_to",
            	 "verb_category": "tweets_to"
             },
             {
            	 "assoc_type": "Event",
            	 "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
            	 "entity2_index": "$SCRIPT( return _iterator.expanded_url + '/url'; )",
            	 "iterateOver": "json.gnip.urls",
            	 "verb": "tweets_link",
            	 "verb_category": "tweets_link"
             }
         ],
         "description": "$metadata.json.body",
         "entities": [
		      {
		    	  "actual_name": "$metadata.json.actor.displayName",
		    	  "dimension": "Who",
		    	  "disambiguated_name": "$metadata.json.actor.preferredUsername",
		    	  "linkdata": "$metadata.json.actor.link",
		    	  "type": "TwitterHandle"
		      },
		      {
		    	  "iterateOver": "json.twitter_entities.user_mentions",
		    	  "actual_name": "$SCRIPT(return _iterator.name;)",
		    	  "dimension": "Who",
		    	  "disambiguated_name": "$SCRIPT(return _iterator.screen_name;)",
		    	  "linkdata": "$SCRIPT(return 'http://www.twitter.com/' + _iterator.screen_name;)",
		    	  "type": "TwitterHandle"
		      },
		      {
		    	  "actual_name": "$metadata.json.object.actor.displayName",
		    	  "dimension": "Who",
		    	  "disambiguated_name": "$metadata.json.object.actor.preferredUsername",
		    	  "linkdata": "$metadata.json.object.actor.link",
		    	  "type": "TwitterHandle"
		      },
		      {
		    	  "dimension": "Where",
		    	  "disambiguated_name": "$metadata.json.actor.location.displayName",
		    	  "geotag": {
		    		  "city": "$SCRIPT( return getAddressVal( _doc.metadata.json[0].actor.location.displayName, 0 ) )",
		    		  "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )",
		    		  "countryCode" : "US",
		    		  "alternatives": [
		                   {
		                	   "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )",
		                	   "countryCode" : "US"
		                   }
	                   ]
		    	  },
		    	  "type": "Location"
		      },
		      {
		    	  "dimension": "Where",
		    	  "disambiguated_name": "$metadata.json.object.actor.location.displayName",
		    	  "type": "Location"
		      },
		      {
		    	  "disambiguated_name": "$SCRIPT(return _iterator.text;)",
		    	  "iterateOver": "json.twitter_entities.hashtags",
		    	  "type": "HashTag"
		      },
		      {
		    	  "actual_name": "$SCRIPT(return _iterator.url)",
		    	  "disambiguated_name": "$SCRIPT(return _iterator.expanded_url;)",
		    	  "iterateOver": "json.gnip.urls",
		    	  "type": "URL"
		      }
		      ],
      "fullText": "$metadata.json.body",
      "script": "function getAddressVal( addressStr, number) { try { var addressArray = addressStr.split(/ *, */); if (addressArray != null && addressArray.length > 0) { if (addressArray[number].toLowerCase()=='ny') { return 'new york'; } else if (addressArray[number].toLowerCase()=='long island' || addressArray[number].toLowerCase()=='li') { return 'medford'; } else { return addressArray[number]; } } else { return ''; } } catch (err) { return ''; } } function getRegion( code ) { if (code.toLowerCase()=='ny') {return 'New York';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else if (code.toLowerCase()=='ct') {return 'Connecticut';} else if (code.toLowerCase()=='md') {return 'Maryland';} else if (code.toLowerCase()=='va') {return 'Virginia';} else if (code.toLowerCase()=='pa') {return 'Pennsylvania';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else {return 'New York';} }",
      "scriptEngine": "javascript",
      "title": "$metadata.json.body",
      "url": "$metadata.json.link",
      "publishedDate": "$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)"
	},
	"tags": [
         "twitter",
         "gnip"
     ],
     "title": "Amex Teams With Halo 4 on Master Chief Incentives http://t.co/IvwmjJyV #crmSuper Storm Sandy - Twitter: SANDY_SUBSTRING",
     "url": "smb://HOST:139/SHARE/PATH/TO/",
     "useExtractor": "AlchemyAPI-metadata",
     "urluseTextExtractor": "http://twitter.com/FocalCRM/statuses/266601489475186688none"
}