Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
{
    "description": "gnip test2.5 million tweets related to Super Storm Sandy",
    "extractType": "File",
    "extractorOptionsfile": {
        "app.alchemyapi-metadata.batchSizeXmlPrimaryKey": 200"",
        "app.alchemyapi-metadata.numKeywordsXmlRootLevelValues": 5[],
    },     "filedomain": {"XXXXXXX",
        "XmlPrimaryKeypassword": "XXXXXX",
        "XmlRootLevelValuesusername": [], "XXXXXX"
    },
    "domainisApproved": "WORKGROUP"true,
    "isPublic": true,
    "passwordmediaType": "PASSWORDSocial",
    "structuredAnalysis": {
    	"usernamerejectDocCriteria": "PASSWORD"
    },$SCRIPT( if (null == _doc.metadata.json[0].link || null == _doc.metadata.json[0].object) return 'reject'; )",
    	metadataFieldList: "isApproved":,
true,    	docGeo "isPublic": true, {
        	"mediaTypelat": "Social$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[0];} catch (err) {return '';})",
        	"structuredAnalysis": {
 lon": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[1];} catch (err) {return '';})"
        },
        "associations": [
            {
                "assoc_type": "Event",
                "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor); )",
                "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
                "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
                "verb": "retweets",
                "verb_category": "retweets"
            },
            {
                "assoc_type": "Event",
                "creationCriteriaScript": "$SCRIPT( return (null != _doc.metadata.json[0].object.actor) && (null != _doc.metadata.json[0].object.actor.location); )",
                "entity1_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.preferredUsername + '/twitterhandle';)",
                "entity2_index": "$SCRIPT( return _doc.metadata.json[0].object.actor.location.displayName+ '/location';)",
                "verb": "twitter_location",
                "verb_category": "twitter_location"
            },
            {
                "assoc_type": "Event",
                "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
                "entity2_index": "$SCRIPT( return _iterator.text + '/hashtag'; )",
                "iterateOver": "json.twitter_entities.hashtags",
                "verb": "tweets_about",
                "verb_category": "tweets_about"
            },
            {
                "assoc_type": "Event",
                "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
                "entity2_index": "$SCRIPT( return _iterator.screen_name + '/twitterhandle'; )",
                "iterateOver": "json.twitter_entities.user_mentions",
                "verb": "tweets_to",
                "verb_category": "tweets_to"
            },
            {
                "assoc_type": "Event",
                "entity1_index": "$SCRIPT( return _doc.metadata.json[0].actor.preferredUsername + '/twitterhandle';)",
                "entity2_index": "$SCRIPT( return _iterator.expanded_url + '/url'; )",
                "iterateOver": "json.gnip.urls",
                "verb": "tweets_link",
                "verb_category": "tweets_link"
            }
        ],
        "description": "$metadata.json.body",
        "entities": [
            {
                "actual_name": "$metadata.json.actor.displayName",
                "dimension": "Who",
                "disambiguated_name": "$metadata.json.actor.preferredUsername",
                "linkdata": "$metadata.json.actor.link",
                "type": "TwitterHandle"
            },
            {
            	"iterateOver": "json.twitter_entities.user_mentions",
                "actual_name": "$SCRIPT(return _iterator.name;)",
                "dimension": "Who",
                "disambiguated_name": "$SCRIPT(return _iterator.screen_name;)",
                "linkdata": "$SCRIPT(return 'http://www.twitter.com/' + _iterator.screen_name;)",
                "type": "TwitterHandle"
            },
            {
                "actual_name": "$metadata.json.object.actor.displayName",
                "dimension": "Who",
                "disambiguated_name": "$metadata.json.object.actor.preferredUsername",
                "linkdata": "$metadata.json.object.actor.link",
                "type": "TwitterHandle"
            },
            {
                "dimension": "Where",
                "disambiguated_name": "$metadata.json.actor.location.displayName",
                "geotag": {
                    "city": "$SCRIPT( return getAddressVal( _doc.metadata.json[0].actor.location.displayName, 0 ) )",
                    "stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )",
                    "country_code" : "US",
                    "alternatives": [
            			{
                			"stateProvince": "$SCRIPT( return getRegion(getAddressVal( _doc.metadata.json[0].actor.location.displayName, 1 )) )",
                			"country_code" : "US",
            			}
            		]
                },
                "type": "Location"
            },
            {
                "dimension": "Where",
                "disambiguated_name": "$metadata.json.object.actor.location.displayName",
                "type": "Location"
            },
            {
                "disambiguated_name": "$SCRIPT(return _iterator.text;)",
                "iterateOver": "json.twitter_entities.hashtags",
                "type": "HashTag"
            },
            {
                "actual_name": "$SCRIPT(return _iterator.url)",
                "disambiguated_name": "$SCRIPT(return _iterator.expanded_url;)",
                "iterateOver": "json.gnip.urls",
                "type": "URL"
            }
        ],
        "fullText": "$metadata.json.body",
        "script": "function getAddressVal( addressStr, number) { try { var addressArray = addressStr.split(','); if (addressArray != null && addressArray.length > 0) { return addressArray[number]; } else { return ''; } } catch (err) { return ''; } } function getRegion( code ) { if (code.toLowerCase()=='ny') {return 'New York';} else if (code.toLowerCase()=='nj') {return 'New Jersey';} else if (code.toLowerCase()=='ct') {return 'Connecticut';} else if (code.toLowerCase()=='md') {return 'Maryland';} else if (code.toLowerCase()=='va') {return 'Virginia';} else if (code.toLowerCase()=='pa') {return 'Pennsylvania';} else {return 'New York';} }",
        "scriptEngine": "javascript",
        "title": "$metadata.json.body",
        "url": "$metadata.json.link",
        "publishedDate": "$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)"
    },
    "tags": [
        "twitter",
        "gnip"
    ],
    "title": "gnip testSuper Storm Sandy - Twitter",
    "url": "smb://FILESHARE:139/datasift/gnipXXXXXXXXXXX/sandy_demo/test/",
    "useExtractor": "none",
    "useTextExtractor": "AlchemyAPI-metadatanone"
}

Sample output

TODO