Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
},        {
            "docMetadata": {
                "title": "$metadata.json.body",
                "description": "$metadata.json.body",
                "fullText": "$metadata.json.body",
                "publishedDate": "$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)",
                "geotag": {
                    "lat": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[0];} catch (err) {return '';})",
                    "lon": "$SCRIPT( try {return _doc.metadata.json[0].geo.coordinates[1];} catch (err) {return '';})"
                }
            }
        },

 We can see in

"Office" Documents Example

In this example that the script is used to parse the data from the twitter feed, in order to set the metadata values, the subject line of an email correspondence can be extracted by Document metadata and set as the title of the resulting document.

Code Block
 },        {
],      "fullText": "$metadata.json.body",       "scriptdocMetadata": "function
{
getAddressVal( addressStr, number) { try { var addressArray =  addressStr.split(/ *, */); if (addressArray != null && 
addressArray.length > 0) { if 
(addressArray[number].toLowerCase()=='ny') { return 'new york'; } else 
if (addressArray[number].toLowerCase()=='long island' || 
addressArray[number].toLowerCase()=='li') { return 'medford'; } else { 
return addressArray[number]; } } else { return ''; } } catch (err) { 
return ''; } } function getRegion( code ) { if 
(code.toLowerCase()=='ny') {return 'New York';} else if 
(code.toLowerCase()=='nj') {return 'New Jersey';} else if 
(code.toLowerCase()=='ct') {return 'Connecticut';} else if 
(code.toLowerCase()=='md') {return 'Maryland';} else if 
(code.toLowerCase()=='va') {return 'Virginia';} else if 
(code.toLowerCase()=='pa') {return 'Pennsylvania';} else if 
(code.toLowerCase()=='nj') {return 'New Jersey';} else {return 'New 
York';} }",
      "scriptEngine": "javascript",
      "title": "$metadata.json.body",
      "url": "$metadata.json.link", "title": "$SCRIPT( return _doc.metadata._FILE_METADATA_[0].metadata.subject[0];)"
            }
        },

 

In the sample output we can see the "title" that was set using the docMetadata script.

Code Block
  {
    "_id": "5048efb0e4b01fd6455420ee",
    "title": "RE: Testing Preschedule workspace",
    "url": "smb://modus:139/enron/testing/semperger-c/deleted_items/37QTKE~3",
    "created": "Sep 6, 2012 06:42:01 PM UTC",
    "modified": "Jul 24, 2012 01:13:02 AM UTC",
    "publishedDate": "Jul 9, 2001 06:33:32 PM UTC",
    "source": [
        "Enron Emails (TextRank)"
    ],
    "sourceKey": [
        "modus.139.enron.testing.."
    ],
    "mediaType": [
        "publishedDateEmail": "$SCRIPT(return _doc.metadata.json[0].postedTime.replace(/.[0-9]{3}Z/,'Z');)"
    },
Info

Note: When iterating over entities or metadata (for either entity or association building), the "$" sign is relative to the iterator, not the document (eg the metadata object being looped over). However when iterating over metadata fields that are strings, then the above document-level referencing is still valid, or "$value"/"${value}" can be used to reference the value itself.

"Office" Documents Example

In this example, the subject line of an email correspondence can be extracted by Document metadata and set as the title of the resulting document.

Code Block
 },        {
            "docMetadata": {
       
    ],
    "description": "I am trying to pull it up now, it's taking a long time\r\n\r\n \r\nFrom: \tSmith, Will \r\nSent:\tMonday, July 09, 2001 11:28 AM\r\nTo:\tSemperger, Cara\r\nSubject:\tRE: Testing Preschedule workspace\r\n\r\nYes, but Vish made the changes in Table Edit. : - )\r\n\r\nWill\r\n\r\n \r\nFrom: \tSemperger, Cara \r\nSent:\tMonday, July 09, 2001 1:20 PM\r\nTo:\tSmith, Will\r\nSubject:\tRE: Testing Preschedule workspace\r\n\r\nSo, this table edit that Brett is asking me to test is really from ",

 

Anchor
Location
Location
Setting Metadata Values for Location

In the example $SCRIPT is used to set the values for geotag elements city, country, and stateProvince.  It references functions and variables imported by globals.

Code Block
  },        {
"title": "$SCRIPT( return _doc.metadata._FILE_METADATA_[0].metadata.subject[0];)"            "docMetadata": {
                "title": "$metadata.subject",
}         },

 

In the sample output we can see the title that was set using the docMetadata script.

Code Block
{       "_iddescription": "5048efb0e4b01fd6455420ee$metadata.summary",
       "title": "RE: Testing Preschedule workspace",     "urlpublishedDate": "smb://modus:139/enron/testing/semperger-c/deleted_items/37QTKE~3",$metadata.incidentdate",
     "created": "Sep 6, 2012 06:42:01 PM UTC",     "modifiedgeotag": "Jul{
24, 2012 01:13:02 AM UTC",     "publishedDate": "Jul 9, 2001 06:33:32 PM UTC",     "sourcecity": "$SCRIPT( return _doc.metadata.location[0].citystateprovince.city; )",
        "Enron Emails (TextRank)"     ],     "sourceKeycountry": ["$SCRIPT(         "modus.139.enron.testing.."return _doc.metadata.location[0].country; )",
     ],     "mediaType": [         "EmailstateProvince": "$SCRIPT( return _doc.metadata.location[0].citystateprovince.stateprovince; )"
],     "description": "I  am  trying to pull it up now, it's}
taking a long time\r\n\r\n \r\nFrom:  \tSmith, Will \r\nSent:\tMonday, July 09, 2001 11:28}
 AM\r\nTo:\tSemperger, Cara\r\nSubject:\tRE: Testing Preschedule 
workspace\r\n\r\nYes, but Vish made the changes in Table Edit. : - 
)\r\n\r\nWill\r\n\r\n \r\nFrom: \tSemperger, Cara \r\nSent:\tMonday, 
July 09, 2001 1:20 PM\r\nTo:\tSmith, Will\r\nSubject:\tRE: Testing 
Preschedule workspace\r\n\r\nSo, this table edit that Brett is asking me
 to test is really from ",
    "entities": [
        {
            "disambiguated_name": "on- june 18-paloverde-day",
            "index": "on- june 18-paloverde-day/keyword",
            "actual_name": "on- june 18-paloverde-day",
            "type": "Keyword",
            "relevance": 0.10585404743253149,
            "frequency": 1,
            "totalfrequency": 12,
            "doccount": 12,
            "dimension": "What"
        },
        {
            "disambiguated_name": "mulitple times additional data",
            "index": "mulitple times additional data/keyword",
            "actual_name": "mulitple times additional data",
            "type": "Keyword",
            "relevance": 0.18088061045762382,
            "frequency": 1,
            "totalfrequency": 12,
            "doccount": 12,
            "dimension": "What"
        },

 

...

You can use document metadata to set location values using geoTag.  In the example source, the docMetadata block has been configured to use javascript to set the city, country and stateProvince.  In this example, the javascript function and variables were already defined using the globals.

 

Code Block
  },        {
            "docMetadata": {
                "title": "$metadata.subject",
                "description": "$metadata.summary",
                "publishedDate": "$metadata.incidentdate",
                "geotag": {
                    "city": "$SCRIPT( return _doc.metadata.location[0].citystateprovince.city; )",
                    "country": "$SCRIPT( return _doc.metadata.location[0].country; )",
                    "stateProvince": "$SCRIPT( return _doc.metadata.location[0].citystateprovince.stateprovince; )"
              },


Globals:

Code Block
  {
            "globals": {
                "scripts": [
                    "function getLocationEntity() { var s = (_iterator.citystateprovince.city != null) ? _iterator.citystateprovince.city : ''; s+= (s.length > 0) ? ',' : ''; s+= (_iterator.citystateprovince.stateprovince != null) ? _iterator.citystateprovince.stateprovince : ''; s+= (s.length > 0) ? ',' : ''; s+= (_iterator.country != null) ? _iterator.country : ''; return s; } function getVictim() { var indicator = (_iterator.indicator != 'Unknown') ? _iterator.indicator : ''; var victimType = (_iterator.victimtype != 'Unknown') ? _iterator.victimtype : ''; var child = (_iterator.child == 'Yes') ? 'Child' : 'Adult'; var combatant = (_iterator.combatant == 'Yes') ? 'Combatant' : ''; var targeted = (_iterator.targetedcharacteristic != 'None' && _iterator.targetedcharacteristic != 'Unknown') ? _iterator.targetedcharacteristic : ''; var defining = (_iterator.definingcharacteristic != 'None' &&_iterator.definingcharacteristic != 'Unknown') ? _iterator.definingcharacteristic : ''; var s = indicator; if (victimType.length > 0) { if (s.length > 0) { s += ', '; } s += victimType; } if (s.length > 0) { s += ', '; } s += child; if (combatant.length > 0) { if (s.length > 0) { s += ', '; } s += combatant; } if (targeted.length > 0) { if (s.length > 0) { s += ', '; } s += targeted; } if (defining.length > 0) { if (s.length > 0) { s += ', '; } s += defining; } if (s.length > 0) { s += ' from '; } s += _iterator.nationality; return s; } function getVictimCount() { var count = parseInt(_iterator.deadcount, 10) + parseInt(_iterator.woundedcount, 10); return count; } function getEventType() { var s = _value; if (_doc.metadata.assassination[0] == 'Yes') s += ', Assassination'; if (_doc.metadata.suicide[0] == 'Yes') s += ', Suicide'; if (_doc.metadata.ied[0] == 'Yes') s += ', IED'; return s; } function getEventTypeFull() { var s = _doc.metadata.eventtype[0]; if (_doc.metadata.assassination[0] == 'Yes') s += ', Assassination'; if (_doc.metadata.suicide[0] == 'Yes') s += ', Suicide'; if (_doc.metadata.ied[0] == 'Yes') s += ', IED'; return s;} function isOrganizationSpecified() {  if (_doc.metadata.organization != null && _doc.metadata.organization[0].toString().toLowerCase() == 'no group') { return false; } else { return true; } }function getOrganizationName() {  if (_doc.metadata.organization != null && _doc.metadata.organization[0].toString().toLowerCase() != 'no group') { return _doc.metadata.organization[0]; } }"
        }         ]
   }         },

 

Output:

The output of the example source, returns the location information pertaining to the source data.

...