Input format sample
Code Block | ||
---|---|---|
| ||
Date,Device,SrcIP,dstIP,Alert,Country SCANNER_1,2012-01-01T13:43:00,10.0.0.1,66.66.66.66,DUMMY_ALERT_TYPE_1,United States SCANNER_2,2012-02-01T14:21:00,SCANNER_2,10.0.0.2,66.66.66.66,DUMMY_ALERT_TYPE_2,United Kingdom SCANNER_3,2012-03-01T15:17:00,10.0.0.1,99.66.99.66,DUMMY_ALERT_TYPE_3,Netherlands |
Source #1a - fileshare, manual parsing
Code Block | ||
---|---|---|
| ||
{ "description": "For cyber demo", "isPublic": false, "mediaType": "Log", "searchCycle_secs": 3600, "tags": [ "cyber", "structured" ], "title": "Cyber Logs Test", "processingPipeline": [ { "file": { "XmlRootLevelValues": [], "domain": "DOMAIN", "password": "PASSWORD", "type": "csv", "username": "USER", "url": "smb://FILESHARE:139/cyber_logs/" } }, { "globals": { "scripts": [ "function decode(x)\n{\n var info = {}; \n var rec = x.split(','); \n info.device = rec[0];\n info.date = rec[1];\n info.srcIP = rec[2];\n info.dstIP = rec[3];\n info.alert = rec[4];\n info.country = rec[5];\n return info;\n}" ] } }, { "harvest": { "searchCycle_secs": 3600 } }, { "docMetadata": { "title": "$metadata.info.alert @ $metadata.info.date [$metadata.info.device]: $metadata.info.dstIP -> $metadata.info.srcIP", "publishedDate": "$SCRIPT( return _doc.metadata.info[0].date; )" } }, { "contentMetadata": [ { "fieldName": "info", "script": "var info = decode(text); info;", "scriptlang": "javascript" } ] }, { "text": [ { "fieldName": "fullText", "script": ",", "scriptlang": "regex", "flags": "md", "replacement": " , " }, { "fieldName": "description", "script": ",", "scriptlang": "regex", "flags": "md", "replacement": " , " } ] }, { "entities": [ { "dimension": "What", "disambiguated_name": "$metadata.info.srcIP", "type": "PrivateIP" }, { "dimension": "What", "disambiguated_name": "$metadata.info.dstIP", "geotag": { "country": "$SCRIPT( return _doc.metadata.info[0].country; )" }, "ontology_type": "country", "type": "PublicIP" }, { "actual_name": "$metadata.info.country", "dimension": "Where", "disambiguated_name": "$SCRIPT( return _doc.metadata.info[0].country; )", "geotag": { "country": "$SCRIPT( return _doc.metadata.info[0].country; )" }, "ontology_type": "country", "type": "Country" }, { "dimension": "What", "disambiguated_name": "$metadata.info.device", "type": "Sensor" }, { "dimension": "What", "disambiguated_name": "$metadata.info.alert", "type": "AlertType" } ] }, { "associations": [ { "entity1": "$metadata.info.dstIP", "entity2": "$metadata.info.srcIP", "geo_index": "$SCRIPT( return _doc.metadata.info[0].country + '/country'; )", "time_start": "$SCRIPT( return _doc.metadata.info[0].date; )", "verb": "$SCRIPT( return _doc.metadata.info[0].alert; )", "verb_category": "$SCRIPT( return _doc.metadata.info[0].alert; )" } ] }, { "searchIndex": { "metadataFieldList": "" } } ] } |
Source #1b - fileshare, automated parsing - headers manually specified
Code Block | ||
---|---|---|
| ||
{ "description": "For cyber demo", "isPublic": false, "mediaType": "Log", "searchCycle_secs": 3600, "tags": [ "cyber", "structured" ], "title": "Cyber Logs Test", "processingPipeline": [ { "file": { "XmlRootLevelValues": [ "device", "date", "srcIP", "dstIP", "alert", "country" ], "XmlIgnoreValues": [ "device,date,srcIP" ], "domain": "DOMAIN", "password": "PASSWORD", "type": "csv", "username": "USER", "url": "smb://FILESHARE:139/cyber_logs/" } }, { "harvest": { "searchCycle_secs": 3600 } }, { "docMetadata": { "title": "$metadata.csv.alert @ $metadata.csv.date [$metadata.csv.device]: $metadata.csv.dstIP -> $metadata.csv.srcIP", "publishedDate": "$SCRIPT( return _doc.metadata.csv[0].date; )" } }, { "entities": [ { "dimension": "What", "disambiguated_name": "$metadata.csv.srcIP", "type": "PrivateIP" }, { "dimension": "What", "disambiguated_name": "$metadata.csv.dstIP", "geotag": { "country": "$SCRIPT( return _doc.metadata.csv[0].country; )" }, "ontology_type": "country", "type": "PublicIP" }, { "actual_name": "$metadata.csv.country", "dimension": "Where", "disambiguated_name": "$SCRIPT( return _doc.metadata.csv[0].country; )", "geotag": { "country": "$SCRIPT( return _doc.metadata.csv[0].country; )" }, "ontology_type": "country", "type": "Country" }, { "dimension": "What", "disambiguated_name": "$metadata.csv.device", "type": "Sensor" }, { "dimension": "What", "disambiguated_name": "$metadata.csv.alert", "type": "AlertType" } ] }, { "associations": [ { "entity1": "$metadata.csv.dstIP", "entity2": "$metadata.csv.srcIP", "geo_index": "$SCRIPT( return _doc.metadata.csv[0].country + '/country'; )", "time_start": "$SCRIPT( return _doc.metadata.csv[0].date; )", "verb": "$SCRIPT( return _doc.metadata.csv[0].alert; )", "verb_category": "$SCRIPT( return _doc.metadata.csv[0].alert; )" } ] }, { "searchIndex": { "metadataFieldList": "" } } ] } |
Source #1c - fileshare, automated parsing - headers automatically specified
For the purpose of this example, imagine that the first line starts with a "#", eg:
Code Block | ||
---|---|---|
| ||
#Date,Device,SrcIP,dstIP,Alert,Country
SCANNER_1,2012-01-01T13:43:00,10.0.0.1,66.66.66.66,DUMMY_ALERT_TYPE_1,United States
... |
Then:
Code Block | ||
---|---|---|
| ||
{
"description": "For cyber demo",
"isPublic": false,
"mediaType": "Log",
"searchCycle_secs": 3600,
"tags": [
"cyber",
"structured"
],
"title": "Cyber Logs Test",
"processingPipeline": [
{
"file": {
"XmlIgnoreValues": [
"#"
],
"domain": "DOMAIN",
"password": "PASSWORD",
"type": "csv",
"username": "USER",
"url": "smb://FILESHARE:139/cyber_logs/"
}
},
{
"harvest": {
"searchCycle_secs": 3600
}
},
{
"docMetadata": {
"title": "$metadata.csv.alert @ $metadata.csv.date [$metadata.csv.device]: $metadata.csv.dstIP -> $metadata.csv.srcIP",
"publishedDate": "$SCRIPT( return _doc.metadata.csv[0].date; )"
}
},
{
"entities": [
{
"dimension": "What",
"disambiguated_name": "$metadata.csv.srcIP",
"type": "PrivateIP"
},
{
"dimension": "What",
"disambiguated_name": "$metadata.csv.dstIP",
"geotag": {
"country": "$SCRIPT( return _doc.metadata.csv[0].country; )"
},
"ontology_type": "country",
"type": "PublicIP"
},
{
"actual_name": "$metadata.csv.country",
"dimension": "Where",
"disambiguated_name": "$SCRIPT( return _doc.metadata.csv[0].country; )",
"geotag": {
"country": "$SCRIPT( return _doc.metadata.csv[0].country; )"
},
"ontology_type": "country",
"type": "Country"
},
{
"dimension": "What",
"disambiguated_name": "$metadata.csv.device",
"type": "Sensor"
},
{
"dimension": "What",
"disambiguated_name": "$metadata.csv.alert",
"type": "AlertType"
}
]
},
{
"associations": [
{
"entity1": "$metadata.csv.dstIP",
"entity2": "$metadata.csv.srcIP",
"geo_index": "$SCRIPT( return _doc.metadata.csv[0].country + '/country'; )",
"time_start": "$SCRIPT( return _doc.metadata.csv[0].date; )",
"verb": "$SCRIPT( return _doc.metadata.csv[0].alert; )",
"verb_category": "$SCRIPT( return _doc.metadata.csv[0].alert; )"
}
]
},
{
"searchIndex": {
"metadataFieldList": ""
}
}
]
} |
Note that if the header was as originally specified (eg first line "Date,Device,SrcIP,dstIP,Alert,Country"), then the "XmlIgnoreValues" field would have need to be "\"Device\"" to correctly parse the headers (assuming the quote field was ").
Source #2a - web (including uploaded fileshares), manual parsing
It is slightly more complicated to parse CSV files over the Web, but still quite possible, using the searchConfig capability. Note that one neat trick is to upload a share to Infinit.e, and then use an API key to access the REST interface. Users can allocate themselves an API key from the People Manager.
Info |
---|
Note that when accessing Web documents you must use "rss.extraUrls" and specify minimally "url" and "title" fields, and not the top-level "url" (otherwise the URL is treated as an RSS feed rather than a standalone web page) |
...
Code Block | ||
---|---|---|
| ||
{ "associations": [{ "assoc_type": "Event", "entity1": "66.66.66.66", "entity1_index": "66.66.66.66/publicip", "entity2": "10.0.0.1", "entity2_index": "10.0.0.1/privateip", "geo_index": "united states/country", "time_start": "2012-01-01T13:43:00", "verb": "DUMMY_ALERT_TYPE_1", "verb_category": "DUMMY_ALERT_TYPE_1" }], "communityId": ["506dc16dfbf042893dd6b8f2"], "created": "Jun 4, 2013 12:54:34 AM UTC", "entities": [ { "actual_name": "10.0.0.1", "dimension": "What", "disambiguated_name": "10.0.0.1", "doccount": 0, "frequency": 1, "index": "10.0.0.1/privateip", "relevance": 0, "totalfrequency": -1, "type": "PrivateIP" }, { "actual_name": "66.66.66.66", "dimension": "What", "disambiguated_name": "66.66.66.66", "doccount": 0, "frequency": 1, "index": "66.66.66.66/publicip", "relevance": 0, "totalfrequency": -1, "type": "PublicIP" }, { "actual_name": "United States", "dimension": "Where", "disambiguated_name": "United States", "doccount": 0, "frequency": 1, "index": "united states/country", "ontology_type": "country", "relevance": 0, "totalfrequency": -1, "type": "Country" }, { "actual_name": "SCANNER_1", "dimension": "What", "disambiguated_name": "SCANNER_1", "doccount": 0, "frequency": 1, "index": "scanner_1/sensor", "relevance": 0, "totalfrequency": -1, "type": "Sensor" }, { "actual_name": "DUMMY_ALERT_TYPE_1", "dimension": "What", "disambiguated_name": "DUMMY_ALERT_TYPE_1", "doccount": 0, "frequency": 1, "index": "dummy_alert_type_1/alerttype", "relevance": 0, "totalfrequency": -1, "type": "AlertType" } ], "fullText": "SCANNER_1 , 2012-01-01T13:43:00 , 10.0.0.1 , 66.66.66.66 , DUMMY_ALERT_TYPE_1 , United States", "mediaType": ["Log"], "metadata": {"info": [{ "alert": "DUMMY_ALERT_TYPE_1 ", "country": "United States", "date": "2012-01-01T13:43:00", "device": "SCANNER_1 ", "dstIP": "66.66.66.66", "srcIP": " 10.0.0.1" }]}, "modified": "Jun 4, 2013 12:54:34 AM UTC", "publishedDate": "January 1, 2012 13:43:00 PM UTC", "source": ["Cyber Logs Test"], "sourceKey": ["INFINITE_ENDPOINT.api.share.get.51ad28a440b4a4f0f757824c.25.26"], "tags": [ "cyber", "structured" ], "title": "DUMMY_ALERT_TYPE_1 @ 2012-01-01T13:43:00 [SCANNER_1 ]: 66.66.66.66 -> 10.0.0.1", "url": "http://INFINITE_ENDPOINT/api/share/get/51ad28a440b4a4f0f757824c#1" } |
Sources - old format
Old Format Source #1a - fileshare, manual parsing
Code Block | ||
---|---|---|
| ||
{ "description": "For cyber demo", "extractType": "File", "file": { "XmlRootLevelValues": [], "domain": "DOMAIN", "password": "PASSWORD", "type": "csv", "username": "USER" }, "isPublic": false, "mediaType": "Log", "searchCycle_secs": 3600, "searchIndexFilter": { "metadataFieldList": "" }, "structuredAnalysis": { "associations": [ { "entity1": "$metadata.info.dstIP", "entity2": "$metadata.info.srcIP", "geo_index": "$SCRIPT( return _doc.metadata.info[0].country + '/country'; )", "time_start": "$SCRIPT( return _doc.metadata.info[0].date; )", "verb": "$SCRIPT( return _doc.metadata.info[0].alert; )", "verb_category": "$SCRIPT( return _doc.metadata.info[0].alert; )" } ], "entities": [ { "dimension": "What", "disambiguated_name": "$metadata.info.srcIP", "type": "PrivateIP" }, { "dimension": "What", "disambiguated_name": "$metadata.info.dstIP", "geotag": { "country": "$SCRIPT( return _doc.metadata.info[0].country; )" }, "ontology_type": "country", "type": "PublicIP" }, { "actual_name": "$metadata.info.country", "dimension": "Where", "disambiguated_name": "$SCRIPT( return _doc.metadata.info[0].country; )", "geotag": { "country": "$SCRIPT( return _doc.metadata.info[0].country; )" }, "ontology_type": "country", "type": "Country" }, { "dimension": "What", "disambiguated_name": "$metadata.info.device", "type": "Sensor" }, { "dimension": "What", "disambiguated_name": "$metadata.info.alert", "type": "AlertType" } ], "publishedDate": "$SCRIPT( return _doc.metadata.info[0].date; )", "script": "", "scriptEngine": "javascript", "title": "$metadata.info.alert @ $metadata.info.date [$metadata.info.device]: $metadata.info.dstIP -> $metadata.info.srcIP" }, "tags": [ "cyber", "structured" ], "title": "Cyber Logs Test", "unstructuredAnalysis": { "meta": [ { "context": "First", "fieldName": "info", "script": "var info = decode(text); info;", "scriptlang": "javascript" } ], "script": "function decode(x)\n{\n var info = {}; \n var rec = x.split(','); \n info.device = rec[0];\n info.date = rec[1];\n info.srcIP = rec[2];\n info.dstIP = rec[3];\n info.alert = rec[4];\n info.country = rec[5];\n return info;\n}", "simpleTextCleanser": [ { "field": "fullText", "flags": "md", "replacement": " , ", "script": ",", "scriptlang": "regex" }, { "field": "description", "flags": "md", "replacement": " , ", "script": ",", "scriptlang": "regex" } ] }, "useExtractor":"none", "useTextExtractor":"none", "url": "smb://FILESHARE:139/cyber_logs/" } |
...
It is slightly more complicated to parse CSV files over the Web, but still quite possible, using the searchConfig capability. Note that one neat trick is to upload a share to Infinit.e, and then use an API key to access the REST interface. Users can allocate themselves an API key from theĀ People Manager.
Info |
---|
Note that when accessing Web documents you must use "rss.extraUrls" and specify minimally "url" and "title" fields, and not the top-level "url" (otherwise the URL is treated as an RSS feed rather than a standalone web page) |
...