Input format sample
Code Block | ||
---|---|---|
| ||
Date,Device,SrcIP,dstIP,Alert,Country SCANNER_1,2012-01-01T13:43:00,10.0.0.1,66.66.66.66,DUMMY_ALERT_TYPE_1,United States SCANNER_2,2012-02-01T14:21:00,SCANNER_2,10.0.0.2,66.66.66.66,DUMMY_ALERT_TYPE_2,United Kingdom SCANNER_3,2012-03-01T15:17:00,10.0.0.1,99.66.99.66,DUMMY_ALERT_TYPE_3,Netherlands |
Source #1a - fileshare, manual parsing
Code Block | ||
---|---|---|
| ||
{ "description": "For cyber demo", "extractType": "File", "file": { "XmlRootLevelValues": [], "domain": "DOMAIN", "password": "PASSWORD", "type": "csv", "username": "USER" }, "isPublic": false, "mediaType": "Log", "searchCycle_secs": 3600, "searchIndexFilter": { "metadataFieldList": "" }, "structuredAnalysis": { "associations": [ { "entity1": "$metadata.info.dstIP", "entity2": "$metadata.info.srcIP", "geo_index": "$SCRIPT( return _doc.metadata.info[0].country + '/country'; )", "time_start": "$SCRIPT( return _doc.metadata.info[0].date; )", "verb": "$SCRIPT( return _doc.metadata.info[0].alert; )", "verb_category": "$SCRIPT( return _doc.metadata.info[0].alert; )" } ], "entities": [ { "dimension": "What", "disambiguated_name": "$metadata.info.srcIP", "type": "PrivateIP" }, { "dimension": "What", "disambiguated_name": "$metadata.info.dstIP", "geotag": { "country": "$SCRIPT( return _doc.metadata.info[0].country; )" }, "ontology_type": "country", "type": "PublicIP" }, { "actual_name": "$metadata.info.country", "dimension": "Where", "disambiguated_name": "$SCRIPT( return _doc.metadata.info[0].country; )", "geotag": { "country": "$SCRIPT( return _doc.metadata.info[0].country; )" }, "ontology_type": "country", "type": "Country" }, { "dimension": "What", "disambiguated_name": "$metadata.info.device", "type": "Sensor" }, { "dimension": "What", "disambiguated_name": "$metadata.info.alert", "type": "AlertType" } ], "publishedDate": "$SCRIPT( return _doc.metadata.info[0].date; )", "script": "", "scriptEngine": "javascript", "title": "$metadata.info.alert @ $metadata.info.date [$metadata.info.device]: $metadata.info.dstIP -> $metadata.info.srcIP" }, "tags": [ "cyber", "structured" ], "title": "Cyber Logs Test", "unstructuredAnalysis": { "meta": [ { "context": "First", "fieldName": "info", "script": "var info = decode(text); info;", "scriptlang": "javascript" } ], "script": "function decode(x)\n{\n var info = {}; \n var rec = x.split(','); \n info.device = rec[0];\n info.date = rec[1];\n info.srcIP = rec[2];\n info.dstIP = rec[3];\n info.alert = rec[4];\n info.country = rec[5];\n return info;\n}", "simpleTextCleanser": [ { "field": "fullText", "flags": "md", "replacement": " , ", "script": ",", "scriptlang": "regex" }, { "field": "description", "flags": "md", "replacement": " , ", "script": ",", "scriptlang": "regex" } ] }, "useExtractor":"none", "useTextExtractor":"none", "url": "smb://FILESHARE:139/cyber_logs/" } |
Source #1b - fileshare, automated parsing
Code Block | ||
---|---|---|
| ||
{ "description": "For cyber demo", "extractType": "File", "file": { "XmlRootLevelValues": [ "device", "date", "srcIP", "dstIP", "alert", "country" ], "XmlIgnoreValues": [ "device,date,srcIP" ], "domain": "DOMAIN", "password": "PASSWORD", "type": "csv", "username": "USER" }, "isPublic": false, "mediaType": "Log", "searchCycle_secs": 3600, "searchIndexFilter": { "metadataFieldList": "" }, "structuredAnalysis": { "associations": [ { "entity1": "$metadata.csv.dstIP", "entity2": "$metadata.csv.srcIP", "geo_index": "$SCRIPT( return _doc.metadata.csv[0].country + '/country'; )", "time_start": "$SCRIPT( return _doc.metadata.csv[0].date; )", "verb": "$SCRIPT( return _doc.metadata.csv[0].alert; )", "verb_category": "$SCRIPT( return _doc.metadata.csv[0].alert; )" } ], "entities": [ { "dimension": "What", "disambiguated_name": "$metadata.csv.srcIP", "type": "PrivateIP" }, { "dimension": "What", "disambiguated_name": "$metadata.csv.dstIP", "geotag": { "country": "$SCRIPT( return _doc.metadata.csv[0].country; )" }, "ontology_type": "country", "type": "PublicIP" }, { "actual_name": "$metadata.csv.country", "dimension": "Where", "disambiguated_name": "$SCRIPT( return _doc.metadata.csv[0].country; )", "geotag": { "country": "$SCRIPT( return _doc.metadata.csv[0].country; )" }, "ontology_type": "country", "type": "Country" }, { "dimension": "What", "disambiguated_name": "$metadata.csv.device", "type": "Sensor" }, { "dimension": "What", "disambiguated_name": "$metadata.csv.alert", "type": "AlertType" } ], "publishedDate": "$SCRIPT( return _doc.metadata.csv[0].date; )", "script": "", "scriptEngine": "javascript", "title": "$metadata.csv.alert @ $metadata.csv.date [$metadata.csv.device]: $metadata.csv.dstIP -> $metadata.csv.srcIP" }, "tags": [ "cyber", "structured" ], "title": "Cyber Logs Test", "useExtractor":"none", "useTextExtractor":"none", "url": "smb://FILESHARE:139/cyber_logs/" } |
Source
...
#2a - web (including uploaded fileshares), manual parsing
It is slightly more complicated to parse CSV files over the Web, but still quite possible, using the searchConfig capability. Note that one neat trick is to upload a share to Infinit.e, and then use an API key to access the REST interface. Users can allocate themselves an API key from the People Manager.
...
Note that the API key is not visible in any of the extracted documents (it is removed in the "searchConfig.script" code), and is also not visible in the source to anyone but the source owner and administrator (due to the "isPublic:false" field). As an alternative (from June 2013), a cookie can be used: (eg) "rss.httpFields": { "Cookie": "infinitecookie=api:API_KEY;" }.
Source #2b - web (including uploaded fileshares), manual parsing using Java
As above, except "unstructuredAnalysis.script" will look like:
Code Block |
---|
var parser = new Packages.au.com.bytecode.opencsv.CSVParser(); function decode(x) { var rec = parser.parseLine(x.toString()); var info = {}; info.device = '' + rec[0]; info.date = '' + rec[1]; info.srcIP = '' + rec[2]; info.dstIP = '' + rec[3]; info.alert = '' + rec[4]; info.country = '' + rec[5]; return info; } |
(Note that the "'' + <string-variable>" construct is necessary to convert from Java strings to javascript strings)
Output sample
(For source 1b, metadata.info is called metadata.csv)
...