Input format sample

Code Block

language	javascript

Date,Device,SrcIP,dstIP,Alert,Country
SCANNER_1,2012-01-01T13:43:00,10.0.0.1,66.66.66.66,DUMMY_ALERT_TYPE_1,United States
SCANNER_2,2012-02-01T14:21:00,SCANNER_2,10.0.0.2,66.66.66.66,DUMMY_ALERT_TYPE_2,United Kingdom
SCANNER_3,2012-03-01T15:17:00,10.0.0.1,99.66.99.66,DUMMY_ALERT_TYPE_3,Netherlands

Source #1a - fileshare, manual parsing

Code Block

language	javascript

{
    "description": "For cyber demo",
    "isPublic": false,
    "mediaType": "Log",
    "searchCycle_secs": 3600,
    "tags": [
        "cyber",
        "structured"
    ],
    "title": "Cyber Logs Test",
    "processingPipeline": [
        {
            "file": {
                "XmlRootLevelValues": [],
                "domain": "DOMAIN",
                "password": "PASSWORD",
                "type": "csv",
                "username": "USER",
                "url": "smb://FILESHARE:139/cyber_logs/"
            }
        },
        {
            "globals": {
                "scripts": [
                    "function decode(x)\n{\n    var info = {};   \n    var rec = x.split(',');   \n    info.device = rec[0];\n    info.date = rec[1];\n    info.srcIP = rec[2];\n    info.dstIP = rec[3];\n    info.alert = rec[4];\n    info.country = rec[5];\n    return info;\n}"
                ]
            }
        },
        {
            "harvest": {
                "searchCycle_secs": 3600
            }
        },
        {
            "docMetadata": {
                "title": "$metadata.info.alert @ $metadata.info.date [$metadata.info.device]: $metadata.info.dstIP -> $metadata.info.srcIP",
                "publishedDate": "$SCRIPT( return _doc.metadata.info[0].date; )"
            }
        },
        {
            "contentMetadata": [
                {
                    "fieldName": "info",
                    "script": "var info = decode(text); info;",
                    "scriptlang": "javascript"
                }
            ]
        },
        {
            "text": [
                {
                    "fieldName": "fullText",
                    "script": ",",
                    "scriptlang": "regex",
                    "flags": "md",
                    "replacement": " , "
                },
                {
                    "fieldName": "description",
                    "script": ",",
                    "scriptlang": "regex",
                    "flags": "md",
                    "replacement": " , "
                }
            ]
        },
        {
            "entities": [
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.info.srcIP",
                    "type": "PrivateIP"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.info.dstIP",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.info[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "PublicIP"
                },
                {
                    "actual_name": "$metadata.info.country",
                    "dimension": "Where",
                    "disambiguated_name": "$SCRIPT( return _doc.metadata.info[0].country; )",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.info[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "Country"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.info.device",
                    "type": "Sensor"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.info.alert",
                    "type": "AlertType"
                }
            ]
        },
        {
            "associations": [
                {
                    "entity1": "$metadata.info.dstIP",
                    "entity2": "$metadata.info.srcIP",
                    "geo_index": "$SCRIPT( return _doc.metadata.info[0].country + '/country'; )",
                    "time_start": "$SCRIPT( return _doc.metadata.info[0].date; )",
                    "verb": "$SCRIPT( return _doc.metadata.info[0].alert; )",
                    "verb_category": "$SCRIPT( return _doc.metadata.info[0].alert; )"
                }
            ]
        },
        {
            "searchIndex": {
                "metadataFieldList": ""
            }
        }
    ]
}

Source #1b - fileshare, automated parsing - headers manually specified

Code Block

language	javascript

{
    "description": "For cyber demo",
    "isPublic": false,
    "mediaType": "Log",
    "searchCycle_secs": 3600,
    "tags": [
        "cyber",
        "structured"
    ],
    "title": "Cyber Logs Test",
    "processingPipeline": [
        {
            "file": {
                "XmlRootLevelValues": [
                    "device",
                    "date",
                    "srcIP",
                    "dstIP",
                    "alert",
                    "country"
                ],
                "XmlIgnoreValues": [
                    "device,date,srcIP"
                ],
                "domain": "DOMAIN",
                "password": "PASSWORD",
                "type": "csv",
                "username": "USER",
                "url": "smb://FILESHARE:139/cyber_logs/"
            }
        },
        {
            "harvest": {
                "searchCycle_secs": 3600
            }
        },
        {
            "docMetadata": {
                "title": "$metadata.csv.alert @ $metadata.csv.date [$metadata.csv.device]: $metadata.csv.dstIP -> $metadata.csv.srcIP",
                "publishedDate": "$SCRIPT( return _doc.metadata.csv[0].date; )"
            }
        },
        {
            "entities": [
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.srcIP",
                    "type": "PrivateIP"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.dstIP",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.csv[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "PublicIP"
                },
                {
                    "actual_name": "$metadata.csv.country",
                    "dimension": "Where",
                    "disambiguated_name": "$SCRIPT( return _doc.metadata.csv[0].country; )",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.csv[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "Country"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.device",
                    "type": "Sensor"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.alert",
                    "type": "AlertType"
                }
            ]
        },
        {
            "associations": [
                {
                    "entity1": "$metadata.csv.dstIP",
                    "entity2": "$metadata.csv.srcIP",
                    "geo_index": "$SCRIPT( return _doc.metadata.csv[0].country + '/country'; )",
                    "time_start": "$SCRIPT( return _doc.metadata.csv[0].date; )",
                    "verb": "$SCRIPT( return _doc.metadata.csv[0].alert; )",
                    "verb_category": "$SCRIPT( return _doc.metadata.csv[0].alert; )"
                }
            ]
        },
        {
            "searchIndex": {
                "metadataFieldList": ""
            }
        }
    ]
}

Source #1c - fileshare, automated parsing - headers automatically specified

For the purpose of this example, imagine that the first line starts with a "#", eg:

Code Block

language	javascript

#Date,Device,SrcIP,dstIP,Alert,Country
SCANNER_1,2012-01-01T13:43:00,10.0.0.1,66.66.66.66,DUMMY_ALERT_TYPE_1,United States

...

Then:

Code Block

language	javascript

{
    "description": "For cyber demo",
    "isPublic": false,
    "mediaType": "Log",
    "searchCycle_secs": 3600,
    "tags": [
        "cyber",
        "structured"
    ],
    "title": "Cyber Logs Test",
    "processingPipeline": [
        {
            "file": {
                "XmlIgnoreValues": [
                    "#"
                ],
                "domain": "DOMAIN",
                "password": "PASSWORD",
                "type": "csv",
                "username": "USER",
                "url": "smb://FILESHARE:139/cyber_logs/"
            }
        },
        {
            "harvest": {
                "searchCycle_secs": 3600
            }
        },
        {
            "docMetadata": {
                "title": "$metadata.csv.alert @ $metadata.csv.date [$metadata.csv.device]: $metadata.csv.dstIP -> $metadata.csv.srcIP",
                "publishedDate": "$SCRIPT( return _doc.metadata.csv[0].date; )"
            }
        },
        {
            "entities": [
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.srcIP",
                    "type": "PrivateIP"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.dstIP",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.csv[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "PublicIP"
                },
                {
                    "actual_name": "$metadata.csv.country",
                    "dimension": "Where",
                    "disambiguated_name": "$SCRIPT( return _doc.metadata.csv[0].country; )",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.csv[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "Country"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.device",
                    "type": "Sensor"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.alert",
                    "type": "AlertType"
                }
            ]
        },
        {
            "associations": [
                {
                    "entity1": "$metadata.csv.dstIP",
                    "entity2": "$metadata.csv.srcIP",
                    "geo_index": "$SCRIPT( return _doc.metadata.csv[0].country + '/country'; )",
                    "time_start": "$SCRIPT( return _doc.metadata.csv[0].date; )",
                    "verb": "$SCRIPT( return _doc.metadata.csv[0].alert; )",
                    "verb_category": "$SCRIPT( return _doc.metadata.csv[0].alert; )"
                }
            ]
        },
        {
            "searchIndex": {
                "metadataFieldList": ""
            }
        }
    ]
}

Note that if the header was as originally specified (eg first line "Date,Device,SrcIP,dstIP,Alert,Country"), then the "XmlIgnoreValues" field would have need to be "\"Device\"" to correctly parse the headers (assuming the quote field was ").

Source #2a - web (including uploaded fileshares), manual parsing

It is slightly more complicated to parse CSV files over the Web, but still quite possible, using the searchConfig capability. Note that one neat trick is to upload a share to Infinit.e, and then use an API key to access the REST interface. Users can allocate themselves an API key from the People Manager.

Info
Note that when accessing Web documents you must use "rss.extraUrls" and specify minimally "url" and "title" fields, and not the top-level "url" (otherwise the URL is treated as an RSS feed rather than a standalone web page)

...

Code Block

language	javascript

{
    "associations": [{
        "assoc_type": "Event",
        "entity1": "66.66.66.66",
        "entity1_index": "66.66.66.66/publicip",
        "entity2": "10.0.0.1",
        "entity2_index": "10.0.0.1/privateip",
        "geo_index": "united states/country",
        "time_start": "2012-01-01T13:43:00",
        "verb": "DUMMY_ALERT_TYPE_1",
        "verb_category": "DUMMY_ALERT_TYPE_1"
    }],
    "communityId": ["506dc16dfbf042893dd6b8f2"],
    "created": "Jun 4, 2013 12:54:34 AM UTC",
    "entities": [
        {
            "actual_name": "10.0.0.1",
            "dimension": "What",
            "disambiguated_name": "10.0.0.1",
            "doccount": 0,
            "frequency": 1,
            "index": "10.0.0.1/privateip",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "PrivateIP"
        },
        {
            "actual_name": "66.66.66.66",
            "dimension": "What",
            "disambiguated_name": "66.66.66.66",
            "doccount": 0,
            "frequency": 1,
            "index": "66.66.66.66/publicip",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "PublicIP"
        },
        {
            "actual_name": "United States",
            "dimension": "Where",
            "disambiguated_name": "United States",
            "doccount": 0,
            "frequency": 1,
            "index": "united states/country",
            "ontology_type": "country",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "Country"
        },
        {
            "actual_name": "SCANNER_1",
            "dimension": "What",
            "disambiguated_name": "SCANNER_1",
            "doccount": 0,
            "frequency": 1,
            "index": "scanner_1/sensor",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "Sensor"
        },
        {
            "actual_name": "DUMMY_ALERT_TYPE_1",
            "dimension": "What",
            "disambiguated_name": "DUMMY_ALERT_TYPE_1",
            "doccount": 0,
            "frequency": 1,
            "index": "dummy_alert_type_1/alerttype",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "AlertType"
        }
    ],
    "fullText": "SCANNER_1 , 2012-01-01T13:43:00 , 10.0.0.1 , 66.66.66.66 , DUMMY_ALERT_TYPE_1 , United States",
    "mediaType": ["Log"],
    "metadata": {"info": [{
        "alert": "DUMMY_ALERT_TYPE_1 ",
        "country": "United States",
        "date": "2012-01-01T13:43:00",
        "device": "SCANNER_1 ",
        "dstIP": "66.66.66.66",
        "srcIP": " 10.0.0.1"
    }]},
    "modified": "Jun 4, 2013 12:54:34 AM UTC",
    "publishedDate": "January 1, 2012 13:43:00 PM UTC",
    "source": ["Cyber Logs Test"],
    "sourceKey": ["INFINITE_ENDPOINT.api.share.get.51ad28a440b4a4f0f757824c.25.26"],
    "tags": [
        "cyber",
        "structured"
    ],
    "title": "DUMMY_ALERT_TYPE_1  @ 2012-01-01T13:43:00 [SCANNER_1 ]: 66.66.66.66 -> 10.0.0.1",
    "url": "http://INFINITE_ENDPOINT/api/share/get/51ad28a440b4a4f0f757824c#1"
}

Sources - old format

Old Format Source #1a - fileshare, manual parsing

Code Block

language	javascript

{
    "description": "For cyber demo",
    "extractType": "File",
    "file": {
        "XmlRootLevelValues": [],
        "domain": "DOMAIN",
        "password": "PASSWORD",
        "type": "csv",
        "username": "USER"
    },
    "isPublic": false,
    "mediaType": "Log",
    "searchCycle_secs": 3600,
    "searchIndexFilter": {
        "metadataFieldList": ""
    },
    "structuredAnalysis": {
        "associations": [
            {
                "entity1": "$metadata.info.dstIP",
                "entity2": "$metadata.info.srcIP",
                "geo_index": "$SCRIPT( return _doc.metadata.info[0].country + '/country'; )",
                "time_start": "$SCRIPT( return _doc.metadata.info[0].date; )",
                "verb": "$SCRIPT( return _doc.metadata.info[0].alert; )",
                "verb_category": "$SCRIPT( return _doc.metadata.info[0].alert; )"
            }
        ],
        "entities": [
            {
                "dimension": "What",
                "disambiguated_name": "$metadata.info.srcIP",
                "type": "PrivateIP"
            },
            {
                "dimension": "What",
                "disambiguated_name": "$metadata.info.dstIP",
                "geotag": {
                    "country": "$SCRIPT( return _doc.metadata.info[0].country; )"
                },
                "ontology_type": "country",
                "type": "PublicIP"
            },
            {
                "actual_name": "$metadata.info.country",
                "dimension": "Where",
                "disambiguated_name": "$SCRIPT( return _doc.metadata.info[0].country; )",
                "geotag": {
                    "country": "$SCRIPT( return _doc.metadata.info[0].country; )"
                },
                "ontology_type": "country",
                "type": "Country"
            },
            {
                "dimension": "What",
                "disambiguated_name": "$metadata.info.device",
                "type": "Sensor"
            },
            {
                "dimension": "What",
                "disambiguated_name": "$metadata.info.alert",
                "type": "AlertType"
            }
        ],
        "publishedDate": "$SCRIPT( return _doc.metadata.info[0].date; )",
        "script": "",
        "scriptEngine": "javascript",
        "title": "$metadata.info.alert @ $metadata.info.date [$metadata.info.device]: $metadata.info.dstIP -> $metadata.info.srcIP"
    },
    "tags": [
        "cyber",
        "structured"
    ],
    "title": "Cyber Logs Test",
    "unstructuredAnalysis": {
        "meta": [
            {
                "context": "First",
                "fieldName": "info",
                "script": "var info = decode(text); info;",
                "scriptlang": "javascript"
            }
        ],
        "script": "function decode(x)\n{\n    var info = {};   \n    var rec = x.split(',');   \n    info.device = rec[0];\n    info.date = rec[1];\n    info.srcIP = rec[2];\n    info.dstIP = rec[3];\n    info.alert = rec[4];\n    info.country = rec[5];\n    return info;\n}",
        "simpleTextCleanser": [
            {
                "field": "fullText",
                "flags": "md",
                "replacement": " , ",
                "script": ",",
                "scriptlang": "regex"
            },
            {
                "field": "description",
                "flags": "md",
                "replacement": " , ",
                "script": ",",
                "scriptlang": "regex"
            }
        ]
    },
    "useExtractor":"none",
    "useTextExtractor":"none",
    "url": "smb://FILESHARE:139/cyber_logs/"
}

...

It is slightly more complicated to parse CSV files over the Web, but still quite possible, using the searchConfig capability. Note that one neat trick is to upload a share to Infinit.e, and then use an API key to access the REST interface. Users can allocate themselves an API key from the People Manager.

Info
Note that when accessing Web documents you must use "rss.extraUrls" and specify minimally "url" and "title" fields, and not the top-level "url" (otherwise the URL is treated as an RSS feed rather than a standalone web page)

...

Versions Compared

Old Version 13

New Version Current

Key

Input format sample

Source #1b - fileshare, automated parsing - headers manually specified

Source #1c - fileshare, automated parsing - headers automatically specified

Source #2a - web (including uploaded fileshares), manual parsing

Sources - old format

Old Format Source #1a - fileshare, manual parsing

Page Comparison

Versions Compared

Old Version 13

New Version Current

Key

Source #1b - fileshare, automated parsing - headers manually specified

Source #1c - fileshare, automated parsing - headers automatically specified

Source #2a - web (including uploaded fileshares), manual parsing

Sources - old format

Old Format Source #1a - fileshare, manual parsing