Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

Input format sample

Code Block
languagejavascript
Date,Device,SrcIP,dstIP,Alert,Country
SCANNER_1,2012-01-01T13:43:00,10.0.0.1,66.66.66.66,DUMMY_ALERT_TYPE_1,United States
SCANNER_2,2012-02-01T14:21:00,SCANNER_2,10.0.0.2,66.66.66.66,DUMMY_ALERT_TYPE_2,United Kingdom
SCANNER_3,2012-03-01T15:17:00,10.0.0.1,99.66.99.66,DUMMY_ALERT_TYPE_3,Netherlands

Source #1a - fileshare, manual parsing

Code Block
languagejavascript
{
    "description": "For cyber demo",
    "isPublic": false,
    "mediaType": "Log",
    "searchCycle_secs": 3600,
    "tags": [
        "cyber",
        "structured"
    ],
    "title": "Cyber Logs Test",
    "processingPipeline": [
        {
            "file": {
                "XmlRootLevelValues": [],
                "domain": "DOMAIN",
                "password": "PASSWORD",
                "type": "csv",
                "username": "USER",
                "url": "smb://FILESHARE:139/cyber_logs/"
            }
        },
        {
            "globals": {
                "scripts": [
                    "function decode(x)\n{\n    var info = {};   \n    var rec = x.split(',');   \n    info.device = rec[0];\n    info.date = rec[1];\n    info.srcIP = rec[2];\n    info.dstIP = rec[3];\n    info.alert = rec[4];\n    info.country = rec[5];\n    return info;\n}"
                ]
            }
        },
        {
            "harvest": {
                "searchCycle_secs": 3600
            }
        },
        {
            "docMetadata": {
                "title": "$metadata.info.alert @ $metadata.info.date [$metadata.info.device]: $metadata.info.dstIP -> $metadata.info.srcIP",
                "publishedDate": "$SCRIPT( return _doc.metadata.info[0].date; )"
            }
        },
        {
            "contentMetadata": [
                {
                    "fieldName": "info",
                    "script": "var info = decode(text); info;",
                    "scriptlang": "javascript"
                }
            ]
        },
        {
            "text": [
                {
                    "fieldName": "fullText",
                    "script": ",",
                    "scriptlang": "regex",
                    "flags": "md",
                    "replacement": " , "
                },
                {
                    "fieldName": "description",
                    "script": ",",
                    "scriptlang": "regex",
                    "flags": "md",
                    "replacement": " , "
                }
            ]
        },
        {
            "entities": [
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.info.srcIP",
                    "type": "PrivateIP"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.info.dstIP",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.info[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "PublicIP"
                },
                {
                    "actual_name": "$metadata.info.country",
                    "dimension": "Where",
                    "disambiguated_name": "$SCRIPT( return _doc.metadata.info[0].country; )",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.info[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "Country"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.info.device",
                    "type": "Sensor"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.info.alert",
                    "type": "AlertType"
                }
            ]
        },
        {
            "associations": [
                {
                    "entity1": "$metadata.info.dstIP",
                    "entity2": "$metadata.info.srcIP",
                    "geo_index": "$SCRIPT( return _doc.metadata.info[0].country + '/country'; )",
                    "time_start": "$SCRIPT( return _doc.metadata.info[0].date; )",
                    "verb": "$SCRIPT( return _doc.metadata.info[0].alert; )",
                    "verb_category": "$SCRIPT( return _doc.metadata.info[0].alert; )"
                }
            ]
        },
        {
            "searchIndex": {
                "metadataFieldList": ""
            }
        }
    ]
}

Source #1b - fileshare, automated parsing - headers manually specified

Code Block
languagejavascript
{
    "description": "For cyber demo",
    "isPublic": false,
    "mediaType": "Log",
    "searchCycle_secs": 3600,
    "tags": [
        "cyber",
        "structured"
    ],
    "title": "Cyber Logs Test",
    "processingPipeline": [
        {
            "file": {
                "XmlRootLevelValues": [
                    "device",
                    "date",
                    "srcIP",
                    "dstIP",
                    "alert",
                    "country"
                ],
                "XmlIgnoreValues": [
                    "device,date,srcIP"
                ],
                "domain": "DOMAIN",
                "password": "PASSWORD",
                "type": "csv",
                "username": "USER",
                "url": "smb://FILESHARE:139/cyber_logs/"
            }
        },
        {
            "harvest": {
                "searchCycle_secs": 3600
            }
        },
        {
            "docMetadata": {
                "title": "$metadata.csv.alert @ $metadata.csv.date [$metadata.csv.device]: $metadata.csv.dstIP -> $metadata.csv.srcIP",
                "publishedDate": "$SCRIPT( return _doc.metadata.csv[0].date; )"
            }
        },
        {
            "entities": [
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.srcIP",
                    "type": "PrivateIP"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.dstIP",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.csv[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "PublicIP"
                },
                {
                    "actual_name": "$metadata.csv.country",
                    "dimension": "Where",
                    "disambiguated_name": "$SCRIPT( return _doc.metadata.csv[0].country; )",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.csv[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "Country"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.device",
                    "type": "Sensor"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.alert",
                    "type": "AlertType"
                }
            ]
        },
        {
            "associations": [
                {
                    "entity1": "$metadata.csv.dstIP",
                    "entity2": "$metadata.csv.srcIP",
                    "geo_index": "$SCRIPT( return _doc.metadata.csv[0].country + '/country'; )",
                    "time_start": "$SCRIPT( return _doc.metadata.csv[0].date; )",
                    "verb": "$SCRIPT( return _doc.metadata.csv[0].alert; )",
                    "verb_category": "$SCRIPT( return _doc.metadata.csv[0].alert; )"
                }
            ]
        },
        {
            "searchIndex": {
                "metadataFieldList": ""
            }
        }
    ]
}

Source #1c - fileshare, automated parsing - headers automatically specified

For the purpose of this example, imagine that the first line starts with a "#", eg:

Code Block
languagejavascript
#Date,Device,SrcIP,dstIP,Alert,Country
SCANNER_1,2012-01-01T13:43:00,10.0.0.1,66.66.66.66,DUMMY_ALERT_TYPE_1,United States

...

Then:

Code Block
languagejavascript
{
    "description": "For cyber demo",
    "isPublic": false,
    "mediaType": "Log",
    "searchCycle_secs": 3600,
    "tags": [
        "cyber",
        "structured"
    ],
    "title": "Cyber Logs Test",
    "processingPipeline": [
        {
            "file": {
                "XmlIgnoreValues": [
                    "#"
                ],
                "domain": "DOMAIN",
                "password": "PASSWORD",
                "type": "csv",
                "username": "USER",
                "url": "smb://FILESHARE:139/cyber_logs/"
            }
        },
        {
            "harvest": {
                "searchCycle_secs": 3600
            }
        },
        {
            "docMetadata": {
                "title": "$metadata.csv.alert @ $metadata.csv.date [$metadata.csv.device]: $metadata.csv.dstIP -> $metadata.csv.srcIP",
                "publishedDate": "$SCRIPT( return _doc.metadata.csv[0].date; )"
            }
        },
        {
            "entities": [
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.srcIP",
                    "type": "PrivateIP"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.dstIP",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.csv[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "PublicIP"
                },
                {
                    "actual_name": "$metadata.csv.country",
                    "dimension": "Where",
                    "disambiguated_name": "$SCRIPT( return _doc.metadata.csv[0].country; )",
                    "geotag": {
                        "country": "$SCRIPT( return _doc.metadata.csv[0].country; )"
                    },
                    "ontology_type": "country",
                    "type": "Country"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.device",
                    "type": "Sensor"
                },
                {
                    "dimension": "What",
                    "disambiguated_name": "$metadata.csv.alert",
                    "type": "AlertType"
                }
            ]
        },
        {
            "associations": [
                {
                    "entity1": "$metadata.csv.dstIP",
                    "entity2": "$metadata.csv.srcIP",
                    "geo_index": "$SCRIPT( return _doc.metadata.csv[0].country + '/country'; )",
                    "time_start": "$SCRIPT( return _doc.metadata.csv[0].date; )",
                    "verb": "$SCRIPT( return _doc.metadata.csv[0].alert; )",
                    "verb_category": "$SCRIPT( return _doc.metadata.csv[0].alert; )"
                }
            ]
        },
        {
            "searchIndex": {
                "metadataFieldList": ""
            }
        }
    ]
}

Note that if the header was as originally specified (eg first line "Date,Device,SrcIP,dstIP,Alert,Country"), then the "XmlIgnoreValues" field would have need to be "\"Device\"" to correctly parse the headers (assuming the quote field was ").

Source #2a - web (including uploaded fileshares), manual parsing

It is slightly more complicated to parse CSV files over the Web, but still quite possible, using the searchConfig capability. Note that one neat trick is to upload a share to Infinit.e, and then use an API key to access the REST interface. Users can allocate themselves an API key from the People Manager.

Info

Note that when accessing Web documents you must use "rss.extraUrls" and specify minimally "url" and "title" fields, and not the top-level "url" (otherwise the URL is treated as an RSS feed rather than a standalone web page)

...

Code Block
languagejavascript
{
    "associations": [{
        "assoc_type": "Event",
        "entity1": "66.66.66.66",
        "entity1_index": "66.66.66.66/publicip",
        "entity2": "10.0.0.1",
        "entity2_index": "10.0.0.1/privateip",
        "geo_index": "united states/country",
        "time_start": "2012-01-01T13:43:00",
        "verb": "DUMMY_ALERT_TYPE_1",
        "verb_category": "DUMMY_ALERT_TYPE_1"
    }],
    "communityId": ["506dc16dfbf042893dd6b8f2"],
    "created": "Jun 4, 2013 12:54:34 AM UTC",
    "entities": [
        {
            "actual_name": "10.0.0.1",
            "dimension": "What",
            "disambiguated_name": "10.0.0.1",
            "doccount": 0,
            "frequency": 1,
            "index": "10.0.0.1/privateip",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "PrivateIP"
        },
        {
            "actual_name": "66.66.66.66",
            "dimension": "What",
            "disambiguated_name": "66.66.66.66",
            "doccount": 0,
            "frequency": 1,
            "index": "66.66.66.66/publicip",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "PublicIP"
        },
        {
            "actual_name": "United States",
            "dimension": "Where",
            "disambiguated_name": "United States",
            "doccount": 0,
            "frequency": 1,
            "index": "united states/country",
            "ontology_type": "country",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "Country"
        },
        {
            "actual_name": "SCANNER_1",
            "dimension": "What",
            "disambiguated_name": "SCANNER_1",
            "doccount": 0,
            "frequency": 1,
            "index": "scanner_1/sensor",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "Sensor"
        },
        {
            "actual_name": "DUMMY_ALERT_TYPE_1",
            "dimension": "What",
            "disambiguated_name": "DUMMY_ALERT_TYPE_1",
            "doccount": 0,
            "frequency": 1,
            "index": "dummy_alert_type_1/alerttype",
            "relevance": 0,
            "totalfrequency": -1,
            "type": "AlertType"
        }
    ],
    "fullText": "SCANNER_1 , 2012-01-01T13:43:00 , 10.0.0.1 , 66.66.66.66 , DUMMY_ALERT_TYPE_1 , United States",
    "mediaType": ["Log"],
    "metadata": {"info": [{
        "alert": "DUMMY_ALERT_TYPE_1 ",
        "country": "United States",
        "date": "2012-01-01T13:43:00",
        "device": "SCANNER_1 ",
        "dstIP": "66.66.66.66",
        "srcIP": " 10.0.0.1"
    }]},
    "modified": "Jun 4, 2013 12:54:34 AM UTC",
    "publishedDate": "January 1, 2012 13:43:00 PM UTC",
    "source": ["Cyber Logs Test"],
    "sourceKey": ["INFINITE_ENDPOINT.api.share.get.51ad28a440b4a4f0f757824c.25.26"],
    "tags": [
        "cyber",
        "structured"
    ],
    "title": "DUMMY_ALERT_TYPE_1  @ 2012-01-01T13:43:00 [SCANNER_1 ]: 66.66.66.66 -> 10.0.0.1",
    "url": "http://INFINITE_ENDPOINT/api/share/get/51ad28a440b4a4f0f757824c#1"
}

Sources - old format

Old Format Source #1a - fileshare, manual parsing

Code Block
languagejavascript
{
    "description": "For cyber demo",
    "extractType": "File",
    "file": {
        "XmlRootLevelValues": [],
        "domain": "DOMAIN",
        "password": "PASSWORD",
        "type": "csv",
        "username": "USER"
    },
    "isPublic": false,
    "mediaType": "Log",
    "searchCycle_secs": 3600,
    "searchIndexFilter": {
        "metadataFieldList": ""
    },
    "structuredAnalysis": {
        "associations": [
            {
                "entity1": "$metadata.info.dstIP",
                "entity2": "$metadata.info.srcIP",
                "geo_index": "$SCRIPT( return _doc.metadata.info[0].country + '/country'; )",
                "time_start": "$SCRIPT( return _doc.metadata.info[0].date; )",
                "verb": "$SCRIPT( return _doc.metadata.info[0].alert; )",
                "verb_category": "$SCRIPT( return _doc.metadata.info[0].alert; )"
            }
        ],
        "entities": [
            {
                "dimension": "What",
                "disambiguated_name": "$metadata.info.srcIP",
                "type": "PrivateIP"
            },
            {
                "dimension": "What",
                "disambiguated_name": "$metadata.info.dstIP",
                "geotag": {
                    "country": "$SCRIPT( return _doc.metadata.info[0].country; )"
                },
                "ontology_type": "country",
                "type": "PublicIP"
            },
            {
                "actual_name": "$metadata.info.country",
                "dimension": "Where",
                "disambiguated_name": "$SCRIPT( return _doc.metadata.info[0].country; )",
                "geotag": {
                    "country": "$SCRIPT( return _doc.metadata.info[0].country; )"
                },
                "ontology_type": "country",
                "type": "Country"
            },
            {
                "dimension": "What",
                "disambiguated_name": "$metadata.info.device",
                "type": "Sensor"
            },
            {
                "dimension": "What",
                "disambiguated_name": "$metadata.info.alert",
                "type": "AlertType"
            }
        ],
        "publishedDate": "$SCRIPT( return _doc.metadata.info[0].date; )",
        "script": "",
        "scriptEngine": "javascript",
        "title": "$metadata.info.alert @ $metadata.info.date [$metadata.info.device]: $metadata.info.dstIP -> $metadata.info.srcIP"
    },
    "tags": [
        "cyber",
        "structured"
    ],
    "title": "Cyber Logs Test",
    "unstructuredAnalysis": {
        "meta": [
            {
                "context": "First",
                "fieldName": "info",
                "script": "var info = decode(text); info;",
                "scriptlang": "javascript"
            }
        ],
        "script": "function decode(x)\n{\n    var info = {};   \n    var rec = x.split(',');   \n    info.device = rec[0];\n    info.date = rec[1];\n    info.srcIP = rec[2];\n    info.dstIP = rec[3];\n    info.alert = rec[4];\n    info.country = rec[5];\n    return info;\n}",
        "simpleTextCleanser": [
            {
                "field": "fullText",
                "flags": "md",
                "replacement": " , ",
                "script": ",",
                "scriptlang": "regex"
            },
            {
                "field": "description",
                "flags": "md",
                "replacement": " , ",
                "script": ",",
                "scriptlang": "regex"
            }
        ]
    },
    "useExtractor":"none",
    "useTextExtractor":"none",
    "url": "smb://FILESHARE:139/cyber_logs/"
}

...

It is slightly more complicated to parse CSV files over the Web, but still quite possible, using the searchConfig capability. Note that one neat trick is to upload a share to Infinit.e, and then use an API key to access the REST interface. Users can allocate themselves an API key from theĀ People Manager.

Info

Note that when accessing Web documents you must use "rss.extraUrls" and specify minimally "url" and "title" fields, and not the top-level "url" (otherwise the URL is treated as an RSS feed rather than a standalone web page)

...