Feed Source

The following code samples show how to create a basic source that extracts data from an RSS feed.

Sample RSS Feed Source
{
    "description": "Article on Medical Issues",
    "harvestBadSource": false,
    "isApproved": true,
    "isPublic": true,
    "key": "http.www.mayoclinic.com.rss.blog.xml",
    "mediaType": "News",
    "modified": "Oct 19, 2010 11:31:59 AM",
    "tags": [
        "topic:healthcare",
        "industry:healthcare",
        "mayo clinic",
        "health"
    ],
    "title": "MayoClinic: General Topics",
    "processingPipeline": [
        {
            "feed": {
                "extraUrls": [
                    {
                        "url": "http://www.mayoclinic.com/rss/blog.xml"
                    }
                ]
            }
        },
        {
            "textEngine": {
                "engineName": "AlchemyAPI"
            }
        },
        {
            "featureEngine": {
                "engineName": "OpenCalais"
            }
        }
    ]
}

.

Sample Output from RSS Feed Source Above
{
    "_id" : "4e1c8afa7d56bb818ed10f76",
    "created" : "1310493434159",
    "description" : "Clarify the role of carbohydrates in the Dr. Bernstein diet and find a 
         healthy eating plan that works for you.",
    "entities" : [
    {
        "actual_name" : "certified diabetes",
        "dimension" : "What",
        "disambiguous_name" : "certified diabetes",
        "doccount" : NumberLong(38),
        "frequency" : 3,
        "gazateer_index" : "certified diabetes/medicalcondition",
        "relevance" : "0.711",
        "totalfrequency" : NumberLong(114),
        "type" : "MedicalCondition"
    },
    {
        "actual_name" : "Diabetes Unit",
        "dimension" : "Who",
        "disambiguous_name" : "Diabetes Unit",
        "doccount" : NumberLong(38),
        "frequency" : 1,
        "gazateer_index" : "diabetes unit/organization",
        "relevance" : "0.235",
        "totalfrequency" : NumberLong(38),
        "type" : "Organization"
    },
    {
        "actual_name" : "Mayo Clinic",
        "dimension" : "What",
        "disambiguous_name" : "Mayo Clinic",
        "doccount" : NumberLong(514),
        "frequency" : 2,
        "gazateer_index" : "mayo clinic/facility",
        "relevance" : "0.305",
        "totalfrequency" : NumberLong(1033),
        "type" : "Facility"
    },
    {
        "actual_name" : "Rochester",
        "dimension" : "Where",
        "disambiguous_name" : "Rochester,Minnesota,United States",
        "doccount" : NumberLong(345),
        "frequency" : 2,
        "gazateer_index" : "rochester,minnesota,united states/city",
        "geotag" : {
            "latitude" : "44.0217",
            "longitude" : "-92.4697",
            "loc" : [
                44.0217,
                -92.4697
            ]
        },
        "linkdata" : [
            "http://d.opencalais.com/er/geo/city/ralg-geo1/9d780656-b9a4-6789-bcaf-370cabc32490"
        ],
        "relevance" : "0.305",
        "totalfrequency" : NumberLong(404),
        "type" : "City"
    },
    {
        "actual_name" : "Minnesota",
        "dimension" : "Where",
        "disambiguous_name" : "Minnesota,United States",
        "doccount" : NumberLong(2103),
        "frequency" : 2,
        "gazateer_index" : "minnesota,united states/provinceorstate",
        "geotag" : {
            "latitude" : "46.0",
            "longitude" : "-94.0",
            "loc" : [
               46,
               -94
            ]
         },
         "linkdata" : [
             "http://d.opencalais.com/er/geo/provinceorstate/ralg-geo1/b99d1bcd-ec35-113e-54ad-4d6c44682ce1"
         ],
         "relevance" : "0.305",
         "totalfrequency" : NumberLong(3134),
         "type" : "ProvinceOrState"
    },
    {
         "actual_name" : "R.N.",
         "dimension" : "Who",
         "disambiguous_name" : "R.N.",
         "doccount" : NumberLong(108),
         "frequency" : 1,
         "gazateer_index" : "r.n./position",
         "relevance" : "0.138",
         "totalfrequency" : NumberLong(130),
         "type" : "Position"
    },
    {
         "actual_name" : "coordinator",
         "dimension" : "Who",
         "disambiguous_name" : "coordinator",
         "doccount" : NumberLong(220),
         "frequency" : 1,
         "gazateer_index" : "coordinator/position",
         "relevance" : "0.235",
         "totalfrequency" : NumberLong(238),
         "type" : "Position"
    },
    {
         "actual_name" : "Division of Endocrinology, Diabetes, Metabolism, & Nutrition",
         "dimension" : "Who",
         "disambiguous_name" : "Division of Endocrinology, Diabetes, Metabolism, & Nutrition",
         "doccount" : NumberLong(38),
         "frequency" : 2,
         "gazateer_index" : "division of endocrinology, diabetes, metabolism, & nutrition/organization",
         "relevance" : "0.305",
         "totalfrequency" : NumberLong(76),
         "type" : "Organization"
    },
    {
         "actual_name" : "self-management",
         "dimension" : "What",
         "disambiguous_name" : "self-management",
         "doccount" : NumberLong(72),
         "frequency" : 1,
         "gazateer_index" : "self-management/medicaltreatment",
         "relevance" : "0.076",
         "totalfrequency" : NumberLong(78),
         "type" : "MedicalTreatment"
    },
    {
         "actual_name" : "Peggy Moreland",
         "dimension" : "Who",
         "disambiguous_name" : "Peggy Moreland",
         "doccount" : NumberLong(38),
         "frequency" : 3,
         "gazateer_index" : "peggy moreland/person",
         "relevance" : "0.358",
         "totalfrequency" : NumberLong(114),
         "type" : "Person"
    },
    {
         "actual_name" : "University of Phoenix",
         "dimension" : "What",
         "disambiguous_name" : "University of Phoenix",
         "doccount" : NumberLong(64),
         "frequency" : 1,
         "gazateer_index" : "university of phoenix/facility",
         "relevance" : "0.102",
         "totalfrequency" : NumberLong(68),
         "type" : "Facility"
    },
    {
         "actual_name" : "registered nurse",
         "dimension" : "Who",
         "disambiguous_name" : "registered nurse",
         "doccount" : NumberLong(69),
         "frequency" : 1,
         "gazateer_index" : "registered nurse/position",
         "relevance" : "0.335",
         "totalfrequency" : NumberLong(70),
         "type" : "Position"
    },
    {
         "actual_name" : "American Diabetes Association",
         "dimension" : "Who",
         "disambiguous_name" : "American Diabetes Association",
         "doccount" : NumberLong(120),
         "frequency" : 1,
         "gazateer_index" : "american diabetes association/organization",
         "relevance" : "0.102",
         "totalfrequency" : NumberLong(226),
         "type" : "Organization"
    },
    {
         "actual_name" : "N.\n  Peggy Moreland",
         "dimension" : "Who",
         "disambiguous_name" : "N.\n  Peggy Moreland",
         "doccount" : NumberLong(38),
         "frequency" : 1,
         "gazateer_index" : "n.\n  peggy moreland/person",
         "relevance" : "0.146",
         "totalfrequency" : NumberLong(38),
         "type" : "Person"
    },
    {
         "actual_name" : "insulin pump",
         "dimension" : "What",
         "disambiguous_name" : "insulin pump",
         "doccount" : NumberLong(52),
         "frequency" : 1,
         "gazateer_index" : "insulin pump/medicaltreatment",
         "relevance" : "0.184",
         "totalfrequency" : NumberLong(69),
         "type" : "MedicalTreatment"
    },
    {
         "actual_name" : "M.S.N.\n  Peggy",
         "dimension" : "Who",
         "disambiguous_name" : "M.S.N.\n  Peggy",
         "doccount" : NumberLong(38),
         "frequency" : 2,
         "gazateer_index" : "m.s.n.\n  peggy/person",
         "relevance" : "0.482",
         "totalfrequency" : NumberLong(76),
         "type" : "Person"
    },
    {
         "actual_name" : "Nancy Klobassa Davidson",
         "dimension" : "Who",
         "disambiguous_name" : "Nancy Klobassa Davidson",
         "doccount" : NumberLong(38),
         "frequency" : 5,
         "gazateer_index" : "nancy klobassa davidson/person",
         "relevance" : "0.659",
         "totalfrequency" : NumberLong(190),
         "type" : "Person"
    },
    {
         "actual_name" : "University of Phoenix",
         "dimension" : "Who",
         "disambiguous_name" : "University of Phoenix",
         "doccount" : NumberLong(65),
         "frequency" : 1,
         "gazateer_index" : "university of phoenix/organization",
         "relevance" : "0.102",
         "totalfrequency" : NumberLong(69),
         "type" : "Organization"
    },
    {
         "actual_name" : "diabetes",
         "dimension" : "What",
         "disambiguous_name" : "diabetes",
         "doccount" : NumberLong(1702),
         "frequency" : 3,
         "gazateer_index" : "diabetes/medicalcondition",
         "relevance" : "0.733",
         "totalfrequency" : NumberLong(4284),
         "type" : "MedicalCondition"
    },
    {
         "actual_name" : "insulin therapy",
         "dimension" : "What",
         "disambiguous_name" : "insulin therapy",
         "doccount" : NumberLong(53),
         "frequency" : 1,
         "gazateer_index" : "insulin therapy/medicaltreatment",
         "relevance" : "0.235",
         "totalfrequency" : NumberLong(57),
         "type" : "MedicalTreatment"
    },
    {
         "actual_name" : "American Association of Diabetes Educators",
         "dimension" : "Who",
         "disambiguous_name" : "American Association of Diabetes Educators",
         "doccount" : NumberLong(38),
         "frequency" : 1,
         "gazateer_index" : "american association of diabetes educators/organization",
         "relevance" : "0.102",
         "totalfrequency" : NumberLong(38),
         "type" : "Organization"
    },
    {
         "actual_name" : "member",
         "dimension" : "Who",
         "disambiguous_name" : "member",
         "doccount" : NumberLong(795),
         "frequency" : 1,
         "gazateer_index" : "member/position",
         "relevance" : "0.102",
         "totalfrequency" : NumberLong(881),
         "type" : "Position"
    }
    ],
    "events" : [
    {
         "entity1" : "nancy klobassa davidson",
         "entity1_index" : "nancy klobassa davidson/person",
         "verb" : "be",
         "verb_category" : "generic relations",
         "entity2" : "coordinator of the diabetes unit's intensive insulin therapy
              program within the division of endocrinology",
         "event_type" : "Summary"
    },
    {
         "entity1" : "peggy moreland",
         "entity1_index" : "peggy moreland/person",
         "verb" : "work",
         "verb_category" : "generic relations",
         "event_type" : "Summary"
    },
    {
         "entity1" : "nancy klobassa davidson",
         "entity1_index" : "nancy klobassa davidson/person",
         "verb" : "current",
         "verb_category" : "career",
         "entity2" : "coordinator",
         "entity2_index" : "coordinator/position",
         "event_type" : "Fact"
    },
    {
         "entity1" : "m.s.n.\n  peggy",
         "entity1_index" : "m.s.n.\n  peggy/person",
         "verb" : "current",
         "verb_category" : "career",
         "entity2" : "r.n.",
         "entity2_index" : "r.n./position",
         "event_type" : "Fact"
    },
    {
         "entity1" : "peggy moreland",
         "entity1_index" : "peggy moreland/person",
         "verb" : "graduate",
         "verb_category" : "generic relations",
         "event_type" : "Summary"
    },
    {
         "entity1" : "n.\n  peggy moreland",
         "entity1_index" : "n.\n  peggy moreland/person",
         "verb" : "be",
         "verb_category" : "generic relations",
         "entity2" : "a certified diabetes educator",
         "event_type" : "Summary"
    },
    {
         "entity1" : "nancy klobassa davidson",
         "entity1_index" : "nancy klobassa davidson/person",
         "verb" : "be",
         "verb_category" : "generic relations",
         "entity2" : "a certified diabetes educator",
         "event_type" : "Summary"
    },
    {
         "entity1" : "peggy moreland",
         "entity1_index" : "peggy moreland/person",
         "verb" : "current",
         "verb_category" : "career",
         "entity2" : "r.n.",
         "entity2_index" : "r.n./position",
         "event_type" : "Fact"
    },
    {
         "entity1" : "nancy klobassa davidson",
         "entity1_index" : "nancy klobassa davidson/person",
         "verb" : "be",
         "verb_category" : "generic relations",
         "entity2" : "a registered nurse",
         "event_type" : "Summary"
    },
    {
         "entity1" : "nancy klobassa davidson",
         "entity1_index" : "nancy klobassa davidson/person",
         "verb" : "work",
         "verb_category" : "generic relations",
         "event_type" : "Summary"
    },
    {
         "entity1" : "nancy klobassa davidson",
         "entity1_index" : "nancy klobassa davidson/person",
         "verb" : "current",
         "verb_category" : "career",
         "entity2" : "registered nurse",
         "entity2_index" : "registered nurse/position",
         "event_type" : "Fact"
    },
    {
         "entity1" : "peggy moreland",
         "entity1_index" : "peggy moreland/person",
         "verb_category" : "person education",
         "entity2" : "master of science",
         "event_type" : "Summary"
    },
    {
         "entity1" : "peggy moreland",
         "entity1_index" : "peggy moreland/person",
         "verb" : "current",
         "verb_category" : "career",
         "entity2" : "member",
         "entity2_index" : "member/position",
         "event_type" : "Fact"
    }
    ],
    "groupids" : [ "4c927585d591d31d7b37097a" ],
    "index" : "doc_4c927585d591d31d7b37097a",
    "mediaType" : "News",
    "modified" : "1310493434159",
    "publishedDate" : "Fri Jul 08 01:00:00 EDT 2011",
    "source" : "MayoClinic: General Topics",
    "sourceKey" : "http.www.mayoclinic.com.rss.blog.xml",
    "tags" : [
        "topic:healthcare",
        "industry:healthcare",
        "mayo clinic",
        "health"
    ],
    "title" : "Dr. Bernstein diet and beyond",
    "url" : "http://www.mayoclinic.com/health/dr-bernstein-diet/MY01817/rss=5"
}

Annex - old format source

Sample RSS Feed Source
{
    "description" : "Article on Medical Issues",
    "extractType" : "Feed",
    "harvestBadSource" : false,
    "isApproved" : true,
    "isPublic" : true,
    "key" : "http.www.mayoclinic.com.rss.blog.xml",
    "mediaType" : "News",
    "modified" : "Oct 19, 2010 11:31:59 AM",
    "tags" : [
        "topic:healthcare",
        "industry:healthcare",
        "mayo clinic",
        "health"
    ],
    "title" : "MayoClinic: General Topics",
    "url" : "http://www.mayoclinic.com/rss/blog.xml"
	"useTextExtractor":"AlchemyAPI",
	"useExtractor":"OpenCalais"
}