Feed Source
The following code samples show how to create a basic source that extracts data from an RSS feed.
Sample RSS Feed Source
{ "description": "Article on Medical Issues", "harvestBadSource": false, "isApproved": true, "isPublic": true, "key": "http.www.mayoclinic.com.rss.blog.xml", "mediaType": "News", "modified": "Oct 19, 2010 11:31:59 AM", "tags": [ "topic:healthcare", "industry:healthcare", "mayo clinic", "health" ], "title": "MayoClinic: General Topics", "processingPipeline": [ { "feed": { "extraUrls": [ { "url": "http://www.mayoclinic.com/rss/blog.xml" } ] } }, { "textEngine": { "engineName": "AlchemyAPI" } }, { "featureEngine": { "engineName": "OpenCalais" } } ] }
.
Sample Output from RSS Feed Source Above
{ "_id" : "4e1c8afa7d56bb818ed10f76", "created" : "1310493434159", "description" : "Clarify the role of carbohydrates in the Dr. Bernstein diet and find a healthy eating plan that works for you.", "entities" : [ { "actual_name" : "certified diabetes", "dimension" : "What", "disambiguous_name" : "certified diabetes", "doccount" : NumberLong(38), "frequency" : 3, "gazateer_index" : "certified diabetes/medicalcondition", "relevance" : "0.711", "totalfrequency" : NumberLong(114), "type" : "MedicalCondition" }, { "actual_name" : "Diabetes Unit", "dimension" : "Who", "disambiguous_name" : "Diabetes Unit", "doccount" : NumberLong(38), "frequency" : 1, "gazateer_index" : "diabetes unit/organization", "relevance" : "0.235", "totalfrequency" : NumberLong(38), "type" : "Organization" }, { "actual_name" : "Mayo Clinic", "dimension" : "What", "disambiguous_name" : "Mayo Clinic", "doccount" : NumberLong(514), "frequency" : 2, "gazateer_index" : "mayo clinic/facility", "relevance" : "0.305", "totalfrequency" : NumberLong(1033), "type" : "Facility" }, { "actual_name" : "Rochester", "dimension" : "Where", "disambiguous_name" : "Rochester,Minnesota,United States", "doccount" : NumberLong(345), "frequency" : 2, "gazateer_index" : "rochester,minnesota,united states/city", "geotag" : { "latitude" : "44.0217", "longitude" : "-92.4697", "loc" : [ 44.0217, -92.4697 ] }, "linkdata" : [ "http://d.opencalais.com/er/geo/city/ralg-geo1/9d780656-b9a4-6789-bcaf-370cabc32490" ], "relevance" : "0.305", "totalfrequency" : NumberLong(404), "type" : "City" }, { "actual_name" : "Minnesota", "dimension" : "Where", "disambiguous_name" : "Minnesota,United States", "doccount" : NumberLong(2103), "frequency" : 2, "gazateer_index" : "minnesota,united states/provinceorstate", "geotag" : { "latitude" : "46.0", "longitude" : "-94.0", "loc" : [ 46, -94 ] }, "linkdata" : [ "http://d.opencalais.com/er/geo/provinceorstate/ralg-geo1/b99d1bcd-ec35-113e-54ad-4d6c44682ce1" ], "relevance" : "0.305", "totalfrequency" : NumberLong(3134), "type" : "ProvinceOrState" }, { "actual_name" : "R.N.", "dimension" : "Who", "disambiguous_name" : "R.N.", "doccount" : NumberLong(108), "frequency" : 1, "gazateer_index" : "r.n./position", "relevance" : "0.138", "totalfrequency" : NumberLong(130), "type" : "Position" }, { "actual_name" : "coordinator", "dimension" : "Who", "disambiguous_name" : "coordinator", "doccount" : NumberLong(220), "frequency" : 1, "gazateer_index" : "coordinator/position", "relevance" : "0.235", "totalfrequency" : NumberLong(238), "type" : "Position" }, { "actual_name" : "Division of Endocrinology, Diabetes, Metabolism, & Nutrition", "dimension" : "Who", "disambiguous_name" : "Division of Endocrinology, Diabetes, Metabolism, & Nutrition", "doccount" : NumberLong(38), "frequency" : 2, "gazateer_index" : "division of endocrinology, diabetes, metabolism, & nutrition/organization", "relevance" : "0.305", "totalfrequency" : NumberLong(76), "type" : "Organization" }, { "actual_name" : "self-management", "dimension" : "What", "disambiguous_name" : "self-management", "doccount" : NumberLong(72), "frequency" : 1, "gazateer_index" : "self-management/medicaltreatment", "relevance" : "0.076", "totalfrequency" : NumberLong(78), "type" : "MedicalTreatment" }, { "actual_name" : "Peggy Moreland", "dimension" : "Who", "disambiguous_name" : "Peggy Moreland", "doccount" : NumberLong(38), "frequency" : 3, "gazateer_index" : "peggy moreland/person", "relevance" : "0.358", "totalfrequency" : NumberLong(114), "type" : "Person" }, { "actual_name" : "University of Phoenix", "dimension" : "What", "disambiguous_name" : "University of Phoenix", "doccount" : NumberLong(64), "frequency" : 1, "gazateer_index" : "university of phoenix/facility", "relevance" : "0.102", "totalfrequency" : NumberLong(68), "type" : "Facility" }, { "actual_name" : "registered nurse", "dimension" : "Who", "disambiguous_name" : "registered nurse", "doccount" : NumberLong(69), "frequency" : 1, "gazateer_index" : "registered nurse/position", "relevance" : "0.335", "totalfrequency" : NumberLong(70), "type" : "Position" }, { "actual_name" : "American Diabetes Association", "dimension" : "Who", "disambiguous_name" : "American Diabetes Association", "doccount" : NumberLong(120), "frequency" : 1, "gazateer_index" : "american diabetes association/organization", "relevance" : "0.102", "totalfrequency" : NumberLong(226), "type" : "Organization" }, { "actual_name" : "N.\n Peggy Moreland", "dimension" : "Who", "disambiguous_name" : "N.\n Peggy Moreland", "doccount" : NumberLong(38), "frequency" : 1, "gazateer_index" : "n.\n peggy moreland/person", "relevance" : "0.146", "totalfrequency" : NumberLong(38), "type" : "Person" }, { "actual_name" : "insulin pump", "dimension" : "What", "disambiguous_name" : "insulin pump", "doccount" : NumberLong(52), "frequency" : 1, "gazateer_index" : "insulin pump/medicaltreatment", "relevance" : "0.184", "totalfrequency" : NumberLong(69), "type" : "MedicalTreatment" }, { "actual_name" : "M.S.N.\n Peggy", "dimension" : "Who", "disambiguous_name" : "M.S.N.\n Peggy", "doccount" : NumberLong(38), "frequency" : 2, "gazateer_index" : "m.s.n.\n peggy/person", "relevance" : "0.482", "totalfrequency" : NumberLong(76), "type" : "Person" }, { "actual_name" : "Nancy Klobassa Davidson", "dimension" : "Who", "disambiguous_name" : "Nancy Klobassa Davidson", "doccount" : NumberLong(38), "frequency" : 5, "gazateer_index" : "nancy klobassa davidson/person", "relevance" : "0.659", "totalfrequency" : NumberLong(190), "type" : "Person" }, { "actual_name" : "University of Phoenix", "dimension" : "Who", "disambiguous_name" : "University of Phoenix", "doccount" : NumberLong(65), "frequency" : 1, "gazateer_index" : "university of phoenix/organization", "relevance" : "0.102", "totalfrequency" : NumberLong(69), "type" : "Organization" }, { "actual_name" : "diabetes", "dimension" : "What", "disambiguous_name" : "diabetes", "doccount" : NumberLong(1702), "frequency" : 3, "gazateer_index" : "diabetes/medicalcondition", "relevance" : "0.733", "totalfrequency" : NumberLong(4284), "type" : "MedicalCondition" }, { "actual_name" : "insulin therapy", "dimension" : "What", "disambiguous_name" : "insulin therapy", "doccount" : NumberLong(53), "frequency" : 1, "gazateer_index" : "insulin therapy/medicaltreatment", "relevance" : "0.235", "totalfrequency" : NumberLong(57), "type" : "MedicalTreatment" }, { "actual_name" : "American Association of Diabetes Educators", "dimension" : "Who", "disambiguous_name" : "American Association of Diabetes Educators", "doccount" : NumberLong(38), "frequency" : 1, "gazateer_index" : "american association of diabetes educators/organization", "relevance" : "0.102", "totalfrequency" : NumberLong(38), "type" : "Organization" }, { "actual_name" : "member", "dimension" : "Who", "disambiguous_name" : "member", "doccount" : NumberLong(795), "frequency" : 1, "gazateer_index" : "member/position", "relevance" : "0.102", "totalfrequency" : NumberLong(881), "type" : "Position" } ], "events" : [ { "entity1" : "nancy klobassa davidson", "entity1_index" : "nancy klobassa davidson/person", "verb" : "be", "verb_category" : "generic relations", "entity2" : "coordinator of the diabetes unit's intensive insulin therapy program within the division of endocrinology", "event_type" : "Summary" }, { "entity1" : "peggy moreland", "entity1_index" : "peggy moreland/person", "verb" : "work", "verb_category" : "generic relations", "event_type" : "Summary" }, { "entity1" : "nancy klobassa davidson", "entity1_index" : "nancy klobassa davidson/person", "verb" : "current", "verb_category" : "career", "entity2" : "coordinator", "entity2_index" : "coordinator/position", "event_type" : "Fact" }, { "entity1" : "m.s.n.\n peggy", "entity1_index" : "m.s.n.\n peggy/person", "verb" : "current", "verb_category" : "career", "entity2" : "r.n.", "entity2_index" : "r.n./position", "event_type" : "Fact" }, { "entity1" : "peggy moreland", "entity1_index" : "peggy moreland/person", "verb" : "graduate", "verb_category" : "generic relations", "event_type" : "Summary" }, { "entity1" : "n.\n peggy moreland", "entity1_index" : "n.\n peggy moreland/person", "verb" : "be", "verb_category" : "generic relations", "entity2" : "a certified diabetes educator", "event_type" : "Summary" }, { "entity1" : "nancy klobassa davidson", "entity1_index" : "nancy klobassa davidson/person", "verb" : "be", "verb_category" : "generic relations", "entity2" : "a certified diabetes educator", "event_type" : "Summary" }, { "entity1" : "peggy moreland", "entity1_index" : "peggy moreland/person", "verb" : "current", "verb_category" : "career", "entity2" : "r.n.", "entity2_index" : "r.n./position", "event_type" : "Fact" }, { "entity1" : "nancy klobassa davidson", "entity1_index" : "nancy klobassa davidson/person", "verb" : "be", "verb_category" : "generic relations", "entity2" : "a registered nurse", "event_type" : "Summary" }, { "entity1" : "nancy klobassa davidson", "entity1_index" : "nancy klobassa davidson/person", "verb" : "work", "verb_category" : "generic relations", "event_type" : "Summary" }, { "entity1" : "nancy klobassa davidson", "entity1_index" : "nancy klobassa davidson/person", "verb" : "current", "verb_category" : "career", "entity2" : "registered nurse", "entity2_index" : "registered nurse/position", "event_type" : "Fact" }, { "entity1" : "peggy moreland", "entity1_index" : "peggy moreland/person", "verb_category" : "person education", "entity2" : "master of science", "event_type" : "Summary" }, { "entity1" : "peggy moreland", "entity1_index" : "peggy moreland/person", "verb" : "current", "verb_category" : "career", "entity2" : "member", "entity2_index" : "member/position", "event_type" : "Fact" } ], "groupids" : [ "4c927585d591d31d7b37097a" ], "index" : "doc_4c927585d591d31d7b37097a", "mediaType" : "News", "modified" : "1310493434159", "publishedDate" : "Fri Jul 08 01:00:00 EDT 2011", "source" : "MayoClinic: General Topics", "sourceKey" : "http.www.mayoclinic.com.rss.blog.xml", "tags" : [ "topic:healthcare", "industry:healthcare", "mayo clinic", "health" ], "title" : "Dr. Bernstein diet and beyond", "url" : "http://www.mayoclinic.com/health/dr-bernstein-diet/MY01817/rss=5" }
Annex - old format source
Sample RSS Feed Source
{ "description" : "Article on Medical Issues", "extractType" : "Feed", "harvestBadSource" : false, "isApproved" : true, "isPublic" : true, "key" : "http.www.mayoclinic.com.rss.blog.xml", "mediaType" : "News", "modified" : "Oct 19, 2010 11:31:59 AM", "tags" : [ "topic:healthcare", "industry:healthcare", "mayo clinic", "health" ], "title" : "MayoClinic: General Topics", "url" : "http://www.mayoclinic.com/rss/blog.xml" "useTextExtractor":"AlchemyAPI", "useExtractor":"OpenCalais" }