Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

http://www.w3schools.com/xml/simple.xml

Info

Note that when accessing Web documents you must use "rss.extraUrls" and specify minimally "url" and "title" fields, and not the top-level "url" (otherwise the URL is treated as an RSS feed rather than a standalone web page)

Code Block
languagehtml/xml
<?xml version="1.0" encoding="UTF-8"?>
<breakfast_menu>
   <food>
      <name>Belgian Waffles</name>
      <price>$5.95</price>
      <description>two of our famous Belgian Waffles with plenty of real maple syrup</description>
      <calories>650</calories>
   </food>
   <food>
      <name>Strawberry Belgian Waffles</name>
      <price>$7.95</price>
      <description>light Belgian waffles covered with strawberries and whipped cream</description>
      <calories>900</calories>
   </food>
   <food>
      <name>Berry-Berry Belgian Waffles</name>
      <price>$8.95</price>
      <description>light Belgian waffles covered with an assortment of fresh berries and whipped cream</description>
      <calories>900</calories>
   </food>
   <food>
      <name>French Toast</name>
      <price>$4.50</price>
      <description>thick slices made from our homemade sourdough bread</description>
      <calories>600</calories>
   </food>
   <food>
      <name>Homestyle Breakfast</name>
      <price>$6.95</price>
      <description>two eggs, bacon or sausage, toast, and our ever-popular hash browns</description>
      <calories>950</calories>
   </food>
</breakfast_menu>

...

Note the use of XPath to identify easily how to convert the top-level XML document into lots of little documents - the "rssweb,searchConfig.script" is then boilerplate and converts the XML into lots of small documents, with the "fullText" of each containing the JSON representation of the selected XML. This is then converted into metadata by the "unstructuredAnalysiscontentMetadata" block. Normally a "docMetadata"structuredAnalysis" block would /"entities"/"associations" block would finally be used to set the per-document titles, descriptions, entities etc.

Code Block
languagejavascript
{
    "description": "wiy",
    "extractType": "Feed",
    "isPublic": true,
    "mediaType": "News",
    "rsstags": {[
        "searchConfig": {tag1"
    ],
    "title": "aaa xml test",
    "extraMetaprocessingPipeline": [
        {
       {     "feed": {
                "contextextraUrls": [
"First",                     "fieldName": "convert_to_json",{
                        "flagsurl": "o",http://www.w3schools.com/xml/simple.xml"
                    "script": "//breakfast_menu/food[*]",}
                ],
    "scriptlang": "xpath"           "updateCycle_secs": 86400
    }        }
    ],    },
        "script": "function convert_to_docs(jsonarray, url)\n{\n{
        var docs = [];\n   "links": {
 for (var docIt in jsonarray) {\n        var predoc ="extraMeta": jsonarray[docIt];\n
        delete predoc.content;\n        var doc = {};\n
        doc.url = _doc.url.replace(/[?].*/,\"\") + '#' + docIt;\n        doc.fullText = predoc;\n  "context": "First",
      doc.title = \"TBD\";\n                doc.description = \"TBD\";\n "fieldName": "convert_to_json",
            docs.push(doc);\n    }\n    return docs;\n}\nvar docs = convert_to_docs(_doc.metadata['convert_to_json'], _doc.url);\ndocs; "flags": "o",
            "scriptflags": "d"         },         "extraUrls"script": "//breakfast_menu/food[*]",
            {            "scriptlang": "xpath"
   "url": "http://www.w3schools.com/xml/simple.xml"             }   }
     ],         "updateCycle_secs": 86400 ],
   },     "tags": [         "tag1script": "function convert_to_docs(jsonarray, url)\n{\n  ],  var docs  "title": "aaa xml test",
    "unstructuredAnalysis": {= [];\n    for (var docIt in jsonarray) {\n        var predoc "meta":= jsonarray[docIt];\n        delete predoc.content;\n    {    var doc = {};\n        doc.url  "context": "First",
   = _doc.url.replace(/[?].*/,\"\") + '#' + docIt;\n        doc.fullText = predoc;\n   "fieldName": "json",    doc.title = \"TBD\";\n        doc.description =  \"script": "var json = eval('('+text+')'); json;",TBD\";\n        docs.push(doc);\n    }\n    return  "scriptlang": "javascript"
      docs;\n}\nvar docs = convert_to_docs(_doc.metadata['convert_to_json'], _doc.url);\ndocs;",
     }         ]  "scriptflags": "d"
 },     "useExtractor": "none",     "useTextExtractor": "none"
}

Output

Code Block
languagejavascript
{}
      "communityId": ["4d38b72c054548f038a0414a"]  },
    "created": "Jun 5, 2013 09:12:15{
PM UTC",     "description": "TBD",     "fullTextcontentMetadata": "{ \"calories\" : \"650\" , \"description\" : \"two of our famous Belgian Waffles with plenty of real maple syrup\" , \"price\" : \"$5.95\" , \"name\" : \"Belgian Waffles\"}",[
                {
              "mediaType": ["News"],     "metadatafieldName": {"json":,
 [{         "calories": "650",         "descriptionscript": "two of our famous Belgian Waffles with plenty of real maple syrup",var json = eval('('+text+')'); json;",
                    "namescriptlang": "Belgian Waffles",javascript"
          "price": "$5.95"     }]},
    "modified": "Jun 5, 2013 09:12:15 PM UTC"     ]
        }
    ]
}

Output

Code Block
languagejavascript
{
    "communityId": ["4d38b72c054548f038a0414a"],
    "publishedDatecreated": "Jun 5, 2013 09:12:15 PM UTC",
    "sourcedescription": ["aaa xml testTBD"],
    "sourceKeyfullText": ["www.w3schools.com.xml.simple.xml"],
    "tags": ["tag1"],
    "title": "TBD",
    "url": "http://www.w3schools.com/xml/simple.xml#0"
}
{"{ \"calories\" : \"650\" , \"description\" : \"two of our famous Belgian Waffles with plenty of real maple syrup\" , \"price\" : \"$5.95\" , \"name\" : \"Belgian Waffles\"}",
    "communityIdmediaType": ["4d38b72c054548f038a0414aNews"],
    "createdmetadata": {"json"Jun: 5,[{
2013 09:12:15 PM UTC",     "descriptioncalories": "TBD650",
        "fullTextdescription": "{ \"calories\" : \"900\" , \"description\" : \"light Belgian waffles covered with strawberries and whipped cream\" , \"price\" : \"$7.95\" , \"name\" : \"Strawberry Belgian Waffles\"}",two of our famous Belgian Waffles with plenty of real maple syrup",
        "name": "Belgian Waffles",
        "mediaTypeprice": ["News"]$5.95"
    }]},
    "metadatamodified": {"json": [{
   Jun 5, 2013 09:12:15 PM UTC",
    "caloriespublishedDate": "900"Jun 5, 2013 09:12:15 PM UTC",
    "descriptionsource": ["lightaaa Belgian waffles covered with strawberries and whipped cream",
   xml test"],
    "sourceKey": ["www.w3schools.com.xml.simple.xml"],
    "nametags": ["Strawberry Belgian Wafflestag1"],
   
    "pricetitle": "$7.95TBD",
    }]}, "url": "http://www.w3schools.com/xml/simple.xml#0"
}
{
    "modifiedcommunityId": "Jun 5, 2013 09:12:15 PM UTC"["4d38b72c054548f038a0414a"],
    "publishedDatecreated": "Jun 5, 2013 09:12:15 PM UTC",
    "sourcedescription": ["aaa xml testTBD"],
    "sourceKeyfullText": ["www.w3schools.com.xml.simple.xml"],
    "tags": ["tag1"],
    "title": "TBD",
    "url": "http://www.w3schools.com/xml/simple.xml#1"
}
{
    "communityId": ["4d38b72c054548f038a0414a"{ \"calories\" : \"900\" , \"description\" : \"light Belgian waffles covered with strawberries and whipped cream\" , \"price\" : \"$7.95\" , \"name\" : \"Strawberry Belgian Waffles\"}",
    "mediaType": ["News"],
    "createdmetadata": {"json"Jun: 5,[{
2013 09:12:15 PM UTC",     "descriptioncalories": "TBD900",
    "fullText": "{ \"calories\" : \"900\" , \"description\" : \"light Belgian waffles covered with an assortment of fresh berries strawberries and whipped cream\",
, \"price\" : \"$8.95\" , \"name\" : \"Berry-Berry        "name": "Strawberry Belgian Waffles\"}",
     "mediaType   "price": ["News"]$7.95"
    }]},
    "metadatamodified": {"json": [{
   Jun 5, 2013 09:12:15 PM UTC",
    "caloriespublishedDate": "900"Jun 5, 2013 09:12:15 PM UTC",
    "descriptionsource": ["lightaaa Belgian waffles covered with an assortment of fresh berries and whipped cream"xml test"],
    "sourceKey": ["www.w3schools.com.xml.simple.xml"],
        "nametags": "Berry-Berry Belgian Waffles",
["tag1"],
       "pricetitle": "$8.95"
    }]}TBD",
    "modifiedurl": "Jun 5, 2013 09:12:15 PM UTC""http://www.w3schools.com/xml/simple.xml#1"
}
{
    "communityId": ["4d38b72c054548f038a0414a"],
    "publishedDatecreated": "Jun 5, 2013 09:12:15 PM UTC",
    "sourcedescription": ["aaa xml testTBD"],
    "sourceKeyfullText": "{ ["www.w3schools.com.xml.simple.xml"],
    "tags": ["tag1"],
    "title": "TBD",
    "url": "http://www.w3schools.com/xml/simple.xml#2"
}
{
    "communityId": ["4d38b72c054548f038a0414a\"calories\" : \"900\" , \"description\" : \"light Belgian waffles covered with an assortment of fresh berries and whipped cream\" , \"price\" : \"$8.95\" , \"name\" : \"Berry-Berry Belgian Waffles\"}",
    "mediaType": ["News"],
    "createdmetadata": {"Jun 5, 2013 09:12:15 PM UTC",json": [{
        "descriptioncalories": "TBD900",
     "fullText": "{ \"calories\" : \"600\" , \"description\" : \"thick slices made from our homemade sourdough bread\" , \"price\" : \"$4.50\" , \"name\" : \"French Toast\"}",
    "mediaType": ["News"],   "description": "light Belgian waffles covered with an assortment of fresh berries and whipped cream",
        "name": "Berry-Berry Belgian Waffles",
        "metadataprice": {"json$8.95":
[{    }]},
    "caloriesmodified": "600"Jun 5, 2013 09:12:15 PM UTC",
    "descriptionpublishedDate": "thickJun slices5, made from our homemade sourdough bread",
   2013 09:12:15 PM UTC",
    "namesource": ["Frenchaaa xml Toasttest"],
   
    "pricesourceKey": "$4.50"["www.w3schools.com.xml.simple.xml"],
    }]}"tags": ["tag1"],
    "modifiedtitle": "TBD"Jun 5, 2013 09:12:15 PM UTC",
    "publishedDate": "Jun 5, 2013 09:12:15 PM UTC",
    "source": ["aaa xml test"],
    "sourceKey": ["www.w3schools.com.xml.simple.xml"],
    "tags": ["tag1"],
    "title": "TBD",,
    "url": "http://www.w3schools.com/xml/simple.xml#2"
}
{
    "communityId": ["4d38b72c054548f038a0414a"],
    "created": "Jun 5, 2013 09:12:15 PM UTC",
    "description": "TBD",
    "fullText": "{ \"calories\" : \"600\" , \"description\" : \"thick slices made from our homemade sourdough bread\" , \"price\" : \"$4.50\" , \"name\" : \"French Toast\"}",
    "mediaType": ["News"],
    "metadata": {"json": [{
        "calories": "600",
        "description": "thick slices made from our homemade sourdough bread",
        "name": "French Toast",
        "price": "$4.50"
    }]},
    "modified": "Jun 5, 2013 09:12:15 PM UTC",
    "publishedDate": "Jun 5, 2013 09:12:15 PM UTC",
    "source": ["aaa xml test"],
    "sourceKey": ["www.w3schools.com.xml.simple.xml"],
    "tags": ["tag1"],
    "title": "TBD",
    "url": "http://www.w3schools.com/xml/simple.xml#3"
}
{
    "communityId": ["4d38b72c054548f038a0414a"],
    "created": "Jun 5, 2013 09:12:15 PM UTC",
    "description": "TBD",
    "fullText": "{ \"calories\" : \"950\" , \"description\" : \"two eggs, bacon or sausage, toast, and our ever-popular hash browns\" , \"price\" : \"$6.95\" , \"name\" : \"Homestyle Breakfast\"}",
    "mediaType": ["News"],
    "metadata": {"json": [{
        "calories": "950",
        "description": "two eggs, bacon or sausage, toast, and our ever-popular hash browns",
        "name": "Homestyle Breakfast",
        "price": "$6.95"
    }]},
    "modified": "Jun 5, 2013 09:12:15 PM UTC",
    "publishedDate": "Jun 5, 2013 09:12:15 PM UTC",
    "source": ["aaa xml test"],
    "sourceKey": ["www.w3schools.com.xml.simple.xml"],
    "tags": ["tag1"],
    "title": "TBD",
    "url": "http://www.w3schools.com/xml/simple.xml#4"
}

 

Old format source

Code Block
languagejavascript
{
    "description": "wiy",
    "extractType": "Feed",
    "isPublic": true,
    "mediaType": "News",
    "rss": {
        "searchConfig": {
            "extraMeta": [
                {
                    "context": "First",
                    "fieldName": "convert_to_json",
                    "flags": "o",
                    "script": "//breakfast_menu/food[*]",
                    "scriptlang": "xpath"
                }
            ],
            "script": "function convert_to_docs(jsonarray, url)\n{\n    var docs = [];\n    for (var docIt in jsonarray) {\n        var predoc = jsonarray[docIt];\n        delete predoc.content;\n        var doc = {};\n        doc.url = _doc.url.replace(/[?].*/,\"\") + '#' + docIt;\n        doc.fullText = predoc;\n        doc.title = \"TBD\";\n        doc.description = \"TBD\";\n        docs.push(doc);\n    }\n    return docs;\n}\nvar docs = convert_to_docs(_doc.metadata['convert_to_json'], _doc.url);\ndocs;",
            "scriptflags": "d"
        },
        "extraUrls": [
            {
                "url": "http://www.w3schools.com/xml/simple.xml#3xml"
            }
  {     "communityId": ["4d38b72c054548f038a0414a"],
        "createdupdateCycle_secs": "Jun86400
5, 2013 09:12:15 PM UTC"},
    "descriptiontags": "TBD",[
    "fullText": "{ \"calories\" : \"950\tag1"
, \"description\" : \"two eggs, bacon or sausage, toast, and our ever-popular hash browns\" , \"price\" : \"$6.95\" , \"name\" : \"Homestyle Breakfast\"}", ],
    "title": "aaa xml test",
    "unstructuredAnalysis": {
        "mediaTypemeta": ["News"],
            {
     "metadata": {"json": [{         "caloriescontext": "950First",
         "description       "fieldName": "json"two eggs,
bacon or sausage, toast, and our ever-popular hash browns",         "namescript": "Homestyle Breakfast",
 var json = eval('('+text+')'); json;",
      "price": "$6.95"     }]},     "modifiedscriptlang": "Jun 5, 2013 09:12:15 PM UTC",javascript"
    "publishedDate": "Jun 5, 2013 09:12:15 PM UTC",
    "source": ["aaa xml test"], }
       "sourceKey": ["www.w3schools.com.xml.simple.xml"],
    "tags": ["tag1"]},
    "titleuseExtractor": "TBDnone",
    "urluseTextExtractor": "http://www.w3schools.com/xml/simple.xml#4none"
}

 

...