Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

In this example, the individual pages of an E-Book are ingested into infinit.e and then split into individual documents using "splitter."  The original document is then deleted.

...

 

The Global javascript function enables "splitter" to format the input into the appropriate array output.

Code Block
 },
         {
            "display": "A global space to group all the complex parsing and processing logic, can be called from anywhere",
            "globals": {
                "scriptlang": "javascript",
                "scripts": [
                    "function convert_to_docs(jsonarray, topDoc)\n{\n    var docs = [];\n    for (var docIt in jsonarray) \n    { \n        var predoc = jsonarray[docIt];\n        var doc = {};\n        doc.url = topDoc.url.replace(/[?].*/,\"\") + '#' + (parseInt(docIt) + 1).toString();\n        doc.fullText = predoc.replace(/\\\\\\//,\"/\");\n        doc.title = topDoc.title + \"; Page: \" + (parseInt(docIt) + 1).toString();\n        doc.publishedDate = topDoc.publishedDate;\n        doc.description = topDoc.url;\n        docs.push(doc);\n    }\n    return docs; \n}\n\n"
                ]
            }
        },
        {
            "contentMetadata": [
                {
                    "fieldName": "pages",
                    "index": false,
                    "script": "div",
                    "scriptlang": "stream",
                    "store": true
                }
            ],
            "display": "Uses the PDF's internal structured to break each page into an element in a pages metadata fields in the first document"
        },
        {
            "display": "Take the individual pages fromcreated in the first doc's metadataprevious step, splitconvert them into new documentsdocs, and then delete the original",
            "splitter": {
                "deleteExisting": true,
                "numPages": 10,
                "numResultsPerPage": 1,
                "script": "var docs = convert_to_docs(_doc.metadata['pages'], _doc); docs;",
                "scriptflags": "d",
                "scriptlang": "javascript"
            }
        },

 

 

Panel

Footnotes:

Legacy documentation:

Legacy documentation:

...