Search code examples
marklogicmarklogic-dhf

Fail to pass paramater from sjs to xquery in DHF 6.0.0


I could not pass a variable from sjs script to xquery inside dhf 6.0.0 project. It keeps saying the variable is not defined. However, I could run the same script in console successfully.

Here is the error log from DHF error log.

Below is the success run from qconsole.

QConsole test success


'use strict';

var message = 'hello from SJS';

var evalScript = '\
const logging = require("/custom-modules/custom/loggingStep/log.xqy");\
declareUpdate();\
logging.log(input);\
';
var params = { "input": message }

console.log("params:", params);

xdmp.eval( evalScript, params,
{
    isolation:'different-transaction'
})

Here is how to replicate the issue. I am using an customized logging function to demo the problem. Let us assume that I could only do with XQuery in update mode in different transaction.

  • Environment

  • Set up the Flow, Step Definition and Step with com.marklogic.ml-data-hub 6.0.0


gradlew -i hubCreateFlow -PflowName=simpleFlow -PwithInlineSteps=false

gradlew hubCreateStepDefinition -PstepDefName=loggingStep -PstepDefType=custom -Pformat=sjs -i

gradlew -i hubCreateStep -PstepName=loggingSimpleStep -PstepType=custom -PstepDefName=loggingStep -i


  • Add log.xqy


xquery version "1.0-ml";

module namespace logging = "http://standards.org/custom/logging";


declare function logging:log($message as xs:string){
    let $_ := xdmp:log(fn:concat("xquery logging:", $message))
    return 'xquery function call complete'
};

  • Modify the default created main.sjs to include call to that log.xqy


/**
* This scaffolded step module provides a template for implementing your own logic as a DHF step.
* All of the comments in this module are intended to explain how to implement a DHF step. You are free to delete
* any or all of the comments at any point.
*/

const mjsProxy = require("/data-hub/core/util/mjsProxy.sjs");
const flowApi = mjsProxy.requireMjsModule('/data-hub/public/flow/flow-api.mjs');

/**
* Performs the main step processing on the given content, returning zero or many content objects. DHF will run this function
* in query (read-only) mode, as the intent of a step is for it to return content objects that DHF will then handle persisting.
*
* The content argument is either a content object, as defined by
* https://github.com/marklogic/marklogic-data-hub/blob/master/specs/models/ContentObject.schema.json, or it is an array of content
* objects defined by that same schema. The argument will be an array if acceptsBatch=true in the step configuration.
*
* The function must return a single content object or an array of content objects, defined by the same ContentObject schema
* referenced above. A returned content object may be the content object that was passed into this function, or it may be a
* new content object.
*
* @param content either a single content object, or an array of content objects
* @param options an object consisting of combined options from the runtime options, the step configuration, the flow options,
*  and the step definition options
* @returns a content object, or an array of content objects
*/
function main(content, options) {
 const inputDocument = content.value;

 // DHF recommends wrapping documents in an envelope, particularly for curated documents based on an entity model.
 // If your input document is already an envelope, you can typically just call toObject() it and modify what you need.
 // Otherwise, the below code is a starting point for constructing the 3 parts of an envelope.
 // If your input document is XML and you need to modify it, it is recommended to generate an XQuery custom step instead.´
 // const instance = inputDocument.toObject();
 const instance = {};
 const headers = {};
 const triples = [];

 // If you are creating a new entity instance, it is recommended to define the entity type name, and optionally the version,
 // as shown below. This will result in an envelope/instance/info block being added to your document, as well as your instance
 // data being located under envelope/instance/(entity type name).
 // instance['$type'] = 'myEntityTypeName';
 // instance['$version'] = '0.0.1';

 // If you would like to include the input document as an attachment in the envelope, you can do so via the below code.
 // instance['$attachments'] = [inputDocument];

 // makeEnvelope is a convenience function for building an envelope with the inputs that were defined above.
 // You may wish to specify the output format in your step configuration. But for a custom step, which is typically coded
 // based on an expected output format, it's usually simpler to define the output format in the code.
 const outputFormat = 'json';
 const envelope = flowApi.makeEnvelope(instance, headers, triples, outputFormat);

   var message = 'hello from SJS';

   var evalScript = '\
   const logging = require("/custom-modules/custom/loggingStep/log.xqy");\
   declareUpdate();\
   logging.log(input);\
   ';
   var params = { "input": message }
   // var params = { "allPendingDrafts": allPendingDrafts }
   // // var params = {"allPendingDrafts": JSON.parse(allPendingDrafts)}

   console.log("params:", params);

   xdmp.eval( evalScript, params,
   {
       isolation:'different-transaction'
   })

 // Return a new content object. You may also modify the incoming content object and return it instead.
 // You may also choose any URI that you want, and either modify the content.context object or construct a new context object
 // based on the ContentObject schema that is referenced above.
 return {
   uri: content.uri,
   value: envelope,
   context: content.context
 };
}

module.exports = {
 main
};


  • Modify the step to select some documents.

{
  "collections" : [ "loggingSimpleStep" ],
  "permissions" : "data-hub-common,read,data-hub-common,update",
  "batchSize" : 100,
  "sourceDatabase" : "ML2-Feed-content",
  "targetDatabase" : "ML2-Feed-content",
  "provenanceGranularityLevel" : "off",
  "name" : "loggingSimpleStep",
  "description" : "",
  "stepDefinitionType" : "custom",
  "stepDefinitionName" : "loggingStep",
  "selectedSource" : "query",
  "sourceQuery" : "cts.collectionQuery([\"pending\"])",
  "stepId" : "loggingSimpleStep-custom",
  "lastUpdated" : "2024-02-21T06:05:39.882208+11:00"
}

  • Update the Flow to include that newly created step.


{
  "name" : "simpleFlow",
  "description" : "This is the default flow containing all of the default steps",
  "batchSize" : 100,
  "threadCount" : 4,
  "steps" : {
    "1" : {
      "stepId" : "loggingSimpleStep-custom"
    }
  },
  "lastUpdated" : "2024-02-21T06:08:27.354086+11:00"
}

  • After deploying the module, run the flow.

    shell gradlew hubRunFlow -PflowName=simpleFlow

I wonder what is wrong with my DHF code? Is it a bug in my code or a bug in DHF 6.0.0?


Solution

  • In Data Hub 6, the core code was changed from using SJS and MJS. This appears to be an issue with eval code that you're providing to evaluate as MJS despite being called from an SJS file due to an MJS file calling the SJS code. If you prefix your passed parameter in the script with "external." like below it should resolve the issue.

    'use strict';
    
    var message = 'hello from SJS';
    
    var evalScript = '\
    const logging = require("/custom-modules/custom/loggingStep/log.xqy");\
    declareUpdate();\
    logging.log(external.input);\
    ';
    var params = { "input": message }
    
    console.log("params:", params);
    
    xdmp.eval( evalScript, params,
    {
        isolation:'different-transaction'
    })
    

    I believe, where reasonable, xdmp.invokeFunction or xdmp.invoke are better choices use than xdmp.eval. It avoids potential code injection issues and would avoid any confusion about whether MJS or SJS code is being evaluated.