Search code examples
expressasynchronousfilestreamibm-watsonspeech-to-text

Files larger than ~7mb throws "Response not received." on IBM Watson Speech-To-Text async createJob call


I'm working on server-side speech to text transcription, and I was able to get a post endpoint working for smaller files, but anything larger than ~7Mb throws an error that doesn't make much sense to me.

I know this code works for small files, and that the callback URL is registered correctly. I've tried throwing the "readableTranscriptStream" creation in a setTimeout() function, just to make sure it wasn't an issue of the full file not being passed to the createJob call. I've also tried just passing req.file.buffer as an argument for the audio param. I also know it isn't an issue of the encoding of the files being incorrect, as I've used the same audio file, slowly increasing the length and size of the file until it threw this error, and it worked until I hit about 7.2Mb, or ~3 min of audio in a .wav 16-bit encoded file.

I've also tried this with the fs.createFileStream('./local_test.wav') as an argument and gotten the same error back, though when I tried that, the _requestBodyLength field in the error was 10485760, and the _requestBodyBuffers was an array of objects. I realize this 10485760 is the same as the maxBodyLength, but the docs for the API say that "You can pass a maximum of 1 GB and a minimum of 100 bytes of audio with a request", and the test audio was, again ~7.2 Mb.

       username: process.env.wastonUsername,
       password: process.env.watsonPassword,
       url: 'https://stream.watsonplatform.net/speech-to-text/api/'
     });

   const storage = multer.memoryStorage();
   const upload = multer({ storage: storage , limit: {fields: 1, fileSize: 209715200, files:1, parts:2}});

   upload.single('track')(req,res, (err) => {
       req.setTimeout(0);
       if (err) {
           console.log(err);
           return res.status(400).json({ message: err })
       }

       const registerCallbackParams = {
           callback_url: <my_callback_url>,
           user_secret: "Test"
       };

       const readableTranscriptStream = new Readable();
       readableTranscriptStream.push(req.file.buffer);
       readableTranscriptStream.push(null);

       const createJobParams = {
           audio: readableTranscriptStream,
           callback_url: <my_callback_url>,
           content_type: req.file.mimetype,
           events:"recognitions.completed_with_results",
           inactivity_timeout: -1
         };

       speechToText.createJob(createJobParams)
       .then(recognitionJob => {
           console.log(recognitionJob);
       })
       .catch(err => {
           console.log('error:', err);
       }); 
   }) 

The error I'm getting back is :

error:{  
   Error:   Response not received. Body of error is HTTP ClientRequest object 
at formatError (/app/node_modules/ibm-cloud-sdk-core/lib/requestwrapper.js:111:17   ) 
at /app/node_modules/ibm-cloud-sdk-core/lib/requestwrapper.js:259:19   at process._tickCallback (internal/process/next_tick.js:68:7   ) 
message:'Response not received. Body of error is HTTP ClientRequest object',
   body:Writable   {  
      _writableState:WritableState      {  
         objectMode:false,
         highWaterMark:16384,
         finalCalled:false,
         needDrain:false,
         ending:false,
         ended:false,
         finished:false,
         destroyed:false,
         decodeStrings:true,
         defaultEncoding:'utf8',
         length:0,
         writing:false,
         corked:0,
         sync:true,
         bufferProcessing:false,
         onwrite:[  
            Function:bound onwrite
         ],
         writecb:null,
         writelen:0,
         bufferedRequest:null,
         lastBufferedRequest:null,
         pendingcb:0,
         prefinished:false,
         errorEmitted:false,
         emitClose:true,
         bufferedRequestCount:0,
         corkedRequestsFree:[  
            Object
         ]
      },
      writable:true,
      _events:[  
         Object:null         prototype
      ]      {  
         response:[  
            Function:handleResponse
         ],
         error:[  
            Function:handleRequestError
         ]
      },
      _eventsCount:2,
      _maxListeners:undefined,
      _options:{  
         maxRedirects:21,
         maxBodyLength:10485760,
         protocol:'https:',
         path:'/speech-to-text/api/v1/recognitions?callback_url=<my_callback_url>&events=recognitions.completed_with_results&inactivity_timeout=-1',
         method:'post',
         headers:[  
            Object
         ],
         agent:[  
            Agent
         ],
         auth:undefined,
         hostname:'stream.watsonplatform.net',
         port:null,
         nativeProtocols:[  
            Object
         ],
         pathname:'/speech-to-text/api/v1/recognitions',
         search:'?callback_url=<my_callback_url>&events=recognitions.completed_with_results&inactivity_timeout=-1'
      },
      _ended:false,
      _ending:true,
      _redirectCount:0,
      _redirects:[  

      ],
      _requestBodyLength:0,
      _requestBodyBuffers:[  

      ],
      _onNativeResponse:[  
         Function
      ],
      _currentRequest:ClientRequest      {  
         _events:[  
            Object
         ],
         _eventsCount:6,
         _maxListeners:undefined,
         output:[  

         ],
         outputEncodings:[  

         ],
         outputCallbacks:[  

         ],
         outputSize:0,
         writable:true,
         _last:true,
         chunkedEncoding:false,
         shouldKeepAlive:false,
         useChunkedEncodingByDefault:true,
         sendDate:false,
         _removedConnection:false,
         _removedContLen:false,
         _removedTE:false,
         _contentLength:null,
         _hasBody:true,
         _trailer:'',
         finished:false,
         _headerSent:false,
         socket:null,
         connection:null,
         _header:null,
         _onPendingData:[  
            Function:noopPendingOutput
         ],
         agent:[  
            Agent
         ],
         socketPath:undefined,
         timeout:undefined,
         method:'POST',
         path:'/speech-to-text/api/v1/recognitions?callback_url=<my_callback_url>&events=recognitions.completed_with_results&inactivity_timeout=-1',
         _ended:false,
         res:null,
         aborted:1558070725953,
         timeoutCb:null,
         upgradeOrConnect:false,
         parser:null,
         maxHeadersCount:null,
         _redirectable:[  
            Circular
         ],
         [  
            Symbol(isCorked)
         ]:false,
         [  
            Symbol(outHeadersKey)
         ]:[  
            Object
         ]
      },
      _currentUrl:'https://stream.watsonplatform.net/speech-to-text/api/v1/recognitions?callback_url=<my_callback_url>&events=recognitions.completed_with_results&inactivity_timeout=-1'
   }
}

Solution

  • Try adding maxContentLength: Infinity as an option when instantiating SpeechToText

    const speechToText = new SpeechToTextV1({
      username: 'user',
      password: 'pass',
      version: '2019-01-01',
      maxContentLength: Infinity,
    });