I have an express server with a POST
endpoint that starts a crawler. When the crawler finishes it shuts down the whole server. Am I doing something wrong? How can I prevent it from happening?
The project looks something like this:
// server.js
const express = require('express')
const bodyParser = require('body-parser')
const startSearch = require('./crawler.js')
const app = express()
app.use(bodyParser.json())
app.post('/crawl', async (req, res) => {
const { foo, bar } = req.body
startSearch({ foo, bar })
res.end()
})
app.listen(PORT, () => console.log(`listening on port ${PORT}`))
// crawler.js
const Apify = require('apify')
const startSearch = ({ foo, bar }) => {
Apify.main(async () => {
const sources = [{
url: 'https://path_to_website.com',
userData: { foo, bar }
}]
const requestList = await Apify.openRequestList(null, sources)
const crawler = new Apify.PuppeteerCrawler({
requestList,
handlePageFunction: async ({ request, page }) => {
// do things using puppeteer
}
}
})
await crawler.run()
})
}
Just avoid using Apify.main()
. For details, see How to use Apify on Google Cloud Functions
(I thought I'm sending the answer, but it seems it was just a comment)