Search code examples
node.jspostgresqlexpressexpress-handlebarsexpress-router

Node.js : express route, query data from database and render in template.hbs view


I want to create a Node.js application that does web scraping of some sites, saves data on a PostgreSQL database and then shows visualizations (in D3.js) of this data on a web page. I thought about splitting the front-end part (creating and displaying visualizations) and the back-end part (doing web scraping and updating the db).

The skeleton of the two applications (there are two because I divide the tasks into two apps) is as follows.

Back-end app (scraper):

  • connection to the db
  • creation of tables if they do not exist
  • scraper of data
  • saving data on the db
  • disconnection from the db.

This back-end application must be started only a couple of times a year (to do this I can configure a CRON file if for example Unix is ​​used).

Front-end app (viz):

  • connection to the db
  • start a server that is waiting on port 3000 (I need it for visualizations)
  • every time the user refreshes the page (onLoad()), the app makes a query (SELECT) that takes data from the database. In this way the data are always updated.

This application is started by the programmer only once (ideally).

I created a folder structure of this type (I used npm init and Express):

project
 |_ scraper
     |_ helpers // contains some useful .js files
          |_ elaborateJson.js
          |_ saveOnDb.js
          |_ utilFunc.js
     |_ node_modules // modules installed using `npm install moduleName --save`
     |_ routes // contains the files that make scraping
          |_ downloaderHome.js
          |_ downloaderWork.js
     |_ services // contains a files concerning the db
             |_ postgreSQLlib.js
     |_ app.js
     |_ package.json
     |_ package-lock.json
 |_ viz
     |_ helpers // // contains some useful .js files
          |_ utilFunc.js
     |_ node_modules // modules installed using `npm install moduleName --save`
     |_ public // contains files for visualizations
          |_ index.handlebars
          |_ script.js
          |_ style.css
     |_ services // contains a file concerning the db
             |_ postgreSQLlib.js
     |_ app.js
     |_ package.json
     |_ package-lock.json

With this structure I already have two problems that I don't know how to solve:

  1. the postgreSQLlib.js file (and also utilFunc.js) is the same in both scraper and viz. How can I avoid this duplication of the code?

  2. I had to install some modules (for example express-handlebars and express) twice in the scraper and viz folders.

This is project/scraper/app.js:

const downloaderHome = require('./routes/downloaderHome.js');
const downloaderWork = require('./routes/downloaderWork.js');
const postgreSQLlib = require('./services/postgreSQLlib.js');
const saveOnDb = require('./helpers/saveOnDb.js');
const utilFunc = require('./helpers/utilFunc.js');
const express = require('express');
const exphbs = require('express-handlebars');

var app = express();

start();

async function start() {
    console.log('\n Connect to db');
    await postgreSQLlib.connect();

    console.log('\n Create tables if they do not exist');
    await postgreSQLlib.createHomeTable();
    await postgreSQLlib.createWorkTable();

    console.log('\n Check if table \'home\' is updated or not');
    if(!await utilFunc.isTableUpdated('home', 6418)) { // 6308
        console.log('\n   Download data for home');
        await downloaderHome.download();
        console.log('\n   Saving data for home on db');
        await saveOnDb.saveHome();
    }   

    console.log('\n Check if table \'work\' is updated or not');
    if(!await utilFunc.isTableUpdated('work', 6804)) {
        console.log('\n   Download data for work');
        await downloaderWork.download();
        console.log('\n   Saving data for work on db');
        await saveOnDb.saveWork();
    }

    console.log('\n Disconnect from db');
    await postgreSQLlib.disconnect();
}

This is project/viz/app.js:

const postgreSQLlib = require('./services/postgreSQLlib.js');
const utilFunc = require('./helpers/utilFunc.js');
const express = require('express');
const exphbs = require('express-handlebars');
const http = require('http');

var app = express();

var response;
var callback;

start();

async function start() {
    console.log('\n Connect to db');
    await postgreSQLlib.connect();

    // how do I check when page is refreshed?!
    http.get({
            hostname: 'localhost',
            port: 3000,
            path: '/',
            agent: false
        }, callback);

    callback = function(res) {
        response = res;
        console.log(response); // here response will return an object
        console.log('refresh callback');
    }

    console.log(response);
    console.log('refresh');

    ///////////////////////////////////////////////
    // How do I check the disconnection from the db? 
    // If I disconnect now, the visualizations are no longer work. 
    // So when do I get disconnected? 
    // Create problems leaving the connection to the active db?
    ///////////////////////////////////////////////
    //console.log('\n Disconnect from db');
    //await postgreSQLlib.disconnect();
}

The first application (project/scraper/app.js) works perfectly.

The second application (project/viz/app.js) no. I would like you to do this:

  • connection to the db [done. It works]
  • start a server that is waiting on port 3000 (I need it for visualizations) [how do I do it? Look down (*)]
  • every time the user refreshes the page (onLoad()), the app makes a query (SELECT) that takes data from the database [how do I do it?]

(*) I had thought of something like this:

async function start() {
    console.log('\n Connect to db');
    await postgreSQLlib.connect();

    console.log('\n Get data from db');
    var dataHome = await postgreSQLlib.getTableHome();
    var dataWork = await postgreSQLlib.getTableWork();

    //console.log('\n Connect to my server');
    pageLoad(dataHome, dataWork); 
}

function pageLoad(dataHome, dataWork) {
    var hbs = exphbs.create({
        helpers: {
            getDataHome: function() { 
                return JSON.stringify(dataHome); 
            },
            getDataWork: function() { 
                return JSON.stringify(dataWork); 
            }
        }
    });

    app.engine('handlebars', hbs.engine);
    app.set('view engine', 'handlebars');

    app.get('/', function(req, res, next) {
        res.render('index', { // index is html filename
            showTitle: true,
        });
    });
    console.log('Go to http://localhost:3000/ to see visualizations');
    app.listen(3000);
}

Where dataHome and dataWork are two objects that contain data downloaded from the db using a SELECT query. But in this way the data are scrapped only once and not every time the user refreshes the page.

Help would be greatly appreciated. Thank you!

EDIT

Could you be more precise? I tried to do that but it doesn't work:

project/viz/app.js:

const postgreSQLlib = require('../shared_libs/postgreSQLlib.js');
const express = require('express');

var app = express();

start();
async function start() {
    console.log('Connect to db');
    await postgreSQLlib.connect();

    app.get('/', fetchFreshData);
}

async function fetchFreshData(req, res) {
    // download data from db
    var dataHome = await postgreSQLlib.getTableHome();
    var dataWork = await postgreSQLlib.getTableWork();
    // fill this JSON using the results
    var viewData = {dataHome, dataWork};
    // pass data to view
    res.render('index', viewData);
}

project\viz\view\index.handlebars:

<!DOCTYPE html>
<html lang='en'>
    <head>
        <meta charset='utf-8'>
        <title>Map</title>
        <script src='https://d3js.org/d3.v5.js' charset='utf-8'></script>
        <link rel='stylesheet' type='text/css' href='/style.css' media='screen'/>
    </head>
    <body>
        <div id='example'></div>
    </body>
    <script src='/script.js'></script>
</html>

project\viz\view\script.js:

console.log('viewData:', viewData);

Where I'm wrong?

EDIT 2

Ok, I modify again the viz/app.js code:

const postgreSQLlib = require('../shared_libs/postgreSQLlib.js');
const express = require('express');
const exphbs = require('express-handlebars');

var app = express();

start();

async function start() {
    await postgreSQLlib.connect();

    var hbs = Handlebars.registerHelper('json', function(context) {
        return JSON.stringify(context);
    });
    app.engine('handlebars', hbs.engine);
    app.set('view engine', 'handlebars');
    app.get('/', fetchFreshData);
    console.log('Go to http://localhost:3000/ to see data');
    app.listen(3000);
}

async function fetchFreshData(req, res) {
    // download data from db
    var dataHome = await postgreSQLlib.getTableHome();
    var dataWork = await postgreSQLlib.getTableWork();
    // fill this JSON using the results
    var viewData = {}; 
    viewData.timestamp = Date.now();
    viewData.entries = dataHome;
    // pass data to view
    res.render('index', viewData);
}

When I run the app, there are no errors but if I connect to http://localhost:3000/, the browser tells me I cann't reach the site. I feel a bit stupid...

EDIT 3

If I understand correctly your code, there is a (distracting) error in your code. In returnOBJ() instead of res.render('index', viewData); it should be res.render('obj', viewData); (related to obj.hbs file). Right?

I change the index.hbs file in this way:

<html lang='en'>
    <head>
        <meta charset='utf-8'>
        <title>Index</title>
        <script src='https://d3js.org/d3.v5.js' charset='utf-8'></script>
        <link rel='stylesheet' type='text/css' href='/style.css' media='screen'/>
    </head>
    <body>
        <h1>INDEX<small>{{timestamp}}</small></h1>
    </body>
    <script> 
        // add global variables in the .hbs file 
        window.viewData_dataWork = {{ json entries }}
        console.log(window.viewData);
    </script>
    <script src='/script.js'></script>
</html>

But I get:

(node:207156) UnhandledPromiseRejectionWarning: Error: callback function required
    at Function.engine (C:\...\node_modules\express\lib\application.js:295:11)
    at start (C:\...\viz\app.js:20:6)
    at <anonymous>
    at process._tickCallback (internal/process/next_tick.js:182:7)
(node:207156) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 1)
(node:207156) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.

I also don't understand this piece of code.

app.set('view engine', 'hbs');
app.engine('hbs', hbs.__express);
hbs.registerHelper('json', function(context) {
    return JSON.stringify(context);
});
app.engine('handlebars', hbs.engine);
app.set('view engine', 'handlebars');

Why you call app.set('view engine', ...) two times with different values?

EDIT 4

I have further simplified the code:

/viz/app.js:

const postgreSQLlib = require(__dirname + './../shared_libs/services/postgreSQLlib.js');
const express = require('express');
const hbs = require('hbs');

var app = express();

// Server initiator
async function start() {
    await postgreSQLlib.connect();

    // hbs
    app.set('views', '' + __dirname + '/views');
    app.set('view engine', 'hbs');
    app.engine('hbs', hbs.__express);
    hbs.registerHelper('json', function(context) {
        return JSON.stringify(context);
    });
    app.engine('handlebars', hbs.engine);
    app.set('view engine', 'handlebars');

    // router
    app.get('/', testMe);

    console.log('Go to http://localhost:3000/ to see data');
    app.listen(3000);
}

// Your section with fresh data has been populated properly
async function testMe(req, res) {
    console.log('testMe');
    // fill this JSON using the results
    var viewData = {}; 
    viewData.data = 'this string';
    // pass data to view
    res.render('test', viewData);
}

// start the server
start();

/viz/views/test.hbs:

<html>
    <head>
        <title>Server test</title>
    </head>
    <body>
        {{data}}
    </body>
</html>

Then in prompt command I go to project/viz and I type node app.js + enter. The process starts and waits: there are no errors. When I go to http://localhost:3000/ but I get Connection failed.

I'm becoming crazy.

EDIT 5

The problem was not the connect nor the functions that made the select so I simplified the code a bit. and now, it works almost!

Here is the code.

viz/app.js:

const postgreSQLlib = require(__dirname + './../shared_libs/services/postgreSQLlib.js');
const express = require('express'); 
var app = express()
const hbs = require('hbs');
const webapp_opts = {"port":3000};

Initialize();

//.: Setup & Start Server
async function Initialize(){
    await postgreSQLlib.connect();

    console.log("[~] starting ...")
    //:[HBS]:Setup
    app.set('view engine', 'hbs'); 
    app.engine('hbs', hbs.__express)
    app.set('views', "" + __dirname + "/views")
    //:[HBS]:Helpers
    hbs.registerHelper('json', function(context) {
        return JSON.stringify(context);
    })
    //:[EXPRESS]:Router.Paths
    app.get("/", IndexPathFunction);
    // app.get("/script.js", scriptFile); <-- for script.js file

    //:[EXPRESS]:Start 
    app.listen(webapp_opts.port,()=>{
        console.log("[i] ready & listening","\n    http://localhost:"+webapp_opts.port+"/")
    })
}

/*async function scriptFile(req, res) { <-- for script.js file
    console.log('\nscriptFile');
    var viewData = {}; 
    viewData.number = 50;
    console.log('viewData:', viewData);
    res.render('script.js', viewData);
}*/

//.: Router Function : "/"
async function IndexPathFunction(req,res){
    var viewData = {}; 
    viewData.timestamp = Date.now();
    viewData.exJson = [{color: 'red', year: '1955'}, {color: 'blue', year: '2000'}, {color: 'yellow', year: '2013'}]; 
    viewData.exString = 'example of string'; 
    console.log('viewData:', viewData);
    res.render('index', viewData);
}

viz/views/index.hbs:

<html lang='en'>
    <head>
        <meta charset='utf-8'>
        <title>Index</title>
        <script src='https://d3js.org/d3.v5.js' charset='utf-8'></script>
        <link rel='stylesheet' type='text/css' href='/style.css' media='screen'/>
    </head>
    <body>
        <h1>INDEX timestamp: <small>{{timestamp}}</small></h1>
    </body>
    <script> 
        viewData = {}; 
        console.log('viewData:', viewData);
        viewData.exJson = JSON.parse('{{ json exJson }}'.replace(/&quot;/g, '"').replace(/&lt;/, ''));
        viewData.timestamp = {{timestamp}}; // doesn't work
        viewData.exString = {{ exString }}; // doesn't work
        console.log('viewData.exJson:', viewData.exJson);
        console.log('viewData.timestamp:', viewData.timestamp);
        console.log('viewData.exString:', viewData.exString);
    </script>
    <!--<script src='/script.js'></script>-->
</html>

The problem is to get a data type that is not json. For example it gives me error when I try to print the timestamp and the exString. Why?

Also I would like to clean up the code a bit and put the javascript part inside a script.js file that is called by index.hbs using <script src='/script.js'></script>.

EDIT 6

I found this tutorial which was very useful for me.

I edited the index.hbs file by adding a css file, an image and a script (it only contains a console.log('here'); but the idea is to place in script.js the viewData variable).

project/viz/views/index.hbs:

<html lang='en'>
    <head>
        <meta charset='utf-8'>
        <title>Index</title>
        <script src='https://d3js.org/d3.v5.js' charset='utf-8'></script>
        <link href="/css/style.css" rel="stylesheet">
    </head>
    <body>
        <img src="/images/logo.png"/>
        <h1>timestamp: <small>{{timestamp}}</small></h1>
        <h2>Welcome in index.hbs</h2>
    </body>
    <script> 
        viewData = {}; 
        console.log('viewData:', viewData);
        viewData.exJson = JSON.parse('{{json exJson }}'.replace(/&quot;/g, '"').replace(/&lt;/, ''));
        viewData.timestamp = {{timestamp}};
        viewData.exString = '{{exString}}'; 
        console.log('viewData.exJson:', viewData.exJson);
        console.log('viewData.timestamp:', viewData.timestamp);
        console.log('viewData.exString:', viewData.exString);
    </script>
    <link href='/script/script.js' rel='script'>
</html>

My files structure is:

project
    |_ node_modules
    |_ scraper
    |_ shared_libs
    |_ viz  
        |_ app.js 
        |_ public
            |_ css
                |_ style.css
            |_ images
                |_ logo.png
            |_ script
                |_ script.js
        |_ views
            |_ index.hbs

Now I see the image and the css is used. But the script doesn't seem to work because it is not printed the string here.

I search on internet about how to pass variables from a script tag to an external js file but I don't seem to have found anything that suits me. I've read the handlebar APIs and they have not been useful.


Solution

  • Solution to shared (custom) code :

    With this structure I already have two problems that I don't know how to solve:

    1. the postgreSQLlib.js file (and also utilFunc.js) is the same in both scraper and viz. How can I avoid this duplication of the code?

    You are already using require, so move the files from :

    project/scraper/services/postgreSQLlib.js
    project/viz/services/postgreSQLlib.js
    

    to a newly created directory in the projects root directory

    project
     |_ shared_libs
     |_ scraper
     |_ viz
    

    (in this example I used shared_libs , you may name it as you wish)

    project/shared_libs/postgreSQLlib.js
    

    Then, from the code :

    const postgreSQLlib = require(__dirname+"/../shared_libs/postgreSQLlib.js");
    

    This way you wont need to maintain the code in 2 different files in 2 locations


    Solution to shared node_modules :

    The same applies for node_modules simply, merge both of your current (node_modules directories)

    project/scraper/node_modules
    project/viz/node_modules
    

    into a folder in the root of the project directory

    project/node_modules
    

    Then my recomendations :

    1. delete the old ones inside ( project/scraper & project/viz )
    2. from now on, use npm i <module> --save from the project directory

    Both project/scraper & project/viz will use project/node_modules without the need to duplicate entire libraries...


    Solution for project/viz/app.js

    If you want to fetch the data from DB on each GET request

    Then you have to include the fetch the data from DB logic inside the request :

    app.get('/',FetchFreshData)
    

    The function will contain the fetch and construct the viewData for the hbs to be rendered with the fresh data referenced in the .hbs markup

    function FetchFreshData(req,res){
     /* add your SELECT here */
     var viewData = {} //fill this JSON using the results
     res.render('index',viewData)
    }
    

    So logically, every time you perform a GET to the "/" route you will run the QUERY and receive the "visualization" with fresh data.


    EDIT : Expanding Answer

    First of all I would suggets you to understand handlebars (hbs) a bit more in depth.

    If you want to use the data in a script you will have to register a helper in the server-side that renders the data as required.

    Handlebars.registerHelper('json',function(context){
     return JSON.stringify(context)
    })
    

    I need to do an example for your situation so lets say that viewData is the following :

    function FetchFreshData(req,res){
         /* add your SELECT here */
         var viewData = {} //fill this JSON using the results
         viewData.timestamp = Date.now()
         viewData.entries = dataHome
         res.render('index',viewData)
        }
    

    Would give us (an example) :

    {
     "timestamp":"1525182734",
     "entries":[
       {"name":"Entry 1"},
       {"name":"Entry 2"},
       {"name":"Entry 3"}
     ]
    }
    

    Now I will focus on your template (.hbs) :

    <html lang='en'>
     <head>
      <meta charset='utf-8'>
      <title>Map</title>
      <script src='https://d3js.org/d3.v5.js' charset='utf-8'></script>
      <link rel='stylesheet' type='text/css' href='/style.css' media='screen'/>
     </head><body>
      <h1>MAP<small>{{timestamp}}</small></h1>
      <div id='example'>
      {{#if entries}}
       <ul>
        {{#each entries}}
         <li> this.name </li>
        {{/each}}
       </ul>
      {{else}} No content... {{/if}}
      </div>
     </body>
        <script> 
         //Add global variables in the .hbs file 
         window.viewData_entries = {{ json entries }}
        </script>
        <script src='/script.js'></script>
    </html>
    

    It will use the "keys" of the JSON Object (viewData) to access the data & render the view...


    EDIT 2 : Try something simple

    use the module : hbs and try to use my 2 simple examples that return a timestamp and an object, I have also fixed your code in some parts for you to carry on improving it to suite your needs even more.

        const postgreSQLlib = require('../shared_libs/postgreSQLlib.js');
        const express = require('express');
        const hbs = require('hbs')
        var app = express();
        //:Server Initiator
        async function start() {
            await postgreSQLlib.connect();
            //:HBS:
            app.set('views', "" + __dirname + "/views");
            app.set('view engine', 'hbs');
            app.engine('hbs', hbs.__express);
            hbs.registerHelper('json',function(context){return JSON.stringify(context)});
            //:Router:
            app.get('/', fetchFreshData);
            app.get('/timestamp', returnTimestamp);
            app.get('/obj', returnOBJ);
            console.log('Go to http://localhost:3000/ to see data');
            app.listen(3000);
        }
        //:Your section with fresh data has been populated properly
        async function fetchFreshData(req, res) {
            var viewData = {}; 
            viewData.timestamp = Date.now();
            viewData.dataWork = await postgreSQLlib.getTableWork();
            viewData.dataHome = await postgreSQLlib.getTableHome();
            // pass data to view
            res.render('index', viewData);
        }
         /*
            The index.hbs I posted earlier wont work anymore, since the   "entries" key doesnt exist now... Try to understand the template  engine with the following 2 examples below
         */
    
        //:Simple Timestamp EXAMPLE
        function returnTimestamp(req, res) {
            var viewData = {}; 
            viewData.timestamp = Date.now();
            // pass data to view
            res.render('timestamp_example', viewData);
        }
        /* This would be timestamp_example.hbs :
        <html><head><title>Server Timestamp</title></head><body>{{timestamp}}</body></html>
        */
        
        //:Simple JSON EXAMPLE
        function returnOBJ(req, res) {
            var viewData = {}; 
            viewData.OBJ = {"key":"value"};
            // pass data to view
            res.render('json_example', viewData);
        }
        /* This would be json_example.hbs :
        <html><head><title>Server Object</title></head><body>Page will alert "{{OBJ.key}}"<script>var OBJ = {{json OBJ}}; alert(OBJ.key);</script></body></html>
        */
        //start the server :
        start()
    

    Remember that you need to add the new views for the /timestamp and /obj paths, I have added a commented example below each of the server functions.

    ----------
    

    EDIT 3 : a simple demo with callbacks

     const express = require('express'); var app = express()
        const hbs = require('hbs')
        const webapp_opts = {"port":3000}
        //.: Setup & Start Server
        function Initialize(){
         console.log("[~] starting ...")
            //:[HBS]:Setup
         app.set('view engine', 'hbs'); app.engine('hbs', hbs.__express)
         app.set('views', "" + __dirname + "/views")
            //:[HBS]:Helpers
         hbs.registerHelper('json',function(context){return JSON.stringify(context)})
            //:[EXPRESS]:Router.Paths
         app.get("/",IndexPathFunction)
         app.get("/debug",(req,res)=>{
          console.log("[GET]:/debug");  res.send("ok")
         })
            //:[EXPRESS]:Start 
         app.listen(webapp_opts.port,()=>{
          console.log("[i] ready & listening","\n    http://localhost:"+webapp_opts.port+"/")
         })
        }
        //.: Router Function : "/"
        function IndexPathFunction(req,res){
         DBQuery((query_error,query_results)=>{
          if(query_error){console.log("[!] DBQuery @ path : '/'\n",query_error.stack)}
          else{
           console.log("[+] DBResults :",query_results)
           res.render("index",{data:query_results})
           /*or*/// var viewData = {}; viewData.data=query_results; res.render("index",viewData)
          }
         })
        }
        //:[DB]:Example Query using callback method
        function DBQuery(callback_function){
         console.log("[>] DBQuery")
         pool.connect((err,client)=>{
          if (err) throw err
          client.query('SELECT * FROM whatever',(err,res)=>{
            client.release()
            callback_function(err,res)
          })
         })
        }
        //------------------------------------
        /* We can Initialize() the webapp 
         once we know the DB is accesible : */
        const pg = require('pg')
        const db_opts = {
          "user":"dbuser","password":"secretpassword",
          "host":"database.server.com", "port":3211,
          "database":"mydb"
        })
        var pool = new pg.Pool(db_opts);
        pool.on('error',(err,client)=>{
          console.error('Unexpected error on idle client', err); process.exit(-1)
        })
        pool.connect((err,client,done)=>{
          if(err){console.log("[!] DB Connection Error",err)}
          else{console.log("[+] DB Connected"); client.release(); Initialize() }
        })
    

    Remember the key to understand what is going wrong is having a control of the flow and building from small parts to bigger parts. You should have a read around : https://node-postgres.com/ , specially this part if you want to use Express with async/await

    Anyways, this new edited section should provide you with some insights in making a minimal working version. ;)