Search code examples
javascriptmediawikimediawiki-api

Search a mediawiki


Is there any api or other way, with Javascript, to search any mediawiki and print the found page (and if not found print that).

I'd prefer something like this:

function searchWiki(wikipage, search) {
    //the function here
    document.write(foundPage);
}

//run it

searchWiki('https://en.wikipedia.org', 'banana');

//it would print 'https://en.wikipedia.org/wiki/Banana'

Solution

  • Here is my implementation of such a function. It uses the MediaWiki API through JSONP and is quite flexible. I guess a jQuery solution is fine. I created a small fiddle.

    searchWiki(site, search, [callback], [options])

    function searchWiki(site, search, callback, opts) {
        if(typeof callback == 'object') {
            opts = callback;
            callback = null;
        } else {
            opts = opts || {};
        }
        // Build the required URLs
        var siteUrl = (opts.ssl ? 'https' : 'http') + '://' + site;
        var apiUrl = siteUrl + (opts.apiBase || '/w/') + 'api.php';
        var queryUrl = apiUrl + '?action=query&list=search&srsearch=' + encodeURIComponent(search) + '&srlimit=' + (opts.maxResults || 1) + '&format=json';
        // Issue the JSONP request
        $.ajax(queryUrl + '&callback=?', {
            dataType: 'jsonp',
            // This prevents warnings about the unrecognized parameter "_"
            cache: true,
            success: function(data) {
                // Get all returned pages
                var titles = [], links = [];
                for(var i = 0; i < data.query.search.length; i++) {
                    var title = data.query.search[i].title,
                        link = siteUrl + (opts.wikiBase || '/wiki/') + encodeURIComponent(title);
                    titles.push(title);
                    links.push(link);
                }
                if(!opts.maxResults) {
                    // Single result requested
                    if(data.query.search.length == 0) {
                        titles = links = null;
                    } else {
                        titles = titles[0];
                        links = links[0];
                    }
                }
                // Call the callback
                (callback || opts.success || function(){})(titles, links);
            }
        });
    }
    

    Example 1: Single wikipedia search

    searchWiki('en.wikipedia.org', 'banana fruit', {
        ssl: true,
        success: function(title, link) {
            // link is now "https://en.wikipedia.org/wiki/Banana"
            if(title === null) {
                $('#search-msg').text('Not found');
            } else {
                var anchor = $('<a>').text(title).attr('href', link);
                $('#search-msg').append(anchor);
            }
        }
    });
    

    This example shows a link to a wikipedia page with the associated title.

    Example 2: Multiple results

    searchWiki('www.mediawiki.org', 'Release notes', {
        ssl: true,
        maxResults: 5,
        success: function(titles, links) {
            for(var i = 0; i < titles.length; i++) {
                alert('MediaWiki ' + titles[i] + ' at ' + links[i]);
            }
        }
    });
    

    This example displays up to five links to MediaWiki pages which match the query "Release notes".

    Options:

    • ssl: Use HTTPS instead of HTTP
    • maxResults: Return multiple (up to n) results
    • apiBase: API directory on target site (defaults to /w/)
    • wikiBase: Wiki directory on target site (defaults to /wiki/)
    • success: Function to invoke after retrieving the result list

    You can either pass the callback as a function argument (before the options) or as the success option.


    Update: Here is the pure JS solution (no jQuery required). And there is another fiddle, this time without jQuery.

    function searchWiki(site, search, callback, opts) {
        if(typeof callback == 'object') {
            opts = callback;
            callback = null;
        } else {
            opts = opts || {};
        }
        // Build the required URLs
        var siteUrl = (opts.ssl ? 'https' : 'http') + '://' + site;
        var apiUrl = siteUrl + (opts.apiBase || '/w/') + 'api.php';
        var queryUrl = apiUrl + '?action=query&list=search&srsearch=' + encodeURIComponent(search) + '&srlimit=' + (opts.maxResults || 1) + '&format=json';
        var fnName = '_cb_' + Math.floor(Math.random() * 4294967296);
        window[fnName] = function(data) {
            // Clear references to this function
            window[fnName] = null;
            // Get all returned pages
            var titles = [], links = [];
            for(var i = 0; i < data.query.search.length; i++) {
                var title = data.query.search[i].title,
                    link = siteUrl + (opts.wikiBase || '/wiki/') + encodeURIComponent(title);
                titles.push(title);
                links.push(link);
            }
            if(!opts.maxResults) {
                // Single result requested
                if(data.query.search.length == 0) {
                    titles = links = null;
                } else {
                    titles = titles[0];
                    links = links[0];
                }
            }
            // Call the callback
            (callback || opts.success || function(){})(titles, links);
        }
        // Issue the JSONP request
        var scriptTag = document.createElement('script');
        scriptTag.setAttribute('src', queryUrl + '&callback=' + fnName);
        document.head.appendChild(scriptTag);
    }
    

    Update 2: Finally a solution for node.js. The API is still the same, but it provides some additional options:

    • error: An error callback (this was impossible in browser-based JS)
    • userAgent: A custom user agent string as suggested in the docs
    • port: Target port (defaults to 80/443)
    • encoding: Response encoding (default to utf8)

    I did not test this much, but the examples (see above) should still work.

    var http = require('http'),
        https = require('https');
    
    function searchWiki(site, search, callback, opts) {
        if(typeof callback == 'object') {
            opts = callback;
            callback = null;
        } else {
            opts = opts || {};
        }
        // Build the required paths
        var apiPath = (opts.apiBase || '/w/') + 'api.php';
        var queryPath = apiPath + '?action=query&list=search&srsearch=' + encodeURIComponent(search) + '&srlimit=' + (opts.maxResults || 1) + '&format=json';
        // Request options
        var httpOpts = {
            hostname: site,
            port: (opts.port ? opts.port : (opts.ssl ? 443 : 80)),
            method: 'GET',
            path: queryPath,
            agent: false
        };
        // Custom user agent
        if(opts.userAgent) {
            httpOpts.headers = {
                'User-Agent': opts.userAgent
            };
        }
        // Make the request
        var req = (opts.ssl ? https : http).request(httpOpts, function(res) {
            var msgBody = '';
            res.setEncoding(opts.encoding || 'utf8');
    
            res.on('data', function(chunk) {
                msgBody += chunk;
            });
    
            res.on('end', function() {
                // Parse response as JSON
                var data;
                try {
                    data = JSON.parse(msgBody);
                } catch(err) {
                    (opts.error || function(){})(err);
                    return;
                }
                // Get all returned pages
                var siteUrl = (opts.ssl ? 'https' : 'http') + '://' + site;
                var titles = [], links = [];
                for(var i = 0; i < data.query.search.length; i++) {
                    var title = data.query.search[i].title,
                        link = siteUrl + (opts.wikiBase || '/wiki/') + encodeURIComponent(title);
                    titles.push(title);
                    links.push(link);
                }
                if(!opts.maxResults) {
                    // Single result requested
                    if(data.query.search.length == 0) {
                        titles = links = null;
                    } else {
                        titles = titles[0];
                        links = links[0];
                    }
                }
                // Call the callback
                (callback || opts.success || function(){})(titles, links);
            });
        });
        req.on('error', function(err) {
            (opts.error || function(){})(err);
        });
        req.end();
    }