Search code examples
google-app-enginepython-2.7cursor

How to use cursors for search in gae?


When I RTFM, I can't understand how to specify paginated searches using the technique described in the manual. Here's my code:

def find_documents(query_string, limit, cursor):
    try:
        subject_desc = search.SortExpression(
            expression='date',
            direction=search.SortExpression.DESCENDING,
            default_value=datetime.now().date())

        # Sort up to 1000 matching results by subject in descending order
        sort = search.SortOptions(expressions=[subject_desc], limit=1000)

        # Set query options
        options = search.QueryOptions(
            limit=limit,  # the number of results to return
            cursor=cursor,
            sort_options=sort,
            #returned_fields=['author', 'subject', 'summary'],
            #snippeted_fields=['content']
         )
        query = search.Query(query_string=query_string, options=options)
        index = search.Index(name=_INDEX_NAME)
        # Execute the query
        return index.search(query)
    except search.Error:
        logging.exception('Search failed')
    return None


class MainAdvIndexedPage(SearchBaseHandler):
    """Handles search requests for comments."""

    def get(self):
        """Handles a get request with a query."""
        regionname = 'Delhi'
        region = Region.all().filter('name = ', regionname).get()
        uri = urlparse(self.request.uri)
        query = ''
        if uri.query:
            query = parse_qs(uri.query)
            query = query['query'][0]

        results = find_documents(query, 50, search.Cursor())
        next_cursor = results.cursor
        template_values = {
            'results': results,'next_cursor':next_cursor,
            'number_returned': len(results.results),
            'url': url, 'user' : users.get_current_user(), 
            'url_linktext': url_linktext, 'region' : region, 'city' : '', 'request' : self.request, 'form' : SearchForm(), 'query' : query
        }
        self.render_template('indexed.html', template_values)

The code above works and does a search but it doesn't page the result. I wonder about the following code in the manual:

next_cursor = results.cursor

next_cursor_urlsafe = next_cursor.web_safe_string
# save next_cursor_urlsafe
...
# restore next_cursor_urlsafe

results = find_documents(query_string, 20,
                         search.Cursor(web_safe_string=next_cursor_urlsafe))

What is next_cursor used for? How do I save and what is the purpose of saving? How do I get a cursor in the first place? Should the code look something like this instead, using memcache to save an restore the cursor?

class MainAdvIndexedPage(SearchBaseHandler):
    """Handles search requests for comments."""

    def get(self):
        """Handles a get request with a query."""
        regionname = 'Delhi'
        region = Region.all().filter('name = ', regionname).get()
        uri = urlparse(self.request.uri)
        query = ''
        if uri.query:
            query = parse_qs(uri.query)
            query = query['query'][0]
        # restore next_cursor_urlsafe
        next_cursor_urlsafe = memcache.get('results_cursor')
        if last_cursor:
            results = find_documents(query_string, 50,
                         search.Cursor(web_safe_string=next_cursor_urlsafe))
    results = find_documents(query, 50, search.Cursor())
        next_cursor = results.cursor    
        next_cursor_urlsafe = next_cursor.web_safe_string
        # save next_cursor_urlsafe
        memcache.set('results_cursor', results.cursor)
        template_values = {
            'results': results,'next_cursor':next_cursor,
            'number_returned': len(results.results),
            'url': url, 'user' : users.get_current_user(), 
            'url_linktext': url_linktext, 'region' : region, 'city' : '', 'request' : self.request, 'form' : SearchForm(), 'query' : query
        }
        self.render_template('indexed.html', template_values)

Update

From what I see from the answer, I'm supposed to use an HTTP GET query string to save the cursor but I still don't know exactly how. Please tell me how.

Update 2

This is my new effort.

 def get(self):
    """Handles a get request with a query."""
    regionname = 'Delhi'
    region = Region.all().filter('name = ', regionname).get()

    cursor = self.request.get("cursor")

    uri = urlparse(self.request.uri)
    query = ''
    if uri.query:
        query = parse_qs(uri.query)
        query = query['query'][0]
    logging.info('search cursor: %s', search.Cursor())

    if cursor: 
        results = find_documents(query, 50, cursor)
    else:
       results = find_documents(query, 50, search.Cursor())
    next_cursor = None
    if results and results.cursor:
        next_cursor = results.cursor.web_safe_string
    logging.info('next cursor: %s', str(next_cursor))
    template_values = {
        'results': results,'cursor':next_cursor,
        'number_returned': len(results.results),
        'user' : users.get_current_user(), 
        'region' : region, 'city' : '', 'request' : self.request, 'form' : SearchForm(), 'query' : query
    }

I think that I've understood how it's supposed to work with the above, and it's outputting a cursor at the first hit so I can know how to get the cursor in the first place. This is clearly documented enough. But I get this error message: cursor must be a Cursor, got unicode


Solution

  • No, you should not use memcache for that, especially with a constant key like 'results_cursor' - that would mean that all users would get the same cursor, which would be bad.

    You are already passing the cursor to the template context (although you should be converting to the web_safe_string as you do in the second example). In the template, you should ensure that the cursor string is included in the GET parameters of your "next" button: then, back in the view, you should extract it from there and pass it into the find_documents call.

    Apart from the memcache issue, you're almost there with the second example, but you should obviously ensure that the second call to find_documents is inside an else block so it doesn't overwrite the cursor version.