Search code examples
iosjsonfoundationnsjsonserialization

Canonicalize JSON so equivalent objects have the same hash


I'm storing JSON objects in a database. Many—perhaps most—of these objects will be duplicates, so I would like to key them on something like a SHA hash to avoid creating unnecessary extra records.

The problem is, at the point where I want to write them to the database, I no longer have the JSON bytes—just the Foundation objects returned by NSJSONSerialization. Because NSDictionary doesn't make any guarantees about key order (and even if it did, I'm not sure that the server I'm getting the data from does), I can't be certain NSJSONSerialization will output each object's fields in the same order each time I call it. That means that the same object could have different digests, defeating my attempts to save space.

Is there an Objective-C JSON library that does always write the exact same JSON for equivalent objects, presumably by sorting the keys before writing them? I'm targeting iOS 7, but this is probably a Foundation-level concern.


Solution

  • Rather than trying to write my own JSON serializer, I decided to trick Apple's into doing what I want with some proxy trickery.

    Usage:

    NSData * JSONData = [NSJSONSerialization dataWithJSONObject:[jsonObject objectWithSortedKeys] options:0 error:&error];
    

    Header:

    #import <Foundation/Foundation.h>
    
    @interface NSObject (sortedKeys)
    
    /// Returns a proxy for the object in which all dictionary keys, including those of child objects at any level, will always be enumerated in sorted order.
    - (id)objectWithSortedKeys;
    
    @end
    

    Code:

    #import "NSObject+sortedKeys.h"
    
    /// A CbxSortedKeyWrapper intercepts calls to methods like -allKeys, -objectEnumerator, -enumerateKeysAndObjectsUsingBlock:, etc. and makes them enumerate a sorted array of keys, thus ensuring that keys are enumerated in a stable order. It also replaces objects returned by any other methods (including, say, -objectForKey: or -objectAtIndex:) with wrapped versions of those objects, thereby ensuring that child objects are similarly sorted. There are a lot of flaws in this approach, but it works well enough for NSJSONSerialization.
    @interface CbxSortedKeyWrapper: NSProxy
    
    + (id)sortedKeyWrapperForObject:(id)object;
    
    @end
    
    @implementation NSObject (sortedKeys)
    
    - (id)objectWithSortedKeys {
        return [CbxSortedKeyWrapper sortedKeyWrapperForObject:self];
    }
    
    @end
    
    @implementation CbxSortedKeyWrapper {
        id _representedObject;
        NSArray * _keys;
    }
    
    
    + (id)sortedKeyWrapperForObject:(id)object {
        if(!object) {
            return nil;
        }
    
        CbxSortedKeyWrapper * wrapper = [self alloc];
        wrapper->_representedObject = [object copy];
    
        if([wrapper->_representedObject respondsToSelector:@selector(allKeys)]) {
            wrapper->_keys = [[wrapper->_representedObject allKeys] sortedArrayUsingSelector:@selector(compare:)];
        }
    
        return wrapper;
    }
    
    - (NSMethodSignature*)methodSignatureForSelector:(SEL)aSelector {
        return [_representedObject methodSignatureForSelector:aSelector];
    }
    
    - (void)forwardInvocation:(NSInvocation*)invocation {
        [invocation invokeWithTarget:_representedObject];
    
        BOOL returnsObject = invocation.methodSignature.methodReturnType[0] == '@';
    
        if(returnsObject) {
            __unsafe_unretained id out = nil;
            [invocation getReturnValue:&out];
    
            __unsafe_unretained id wrapper = [CbxSortedKeyWrapper sortedKeyWrapperForObject:out];
            [invocation setReturnValue:&wrapper];
        }
    }
    
    - (NSEnumerator *)keyEnumerator {
        return [_keys objectEnumerator];
    }
    
    - (NSEnumerator *)objectEnumerator {
        if(_keys) {
            return [[self allValues] objectEnumerator];
        }
        else {
            return [CbxSortedKeyWrapper sortedKeyWrapperForObject:[_representedObject objectEnumerator]];
        }
    }
    
    - (NSArray *)allKeys {
        return _keys;
    }
    
    - (NSArray *)allValues {
        return [CbxSortedKeyWrapper sortedKeyWrapperForObject:[_representedObject objectsForKeys:_keys notFoundMarker:[NSNull null]]];
    }
    
    - (void)enumerateKeysAndObjectsUsingBlock:(void (^)(id key, id obj, BOOL *stop))block {
        [_keys enumerateObjectsUsingBlock:^(id key, NSUInteger idx, BOOL *stop) {
            id obj = [CbxSortedKeyWrapper sortedKeyWrapperForObject:_representedObject[key]];
            block(key, obj, stop);
        }];
    }
    
    - (void)enumerateKeysAndObjectsWithOptions:(NSEnumerationOptions)opts usingBlock:(void (^)(id key, id obj, BOOL *stop))block {
        [_keys enumerateObjectsWithOptions:opts usingBlock:^(id key, NSUInteger idx, BOOL *stop) {
            id obj = [CbxSortedKeyWrapper sortedKeyWrapperForObject:_representedObject[key]];
            block(key, obj, stop);
        }];
    }
    
    - (void)enumerateObjectsUsingBlock:(void (^)(id obj, NSUInteger idx, BOOL *stop))block {
        [_representedObject enumerateObjectsUsingBlock:^(id obj, NSUInteger idx, BOOL * stop) {
            block([CbxSortedKeyWrapper sortedKeyWrapperForObject:obj], idx, stop);
        }];
    }
    
    - (void)enumerateObjectsWithOptions:(NSEnumerationOptions)opts usingBlock:(void (^)(id obj, NSUInteger idx, BOOL *stop))block {
        [_representedObject enumerateObjectsWithOptions:opts usingBlock:^(id obj, NSUInteger idx, BOOL * stop) {
            block([CbxSortedKeyWrapper sortedKeyWrapperForObject:obj], idx, stop);
        }];
    }
    
    - (void)enumerateObjectsAtIndexes:(NSIndexSet *)indexSet options:(NSEnumerationOptions)opts usingBlock:(void (^)(id obj, NSUInteger idx, BOOL *stop))block {
        [_representedObject enumerateObjectsAtIndexes:indexSet options:opts usingBlock:^(id obj, NSUInteger idx, BOOL * stop) {
            block([CbxSortedKeyWrapper sortedKeyWrapperForObject:obj], idx, stop);
        }];
    }
    
    - (NSUInteger)countByEnumeratingWithState:(NSFastEnumerationState *)state objects:(__unsafe_unretained id *)stackbuf count:(NSUInteger)len {
        NSUInteger count = [_keys countByEnumeratingWithState:state objects:stackbuf count:len];
        for(NSUInteger i = 0; i < count; i++) {
            stackbuf[i] = [CbxSortedKeyWrapper sortedKeyWrapperForObject:stackbuf[i]];
        }
        return count;
    }
    
    @end