Search code examples
zig

Simple log analysis with Zig


Motivated by https://benhoyt.com/writings/count-words/ , I have played a bit with rewriting an internal log analysis script in several languages (I will not go as far as in the article!).

After Go (by myself) and Rust (with some help from SO), I am currently stuck with Zig. I have more or less understood https://github.com/benhoyt/countwords/blob/master/simple.zig but still having a hard time with translating my original along these lines... Notably, using a Hash with tuple keys, handling name of months in parsing and printing...

Original script in Python:

import sys

months = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6,
           "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12 }

months_r = { v:k for k,v in months.items() }

totals = {}

for line in sys.stdin:
    if "redis" in line and "Partial" in line:
        f1, f2 = line.split()[:2]
        w = (months[f1], int(f2))
        totals[w] = totals.get(w, 0) + 1

for k in sorted(totals.keys()):
    print(months_r[k[0]], k[1], totals[k])

Could someone fluent with recent Zig give a hand?

Thanks a lot!


Solution

  • Solution from the Zig Forum

    const std = @import("std");
    
    const Key = struct { month: u4, day: u5 };
    
    fn keyHash(key: Key) u64 {
        return @as(u64, key.month) << 32 | @as(u64, key.day);
    }
    
    const Totals = std.HashMap(
        Key,
        usize,
        keyHash,
        std.hash_map.getAutoEqlFn(Key),
        std.hash_map.default_max_load_percentage,
    );
    
    const Item = struct { key: Key, count: usize };
    
    fn itemSort(context: void, lhs: Item, rhs: Item) bool {
        return keyHash(lhs.key) < keyHash(rhs.key);
    }
    
    // zig fmt: off
    const months = std.ComptimeStringMap(u4, .{
        .{ "Jan",  1 }, .{ "Feb",  2 }, .{ "Mar",  3 },
        .{ "Apr",  4 }, .{ "May",  5 }, .{ "Jun",  6 },
        .{ "Jul",  7 }, .{ "Aug",  8 }, .{ "Sep",  9 },
        .{ "Oct", 10 }, .{ "Nov", 11 }, .{ "Dec", 12 },
    });
    
    const months_r = [_][]const u8{
        "(padding)",
        "Jan", "Feb", "Mar",
        "Apr", "May", "Jun",
        "Jul", "Aug", "Sep",
        "Oct", "Nov", "Dec",
    };
    // zig fmt: on
    
    pub fn main() !void {
        var gpa = std.heap.GeneralPurposeAllocator(.{}){};
        defer if (gpa.deinit()) std.log.err("memory leak detected", .{});
        const allocator = &gpa.allocator;
    
        var totals = Totals.init(allocator);
        defer totals.deinit();
    
        const stdin = std.io.bufferedReader(std.io.getStdIn().reader()).reader();
        var buf: [4096]u8 = undefined;
        while (try stdin.readUntilDelimiterOrEof(&buf, '\n')) |line| {
            if (std.mem.indexOf(u8, line, "redis") == null or std.mem.indexOf(u8, line, "Partial") == null)
                continue;
    
            var it = std.mem.tokenize(line, &std.ascii.spaces);
            const month = months.get(it.next().?).?;
            const day = try std.fmt.parseUnsigned(u5, it.next().?, 10);
    
            const res = try totals.getOrPut(.{ .month = month, .day = day });
            if (res.found_existing)
                res.entry.value += 1
            else
                res.entry.value = 1;
        }
    
        var stdout = std.io.bufferedWriter(std.io.getStdOut().writer());
        defer stdout.flush() catch std.log.err("stdout flushing failed", .{});
        const out = stdout.writer();
    
        var items = try allocator.alloc(Item, totals.count());
        defer allocator.free(items);
    
        {
            var it = totals.iterator();
            var i: usize = 0;
            while (it.next()) |kv| : (i += 1) {
                items[i] = .{ .key = kv.key, .count = kv.value };
            }
        }
    
        std.sort.sort(Item, items, {}, itemSort);
    
        for (items) |it| {
            try out.print("{s} {d} {d}\n", .{ months_r[it.key.month], it.key.day, it.count });
        }
    }