Motivated by https://benhoyt.com/writings/count-words/ , I have played a bit with rewriting an internal log analysis script in several languages (I will not go as far as in the article!).
After Go (by myself) and Rust (with some help from SO), I am currently stuck with Zig. I have more or less understood https://github.com/benhoyt/countwords/blob/master/simple.zig but still having a hard time with translating my original along these lines... Notably, using a Hash with tuple keys, handling name of months in parsing and printing...
Original script in Python:
import sys
months = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6,
"Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12 }
months_r = { v:k for k,v in months.items() }
totals = {}
for line in sys.stdin:
if "redis" in line and "Partial" in line:
f1, f2 = line.split()[:2]
w = (months[f1], int(f2))
totals[w] = totals.get(w, 0) + 1
for k in sorted(totals.keys()):
print(months_r[k[0]], k[1], totals[k])
Could someone fluent with recent Zig give a hand?
Thanks a lot!
Solution from the Zig Forum
const std = @import("std");
const Key = struct { month: u4, day: u5 };
fn keyHash(key: Key) u64 {
return @as(u64, key.month) << 32 | @as(u64, key.day);
}
const Totals = std.HashMap(
Key,
usize,
keyHash,
std.hash_map.getAutoEqlFn(Key),
std.hash_map.default_max_load_percentage,
);
const Item = struct { key: Key, count: usize };
fn itemSort(context: void, lhs: Item, rhs: Item) bool {
return keyHash(lhs.key) < keyHash(rhs.key);
}
// zig fmt: off
const months = std.ComptimeStringMap(u4, .{
.{ "Jan", 1 }, .{ "Feb", 2 }, .{ "Mar", 3 },
.{ "Apr", 4 }, .{ "May", 5 }, .{ "Jun", 6 },
.{ "Jul", 7 }, .{ "Aug", 8 }, .{ "Sep", 9 },
.{ "Oct", 10 }, .{ "Nov", 11 }, .{ "Dec", 12 },
});
const months_r = [_][]const u8{
"(padding)",
"Jan", "Feb", "Mar",
"Apr", "May", "Jun",
"Jul", "Aug", "Sep",
"Oct", "Nov", "Dec",
};
// zig fmt: on
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer if (gpa.deinit()) std.log.err("memory leak detected", .{});
const allocator = &gpa.allocator;
var totals = Totals.init(allocator);
defer totals.deinit();
const stdin = std.io.bufferedReader(std.io.getStdIn().reader()).reader();
var buf: [4096]u8 = undefined;
while (try stdin.readUntilDelimiterOrEof(&buf, '\n')) |line| {
if (std.mem.indexOf(u8, line, "redis") == null or std.mem.indexOf(u8, line, "Partial") == null)
continue;
var it = std.mem.tokenize(line, &std.ascii.spaces);
const month = months.get(it.next().?).?;
const day = try std.fmt.parseUnsigned(u5, it.next().?, 10);
const res = try totals.getOrPut(.{ .month = month, .day = day });
if (res.found_existing)
res.entry.value += 1
else
res.entry.value = 1;
}
var stdout = std.io.bufferedWriter(std.io.getStdOut().writer());
defer stdout.flush() catch std.log.err("stdout flushing failed", .{});
const out = stdout.writer();
var items = try allocator.alloc(Item, totals.count());
defer allocator.free(items);
{
var it = totals.iterator();
var i: usize = 0;
while (it.next()) |kv| : (i += 1) {
items[i] = .{ .key = kv.key, .count = kv.value };
}
}
std.sort.sort(Item, items, {}, itemSort);
for (items) |it| {
try out.print("{s} {d} {d}\n", .{ months_r[it.key.month], it.key.day, it.count });
}
}