I have this trace in rust :
thread 'main' panicked at 'failed printing to stdout: Broken pipe (os error 32)', library/std/src/io/stdio.rs:993:9
stack backtrace:
0: 0x559ffa959dc0 - std::backtrace_rs::backtrace::libunwind::trace::h72c2fb8038f1bbee
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/../../backtrace/src/backtrace/libunwind.rs:96
1: 0x559ffa959dc0 - std::backtrace_rs::backtrace::trace_unsynchronized::h1e3b084883f1e78c
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/../../backtrace/src/backtrace/mod.rs:66
2: 0x559ffa959dc0 - std::sys_common::backtrace::_print_fmt::h3bf6a7ebf7f0394a
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:79
3: 0x559ffa959dc0 - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::h2e8cb764b7fe02e7
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:58
4: 0x559ffa972f6c - core::fmt::write::h7a1184eaee6a8644
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/fmt/mod.rs:1080
5: 0x559ffa957b12 - std::io::Write::write_fmt::haeeb374d93a67eac
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/mod.rs:1516
6: 0x559ffa95beed - std::sys_common::backtrace::_print::h1d14a7f6ad632dc8
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:61
7: 0x559ffa95beed - std::sys_common::backtrace::print::h301abac8bb2e3e81
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:48
8: 0x559ffa95beed - std::panicking::default_hook::{{closure}}::hde0cb80358a6920a
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:208
9: 0x559ffa95bb98 - std::panicking::default_hook::h9b1a691049a0ec8f
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:227
10: 0x559ffa95c5d1 - std::panicking::rust_panic_with_hook::h2bdec87b60580584
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:577
11: 0x559ffa95c179 - std::panicking::begin_panic_handler::{{closure}}::h101ca09d9df5db47
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:484
12: 0x559ffa95a22c - std::sys_common::backtrace::__rust_end_short_backtrace::h3bb85654c20113ca
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/sys_common/backtrace.rs:153
13: 0x559ffa95c139 - rust_begin_unwind
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:483
14: 0x559ffa95c0eb - std::panicking::begin_panic_fmt::hf0503558fbe5b251
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:437
15: 0x559ffa957022 - std::io::stdio::print_to::h9435376f36962f3f
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/stdio.rs:993
16: 0x559ffa957022 - std::io::stdio::_print::h0d31d4b9faa6e1ec
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/io/stdio.rs:1005
17: 0x559ffa944807 - wordstats::main::h1c2ea6400047a5eb
18: 0x559ffa942e73 - std::sys_common::backtrace::__rust_begin_short_backtrace::h9e31cf87ddc88116
19: 0x559ffa942e49 - std::rt::lang_start::{{closure}}::h6c6491f05894818f
20: 0x559ffa95c9f7 - core::ops::function::impls::<impl core::ops::function::FnOnce<A> for &F>::call_once::he179d32a5d10d957
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/core/src/ops/function.rs:259
21: 0x559ffa95c9f7 - std::panicking::try::do_call::hcb3d5e7be089b2b4
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:381
22: 0x559ffa95c9f7 - std::panicking::try::h7ac93b0cd56fb701
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panicking.rs:345
23: 0x559ffa95c9f7 - std::panic::catch_unwind::h7b40e396c93a4fcd
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/panic.rs:382
24: 0x559ffa95c9f7 - std::rt::lang_start_internal::h142b9cc66267fea1
at /rustc/7eac88abb2e57e752f3302f02be5f3ce3d7adfb4/library/std/src/rt.rs:51
25: 0x559ffa944ae2 - main
26: 0x7f6223a380b3 - __libc_start_main
27: 0x559ffa94209e - _start
28: 0x0 - <unknown>
when I compile this program
use diacritics;
use std::collections::HashMap;
use std::io;
use std::io::prelude::*;
#[derive(Debug)]
struct Entry {
word: String,
count: u32,
}
static SEPARATORS: &'static [char] = &[
' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',
];
fn main() {
let mut words: HashMap<String, u32> = HashMap::new();
let stdin = io::stdin();
for line in stdin.lock().lines() {
line_processor(line.unwrap(), &mut words)
}
output(&mut words);
}
fn line_processor(line: String, words: &mut HashMap<String, u32>) {
let formatted_line;
let mut word = String::new();
formatted_line = diacritics::remove_diacritics(&line).to_lowercase();
for c in formatted_line.chars() {
if SEPARATORS.contains(&c) {
add_word(word, words);
word = String::new();
} else {
word.push_str(&c.to_string());
}
}
}
fn add_word(word: String, words: &mut HashMap<String, u32>) {
if word.len() > 0 {
if words.contains_key::<str>(&word) {
words.insert(word.to_string(), words.get(&word).unwrap() + 1);
} else {
words.insert(word.to_string(), 1);
}
// println!("word >{}<", word.to_string())
}
}
fn output(words: &mut HashMap<String, u32>) {
let mut stack = Vec::<Entry>::new();
for (k, v) in words {
stack.push(Entry {
word: k.to_string(),
count: *v,
});
}
stack.sort_by(|a, b| b.count.cmp(&a.count));
stack.reverse();
while let Some(entry) = stack.pop() {
println!("{}\t{}", entry.count, entry.word);
}
}
this way :
cargo build --release
and I run the program like this :
cat src/sample.txt | ./target/release/wordstats | head -n 50
This program should just show something like this (top word count) with no trace :
15 the
14 in
11 are
10 and
10 of
9 species
9 bats
8 horseshoe
8 is
6 or
6 as
5 which
5 their
This is the case with some echoed content , or some other files (e.g. cat src/main.rs | ...
)
but not for this file content which is a part of a random wikipedia page.
My program is a stupid word count that just print a tabular sorted key values list.
The issue occurs when I pipe the result in the head -n 50
program but not when I print the full output
Any Idea why I get such an trace ? Do I handle something the wrong way in my program or could it be related to something else (rust lib / unix misbehavior)
My rustc version is : rustc 1.48.0 (7eac88abb 2020-11-16)
Edit :
Add missing Cargo.toml
[package]
name = "wordstats"
version = "0.1.0"
authors = ["Eric Régnier <utopman@gmail.com>"]
edition = "2018"
[dependencies]
diacritics = "0.1.1"
Firstly, you didn't provide enough information to reproduce your problem. You provided source code that uses a third party dependency but neglected to provide a Cargo.toml
. In your case, it was very easy to remove the use of the dependency without impacting the problem at hand, so that's what I did.
Secondly, using println!
in non-toy command line programs is a footgun for precisely this reason. Namely, there are two issues that combine together to produce this undesirable behavior:
println!
will panic if any error occurs while writing to stdout.PIPE
signal, the corresponding write to the file descriptor that was closed returns an error instead. (In that link, you can see that I am on record as advocating for a change in this behavior.)In a typical C program, SIGPIPE
is not ignored. It is also typically not explicitly handled either. When a process is sent a signal that it doesn't handle, the process terminates. And that's exactly what you want in cases like this. Once head
stops reading its stdin (your stdout), you want your program to stop, but you also want it to stop gracefully without panicking or printing an error. Because that's what Unix CLI utilities do.
You have two ways to solve this problem. One way is to change your code to handle BrokenPipe
errors explicitly. Your code is written in a way that acts as if errors can't happen, since you unwrap
the result of reading stdout. So your program is not idiomatic and not setup to handle errors. So in order to deal with BrokenPipe
correctly, I had to make a couple small changes so that it bubbled up errors correctly:
use std::collections::HashMap;
use std::io;
use std::io::prelude::*;
#[derive(Debug)]
struct Entry {
word: String,
count: u32,
}
static SEPARATORS: &'static [char] = &[
' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',
];
fn main() {
if let Err(err) = try_main() {
if err.kind() == std::io::ErrorKind::BrokenPipe {
return;
}
// Ignore any error that may occur while writing to stderr.
let _ = writeln!(std::io::stderr(), "{}", err);
}
}
fn try_main() -> Result<(), std::io::Error> {
let mut words: HashMap<String, u32> = HashMap::new();
let stdin = io::stdin();
for result in stdin.lock().lines() {
let line = result?;
line_processor(line, &mut words)
}
output(&mut words)?;
Ok(())
}
fn line_processor(line: String, words: &mut HashMap<String, u32>) {
let mut word = String::new();
for c in line.chars() {
if SEPARATORS.contains(&c) {
add_word(word, words);
word = String::new();
} else {
word.push_str(&c.to_string());
}
}
}
fn add_word(word: String, words: &mut HashMap<String, u32>) {
if word.len() > 0 {
if words.contains_key::<str>(&word) {
words.insert(word.to_string(), words.get(&word).unwrap() + 1);
} else {
words.insert(word.to_string(), 1);
}
// println!("word >{}<", word.to_string())
}
}
fn output(words: &mut HashMap<String, u32>) -> Result<(), std::io::Error> {
let mut stack = Vec::<Entry>::new();
for (k, v) in words {
stack.push(Entry {
word: k.to_string(),
count: *v,
});
}
stack.sort_by(|a, b| b.count.cmp(&a.count));
stack.reverse();
let stdout = io::stdout();
let mut stdout = stdout.lock();
while let Some(entry) = stack.pop() {
writeln!(stdout, "{}\t{}", entry.count, entry.word)?;
}
Ok(())
}
The second way of handling this is to go back to the default behavior of SIGPIPE
. This will cause your Rust application to behave like a C application. That can be accomplished by defining a function to reset the signal handler for SIGPIPE
to SIG_DFL
:
#[cfg(unix)]
fn reset_sigpipe() {
unsafe {
libc::signal(libc::SIGPIPE, libc::SIG_DFL);
}
}
#[cfg(not(unix))]
fn reset_sigpipe() {
// no-op
}
And then call it as the first thing in main
. Then you can remove any specific handling of the BrokenPipe
error because it won't occur. Instead, your process will be sent a PIPE
signal and it will subsequently terminate. Here's the full code:
use std::collections::HashMap;
use std::io;
use std::io::prelude::*;
#[derive(Debug)]
struct Entry {
word: String,
count: u32,
}
static SEPARATORS: &'static [char] = &[
' ', ',', '.', '!', '?', '\'', '"', '\n', '(', ')', '#', '{', '}', '[', ']', '-', ';', ':',
];
fn main() {
if let Err(err) = try_main() {
let _ = writeln!(std::io::stderr(), "{}", err);
}
}
fn try_main() -> Result<(), std::io::Error> {
reset_sigpipe();
let mut words: HashMap<String, u32> = HashMap::new();
let stdin = io::stdin();
for result in stdin.lock().lines() {
let line = result?;
line_processor(line, &mut words)
}
output(&mut words)?;
Ok(())
}
fn line_processor(line: String, words: &mut HashMap<String, u32>) {
let mut word = String::new();
for c in line.chars() {
if SEPARATORS.contains(&c) {
add_word(word, words);
word = String::new();
} else {
word.push_str(&c.to_string());
}
}
}
fn add_word(word: String, words: &mut HashMap<String, u32>) {
if word.len() > 0 {
if words.contains_key::<str>(&word) {
words.insert(word.to_string(), words.get(&word).unwrap() + 1);
} else {
words.insert(word.to_string(), 1);
}
// println!("word >{}<", word.to_string())
}
}
fn output(words: &mut HashMap<String, u32>) -> Result<(), std::io::Error> {
let mut stack = Vec::<Entry>::new();
for (k, v) in words {
stack.push(Entry {
word: k.to_string(),
count: *v,
});
}
stack.sort_by(|a, b| b.count.cmp(&a.count));
stack.reverse();
let stdout = io::stdout();
let mut stdout = stdout.lock();
while let Some(entry) = stack.pop() {
writeln!(stdout, "{}\t{}", entry.count, entry.word)?;
}
Ok(())
}
#[cfg(unix)]
fn reset_sigpipe() {
unsafe {
libc::signal(libc::SIGPIPE, libc::SIG_DFL);
}
}
#[cfg(not(unix))]
fn reset_sigpipe() {
// no-op
}