From bd3f26f9893100c8c3ea3fc732341fa208dea819 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Tue, 17 Oct 2023 12:20:25 -0400 Subject: [PATCH 1/7] split: --filter and brokenpipe handling --- src/uu/split/src/split.rs | 170 ++++++++++++++++++++++++++---------- tests/by-util/test_split.rs | 30 +++++++ 2 files changed, 153 insertions(+), 47 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index bfd595e4f86..217cde399a7 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -746,6 +746,9 @@ enum SettingsError { /// Multiple different separator characters MultipleSeparatorCharacters, + /// Using `--filter` with Kth chunk options that output to stdout + FilterWithKthChunkNumber, + /// The `--filter` option is not supported on Windows. #[cfg(windows)] NotSupported, @@ -778,6 +781,9 @@ impl fmt::Display for SettingsError { "invalid suffix {}, contains directory separator", s.quote() ), + Self::FilterWithKthChunkNumber => { + write!(f, "--filter does not process a chunk extracted to stdout") + } #[cfg(windows)] Self::NotSupported => write!( f, @@ -848,12 +854,26 @@ impl Settings { filter: matches.get_one::(OPT_FILTER).map(|s| s.to_owned()), elide_empty_files: matches.get_flag(OPT_ELIDE_EMPTY_FILES), }; + #[cfg(windows)] if result.filter.is_some() { // see https://github.com/rust-lang/rust/issues/29494 return Err(SettingsError::NotSupported); } + // Return an error if `--filter` option is used with any of the + // Kth chunk sub-strategies of `--number` option + // As those are writing to stdout of `split` and cannot write to filter command child process + let kth_chunk = matches!( + result.strategy, + Strategy::Number(NumberType::KthBytes(_, _)) + | Strategy::Number(NumberType::KthLines(_, _)) + | Strategy::Number(NumberType::KthRoundRobin(_, _)) + ); + if kth_chunk && result.filter.is_some() { + return Err(SettingsError::FilterWithKthChunkNumber); + } + Ok(result) } @@ -869,6 +889,46 @@ impl Settings { } } +/// When using `--filter` option, writing to child command process stdin +/// could fail with BrokenPipe error +/// It can be safely ignored +fn ignorable_io_error(error: &std::io::Error, settings: &Settings) -> bool { + error.kind() == ErrorKind::BrokenPipe && settings.filter.is_some() +} + +/// Custom wrapper for `write()` method +/// Follows similar approach to GNU implementation +/// If ignorable io error occurs, return number of bytes as if all bytes written +/// Should not be used for Kth chunk number sub-strategies +/// as those do not work with `--filter` option +fn custom_write( + bytes: &[u8], + writer: &mut T, + settings: &Settings, +) -> std::io::Result { + match writer.write(bytes) { + Ok(n) => Ok(n), + Err(e) if ignorable_io_error(&e, settings) => Ok(bytes.len()), + Err(e) => Err(e), + } +} + +/// Custom wrapper for `write_all()` method +/// Similar to * [`custom_write`] +/// Should not be used for Kth chunk number sub-strategies +/// as those do not work with `--filter` option +fn custom_write_all( + bytes: &[u8], + writer: &mut T, + settings: &Settings, +) -> std::io::Result<()> { + match writer.write_all(bytes) { + Ok(()) => Ok(()), + Err(e) if ignorable_io_error(&e, settings) => Ok(()), + Err(e) => Err(e), + } +} + /// Write a certain number of bytes to one file, then move on to another one. /// /// This struct maintains an underlying writer representing the @@ -964,9 +1024,9 @@ impl<'a> Write for ByteChunkWriter<'a> { // bytes in `buf`, then write all the bytes in `buf`. Otherwise, // write enough bytes to fill the current chunk, then increment // the chunk number and repeat. - let n = buf.len(); - if (n as u64) < self.num_bytes_remaining_in_current_chunk { - let num_bytes_written = self.inner.write(buf)?; + let buf_len = buf.len(); + if (buf_len as u64) < self.num_bytes_remaining_in_current_chunk { + let num_bytes_written = custom_write(buf, &mut self.inner, self.settings)?; self.num_bytes_remaining_in_current_chunk -= num_bytes_written as u64; return Ok(carryover_bytes_written + num_bytes_written); } else { @@ -976,7 +1036,7 @@ impl<'a> Write for ByteChunkWriter<'a> { // self.num_bytes_remaining_in_current_chunk is lower than // n, which is already usize. let i = self.num_bytes_remaining_in_current_chunk as usize; - let num_bytes_written = self.inner.write(&buf[..i])?; + let num_bytes_written = custom_write(&buf[..i], &mut self.inner, self.settings)?; self.num_bytes_remaining_in_current_chunk -= num_bytes_written as u64; // It's possible that the underlying writer did not @@ -1090,14 +1150,16 @@ impl<'a> Write for LineChunkWriter<'a> { // Write the line, starting from *after* the previous // separator character and ending *after* the current // separator character. - let n = self.inner.write(&buf[prev..i + 1])?; - total_bytes_written += n; + let num_bytes_written = + custom_write(&buf[prev..i + 1], &mut self.inner, self.settings)?; + total_bytes_written += num_bytes_written; prev = i + 1; self.num_lines_remaining_in_current_chunk -= 1; } - let n = self.inner.write(&buf[prev..buf.len()])?; - total_bytes_written += n; + let num_bytes_written = + custom_write(&buf[prev..buf.len()], &mut self.inner, self.settings)?; + total_bytes_written += num_bytes_written; Ok(total_bytes_written) } @@ -1246,7 +1308,12 @@ impl<'a> Write for LineBytesChunkWriter<'a> { { self.num_bytes_remaining_in_current_chunk = 0; } else { - let num_bytes_written = self.inner.write(&buf[..end.min(buf.len())])?; + // let num_bytes_written = self.inner.write(&buf[..end.min(buf.len())])?; + let num_bytes_written = custom_write( + &buf[..end.min(buf.len())], + &mut self.inner, + self.settings, + )?; self.num_bytes_remaining_in_current_chunk -= num_bytes_written; total_bytes_written += num_bytes_written; buf = &buf[num_bytes_written..]; @@ -1259,7 +1326,9 @@ impl<'a> Write for LineBytesChunkWriter<'a> { // continue to the next iteration. (See chunk 1 in the // example comment above.) Some(i) if i < self.num_bytes_remaining_in_current_chunk => { - let num_bytes_written = self.inner.write(&buf[..i + 1])?; + // let num_bytes_written = self.inner.write(&buf[..i + 1])?; + let num_bytes_written = + custom_write(&buf[..i + 1], &mut self.inner, self.settings)?; self.num_bytes_remaining_in_current_chunk -= num_bytes_written; total_bytes_written += num_bytes_written; buf = &buf[num_bytes_written..]; @@ -1277,7 +1346,9 @@ impl<'a> Write for LineBytesChunkWriter<'a> { == self.chunk_size.try_into().unwrap() => { let end = self.num_bytes_remaining_in_current_chunk; - let num_bytes_written = self.inner.write(&buf[..end])?; + // let num_bytes_written = self.inner.write(&buf[..end])?; + let num_bytes_written = + custom_write(&buf[..end], &mut self.inner, self.settings)?; self.num_bytes_remaining_in_current_chunk -= num_bytes_written; total_bytes_written += num_bytes_written; buf = &buf[num_bytes_written..]; @@ -1376,29 +1447,26 @@ where writers.push(writer); } - // Capture the result of the `std::io::copy()` calls to check for - // `BrokenPipe`. - let result: std::io::Result<()> = { - // Write `chunk_size` bytes from the reader into each writer - // except the last. - // - // The last writer gets all remaining bytes so that if the number - // of bytes in the input file was not evenly divisible by - // `num_chunks`, we don't leave any bytes behind. - for writer in writers.iter_mut().take(num_chunks - 1) { - io::copy(&mut reader.by_ref().take(chunk_size), writer)?; - } - - // Write all the remaining bytes to the last chunk. - let i = num_chunks - 1; - let last_chunk_size = num_bytes - (chunk_size * (num_chunks as u64 - 1)); - io::copy(&mut reader.by_ref().take(last_chunk_size), &mut writers[i])?; + // Write `chunk_size` bytes from the reader into each writer + // except the last. + // + // The last writer gets all remaining bytes so that if the number + // of bytes in the input file was not evenly divisible by + // `num_chunks`, we don't leave any bytes behind. + for writer in writers.iter_mut().take(num_chunks - 1) { + match io::copy(&mut reader.by_ref().take(chunk_size), writer) { + Ok(_) => continue, + Err(e) if ignorable_io_error(&e, settings) => continue, + Err(e) => return Err(uio_error!(e, "input/output error")), + }; + } - Ok(()) - }; - match result { + // Write all the remaining bytes to the last chunk. + let i = num_chunks - 1; + let last_chunk_size = num_bytes - (chunk_size * (num_chunks as u64 - 1)); + match io::copy(&mut reader.by_ref().take(last_chunk_size), &mut writers[i]) { Ok(_) => Ok(()), - Err(e) if e.kind() == ErrorKind::BrokenPipe => Ok(()), + Err(e) if ignorable_io_error(&e, settings) => Ok(()), Err(e) => Err(uio_error!(e, "input/output error")), } } @@ -1548,8 +1616,8 @@ where let maybe_writer = writers.get_mut(i); let writer = maybe_writer.unwrap(); let bytes = line.as_slice(); - writer.write_all(bytes)?; - writer.write_all(&[sep])?; + custom_write_all(bytes, writer, settings)?; + custom_write_all(&[sep], writer, settings)?; // Add one byte for the separator character. let num_bytes = bytes.len() + 1; @@ -1626,11 +1694,28 @@ where Ok(()) } +/// Split a file into a specific number of chunks by line, but +/// assign lines via round-robin +/// +/// This function always creates one output file for each chunk, even +/// if there is an error reading or writing one of the chunks or if +/// the input file is truncated. However, if the `filter` option is +/// being used, then no files are created. +/// +/// # Errors +/// +/// This function returns an error if there is a problem reading from +/// `reader` or writing to one of the output files. +/// +/// # See also +/// +/// * [`split_into_n_chunks_by_line`], which splits its input in the same way, +/// but without round robin distribution. fn split_into_n_chunks_by_line_round_robin( settings: &Settings, reader: &mut R, num_chunks: u64, -) -> std::io::Result<()> +) -> UResult<()> where R: BufRead, { @@ -1662,8 +1747,8 @@ where let maybe_writer = writers.get_mut(i % num_chunks); let writer = maybe_writer.unwrap(); let bytes = line.as_slice(); - writer.write_all(bytes)?; - writer.write_all(&[sep])?; + custom_write_all(bytes, writer, settings)?; + custom_write_all(&[sep], writer, settings)?; } Ok(()) @@ -1745,13 +1830,7 @@ fn split(settings: &Settings) -> UResult<()> { kth_chunk_by_line(settings, &mut reader, chunk_number, num_chunks) } Strategy::Number(NumberType::RoundRobin(num_chunks)) => { - match split_into_n_chunks_by_line_round_robin(settings, &mut reader, num_chunks) { - Ok(_) => Ok(()), - Err(e) => match e.kind() { - ErrorKind::BrokenPipe => Ok(()), - _ => Err(USimpleError::new(1, format!("{e}"))), - }, - } + split_into_n_chunks_by_line_round_robin(settings, &mut reader, num_chunks) } Strategy::Number(NumberType::KthRoundRobin(chunk_number, num_chunks)) => { // The chunk number is given as a 1-indexed number, but it @@ -1773,7 +1852,6 @@ fn split(settings: &Settings) -> UResult<()> { // indicate that. A special error message needs to be // printed in that case. ErrorKind::Other => Err(USimpleError::new(1, format!("{e}"))), - ErrorKind::BrokenPipe => Ok(()), _ => Err(uio_error!(e, "input/output error")), }, } @@ -1792,7 +1870,6 @@ fn split(settings: &Settings) -> UResult<()> { // indicate that. A special error message needs to be // printed in that case. ErrorKind::Other => Err(USimpleError::new(1, format!("{e}"))), - ErrorKind::BrokenPipe => Ok(()), _ => Err(uio_error!(e, "input/output error")), }, } @@ -1811,7 +1888,6 @@ fn split(settings: &Settings) -> UResult<()> { // indicate that. A special error message needs to be // printed in that case. ErrorKind::Other => Err(USimpleError::new(1, format!("{e}"))), - ErrorKind::BrokenPipe => Ok(()), _ => Err(uio_error!(e, "input/output error")), }, } diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index ce80844cf3e..2fd31a3f95e 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -338,6 +338,36 @@ fn test_filter_broken_pipe() { .succeeds(); } +#[test] +#[cfg(unix)] +fn test_filter_with_kth_chunk() { + let scene = TestScenario::new(util_name!()); + scene + .ucmd() + .args(&["--filter='some'", "--number=1/2"]) + .ignore_stdin_write_error() + .pipe_in("a\n") + .fails() + .no_stdout() + .stderr_contains("--filter does not process a chunk extracted to stdout"); + scene + .ucmd() + .args(&["--filter='some'", "--number=l/1/2"]) + .ignore_stdin_write_error() + .pipe_in("a\n") + .fails() + .no_stdout() + .stderr_contains("--filter does not process a chunk extracted to stdout"); + scene + .ucmd() + .args(&["--filter='some'", "--number=r/1/2"]) + .ignore_stdin_write_error() + .pipe_in("a\n") + .fails() + .no_stdout() + .stderr_contains("--filter does not process a chunk extracted to stdout"); +} + #[test] fn test_split_lines_number() { // Test if stdout/stderr for '--lines' option is correct From af372b5e13b5a2a58529e07af51c413d1de2e1e3 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Tue, 17 Oct 2023 16:37:16 -0400 Subject: [PATCH 2/7] split: `--filter` with endless stdin input --- src/uu/split/src/split.rs | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index f3fb85e5559..53e47105daf 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -916,17 +916,18 @@ fn custom_write( } /// Custom wrapper for `write_all()` method -/// Similar to * [`custom_write`] +/// Similar to * [`custom_write`], but returns true or false +/// depending on if `--filter` stdin is still open (no BrokenPipe error) /// Should not be used for Kth chunk number sub-strategies /// as those do not work with `--filter` option fn custom_write_all( bytes: &[u8], writer: &mut T, settings: &Settings, -) -> std::io::Result<()> { +) -> std::io::Result { match writer.write_all(bytes) { - Ok(()) => Ok(()), - Err(e) if ignorable_io_error(&e, settings) => Ok(()), + Ok(()) => Ok(true), + Err(e) if ignorable_io_error(&e, settings) => Ok(false), Err(e) => Err(e), } } @@ -1407,6 +1408,7 @@ where USimpleError::new(1, format!("{}: cannot determine file size", settings.input)) })?; + // TODO - cannot determine file size for stdin input let num_bytes = metadata.len(); let will_have_empty_files = settings.elide_empty_files && num_chunks > num_bytes; let (num_chunks, chunk_size) = if will_have_empty_files { @@ -1500,6 +1502,7 @@ where // NOTE: the `elide_empty_files` parameter is ignored here // as we do not generate any files // and instead writing to stdout + // TODO - cannot get metadata or determine file size for stdin input let metadata = metadata(&settings.input).map_err(|_| { USimpleError::new(1, format!("{}: cannot determine file size", settings.input)) })?; @@ -1586,6 +1589,7 @@ where { // Get the size of the input file in bytes and compute the number // of bytes per chunk. + // TODO - cannot get metadata or determine file size for stdin input let metadata = metadata(&settings.input).unwrap(); let num_bytes = metadata.len(); let chunk_size = (num_bytes / num_chunks) as usize; @@ -1660,6 +1664,7 @@ where { // Get the size of the input file in bytes and compute the number // of bytes per chunk. + // TODO - cannot get metadata or determine file size for stdin input let metadata = metadata(&settings.input).unwrap(); let num_bytes = metadata.len(); let chunk_size = (num_bytes / num_chunks) as usize; @@ -1669,7 +1674,7 @@ where let mut writer = stdout.lock(); let mut num_bytes_remaining_in_current_chunk = chunk_size; - let mut i = 0; + let mut i = 1; let sep = settings.separator; for line_result in reader.split(sep) { let line = line_result?; @@ -1744,13 +1749,21 @@ where let num_chunks: usize = num_chunks.try_into().unwrap(); let sep = settings.separator; + let mut closed_writers = 0; for (i, line_result) in reader.split(sep).enumerate() { - let line = line_result.unwrap(); let maybe_writer = writers.get_mut(i % num_chunks); let writer = maybe_writer.unwrap(); + let mut line = line_result.unwrap(); + line.push(sep); let bytes = line.as_slice(); - custom_write_all(bytes, writer, settings)?; - custom_write_all(&[sep], writer, settings)?; + let writer_stdin_open = custom_write_all(bytes, writer, settings)?; + if !writer_stdin_open { + closed_writers += 1; + if closed_writers == num_chunks { + // all writers are closed - stop reading + break; + } + } } Ok(()) @@ -1790,6 +1803,10 @@ where let num_chunks: usize = num_chunks.try_into().unwrap(); let chunk_number: usize = chunk_number.try_into().unwrap(); let sep = settings.separator; + // The chunk number is given as a 1-indexed number, but it + // is a little easier to deal with a 0-indexed number + // since `.enumerate()` returns index `i` starting with 0 + let chunk_number = chunk_number - 1; for (i, line_result) in reader.split(sep).enumerate() { let line = line_result?; let bytes = line.as_slice(); @@ -1826,18 +1843,12 @@ fn split(settings: &Settings) -> UResult<()> { split_into_n_chunks_by_line(settings, &mut reader, num_chunks) } Strategy::Number(NumberType::KthLines(chunk_number, num_chunks)) => { - // The chunk number is given as a 1-indexed number, but it - // is a little easier to deal with a 0-indexed number. - let chunk_number = chunk_number - 1; kth_chunk_by_line(settings, &mut reader, chunk_number, num_chunks) } Strategy::Number(NumberType::RoundRobin(num_chunks)) => { split_into_n_chunks_by_line_round_robin(settings, &mut reader, num_chunks) } Strategy::Number(NumberType::KthRoundRobin(chunk_number, num_chunks)) => { - // The chunk number is given as a 1-indexed number, but it - // is a little easier to deal with a 0-indexed number. - let chunk_number = chunk_number - 1; kth_chunk_by_line_round_robin(settings, &mut reader, chunk_number, num_chunks) } Strategy::Lines(chunk_size) => { From b889e52029e978058b14bf44c98705eb863b4603 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Tue, 17 Oct 2023 19:12:43 -0400 Subject: [PATCH 3/7] split: `--number` stdin handling --- src/uu/split/src/split.rs | 12 ++++++------ tests/by-util/test_split.rs | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 53e47105daf..dba425c6706 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -1408,7 +1408,6 @@ where USimpleError::new(1, format!("{}: cannot determine file size", settings.input)) })?; - // TODO - cannot determine file size for stdin input let num_bytes = metadata.len(); let will_have_empty_files = settings.elide_empty_files && num_chunks > num_bytes; let (num_chunks, chunk_size) = if will_have_empty_files { @@ -1502,7 +1501,6 @@ where // NOTE: the `elide_empty_files` parameter is ignored here // as we do not generate any files // and instead writing to stdout - // TODO - cannot get metadata or determine file size for stdin input let metadata = metadata(&settings.input).map_err(|_| { USimpleError::new(1, format!("{}: cannot determine file size", settings.input)) })?; @@ -1589,8 +1587,9 @@ where { // Get the size of the input file in bytes and compute the number // of bytes per chunk. - // TODO - cannot get metadata or determine file size for stdin input - let metadata = metadata(&settings.input).unwrap(); + let metadata = metadata(&settings.input).map_err(|_| { + USimpleError::new(1, format!("{}: cannot determine file size", settings.input)) + })?; let num_bytes = metadata.len(); let chunk_size = (num_bytes / num_chunks) as usize; @@ -1664,8 +1663,9 @@ where { // Get the size of the input file in bytes and compute the number // of bytes per chunk. - // TODO - cannot get metadata or determine file size for stdin input - let metadata = metadata(&settings.input).unwrap(); + let metadata = metadata(&settings.input).map_err(|_| { + USimpleError::new(1, format!("{}: cannot determine file size", settings.input)) + })?; let num_bytes = metadata.len(); let chunk_size = (num_bytes / num_chunks) as usize; diff --git a/tests/by-util/test_split.rs b/tests/by-util/test_split.rs index e7452bae64c..85ce5f89195 100644 --- a/tests/by-util/test_split.rs +++ b/tests/by-util/test_split.rs @@ -729,6 +729,24 @@ fn test_split_stdin_num_kth_chunk() { .stderr_only("split: -: cannot determine file size\n"); } +#[test] +fn test_split_stdin_num_line_chunks() { + new_ucmd!() + .args(&["--number=l/2"]) + .fails() + .code_is(1) + .stderr_only("split: -: cannot determine file size\n"); +} + +#[test] +fn test_split_stdin_num_kth_line_chunk() { + new_ucmd!() + .args(&["--number=l/2/5"]) + .fails() + .code_is(1) + .stderr_only("split: -: cannot determine file size\n"); +} + fn file_read(at: &AtPath, filename: &str) -> String { let mut s = String::new(); at.open(filename).read_to_string(&mut s).unwrap(); From aeaf39deacb407f0c62ed3cf11e6b1b287d5f371 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Tue, 17 Oct 2023 19:27:57 -0400 Subject: [PATCH 4/7] split: comments --- src/uu/split/src/split.rs | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index dba425c6706..1c337c96642 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -916,7 +916,7 @@ fn custom_write( } /// Custom wrapper for `write_all()` method -/// Similar to * [`custom_write`], but returns true or false +/// Similar to [`custom_write`], but returns true or false /// depending on if `--filter` stdin is still open (no BrokenPipe error) /// Should not be used for Kth chunk number sub-strategies /// as those do not work with `--filter` option @@ -996,6 +996,7 @@ impl<'a> ByteChunkWriter<'a> { } impl<'a> Write for ByteChunkWriter<'a> { + /// Implements `--bytes=SIZE` fn write(&mut self, mut buf: &[u8]) -> std::io::Result { // If the length of `buf` exceeds the number of bytes remaining // in the current chunk, we will need to write to multiple @@ -1125,6 +1126,7 @@ impl<'a> LineChunkWriter<'a> { } impl<'a> Write for LineChunkWriter<'a> { + /// Implements `--lines=NUMBER` fn write(&mut self, buf: &[u8]) -> std::io::Result { // If the number of lines in `buf` exceeds the number of lines // remaining in the current chunk, we will need to write to @@ -1259,6 +1261,8 @@ impl<'a> Write for LineBytesChunkWriter<'a> { /// |------| |-------| |--------| |---| /// aaaaaaaa a\nbbbb\n cccc\ndd\n ee\n /// ``` + /// + /// Implements `--line-bytes=SIZE` fn write(&mut self, mut buf: &[u8]) -> std::io::Result { // The total number of bytes written during the loop below. // @@ -1387,6 +1391,10 @@ impl<'a> Write for LineBytesChunkWriter<'a> { /// /// This function returns an error if there is a problem reading from /// `reader` or writing to one of the output files. +/// +/// Implements `--number=CHUNKS` +/// Where CHUNKS +/// * N fn split_into_n_chunks_by_byte( settings: &Settings, reader: &mut R, @@ -1484,6 +1492,10 @@ where /// /// This function returns an error if there is a problem reading from /// `reader` or writing to stdout. +/// +/// Implements `--number=CHUNKS` +/// Where CHUNKS +/// * K/N fn kth_chunks_by_byte( settings: &Settings, reader: &mut R, @@ -1577,6 +1589,10 @@ where /// /// * [`kth_chunk_by_line`], which splits its input in the same way, /// but writes only one specified chunk to stdout. +/// +/// Implements `--number=CHUNKS` +/// Where CHUNKS +/// * l/N fn split_into_n_chunks_by_line( settings: &Settings, reader: &mut R, @@ -1652,6 +1668,10 @@ where /// /// * [`split_into_n_chunks_by_line`], which splits its input in the /// same way, but writes each chunk to its own file. +/// +/// Implements `--number=CHUNKS` +/// Where CHUNKS +/// * l/K/N fn kth_chunk_by_line( settings: &Settings, reader: &mut R, @@ -1718,6 +1738,10 @@ where /// /// * [`split_into_n_chunks_by_line`], which splits its input in the same way, /// but without round robin distribution. +/// +/// Implements `--number=CHUNKS` +/// Where CHUNKS +/// * r/N fn split_into_n_chunks_by_line_round_robin( settings: &Settings, reader: &mut R, @@ -1787,6 +1811,10 @@ where /// /// * [`split_into_n_chunks_by_line_round_robin`], which splits its input in the /// same way, but writes each chunk to its own file. +/// +/// Implements `--number=CHUNKS` +/// Where CHUNKS +/// * r/K/N fn kth_chunk_by_line_round_robin( settings: &Settings, reader: &mut R, From ec7fad3c5e522bcd82c9772919a7bf1708e4dee3 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Tue, 17 Oct 2023 19:50:15 -0400 Subject: [PATCH 5/7] split: formatting --- src/uu/split/src/split.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 1c337c96642..ace0abe5bbc 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -1261,7 +1261,7 @@ impl<'a> Write for LineBytesChunkWriter<'a> { /// |------| |-------| |--------| |---| /// aaaaaaaa a\nbbbb\n cccc\ndd\n ee\n /// ``` - /// + /// /// Implements `--line-bytes=SIZE` fn write(&mut self, mut buf: &[u8]) -> std::io::Result { // The total number of bytes written during the loop below. @@ -1391,7 +1391,7 @@ impl<'a> Write for LineBytesChunkWriter<'a> { /// /// This function returns an error if there is a problem reading from /// `reader` or writing to one of the output files. -/// +/// /// Implements `--number=CHUNKS` /// Where CHUNKS /// * N @@ -1492,7 +1492,7 @@ where /// /// This function returns an error if there is a problem reading from /// `reader` or writing to stdout. -/// +/// /// Implements `--number=CHUNKS` /// Where CHUNKS /// * K/N @@ -1589,7 +1589,7 @@ where /// /// * [`kth_chunk_by_line`], which splits its input in the same way, /// but writes only one specified chunk to stdout. -/// +/// /// Implements `--number=CHUNKS` /// Where CHUNKS /// * l/N @@ -1668,7 +1668,7 @@ where /// /// * [`split_into_n_chunks_by_line`], which splits its input in the /// same way, but writes each chunk to its own file. -/// +/// /// Implements `--number=CHUNKS` /// Where CHUNKS /// * l/K/N @@ -1738,7 +1738,7 @@ where /// /// * [`split_into_n_chunks_by_line`], which splits its input in the same way, /// but without round robin distribution. -/// +/// /// Implements `--number=CHUNKS` /// Where CHUNKS /// * r/N @@ -1811,7 +1811,7 @@ where /// /// * [`split_into_n_chunks_by_line_round_robin`], which splits its input in the /// same way, but writes each chunk to its own file. -/// +/// /// Implements `--number=CHUNKS` /// Where CHUNKS /// * r/K/N From 884251c6c7307bcbe6b03539965bd3cc865b8034 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Tue, 17 Oct 2023 20:17:29 -0400 Subject: [PATCH 6/7] split: comments --- src/uu/split/src/split.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index ace0abe5bbc..2cd4e09a086 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -1315,7 +1315,6 @@ impl<'a> Write for LineBytesChunkWriter<'a> { { self.num_bytes_remaining_in_current_chunk = 0; } else { - // let num_bytes_written = self.inner.write(&buf[..end.min(buf.len())])?; let num_bytes_written = custom_write( &buf[..end.min(buf.len())], &mut self.inner, @@ -1333,7 +1332,6 @@ impl<'a> Write for LineBytesChunkWriter<'a> { // continue to the next iteration. (See chunk 1 in the // example comment above.) Some(i) if i < self.num_bytes_remaining_in_current_chunk => { - // let num_bytes_written = self.inner.write(&buf[..i + 1])?; let num_bytes_written = custom_write(&buf[..i + 1], &mut self.inner, self.settings)?; self.num_bytes_remaining_in_current_chunk -= num_bytes_written; @@ -1353,7 +1351,6 @@ impl<'a> Write for LineBytesChunkWriter<'a> { == self.chunk_size.try_into().unwrap() => { let end = self.num_bytes_remaining_in_current_chunk; - // let num_bytes_written = self.inner.write(&buf[..end])?; let num_bytes_written = custom_write(&buf[..end], &mut self.inner, self.settings)?; self.num_bytes_remaining_in_current_chunk -= num_bytes_written; From e6175a09dd322dc495d55e2cc7ed85c098d73463 Mon Sep 17 00:00:00 2001 From: zhitkoff Date: Wed, 18 Oct 2023 13:08:04 -0400 Subject: [PATCH 7/7] split: comments --- src/uu/split/src/split.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/uu/split/src/split.rs b/src/uu/split/src/split.rs index 2cd4e09a086..638d4c65eee 100644 --- a/src/uu/split/src/split.rs +++ b/src/uu/split/src/split.rs @@ -748,7 +748,10 @@ enum SettingsError { /// Multiple different separator characters MultipleSeparatorCharacters, - /// Using `--filter` with Kth chunk options that output to stdout + /// Using `--filter` with `--number` option sub-strategies that print Kth chunk out of N chunks to stdout + /// K/N + /// l/K/N + /// r/K/N FilterWithKthChunkNumber, /// The `--filter` option is not supported on Windows.