From 33043cf16e76af89178f0d4e7d8238a29c537a9c Mon Sep 17 00:00:00 2001 From: konstin Date: Thu, 24 Aug 2023 18:13:17 +0200 Subject: [PATCH] Add jupyter notebook cell ids in 4.5+ if missing **Summary** See https://github.com/astral-sh/ruff/issues/6834#issuecomment-1691202417 **Test Plan** Added a new notebook --- Cargo.lock | 21 +++++++- Cargo.toml | 1 + crates/ruff/Cargo.toml | 1 + .../jupyter/add_missing_cell_id.ipynb | 37 ++++++++++++++ crates/ruff/src/jupyter/notebook.rs | 48 ++++++++++++++----- 5 files changed, 95 insertions(+), 13 deletions(-) create mode 100644 crates/ruff/resources/test/fixtures/jupyter/add_missing_cell_id.ipynb diff --git a/Cargo.lock b/Cargo.lock index a206ca6caa39fe..f18a9a8f4c647d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2128,6 +2128,7 @@ dependencies = [ "typed-arena", "unicode-width", "unicode_names2", + "uuid", "wsl", ] @@ -3346,9 +3347,25 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.4.0" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" +dependencies = [ + "getrandom", + "rand", + "uuid-macro-internal", +] + +[[package]] +name = "uuid-macro-internal" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d023da39d1fde5a8a3fe1f3e01ca9632ada0a63e9797de55a879d6e2236277be" +checksum = "f7e1ba1f333bd65ce3c9f27de592fcbc256dafe3af2717f56d7c87761fbaccf4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.23", +] [[package]] name = "valuable" diff --git a/Cargo.toml b/Cargo.toml index 7100aeec35c24b..8ca652963dca8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,7 @@ toml = { version = "0.7.2" } tracing = "0.1.37" tracing-indicatif = "0.3.4" tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } +uuid = { version = "1.4.1", features = ["v4", "fast-rng", "macro-diagnostics"] } unicode-width = "0.1.10" wsl = { version = "0.1.0" } diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index 141c824af64bef..914667daf66d6a 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -75,6 +75,7 @@ strum_macros = { workspace = true } thiserror = { version = "1.0.43" } toml = { workspace = true } typed-arena = { version = "2.0.2" } +uuid = { workspace = true, features = ["v4", "fast-rng", "macro-diagnostics"] } unicode-width = { workspace = true } unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" } wsl = { version = "0.1.0" } diff --git a/crates/ruff/resources/test/fixtures/jupyter/add_missing_cell_id.ipynb b/crates/ruff/resources/test/fixtures/jupyter/add_missing_cell_id.ipynb new file mode 100644 index 00000000000000..d1adfcefe2c114 --- /dev/null +++ b/crates/ruff/resources/test/fixtures/jupyter/add_missing_cell_id.ipynb @@ -0,0 +1,37 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "import os\n", + "\n", + "math.pi" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (ruff)", + "language": "python", + "name": "ruff" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/crates/ruff/src/jupyter/notebook.rs b/crates/ruff/src/jupyter/notebook.rs index 144fa948c41498..3937a6be3216d9 100644 --- a/crates/ruff/src/jupyter/notebook.rs +++ b/crates/ruff/src/jupyter/notebook.rs @@ -9,6 +9,7 @@ use itertools::Itertools; use once_cell::sync::OnceCell; use serde::Serialize; use serde_json::error::Category; +use uuid::Uuid; use ruff_diagnostics::Diagnostic; use ruff_python_parser::lexer::lex; @@ -156,7 +157,7 @@ impl Notebook { TextRange::default(), ) })?; - let raw_notebook: RawNotebook = match serde_json::from_reader(reader.by_ref()) { + let mut raw_notebook: RawNotebook = match serde_json::from_reader(reader.by_ref()) { Ok(notebook) => notebook, Err(err) => { // Translate the error into a diagnostic @@ -262,6 +263,23 @@ impl Notebook { cell_offsets.push(current_offset); } + // Add cell ids to 4.5+ notebooks if they are missing + // https://github.com/astral-sh/ruff/issues/6834 + // https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#required-field + if raw_notebook.nbformat == 4 && raw_notebook.nbformat_minor >= 5 { + for cell in &mut raw_notebook.cells { + let id = match cell { + Cell::Code(cell) => &mut cell.id, + Cell::Markdown(cell) => &mut cell.id, + Cell::Raw(cell) => &mut cell.id, + }; + if id.is_none() { + // https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#questions + *id = Some(Uuid::new_v4().to_string()) + } + } + } + Ok(Self { raw: raw_notebook, index: OnceCell::new(), @@ -293,11 +311,11 @@ impl Notebook { .iter() .rev() .find(|m| m.source <= *offset) - else { - // There are no markers above the current offset, so we can - // stop here. - break; - }; + else { + // There are no markers above the current offset, so we can + // stop here. + break; + }; last_marker = Some(marker); marker } @@ -313,6 +331,8 @@ impl Notebook { /// Update the cell contents with the transformed content. /// + /// Also inserts cell ids for jupyter notebook version 4.5+ where they are missing + /// /// ## Panics /// /// Panics if the transformed content is out of bounds for any cell. This @@ -662,10 +682,12 @@ print("after empty cells") Ok(()) } - #[test] - fn test_no_cell_id() -> Result<()> { - let path = "no_cell_id.ipynb".to_string(); - let source_notebook = read_jupyter_notebook(path.as_ref())?; + // Version <4.5, don't emit cell ids + #[test_case(Path::new("no_cell_id.ipynb"), false; "no_cell_id")] + // Version 4.5, cell ids are missing and need to be added + #[test_case(Path::new("add_missing_cell_id.ipynb"), true; "add_missing_cell_id")] + fn test_cell_id(path: &Path, has_id: bool) -> Result<()> { + let source_notebook = read_jupyter_notebook(path)?; let source_kind = SourceKind::Jupyter(source_notebook); let (_, transformed) = test_contents( &source_kind, @@ -676,7 +698,11 @@ print("after empty cells") let mut writer = Vec::new(); linted_notebook.write_inner(&mut writer)?; let actual = String::from_utf8(writer)?; - assert!(!actual.contains(r#""id":"#)); + if has_id { + assert!(actual.contains(r#""id": ""#)); + } else { + assert!(!actual.contains(r#""id":"#)); + } Ok(()) } }