From e3b496b7726f05abd19c12c55c1cf6850005615f Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Fri, 1 Aug 2025 16:53:55 +0100 Subject: [PATCH 1/4] Optimize doc mode handling for attribute parsing --- pyo3-macros-backend/src/attributes.rs | 3 + pyo3-macros-backend/src/method.rs | 2 +- pyo3-macros-backend/src/module.rs | 3 +- pyo3-macros-backend/src/pyclass.rs | 30 ++--- pyo3-macros-backend/src/pyfunction.rs | 2 +- pyo3-macros-backend/src/pyimpl.rs | 2 +- pyo3-macros-backend/src/pymethod.rs | 21 +++- pyo3-macros-backend/src/utils.rs | 168 ++++++++++++++++++++------ 8 files changed, 170 insertions(+), 61 deletions(-) diff --git a/pyo3-macros-backend/src/attributes.rs b/pyo3-macros-backend/src/attributes.rs index af81e7aea69..8e7367bd119 100644 --- a/pyo3-macros-backend/src/attributes.rs +++ b/pyo3-macros-backend/src/attributes.rs @@ -18,8 +18,10 @@ pub mod kw { syn::custom_keyword!(cancel_handle); syn::custom_keyword!(constructor); syn::custom_keyword!(dict); + syn::custom_keyword!(doc_mode); syn::custom_keyword!(eq); syn::custom_keyword!(eq_int); + syn::custom_keyword!(end_doc_mode); syn::custom_keyword!(extends); syn::custom_keyword!(freelist); syn::custom_keyword!(from_py_with); @@ -318,6 +320,7 @@ pub type StrFormatterAttribute = OptionalKeywordAttribute; pub type SubmoduleAttribute = kw::submodule; pub type GILUsedAttribute = KeywordAttribute; +pub type DocModeAttribute = KeywordAttribute; impl Parse for KeywordAttribute { fn parse(input: ParseStream<'_>) -> Result { diff --git a/pyo3-macros-backend/src/method.rs b/pyo3-macros-backend/src/method.rs index 0e8977db101..448ffa53c7d 100644 --- a/pyo3-macros-backend/src/method.rs +++ b/pyo3-macros-backend/src/method.rs @@ -976,7 +976,7 @@ impl<'a> FnSpec<'a> { } /// Forwards to [utils::get_doc] with the text signature of this spec. - pub fn get_doc(&self, attrs: &[syn::Attribute], ctx: &Ctx) -> syn::Result { + pub fn get_doc(&self, attrs: &mut Vec, ctx: &Ctx) -> syn::Result { let text_signature = self .text_signature_call_signature() .map(|sig| format!("{}{}", self.python_name, sig)); diff --git a/pyo3-macros-backend/src/module.rs b/pyo3-macros-backend/src/module.rs index 1888f564df5..e81ce55c4b4 100644 --- a/pyo3-macros-backend/src/module.rs +++ b/pyo3-macros-backend/src/module.rs @@ -116,6 +116,7 @@ pub fn pymodule_module_impl( let ctx = &Ctx::new(&options.krate, None); let Ctx { pyo3_path, .. } = ctx; let doc = get_doc(attrs, None, ctx)?; + let doc = get_doc(attrs, None, ctx)?; let name = options .name .map_or_else(|| ident.unraw(), |name| name.value.0); @@ -453,7 +454,7 @@ pub fn pymodule_function_impl( .name .map_or_else(|| ident.unraw(), |name| name.value.0); let vis = &function.vis; - let doc = get_doc(&function.attrs, None, ctx)?; + let doc = get_doc(&mut function.attrs, None, ctx)?; let initialization = module_initialization( &name, diff --git a/pyo3-macros-backend/src/pyclass.rs b/pyo3-macros-backend/src/pyclass.rs index 714d56ed247..ef2e46a5807 100644 --- a/pyo3-macros-backend/src/pyclass.rs +++ b/pyo3-macros-backend/src/pyclass.rs @@ -250,7 +250,7 @@ pub fn build_py_class( args.options.take_pyo3_options(&mut class.attrs)?; let ctx = &Ctx::new(&args.options.krate, None); - let doc = utils::get_doc(&class.attrs, None, ctx)?; + let doc = utils::get_doc(&mut class.attrs, None, ctx)?; if let Some(lt) = class.generics.lifetimes().next() { bail_spanned!( @@ -446,11 +446,11 @@ fn impl_class( ctx, )?; - let (default_class_geitem, default_class_geitem_method) = - pyclass_class_geitem(&args.options, &syn::parse_quote!(#cls), ctx)?; + let (default_class_getitem, default_class_getitem_method) = + pyclass_class_getitem(&args.options, &syn::parse_quote!(#cls), ctx)?; - if let Some(default_class_geitem_method) = default_class_geitem_method { - default_methods.push(default_class_geitem_method); + if let Some(default_class_getitem_method) = default_class_getitem_method { + default_methods.push(default_class_getitem_method); } let (default_str, default_str_slot) = @@ -484,7 +484,7 @@ fn impl_class( #default_richcmp #default_hash #default_str - #default_class_geitem + #default_class_getitem } }) } @@ -531,7 +531,7 @@ pub fn build_py_enum( bail_spanned!(generic.span() => "enums do not support #[pyclass(generic)]"); } - let doc = utils::get_doc(&enum_.attrs, None, ctx)?; + let doc = utils::get_doc(&mut enum_.attrs, None, ctx)?; let enum_ = PyClassEnum::new(enum_)?; impl_enum(enum_, &args, doc, method_type, ctx) } @@ -1768,7 +1768,7 @@ fn complex_enum_variant_field_getter<'a>( let property_type = crate::pymethod::PropertyType::Function { self_type: &self_type, spec: &spec, - doc: crate::get_doc(&[], None, ctx)?, + doc: PythonDoc::empty(ctx), }; let getter = crate::pymethod::impl_py_getter_def(variant_cls_type, property_type, ctx)?; @@ -2036,7 +2036,7 @@ fn pyclass_hash( } } -fn pyclass_class_geitem( +fn pyclass_class_getitem( options: &PyClassPyO3Options, cls: &syn::Type, ctx: &Ctx, @@ -2045,7 +2045,7 @@ fn pyclass_class_geitem( match options.generic { Some(_) => { let ident = format_ident!("__class_getitem__"); - let mut class_geitem_impl: syn::ImplItemFn = { + let mut class_getitem_impl: syn::ImplItemFn = { parse_quote! { #[classmethod] fn #ident<'py>( @@ -2058,19 +2058,19 @@ fn pyclass_class_geitem( }; let spec = FnSpec::parse( - &mut class_geitem_impl.sig, - &mut class_geitem_impl.attrs, + &mut class_getitem_impl.sig, + &mut class_getitem_impl.attrs, Default::default(), )?; - let class_geitem_method = crate::pymethod::impl_py_method_def( + let class_getitem_method = crate::pymethod::impl_py_method_def( cls, &spec, - &spec.get_doc(&class_geitem_impl.attrs, ctx)?, + &spec.get_doc(&mut class_getitem_impl.attrs, ctx)?, Some(quote!(#pyo3_path::ffi::METH_CLASS)), ctx, )?; - Ok((Some(class_geitem_impl), Some(class_geitem_method))) + Ok((Some(class_getitem_impl), Some(class_getitem_method))) } None => Ok((None, None)), } diff --git a/pyo3-macros-backend/src/pyfunction.rs b/pyo3-macros-backend/src/pyfunction.rs index 303af155acf..96530e14f93 100644 --- a/pyo3-macros-backend/src/pyfunction.rs +++ b/pyo3-macros-backend/src/pyfunction.rs @@ -425,7 +425,7 @@ pub fn impl_wrap_pyfunction( ); } let wrapper = spec.get_wrapper_function(&wrapper_ident, None, ctx)?; - let methoddef = spec.get_methoddef(wrapper_ident, &spec.get_doc(&func.attrs, ctx)?, ctx); + let methoddef = spec.get_methoddef(wrapper_ident, &spec.get_doc(&mut func.attrs, ctx)?, ctx); let wrapped_pyfunction = quote! { // Create a module with the same name as the `#[pyfunction]` - this way `use ` diff --git a/pyo3-macros-backend/src/pyimpl.rs b/pyo3-macros-backend/src/pyimpl.rs index 282a6f35d7b..ae13ce8f910 100644 --- a/pyo3-macros-backend/src/pyimpl.rs +++ b/pyo3-macros-backend/src/pyimpl.rs @@ -139,7 +139,7 @@ pub fn impl_methods( let method = PyMethod::parse(&mut meth.sig, &mut meth.attrs, fun_options)?; #[cfg(feature = "experimental-inspect")] extra_fragments.push(method_introspection_code(&method.spec, ty, ctx)); - match pymethod::gen_py_method(ty, method, &meth.attrs, ctx)? { + match pymethod::gen_py_method(ty, method, &mut meth.attrs, ctx)? { GeneratedPyMethod::Method(MethodAndMethodDef { associated_method, method_def, diff --git a/pyo3-macros-backend/src/pymethod.rs b/pyo3-macros-backend/src/pymethod.rs index eede90f882f..313fc950ed6 100644 --- a/pyo3-macros-backend/src/pymethod.rs +++ b/pyo3-macros-backend/src/pymethod.rs @@ -191,7 +191,7 @@ pub fn is_proto_method(name: &str) -> bool { pub fn gen_py_method( cls: &syn::Type, method: PyMethod<'_>, - meth_attrs: &[syn::Attribute], + meth_attrs: &mut Vec, ctx: &Ctx, ) -> Result { let spec = &method.spec; @@ -237,6 +237,7 @@ pub fn gen_py_method( cls, spec, &spec.get_doc(meth_attrs, ctx)?, + &spec.get_doc(meth_attrs, ctx)?, None, ctx, )?), @@ -244,6 +245,7 @@ pub fn gen_py_method( cls, spec, &spec.get_doc(meth_attrs, ctx)?, + &spec.get_doc(meth_attrs, ctx)?, Some(quote!(#pyo3_path::ffi::METH_CLASS)), ctx, )?), @@ -251,6 +253,7 @@ pub fn gen_py_method( cls, spec, &spec.get_doc(meth_attrs, ctx)?, + &spec.get_doc(meth_attrs, ctx)?, Some(quote!(#pyo3_path::ffi::METH_STATIC)), ctx, )?), @@ -265,6 +268,7 @@ pub fn gen_py_method( self_type, spec, doc: spec.get_doc(meth_attrs, ctx)?, + doc: spec.get_doc(meth_attrs, ctx)?, }, ctx, )?), @@ -274,6 +278,7 @@ pub fn gen_py_method( self_type, spec, doc: spec.get_doc(meth_attrs, ctx)?, + doc: spec.get_doc(meth_attrs, ctx)?, }, ctx, )?), @@ -627,6 +632,7 @@ pub fn impl_py_setter_def( let Ctx { pyo3_path, .. } = ctx; let python_name = property_type.null_terminated_python_name(ctx)?; let doc = property_type.doc(ctx)?; + let doc = property_type.doc(ctx)?; let mut holders = Holders::new(); let setter_impl = match property_type { PropertyType::Descriptor { @@ -815,6 +821,7 @@ pub fn impl_py_getter_def( let Ctx { pyo3_path, .. } = ctx; let python_name = property_type.null_terminated_python_name(ctx)?; let doc = property_type.doc(ctx)?; + let doc = property_type.doc(ctx)?; let mut cfg_attrs = TokenStream::new(); if let PropertyType::Descriptor { field, .. } = &property_type { @@ -978,12 +985,16 @@ impl PropertyType<'_> { } fn doc(&self, ctx: &Ctx) -> Result> { - match self { + let doc = match self { PropertyType::Descriptor { field, .. } => { - utils::get_doc(&field.attrs, None, ctx).map(Cow::Owned) + // FIXME: due to the clone this will not properly strip Rust documentation, maybe + // need to parse the field and doc earlier in the process? + let mut attrs = field.attrs.clone(); + Cow::Owned(utils::get_doc(&mut attrs, None, ctx)?) } - PropertyType::Function { doc, .. } => Ok(Cow::Borrowed(doc)), - } + PropertyType::Function { doc, .. } => Cow::Borrowed(doc), + }; + Ok(doc) } } diff --git a/pyo3-macros-backend/src/utils.rs b/pyo3-macros-backend/src/utils.rs index 0df0b75e431..6a53542480f 100644 --- a/pyo3-macros-backend/src/utils.rs +++ b/pyo3-macros-backend/src/utils.rs @@ -1,7 +1,8 @@ -use crate::attributes::{CrateAttribute, RenamingRule}; +use crate::attributes::{self, CrateAttribute, RenamingRule}; use proc_macro2::{Span, TokenStream}; use quote::{quote, quote_spanned, ToTokens}; use std::ffi::CString; +use syn::parse::Parse; use syn::spanned::Spanned; use syn::visit_mut::VisitMut; use syn::{punctuated::Punctuated, Token}; @@ -115,6 +116,13 @@ impl quote::ToTokens for LitCStr { #[derive(Clone)] pub struct PythonDoc(PythonDocKind); +impl PythonDoc { + /// Returns an empty docstring. + pub fn empty(ctx: &Ctx) -> Self { + PythonDoc(PythonDocKind::LitCStr(LitCStr::empty(ctx))) + } +} + #[derive(Clone)] enum PythonDocKind { LitCStr(LitCStr), @@ -123,15 +131,50 @@ enum PythonDocKind { Tokens(TokenStream), } +enum DocParseMode { + /// Currently generating docs for both Python and Rust. + Both, + /// Currently generating docs for Python only, starting from the given Span. + PythonOnly(Span), + /// Currently generating docs for Rust only, starting from the given Span. + RustOnly(Span), +} + +impl Parse for DocParseMode { + fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result { + let lookahead = input.lookahead1(); + if lookahead.peek(attributes::kw::doc_mode) { + let attribute: attributes::DocModeAttribute = input.parse()?; + let lit = attribute.value; + if lit.value() == "python" { + Ok(DocParseMode::PythonOnly(lit.span())) + } else if lit.value() == "rust" { + Ok(DocParseMode::RustOnly(lit.span())) + } else { + Err(syn::Error::new( + lit.span(), + "expected `doc_mode = \"python\"` or `doc_mode = \"rust\"", + )) + } + } else if lookahead.peek(attributes::kw::end_doc_mode) { + let _: attributes::kw::end_doc_mode = input.parse()?; + Ok(DocParseMode::Both) + } else { + Err(lookahead.error()) + } + } +} + /// Collects all #[doc = "..."] attributes into a TokenStream evaluating to a null-terminated string. /// /// If this doc is for a callable, the provided `text_signature` can be passed to prepend /// this to the documentation suitable for Python to extract this into the `__text_signature__` /// attribute. pub fn get_doc( - attrs: &[syn::Attribute], + attrs: &mut Vec, mut text_signature: Option, ctx: &Ctx, +) -> syn::Result { ) -> syn::Result { let Ctx { pyo3_path, .. } = ctx; // insert special divider between `__text_signature__` and doc @@ -143,37 +186,93 @@ pub fn get_doc( let mut parts = Punctuated::::new(); let mut first = true; let mut current_part = text_signature.unwrap_or_default(); - let mut current_part_span = None; - for attr in attrs { + let mut mode = DocParseMode::Both; + + let mut error: Option = None; + + attrs.retain(|attr| { if attr.path().is_ident("doc") { - if let Ok(nv) = attr.meta.require_name_value() { - current_part_span = match current_part_span { - None => Some(nv.value.span()), - Some(span) => span.join(nv.value.span()), - }; - if !first { - current_part.push('\n'); - } else { - first = false; + // if not in Rust-only mode, we process the doc attribute to add to the Python docstring + if !matches!(mode, DocParseMode::RustOnly(_)) { + if let Ok(nv) = attr.meta.require_name_value() { + if !first { + current_part.push('\n'); + } else { + first = false; + } + if let syn::Expr::Lit(syn::ExprLit { + lit: syn::Lit::Str(lit_str), + .. + }) = &nv.value + { + // Strip single left space from literal strings, if needed. + // e.g. `/// Hello world` expands to #[doc = " Hello world"] + let doc_line = lit_str.value(); + current_part.push_str(doc_line.strip_prefix(' ').unwrap_or(&doc_line)); + } else { + // This is probably a macro doc from Rust 1.54, e.g. #[doc = include_str!(...)] + // Reset the string buffer, write that part, and then push this macro part too. + parts.push(current_part.to_token_stream()); + current_part.clear(); + parts.push(nv.value.to_token_stream()); + } } - if let syn::Expr::Lit(syn::ExprLit { - lit: syn::Lit::Str(lit_str), - .. - }) = &nv.value - { - // Strip single left space from literal strings, if needed. - // e.g. `/// Hello world` expands to #[doc = " Hello world"] - let doc_line = lit_str.value(); - current_part.push_str(doc_line.strip_prefix(' ').unwrap_or(&doc_line)); - } else { - // This is probably a macro doc from Rust 1.54, e.g. #[doc = include_str!(...)] - // Reset the string buffer, write that part, and then push this macro part too. - parts.push(quote_spanned!(current_part_span.unwrap_or(Span::call_site()) => #current_part)); - current_part.clear(); - parts.push(nv.value.to_token_stream()); + } + // discard doc attributes if we're in PythonOnly mode + !matches!(mode, DocParseMode::PythonOnly(_)) + } else if attr.path().is_ident("pyo3_doc_mode") { + match attr.parse_args() { + Ok(new_mode) => { + mode = new_mode; } + Err(err) => match &mut error { + Some(existing_error) => existing_error.combine(err), + None => { + error = Some(err); + } + }, } + // we processed these attributes, remove them + false + } else { + true + } + }); + + // for attr in attrs { + // if attr.path().is_ident("doc") { + // if let Ok(nv) = attr.meta.require_name_value() { + // if !first { + // current_part.push('\n'); + // } else { + // first = false; + // } + // if let syn::Expr::Lit(syn::ExprLit { + // lit: syn::Lit::Str(lit_str), + // .. + // }) = &nv.value + // { + // // Strip single left space from literal strings, if needed. + // // e.g. `/// Hello world` expands to #[doc = " Hello world"] + // let doc_line = lit_str.value(); + // current_part.push_str(doc_line.strip_prefix(' ').unwrap_or(&doc_line)); + // } else { + // // This is probably a macro doc from Rust 1.54, e.g. #[doc = include_str!(...)] + // // Reset the string buffer, write that part, and then push this macro part too. + // parts.push(current_part.to_token_stream()); + // current_part.clear(); + // parts.push(nv.value.to_token_stream()); + // } + // } + // } + // } + + // if the mode has not been ended, we return an error + match mode { + DocParseMode::Both => {} + DocParseMode::PythonOnly(span) | DocParseMode::RustOnly(span) => { + return Err(err_spanned!(span => "doc_mode must be ended with `end_doc_mode`")); } } @@ -194,25 +293,20 @@ pub fn get_doc( syn::token::Comma(Span::call_site()).to_tokens(tokens); }); + Ok(PythonDoc(PythonDocKind::Tokens( Ok(PythonDoc(PythonDocKind::Tokens( quote!(#pyo3_path::ffi::c_str!(#tokens)), ))) + ))) } else { // Just a string doc - return directly with nul terminator - let docs = CString::new(current_part).map_err(|e| { - syn::Error::new( - current_part_span.unwrap_or(Span::call_site()), - format!( - "Python doc may not contain nul byte, found nul at position {}", - e.nul_position() - ), - ) - })?; + let docs = CString::new(current_part).unwrap(); Ok(PythonDoc(PythonDocKind::LitCStr(LitCStr::new( docs, current_part_span.unwrap_or(Span::call_site()), ctx, )))) + )))) } } From 16a2eeec52e9dde9c45a9f1b09385e5c0de36589 Mon Sep 17 00:00:00 2001 From: Rafa Date: Sun, 3 Aug 2025 10:52:51 +0800 Subject: [PATCH 2/4] Add embedded doc mode optimizations to attributes.rs and utils.rs --- pyo3-macros-backend/src/attributes.rs | 1 - pyo3-macros-backend/src/utils.rs | 176 +++++++++++--------------- 2 files changed, 72 insertions(+), 105 deletions(-) diff --git a/pyo3-macros-backend/src/attributes.rs b/pyo3-macros-backend/src/attributes.rs index 8e7367bd119..238b2f7cf77 100644 --- a/pyo3-macros-backend/src/attributes.rs +++ b/pyo3-macros-backend/src/attributes.rs @@ -320,7 +320,6 @@ pub type StrFormatterAttribute = OptionalKeywordAttribute; pub type SubmoduleAttribute = kw::submodule; pub type GILUsedAttribute = KeywordAttribute; -pub type DocModeAttribute = KeywordAttribute; impl Parse for KeywordAttribute { fn parse(input: ParseStream<'_>) -> Result { diff --git a/pyo3-macros-backend/src/utils.rs b/pyo3-macros-backend/src/utils.rs index 6a53542480f..37b6f5fa49f 100644 --- a/pyo3-macros-backend/src/utils.rs +++ b/pyo3-macros-backend/src/utils.rs @@ -1,8 +1,7 @@ -use crate::attributes::{self, CrateAttribute, RenamingRule}; +use crate::attributes::{CrateAttribute, RenamingRule}; use proc_macro2::{Span, TokenStream}; use quote::{quote, quote_spanned, ToTokens}; use std::ffi::CString; -use syn::parse::Parse; use syn::spanned::Spanned; use syn::visit_mut::VisitMut; use syn::{punctuated::Punctuated, Token}; @@ -134,35 +133,10 @@ enum PythonDocKind { enum DocParseMode { /// Currently generating docs for both Python and Rust. Both, - /// Currently generating docs for Python only, starting from the given Span. - PythonOnly(Span), - /// Currently generating docs for Rust only, starting from the given Span. - RustOnly(Span), -} - -impl Parse for DocParseMode { - fn parse(input: syn::parse::ParseStream<'_>) -> syn::Result { - let lookahead = input.lookahead1(); - if lookahead.peek(attributes::kw::doc_mode) { - let attribute: attributes::DocModeAttribute = input.parse()?; - let lit = attribute.value; - if lit.value() == "python" { - Ok(DocParseMode::PythonOnly(lit.span())) - } else if lit.value() == "rust" { - Ok(DocParseMode::RustOnly(lit.span())) - } else { - Err(syn::Error::new( - lit.span(), - "expected `doc_mode = \"python\"` or `doc_mode = \"rust\"", - )) - } - } else if lookahead.peek(attributes::kw::end_doc_mode) { - let _: attributes::kw::end_doc_mode = input.parse()?; - Ok(DocParseMode::Both) - } else { - Err(lookahead.error()) - } - } + /// Currently generating docs for Python only. + PythonOnly, + /// Currently generating docs for Rust only. + RustOnly, } /// Collects all #[doc = "..."] attributes into a TokenStream evaluating to a null-terminated string. @@ -189,93 +163,87 @@ pub fn get_doc( let mut mode = DocParseMode::Both; - let mut error: Option = None; + let mut to_retain = vec![]; // Collect indices of attributes to retain - attrs.retain(|attr| { + for (i, attr) in attrs.iter().enumerate() { if attr.path().is_ident("doc") { - // if not in Rust-only mode, we process the doc attribute to add to the Python docstring - if !matches!(mode, DocParseMode::RustOnly(_)) { - if let Ok(nv) = attr.meta.require_name_value() { - if !first { - current_part.push('\n'); - } else { - first = false; + if let Ok(nv) = attr.meta.require_name_value() { + let include_in_python; + let retain_in_rust; + + if let syn::Expr::Lit(syn::ExprLit { + lit: syn::Lit::Str(lit_str), + .. + }) = &nv.value + { + // Strip single left space from literal strings, if needed. + let doc_line = lit_str.value(); + let stripped_line = doc_line.strip_prefix(' ').unwrap_or(&doc_line); + let trimmed = stripped_line.trim(); + + // Check if this is a mode switch instruction + if let Some(content) = trimmed.strip_prefix("")) { + let content_trimmed = content.trim(); + if content_trimmed.starts_with("pyo3_doc_mode:") { + let value = content_trimmed.strip_prefix("pyo3_doc_mode:").unwrap_or("").trim(); + mode = match value { + "python" => DocParseMode::PythonOnly, + "rust" => DocParseMode::RustOnly, + "both" => DocParseMode::Both, + _ => return Err(syn::Error::new(lit_str.span(), format!("Invalid doc_mode: '{}'. Expected 'python', 'rust', or 'both'.", value))), + }; + // Do not retain mode switch lines in Rust, and skip in Python + continue; + } + } + + // Not a mode switch, decide based on current mode + include_in_python = matches!(mode, DocParseMode::Both | DocParseMode::PythonOnly); + retain_in_rust = matches!(mode, DocParseMode::Both | DocParseMode::RustOnly); + + // Include in Python doc if needed + if include_in_python { + if !first { + current_part.push('\n'); + } else { + first = false; + } + current_part.push_str(stripped_line); } - if let syn::Expr::Lit(syn::ExprLit { - lit: syn::Lit::Str(lit_str), - .. - }) = &nv.value - { - // Strip single left space from literal strings, if needed. - // e.g. `/// Hello world` expands to #[doc = " Hello world"] - let doc_line = lit_str.value(); - current_part.push_str(doc_line.strip_prefix(' ').unwrap_or(&doc_line)); - } else { - // This is probably a macro doc from Rust 1.54, e.g. #[doc = include_str!(...)] + } else { + // This is probably a macro doc, e.g. #[doc = include_str!(...)] + // Decide based on current mode + include_in_python = matches!(mode, DocParseMode::Both | DocParseMode::PythonOnly); + retain_in_rust = matches!(mode, DocParseMode::Both | DocParseMode::RustOnly); + + // Include in Python doc if needed + if include_in_python { // Reset the string buffer, write that part, and then push this macro part too. parts.push(current_part.to_token_stream()); current_part.clear(); parts.push(nv.value.to_token_stream()); } } - } - // discard doc attributes if we're in PythonOnly mode - !matches!(mode, DocParseMode::PythonOnly(_)) - } else if attr.path().is_ident("pyo3_doc_mode") { - match attr.parse_args() { - Ok(new_mode) => { - mode = new_mode; + + // Collect to retain if needed + if retain_in_rust { + to_retain.push(i); } - Err(err) => match &mut error { - Some(existing_error) => existing_error.combine(err), - None => { - error = Some(err); - } - }, } - // we processed these attributes, remove them - false } else { - true - } - }); - - // for attr in attrs { - // if attr.path().is_ident("doc") { - // if let Ok(nv) = attr.meta.require_name_value() { - // if !first { - // current_part.push('\n'); - // } else { - // first = false; - // } - // if let syn::Expr::Lit(syn::ExprLit { - // lit: syn::Lit::Str(lit_str), - // .. - // }) = &nv.value - // { - // // Strip single left space from literal strings, if needed. - // // e.g. `/// Hello world` expands to #[doc = " Hello world"] - // let doc_line = lit_str.value(); - // current_part.push_str(doc_line.strip_prefix(' ').unwrap_or(&doc_line)); - // } else { - // // This is probably a macro doc from Rust 1.54, e.g. #[doc = include_str!(...)] - // // Reset the string buffer, write that part, and then push this macro part too. - // parts.push(current_part.to_token_stream()); - // current_part.clear(); - // parts.push(nv.value.to_token_stream()); - // } - // } - // } - // } - - // if the mode has not been ended, we return an error - match mode { - DocParseMode::Both => {} - DocParseMode::PythonOnly(span) | DocParseMode::RustOnly(span) => { - return Err(err_spanned!(span => "doc_mode must be ended with `end_doc_mode`")); + // Non-doc attributes are always retained + to_retain.push(i); } } + // Retain only the selected attributes + *attrs = to_retain.into_iter().map(|i| attrs[i].clone()).collect(); + + // Check if mode ended in Both; if not, error to enforce "pairing" + if !matches!(mode, DocParseMode::Both) { + return Err(err_spanned!(Span::call_site() => "doc_mode did not end in 'both' mode; consider adding at the end")); + } + if !parts.is_empty() { // Doc contained macro pieces - return as `concat!` expression if !current_part.is_empty() { @@ -483,4 +451,4 @@ pub fn expr_to_python(expr: &syn::Expr) -> String { // others, unsupported yet so defaults to `...` _ => "...".to_string(), } -} +} \ No newline at end of file From dbdf63f1baf5fa364eef1230e57cfca41466e496 Mon Sep 17 00:00:00 2001 From: Rafa Date: Sun, 17 Aug 2025 13:26:13 +0800 Subject: [PATCH 3/4] Add error input recognition prompts --- newsfragments/5288.added.md | 1 + pyo3-macros-backend/src/module.rs | 1 - pyo3-macros-backend/src/pymethod.rs | 100 +++++++++++++++------------- pyo3-macros-backend/src/utils.rs | 94 +++++++++++++++++++++++--- 4 files changed, 139 insertions(+), 57 deletions(-) create mode 100644 newsfragments/5288.added.md diff --git a/newsfragments/5288.added.md b/newsfragments/5288.added.md new file mode 100644 index 00000000000..2618fb10566 --- /dev/null +++ b/newsfragments/5288.added.md @@ -0,0 +1 @@ +Embedded doc mode using HTML comments to fix attribute order issues. \ No newline at end of file diff --git a/pyo3-macros-backend/src/module.rs b/pyo3-macros-backend/src/module.rs index e81ce55c4b4..5b3d9a8dd7b 100644 --- a/pyo3-macros-backend/src/module.rs +++ b/pyo3-macros-backend/src/module.rs @@ -116,7 +116,6 @@ pub fn pymodule_module_impl( let ctx = &Ctx::new(&options.krate, None); let Ctx { pyo3_path, .. } = ctx; let doc = get_doc(attrs, None, ctx)?; - let doc = get_doc(attrs, None, ctx)?; let name = options .name .map_or_else(|| ident.unraw(), |name| name.value.0); diff --git a/pyo3-macros-backend/src/pymethod.rs b/pyo3-macros-backend/src/pymethod.rs index 313fc950ed6..e88b047116c 100644 --- a/pyo3-macros-backend/src/pymethod.rs +++ b/pyo3-macros-backend/src/pymethod.rs @@ -233,55 +233,65 @@ pub fn gen_py_method( } } // ordinary functions (with some specialties) - (_, FnType::Fn(_)) => GeneratedPyMethod::Method(impl_py_method_def( - cls, - spec, - &spec.get_doc(meth_attrs, ctx)?, - &spec.get_doc(meth_attrs, ctx)?, - None, - ctx, - )?), - (_, FnType::FnClass(_)) => GeneratedPyMethod::Method(impl_py_method_def( - cls, - spec, - &spec.get_doc(meth_attrs, ctx)?, - &spec.get_doc(meth_attrs, ctx)?, - Some(quote!(#pyo3_path::ffi::METH_CLASS)), - ctx, - )?), - (_, FnType::FnStatic) => GeneratedPyMethod::Method(impl_py_method_def( - cls, - spec, - &spec.get_doc(meth_attrs, ctx)?, - &spec.get_doc(meth_attrs, ctx)?, - Some(quote!(#pyo3_path::ffi::METH_STATIC)), - ctx, - )?), + (_, FnType::Fn(_)) => { + let doc = spec.get_doc(meth_attrs, ctx)?; + GeneratedPyMethod::Method(impl_py_method_def( + cls, + spec, + &doc, + None, + ctx, + )?) + }, + (_, FnType::FnClass(_)) => { + let doc = spec.get_doc(meth_attrs, ctx)?; + GeneratedPyMethod::Method(impl_py_method_def( + cls, + spec, + &doc, + Some(quote!(#pyo3_path::ffi::METH_CLASS)), + ctx, + )?) + }, + (_, FnType::FnStatic) => { + let doc = spec.get_doc(meth_attrs, ctx)?; + GeneratedPyMethod::Method(impl_py_method_def( + cls, + spec, + &doc, + Some(quote!(#pyo3_path::ffi::METH_STATIC)), + ctx, + )?) + }, // special prototypes (_, FnType::FnNew) | (_, FnType::FnNewClass(_)) => { GeneratedPyMethod::Proto(impl_py_method_def_new(cls, spec, ctx)?) } - (_, FnType::Getter(self_type)) => GeneratedPyMethod::Method(impl_py_getter_def( - cls, - PropertyType::Function { - self_type, - spec, - doc: spec.get_doc(meth_attrs, ctx)?, - doc: spec.get_doc(meth_attrs, ctx)?, - }, - ctx, - )?), - (_, FnType::Setter(self_type)) => GeneratedPyMethod::Method(impl_py_setter_def( - cls, - PropertyType::Function { - self_type, - spec, - doc: spec.get_doc(meth_attrs, ctx)?, - doc: spec.get_doc(meth_attrs, ctx)?, - }, - ctx, - )?), + (_, FnType::Getter(self_type)) => { + let doc = spec.get_doc(meth_attrs, ctx)?; + GeneratedPyMethod::Method(impl_py_getter_def( + cls, + PropertyType::Function { + self_type, + spec, + doc, + }, + ctx, + )?) + }, + (_, FnType::Setter(self_type)) => { + let doc = spec.get_doc(meth_attrs, ctx)?; + GeneratedPyMethod::Method(impl_py_setter_def( + cls, + PropertyType::Function { + self_type, + spec, + doc, + }, + ctx, + )?) + }, (_, FnType::FnModule(_)) => { unreachable!("methods cannot be FnModule") } @@ -632,7 +642,6 @@ pub fn impl_py_setter_def( let Ctx { pyo3_path, .. } = ctx; let python_name = property_type.null_terminated_python_name(ctx)?; let doc = property_type.doc(ctx)?; - let doc = property_type.doc(ctx)?; let mut holders = Holders::new(); let setter_impl = match property_type { PropertyType::Descriptor { @@ -821,7 +830,6 @@ pub fn impl_py_getter_def( let Ctx { pyo3_path, .. } = ctx; let python_name = property_type.null_terminated_python_name(ctx)?; let doc = property_type.doc(ctx)?; - let doc = property_type.doc(ctx)?; let mut cfg_attrs = TokenStream::new(); if let PropertyType::Descriptor { field, .. } = &property_type { diff --git a/pyo3-macros-backend/src/utils.rs b/pyo3-macros-backend/src/utils.rs index 37b6f5fa49f..7277cd43622 100644 --- a/pyo3-macros-backend/src/utils.rs +++ b/pyo3-macros-backend/src/utils.rs @@ -148,7 +148,6 @@ pub fn get_doc( attrs: &mut Vec, mut text_signature: Option, ctx: &Ctx, -) -> syn::Result { ) -> syn::Result { let Ctx { pyo3_path, .. } = ctx; // insert special divider between `__text_signature__` and doc @@ -160,6 +159,7 @@ pub fn get_doc( let mut parts = Punctuated::::new(); let mut first = true; let mut current_part = text_signature.unwrap_or_default(); + let mut current_part_span = None; // Track span for error reporting let mut mode = DocParseMode::Both; @@ -176,6 +176,11 @@ pub fn get_doc( .. }) = &nv.value { + // Update span for error reporting + if current_part_span.is_none() { + current_part_span = Some(lit_str.span()); + } + // Strip single left space from literal strings, if needed. let doc_line = lit_str.value(); let stripped_line = doc_line.strip_prefix(' ').unwrap_or(&doc_line); @@ -190,11 +195,25 @@ pub fn get_doc( "python" => DocParseMode::PythonOnly, "rust" => DocParseMode::RustOnly, "both" => DocParseMode::Both, - _ => return Err(syn::Error::new(lit_str.span(), format!("Invalid doc_mode: '{}'. Expected 'python', 'rust', or 'both'.", value))), + _ => return Err(syn::Error::new( + lit_str.span(), + format!("Invalid doc_mode: '{}'. Expected 'python', 'rust', or 'both'.", value) + )), }; // Do not retain mode switch lines in Rust, and skip in Python continue; + } else if is_likely_pyo3_doc_mode_typo(content_trimmed) { + // Handle potential typos in pyo3_doc_mode prefix + return Err(syn::Error::new( + lit_str.span(), + format!( + "Suspicious comment '{}' - did you mean 'pyo3_doc_mode'? Valid format: ", + content_trimmed + ) + )); } + // If it's an HTML comment but not pyo3_doc_mode related, + // it will be included based on current mode (no special handling) } // Not a mode switch, decide based on current mode @@ -219,8 +238,10 @@ pub fn get_doc( // Include in Python doc if needed if include_in_python { // Reset the string buffer, write that part, and then push this macro part too. - parts.push(current_part.to_token_stream()); - current_part.clear(); + if !current_part.is_empty() { + parts.push(current_part.to_token_stream()); + current_part.clear(); + } parts.push(nv.value.to_token_stream()); } } @@ -241,7 +262,10 @@ pub fn get_doc( // Check if mode ended in Both; if not, error to enforce "pairing" if !matches!(mode, DocParseMode::Both) { - return Err(err_spanned!(Span::call_site() => "doc_mode did not end in 'both' mode; consider adding at the end")); + return Err(syn::Error::new( + Span::call_site(), + "doc_mode did not end in 'both' mode; consider adding at the end" + )); } if !parts.is_empty() { @@ -256,16 +280,13 @@ pub fn get_doc( syn::Ident::new("concat", Span::call_site()).to_tokens(&mut tokens); syn::token::Not(Span::call_site()).to_tokens(&mut tokens); - syn::token::Bracket(Span::call_site()).surround(&mut tokens, |tokens| { + syn::token::Paren(Span::call_site()).surround(&mut tokens, |tokens| { parts.to_tokens(tokens); - syn::token::Comma(Span::call_site()).to_tokens(tokens); }); - Ok(PythonDoc(PythonDocKind::Tokens( Ok(PythonDoc(PythonDocKind::Tokens( quote!(#pyo3_path::ffi::c_str!(#tokens)), ))) - ))) } else { // Just a string doc - return directly with nul terminator let docs = CString::new(current_part).unwrap(); @@ -274,10 +295,63 @@ pub fn get_doc( current_part_span.unwrap_or(Span::call_site()), ctx, )))) - )))) } } +/// Helper function to detect likely typos in pyo3_doc_mode prefix +fn is_likely_pyo3_doc_mode_typo(content: &str) -> bool { + // Simple fuzzy matching for common typos + let potential_typos = [ + "pyo3_doc_mde", + "pyo3_docc_mode", + "pyo3_doc_mod", + "py03_doc_mode", + "pyo3doc_mode", + "pyo3_docmode", + "pyo_doc_mode", + "pyo3_doc_node", + ]; + + potential_typos.iter().any(|&typo| { + content.starts_with(typo) || + (content.len() >= typo.len() - 2 && + simple_edit_distance(content.split(':').next().unwrap_or(""), typo) <= 2) + }) +} + +/// Simple edit distance calculation for typo detection +fn simple_edit_distance(a: &str, b: &str) -> usize { + let a_chars: Vec = a.chars().collect(); + let b_chars: Vec = b.chars().collect(); + let a_len = a_chars.len(); + let b_len = b_chars.len(); + + if a_len == 0 { return b_len; } + if b_len == 0 { return a_len; } + + let mut matrix = vec![vec![0; b_len + 1]; a_len + 1]; + + // Initialize first row and column + for i in 0..=a_len { + matrix[i][0] = i; + } + for j in 0..=b_len { + matrix[0][j] = j; + } + + // Fill the matrix + for i in 1..=a_len { + for j in 1..=b_len { + let cost = if a_chars[i - 1] == b_chars[j - 1] { 0 } else { 1 }; + matrix[i][j] = (matrix[i - 1][j] + 1) // deletion + .min(matrix[i][j - 1] + 1) // insertion + .min(matrix[i - 1][j - 1] + cost); // substitution + } + } + + matrix[a_len][b_len] +} + impl quote::ToTokens for PythonDoc { fn to_tokens(&self, tokens: &mut TokenStream) { match &self.0 { From 4a901aca29aa374bfc60bc0064c7159d214ecd61 Mon Sep 17 00:00:00 2001 From: Rafa Date: Sun, 17 Aug 2025 13:45:24 +0800 Subject: [PATCH 4/4] fmt check --- pyo3-macros-backend/src/pymethod.rs | 18 +++------ pyo3-macros-backend/src/utils.rs | 60 ++++++++++++++++++----------- 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/pyo3-macros-backend/src/pymethod.rs b/pyo3-macros-backend/src/pymethod.rs index e88b047116c..9ab96de7d52 100644 --- a/pyo3-macros-backend/src/pymethod.rs +++ b/pyo3-macros-backend/src/pymethod.rs @@ -235,14 +235,8 @@ pub fn gen_py_method( // ordinary functions (with some specialties) (_, FnType::Fn(_)) => { let doc = spec.get_doc(meth_attrs, ctx)?; - GeneratedPyMethod::Method(impl_py_method_def( - cls, - spec, - &doc, - None, - ctx, - )?) - }, + GeneratedPyMethod::Method(impl_py_method_def(cls, spec, &doc, None, ctx)?) + } (_, FnType::FnClass(_)) => { let doc = spec.get_doc(meth_attrs, ctx)?; GeneratedPyMethod::Method(impl_py_method_def( @@ -252,7 +246,7 @@ pub fn gen_py_method( Some(quote!(#pyo3_path::ffi::METH_CLASS)), ctx, )?) - }, + } (_, FnType::FnStatic) => { let doc = spec.get_doc(meth_attrs, ctx)?; GeneratedPyMethod::Method(impl_py_method_def( @@ -262,7 +256,7 @@ pub fn gen_py_method( Some(quote!(#pyo3_path::ffi::METH_STATIC)), ctx, )?) - }, + } // special prototypes (_, FnType::FnNew) | (_, FnType::FnNewClass(_)) => { GeneratedPyMethod::Proto(impl_py_method_def_new(cls, spec, ctx)?) @@ -279,7 +273,7 @@ pub fn gen_py_method( }, ctx, )?) - }, + } (_, FnType::Setter(self_type)) => { let doc = spec.get_doc(meth_attrs, ctx)?; GeneratedPyMethod::Method(impl_py_setter_def( @@ -291,7 +285,7 @@ pub fn gen_py_method( }, ctx, )?) - }, + } (_, FnType::FnModule(_)) => { unreachable!("methods cannot be FnModule") } diff --git a/pyo3-macros-backend/src/utils.rs b/pyo3-macros-backend/src/utils.rs index 7277cd43622..7518a200b29 100644 --- a/pyo3-macros-backend/src/utils.rs +++ b/pyo3-macros-backend/src/utils.rs @@ -187,16 +187,22 @@ pub fn get_doc( let trimmed = stripped_line.trim(); // Check if this is a mode switch instruction - if let Some(content) = trimmed.strip_prefix("")) { + if let Some(content) = trimmed + .strip_prefix("")) + { let content_trimmed = content.trim(); if content_trimmed.starts_with("pyo3_doc_mode:") { - let value = content_trimmed.strip_prefix("pyo3_doc_mode:").unwrap_or("").trim(); + let value = content_trimmed + .strip_prefix("pyo3_doc_mode:") + .unwrap_or("") + .trim(); mode = match value { "python" => DocParseMode::PythonOnly, "rust" => DocParseMode::RustOnly, "both" => DocParseMode::Both, _ => return Err(syn::Error::new( - lit_str.span(), + lit_str.span(), format!("Invalid doc_mode: '{}'. Expected 'python', 'rust', or 'both'.", value) )), }; @@ -212,12 +218,13 @@ pub fn get_doc( ) )); } - // If it's an HTML comment but not pyo3_doc_mode related, + // If it's an HTML comment but not pyo3_doc_mode related, // it will be included based on current mode (no special handling) } // Not a mode switch, decide based on current mode - include_in_python = matches!(mode, DocParseMode::Both | DocParseMode::PythonOnly); + include_in_python = + matches!(mode, DocParseMode::Both | DocParseMode::PythonOnly); retain_in_rust = matches!(mode, DocParseMode::Both | DocParseMode::RustOnly); // Include in Python doc if needed @@ -232,7 +239,8 @@ pub fn get_doc( } else { // This is probably a macro doc, e.g. #[doc = include_str!(...)] // Decide based on current mode - include_in_python = matches!(mode, DocParseMode::Both | DocParseMode::PythonOnly); + include_in_python = + matches!(mode, DocParseMode::Both | DocParseMode::PythonOnly); retain_in_rust = matches!(mode, DocParseMode::Both | DocParseMode::RustOnly); // Include in Python doc if needed @@ -311,11 +319,11 @@ fn is_likely_pyo3_doc_mode_typo(content: &str) -> bool { "pyo_doc_mode", "pyo3_doc_node", ]; - + potential_typos.iter().any(|&typo| { - content.starts_with(typo) || - (content.len() >= typo.len() - 2 && - simple_edit_distance(content.split(':').next().unwrap_or(""), typo) <= 2) + content.starts_with(typo) + || (content.len() >= typo.len() - 2 + && simple_edit_distance(content.split(':').next().unwrap_or(""), typo) <= 2) }) } @@ -325,12 +333,16 @@ fn simple_edit_distance(a: &str, b: &str) -> usize { let b_chars: Vec = b.chars().collect(); let a_len = a_chars.len(); let b_len = b_chars.len(); - - if a_len == 0 { return b_len; } - if b_len == 0 { return a_len; } - + + if a_len == 0 { + return b_len; + } + if b_len == 0 { + return a_len; + } + let mut matrix = vec![vec![0; b_len + 1]; a_len + 1]; - + // Initialize first row and column for i in 0..=a_len { matrix[i][0] = i; @@ -338,17 +350,21 @@ fn simple_edit_distance(a: &str, b: &str) -> usize { for j in 0..=b_len { matrix[0][j] = j; } - + // Fill the matrix for i in 1..=a_len { for j in 1..=b_len { - let cost = if a_chars[i - 1] == b_chars[j - 1] { 0 } else { 1 }; - matrix[i][j] = (matrix[i - 1][j] + 1) // deletion - .min(matrix[i][j - 1] + 1) // insertion - .min(matrix[i - 1][j - 1] + cost); // substitution + let cost = if a_chars[i - 1] == b_chars[j - 1] { + 0 + } else { + 1 + }; + matrix[i][j] = (matrix[i - 1][j] + 1) // deletion + .min(matrix[i][j - 1] + 1) // insertion + .min(matrix[i - 1][j - 1] + cost); // substitution } } - + matrix[a_len][b_len] } @@ -525,4 +541,4 @@ pub fn expr_to_python(expr: &syn::Expr) -> String { // others, unsupported yet so defaults to `...` _ => "...".to_string(), } -} \ No newline at end of file +}