Skip to content

Commit

Permalink
doc: general tidying up of docstrings
Browse files Browse the repository at this point in the history
and adding some TODOs
  • Loading branch information
flavorjones committed Dec 8, 2024
1 parent 7928194 commit ac9fb8a
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 41 deletions.
4 changes: 2 additions & 2 deletions ext/nokogiri/html4_document.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ static ID id_to_s;

/*
* call-seq:
* new
* new(uri=nil, external_id=nil) → HTML4::Document
*
* Create a new document
* Create a new empty document with base URI +uri+ and external ID +external_id+.
*/
static VALUE
rb_html_document_s_new(int argc, VALUE *argv, VALUE klass)
Expand Down
4 changes: 2 additions & 2 deletions ext/nokogiri/nokogiri.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ Init_nokogiri(void)
{
mNokogiri = rb_define_module("Nokogiri");
mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo");
mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4");
mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX");
mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5");
mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
Expand Down
12 changes: 7 additions & 5 deletions ext/nokogiri/xml_document.c
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,8 @@ noko_xml_document_s_read_io(VALUE rb_class,
VALUE rb_encoding,
VALUE rb_options)
{
/* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
private. */
libxmlStructuredErrorHandlerState handler_state;
VALUE rb_errors = rb_ary_new();

Expand Down Expand Up @@ -417,6 +419,8 @@ noko_xml_document_s_read_memory(VALUE rb_class,
VALUE rb_encoding,
VALUE rb_options)
{
/* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
private. */
VALUE rb_errors = rb_ary_new();
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);

Expand Down Expand Up @@ -444,9 +448,9 @@ noko_xml_document_s_read_memory(VALUE rb_class,

/*
* call-seq:
* new(version = default)
* new(version = "1.0")
*
* Create a new document with +version+ (defaults to "1.0")
* Create a new empty document declaring XML version +version+.
*/
static VALUE
new (int argc, VALUE *argv, VALUE klass)
Expand Down Expand Up @@ -756,9 +760,7 @@ void
noko_init_xml_document(void)
{
assert(cNokogiriXmlNode);
/*
* Nokogiri::XML::Document wraps an xml document.
*/

cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);

rb_define_alloc_func(cNokogiriXmlDocument, _xml_document_alloc);
Expand Down
10 changes: 5 additions & 5 deletions lib/nokogiri/html5.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ def self.HTML5(...)
# The document and fragment parsing methods support options that are different from
# Nokogiri::HTML4::Document or Nokogiri::XML::Document.
#
# - <tt>Nokogiri.HTML5(html, url:, encoding:, **parse_options)</tt>
# - <tt>Nokogiri::HTML5.parse(html, url:, encoding:, **parse_options)</tt>
# - <tt>Nokogiri::HTML5::Document.parse(html, url:, encoding:, **parse_options)</tt>
# - <tt>Nokogiri::HTML5.fragment(html, encoding = nil, **parse_options)</tt>
# - <tt>Nokogiri::HTML5::DocumentFragment.parse(html, encoding = nil, **parse_options)</tt>
# - <tt>Nokogiri.HTML5(input, url:, encoding:, **parse_options)</tt>
# - <tt>Nokogiri::HTML5.parse(input, url:, encoding:, **parse_options)</tt>
# - <tt>Nokogiri::HTML5::Document.parse(input, url:, encoding:, **parse_options)</tt>
# - <tt>Nokogiri::HTML5.fragment(input, encoding:, **parse_options)</tt>
# - <tt>Nokogiri::HTML5::DocumentFragment.parse(input, encoding:, **parse_options)</tt>
#
# The four currently supported parse options are
#
Expand Down
29 changes: 22 additions & 7 deletions lib/nokogiri/html5/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ class Document < Nokogiri::HTML4::Document

class << self
# :call-seq:
# parse(input) { |parse_options| ... }
# parse(input, url:, encoding:, **parse_options)
# parse(input) { |options| ... } → HTML5::Document
# parse(input, url: encoding:) { |options| ... } → HTML5::Document
# parse(input, **options) → HTML5::Document
#
# Parse \HTML input with a parser compliant with the HTML5 spec. This method uses the
# encoding of +input+ if it can be determined, or else falls back to the +encoding:+
Expand All @@ -62,11 +63,25 @@ class << self
#
# [Optional Parameters]
# - +url:+ (String) the base URI of the document.
# - +encoding+ (Encoding) The encoding that should be used when processing the
# document. This option is only used as a fallback when the encoding of +input+ cannot be
# determined.
# - +parse_options+ (Hash) represents keywords arguments that control the behavior of the
# parser. See rdoc-ref:HTML5@Parsing+options for a list of available options.
#
# [Optional Keyword Arguments]
# - +encoding:+ (Encoding) The name of the encoding that should be used when processing the
# document. When not provided, the encoding will be determined based on the document
# content.
#
# - +max_errors:+ (Integer) The maximum number of parse errors to record. (default
# +Nokogiri::Gumbo::DEFAULT_MAX_ERRORS+ which is currently 0)
#
# - +max_tree_depth:+ (Integer) The maximum depth of the parse tree. (default
# +Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH+)
#
# - +max_attributes:+ (Integer) The maximum number of attributes allowed on an
# element. (default +Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES+)
#
# - +parse_noscript_content_as_text:+ (Boolean) Whether to parse the content of +noscript+
# elements as text. (default +false+)
#
# See rdoc-ref:HTML5@Parsing+options for a complete description of these parsing options.
#
# [Yields]
# If present, the block will be passed a Hash object to modify with parse options before the
Expand Down
41 changes: 21 additions & 20 deletions lib/nokogiri/xml/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

module Nokogiri
module XML
# Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
# is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
# on parsing.
# Nokogiri::XML::Document is the main entry point for dealing with \XML documents. The Document
# is created by parsing \XML content from a String or an IO object. See
# Nokogiri::XML::Document.parse for more information on parsing.
#
# For searching a Document, see Nokogiri::XML::Searchable#css and
# Nokogiri::XML::Searchable#xpath
# Document inherits a great deal of functionality from its superclass Nokogiri::XML::Node, so
# please read that class's documentation as well.
class Document < Nokogiri::XML::Node
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
Expand All @@ -25,34 +25,34 @@ class Document < Nokogiri::XML::Node

class << self
# call-seq:
# parse(input, url: nil, encoding: nil, options: DEFAULT_XML) { |options| } => Nokogiri::XML::Document
# parse(input) { |options| ... } => Nokogiri::XML::Document
# parse(input, url:, encoding:, options:) => Nokogiri::XML::Document
#
# Parse XML input from a String or IO object, and return a new Document object.
# Parse \XML input from a String or IO object, and return a new XML::Document.
#
# By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
# 🛡 By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
# or access the network. See Nokogiri::XML::ParseOptions for a complete list of options; and
# that module's DEFAULT_XML constant for what's set (and not set) by default.
#
# See also: Nokogiri.XML() which is a convenience method which will call this method.
# [Required Parameters]
# - +input+ (String | IO) The content to be parsed.
#
# [Parameters]
# - +input+ (String, IO) The content to be parsed.
#
# [Keyword arguments]
# - +url:+ (String) The URI where this document is located.
# [Optional Keyword Arguments]
# - +url:+ (String) The base URI for this document.
#
# - +encoding:+ (String) The name of the encoding that should be used when processing the
# document. (default +nil+ means that the encoding will be determined based on the
# document content)
# document. When not provided, the encoding will be determined based on the document
# content.
#
# - +options+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
# behaviors during parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
# Nokogiri::XML::ParseOptions for more information.
# - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
# behaviors during parsing. See ParseOptions for more information. The default value is
# +ParseOptions::DEFAULT_XML+.
#
# [Yields]
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
#
# [Returns] Nokogiri::XML::Document
def parse(
string_or_io,
url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_XML,
Expand All @@ -72,6 +72,7 @@ def parse(
end

doc = if string_or_io.respond_to?(:read)
# TODO: should we instead check for respond_to?(:to_path) ?
if string_or_io.is_a?(Pathname)
# resolve the Pathname to the file and open it as an IO object, see #2110
string_or_io = string_or_io.expand_path.open
Expand Down

0 comments on commit ac9fb8a

Please sign in to comment.