diff --git a/mf2py/parser.py b/mf2py/parser.py
index 9529005..f099c69 100644
--- a/mf2py/parser.py
+++ b/mf2py/parser.py
@@ -28,24 +28,25 @@ def parse(
filter_roots=False,
):
"""
- Parse a microformats2 document or url and return a json dictionary.
+ Parse a document or URL for microformats and return a dictionary in mf2json format.
Args:
- doc (file or string or BeautifulSoup doc): file handle, text of content
- to parse, or BeautifulSoup document. If None, it will be fetched from
- given url
- url (string): url of the file to be processed. Optionally extracted from
- base-element of given doc
- html_parser (string): optional, select a specific HTML parser. Valid
- options from the BeautifulSoup documentation are:
- "html", "xml", "html5", "lxml", "html5lib", and "html.parser"
+ doc (file, string or BeautifulSoup doc): file handle, text of content
+ to parse, or BeautifulSoup document. If None it will be fetched from
+ given URL.
+ url (string): URL of the file to be processed. If None it will be
+ extracted from the `` element of given doc.
+ html_parser (string): optional, select a specific HTML parser. Valid options
+ from the BeautifulSoup documentation are: "html", "xml","html5", "lxml",
+ "html5lib", and "html.parser".
expose_dom (boolean): optional, expose the DOM of embedded properties.
- metaformats (boolean): whether to include metaformats extracted from OGP
+ metaformats (boolean): optional, include metaformats extracted from OGP
and Twitter card data: https://microformats.org/wiki/metaformats
filter_roots (boolean or list): optional, filter root class names. Use
True to filter known conflicting classes, otherwise filter given list.
- Return: a json dict represented the structured data in this document.
+ Return: a mf2json dict representing the structured data in the document
+
"""
return Parser(
doc,
@@ -58,27 +59,27 @@ def parse(
class Parser(object):
- """Object to parse a document for microformats and return them in
- appropriate formats.
+ """
+ Parser to parse a document or URL for microformats and output in various formats.
Args:
- doc (file or string or BeautifulSoup doc): file handle, text of content
- to parse, or BeautifulSoup document. If None, it will be fetched from
- given url
- url (string): url of the file to be processed. Optionally extracted from
- base-element of given doc
- html_parser (string): optional, select a specific HTML parser. Valid
- options from the BeautifulSoup documentation are:
- "html", "xml", "html5", "lxml", "html5lib", and "html.parser"
- defaults to "html5lib"
+ doc (file, string or BeautifulSoup doc): file handle, text of content
+ to parse, or BeautifulSoup document. If None it will be fetched from
+ given URL.
+ url (string): URL of the file to be processed. If None it will be
+ extracted from the `` element of given doc.
+ html_parser (string): optional, select a specific HTML parser. Valid options
+ from the BeautifulSoup documentation are: "html", "xml","html5", "lxml",
+ "html5lib", and "html.parser".
expose_dom (boolean): optional, expose the DOM of embedded properties.
- metaformats (boolean): whether to include metaformats extracted from OGP
+ metaformats (boolean): optional, include metaformats extracted from OGP
and Twitter card data: https://microformats.org/wiki/metaformats
filter_roots (boolean or list): optional, filter root class names. Use
True to filter known conflicting classes, otherwise filter given list.
Attributes:
useragent (string): the User-Agent string for the Parser
+
"""
ua_desc = "mf2py - microformats2 parser for python"