diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index bcd4f835e9..ef91a8f442 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,11 +1,12 @@
-# Contributing to FBGEMM / FBGEMM_GPU
+# Contributing to FBGEMM and FBGEMM_GPU
 
 We want to make contributing to this project as easy and transparent as
 possible.
 
 ## Code of Conduct
 
-The code of conduct is described in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md).
+The code of conduct is described in the
+[Code of Conduct](https://github.com/pytorch/FBGEMM/blob/main/CODE_OF_CONDUCT.md).
 
 ## Pull Requests
 
diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in
index cf1849d595..313a01b590 100644
--- a/docs/Doxyfile.in
+++ b/docs/Doxyfile.in
@@ -2,8 +2,8 @@
 # All rights reserved.
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
-#
-# Doxyfile 1.8.14
+
+# Doxyfile 1.9.8
 
 # This file describes the settings to be used by the documentation system
 # doxygen (www.doxygen.org) for a project.
@@ -17,6 +17,16 @@
 # For lists, items can also be appended using:
 # TAG += value [value, ...]
 # Values that contain spaces should be placed between quotes (\" \").
+#
+# Note:
+#
+# Use doxygen to compare the used configuration file with the template
+# configuration file:
+# doxygen -x [configFile]
+# Use doxygen to compare the used configuration file with the template
+# configuration file without replacing the environment variables or CMake type
+# replacement variables:
+# doxygen -x_noenv [configFile]
 
 #---------------------------------------------------------------------------
 # Project related configuration options
@@ -37,7 +47,7 @@ DOXYFILE_ENCODING      = UTF-8
 # title of most generated pages and in a few other places.
 # The default value is: My Project.
 
-PROJECT_NAME           = "FBGEMM"
+PROJECT_NAME           = FBGEMM
 
 # The PROJECT_NUMBER tag can be used to enter a project or revision number. This
 # could be handy for archiving the generated documentation or if some version
@@ -63,18 +73,30 @@ PROJECT_LOGO           =
 # entered, it will be relative to the location where doxygen was started. If
 # left blank the current directory will be used.
 
-OUTPUT_DIRECTORY       = "@DOXYGEN_OUTPUT_DIR@"
+OUTPUT_DIRECTORY       = build
 
-# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
-# directories (in 2 levels) under the output directory of each output format and
-# will distribute the generated files over these directories. Enabling this
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096
+# sub-directories (in 2 levels) under the output directory of each output format
+# and will distribute the generated files over these directories. Enabling this
 # option can be useful when feeding doxygen a huge amount of source files, where
 # putting all generated files in the same directory would otherwise causes
-# performance problems for the file system.
+# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to
+# control the number of sub-directories.
 # The default value is: NO.
 
 CREATE_SUBDIRS         = NO
 
+# Controls the number of sub-directories that will be created when
+# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every
+# level increment doubles the number of directories, resulting in 4096
+# directories at level 8 which is the default and also the maximum value. The
+# sub-directories are organized in 2 levels, the first level always has a fixed
+# number of 16 directories.
+# Minimum value: 0, maximum value: 8, default value: 8.
+# This tag requires that the tag CREATE_SUBDIRS is set to YES.
+
+CREATE_SUBDIRS_LEVEL   = 8
+
 # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
 # characters to appear in the names of generated files. If set to NO, non-ASCII
 # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
@@ -86,26 +108,18 @@ ALLOW_UNICODE_NAMES    = NO
 # The OUTPUT_LANGUAGE tag is used to specify the language in which all
 # documentation generated by doxygen is written. Doxygen will use this
 # information to generate all constant output in the proper language.
-# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
-# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
-# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
-# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
-# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
-# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
-# Ukrainian and Vietnamese.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian,
+# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English
+# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek,
+# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with
+# English messages), Korean, Korean-en (Korean with English messages), Latvian,
+# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese,
+# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish,
+# Swedish, Turkish, Ukrainian and Vietnamese.
 # The default value is: English.
 
 OUTPUT_LANGUAGE        = English
 
-# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all generated output in the proper direction.
-# Possible values are: None, LTR, RTL and Context.
-# The default value is: None.
-
-OUTPUT_TEXT_DIRECTION  = None
-
 # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
 # descriptions after the members that are listed in the file and class
 # documentation (similar to Javadoc). Set to NO to disable this.
@@ -202,6 +216,16 @@ SHORT_NAMES            = NO
 
 JAVADOC_AUTOBRIEF      = NO
 
+# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
+# such as
+# /***************
+# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
+# Javadoc-style will behave just like regular comments and it will not be
+# interpreted by doxygen.
+# The default value is: NO.
+
+JAVADOC_BANNER         = NO
+
 # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
 # line (until the first dot) of a Qt-style comment as the brief description. If
 # set to NO, the Qt-style will behave just like regular Qt-style comments (thus
@@ -222,6 +246,14 @@ QT_AUTOBRIEF           = NO
 
 MULTILINE_CPP_IS_BRIEF = NO
 
+# By default Python docstrings are displayed as preformatted text and doxygen's
+# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
+# doxygen's special commands can be used and the contents of the docstring
+# documentation blocks is shown as doxygen documentation.
+# The default value is: YES.
+
+PYTHON_DOCSTRING       = YES
+
 # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
 # documentation from any documented member that it re-implements.
 # The default value is: YES.
@@ -245,25 +277,19 @@ TAB_SIZE               = 4
 # the documentation. An alias has the form:
 # name=value
 # For example adding
-# "sideeffect=@par Side Effects:\n"
+# "sideeffect=@par Side Effects:^^"
 # will allow you to put the command \sideeffect (or @sideeffect) in the
 # documentation, which will result in a user-defined paragraph with heading
-# "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines (in the resulting output). You can put ^^ in the value part of an
-# alias to insert a newline as if a physical newline was in the original file.
-# When you need a literal { or } or , in the value part of an alias you have to
-# escape them by means of a backslash (\), this can lead to conflicts with the
-# commands \{ and \} for these it is advised to use the version @{ and @} or use
-# a double escape (\\{ and \\})
+# "Side Effects:". Note that you cannot put \n's in the value part of an alias
+# to insert newlines (in the resulting output). You can put ^^ in the value part
+# of an alias to insert a newline as if a physical newline was in the original
+# file. When you need a literal { or } or , in the value part of an alias you
+# have to escape them by means of a backslash (\), this can lead to conflicts
+# with the commands \{ and \} for these it is advised to use the version @{ and
+# @} or use a double escape (\\{ and \\})
 
 ALIASES                =
 
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST              =
-
 # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
 # only. Doxygen will then generate output that is more tailored for C. For
 # instance, some of the names that are used will be different. The list of all
@@ -304,19 +330,22 @@ OPTIMIZE_OUTPUT_SLICE  = NO
 # parses. With this tag you can assign which parser to use for a given
 # extension. Doxygen has a built-in mapping, but you can override or extend it
 # using this tag. The format is ext=language, where ext is a file extension, and
-# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
-# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice,
-# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
+# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice,
+# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
 # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
 # tries to guess whether the code is fixed or free formatted code, this is the
-# default for Fortran type files), VHDL, tcl. For instance to make doxygen treat
-# .inc files as Fortran files (default is PHP), and .f files as C (default is
-# Fortran), use: inc=Fortran f=C.
+# default for Fortran type files). For instance to make doxygen treat .inc files
+# as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C.
 #
 # Note: For files without extension you can use no_extension as a placeholder.
 #
 # Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
+# the files are not read by doxygen. When specifying no_extension you should add
+# * to the FILE_PATTERNS.
+#
+# Note see also the list of default file extension mappings.
 
 EXTENSION_MAPPING      =
 
@@ -334,11 +363,22 @@ MARKDOWN_SUPPORT       = YES
 # to that level are automatically included in the table of contents, even if
 # they do not have an id attribute.
 # Note: This feature currently applies only to Markdown headings.
-# Minimum value: 0, maximum value: 99, default value: 0.
+# Minimum value: 0, maximum value: 99, default value: 5.
 # This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
 
 TOC_INCLUDE_HEADINGS   = 0
 
+# The MARKDOWN_ID_STYLE tag can be used to specify the algorithm used to
+# generate identifiers for the Markdown headings. Note: Every identifier is
+# unique.
+# Possible values are: DOXYGEN use a fixed 'autotoc_md' string followed by a
+# sequence number starting at 0 and GITHUB use the lower case version of title
+# with any whitespace replaced by '-' and punctuation characters removed.
+# The default value is: DOXYGEN.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+MARKDOWN_ID_STYLE      = DOXYGEN
+
 # When enabled doxygen tries to link words that correspond to documented
 # classes, or namespaces to their corresponding documentation. Such a link can
 # be prevented in individual cases by putting a % sign in front of the word or
@@ -450,6 +490,27 @@ TYPEDEF_HIDES_STRUCT   = NO
 
 LOOKUP_CACHE_SIZE      = 0
 
+# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use
+# during processing. When set to 0 doxygen will based this on the number of
+# cores available in the system. You can set it explicitly to a value larger
+# than 0 to get more control over the balance between CPU load and processing
+# speed. At this moment only the input processing can be done using multiple
+# threads. Since this is still an experimental feature the default is set to 1,
+# which effectively disables parallel processing. Please report any issues you
+# encounter. Generating dot graphs in parallel is controlled by the
+# DOT_NUM_THREADS setting.
+# Minimum value: 0, maximum value: 32, default value: 1.
+
+NUM_PROC_THREADS       = 1
+
+# If the TIMESTAMP tag is set different from NO then each generated page will
+# contain the date or date and time when the page was generated. Setting this to
+# NO can help when comparing the output of multiple runs.
+# Possible values are: YES, NO, DATETIME and DATE.
+# The default value is: NO.
+
+TIMESTAMP              = NO
+
 #---------------------------------------------------------------------------
 # Build related configuration options
 #---------------------------------------------------------------------------
@@ -470,6 +531,12 @@ EXTRACT_ALL            = NO
 
 EXTRACT_PRIVATE        = NO
 
+# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
+# methods of a class will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIV_VIRTUAL   = NO
+
 # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
 # scope will be included in the documentation.
 # The default value is: NO.
@@ -507,6 +574,13 @@ EXTRACT_LOCAL_METHODS  = NO
 
 EXTRACT_ANON_NSPACES   = NO
 
+# If this flag is set to YES, the name of an unnamed parameter in a declaration
+# will be determined by the corresponding definition. By default unnamed
+# parameters remain unnamed in the output.
+# The default value is: YES.
+
+RESOLVE_UNNAMED_PARAMS = YES
+
 # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
 # undocumented members inside documented classes or files. If set to NO these
 # members will be included in the various overviews, but no documentation
@@ -518,14 +592,15 @@ HIDE_UNDOC_MEMBERS     = NO
 # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
 # undocumented classes that are normally visible in the class hierarchy. If set
 # to NO, these classes will be included in the various overviews. This option
-# has no effect if EXTRACT_ALL is enabled.
+# will also hide undocumented C++ concepts if enabled. This option has no effect
+# if EXTRACT_ALL is enabled.
 # The default value is: NO.
 
 HIDE_UNDOC_CLASSES     = NO
 
 # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
-# (class|struct|union) declarations. If set to NO, these declarations will be
-# included in the documentation.
+# declarations. If set to NO, these declarations will be included in the
+# documentation.
 # The default value is: NO.
 
 HIDE_FRIEND_COMPOUNDS  = NO
@@ -544,12 +619,20 @@ HIDE_IN_BODY_DOCS      = NO
 
 INTERNAL_DOCS          = NO
 
-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
-# The default value is: system dependent.
+# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
+# able to match the capabilities of the underlying filesystem. In case the
+# filesystem is case sensitive (i.e. it supports files in the same directory
+# whose names only differ in casing), the option must be set to YES to properly
+# deal with such files in case they appear in the input. For filesystems that
+# are not case sensitive the option should be set to NO to properly deal with
+# output files written for symbols that only differ in casing, such as for two
+# classes, one named CLASS and the other named Class, and to also support
+# references to files without having to specify the exact matching casing. On
+# Windows (including Cygwin) and MacOS, users should typically set this option
+# to NO, whereas on Linux or other Unix flavors it should typically be set to
+# YES.
+# Possible values are: SYSTEM, NO and YES.
+# The default value is: SYSTEM.
 
 CASE_SENSE_NAMES       = NO
 
@@ -567,6 +650,12 @@ HIDE_SCOPE_NAMES       = NO
 
 HIDE_COMPOUND_REFERENCE= NO
 
+# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class
+# will show which file needs to be included to use the class.
+# The default value is: YES.
+
+SHOW_HEADERFILE        = YES
+
 # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
 # the files that are included by a file in the documentation of that file.
 # The default value is: YES.
@@ -724,7 +813,8 @@ FILE_VERSION_FILTER    =
 # output files in an output format independent way. To create the layout file
 # that represents doxygen's defaults, run doxygen with the -l option. You can
 # optionally specify a file name after the option, if omitted DoxygenLayout.xml
-# will be used as the name of the layout file.
+# will be used as the name of the layout file. See also section "Changing the
+# layout of pages" for information.
 #
 # Note that if you run doxygen from a directory containing a file called
 # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
@@ -770,24 +860,50 @@ WARNINGS               = YES
 WARN_IF_UNDOCUMENTED   = YES
 
 # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some parameters
-# in a documented function, or documenting parameters that don't exist or using
-# markup commands wrongly.
+# potential errors in the documentation, such as documenting some parameters in
+# a documented function twice, or documenting parameters that don't exist or
+# using markup commands wrongly.
 # The default value is: YES.
 
 WARN_IF_DOC_ERROR      = YES
 
+# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete
+# function parameter documentation. If set to NO, doxygen will accept that some
+# parameters have no documentation without warning.
+# The default value is: YES.
+
+WARN_IF_INCOMPLETE_DOC = YES
+
 # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
 # are documented, but have no documentation for their parameters or return
-# value. If set to NO, doxygen will only warn about wrong or incomplete
-# parameter documentation, but not about the absence of documentation. If
-# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
+# value. If set to NO, doxygen will only warn about wrong parameter
+# documentation, but not about the absence of documentation. If EXTRACT_ALL is
+# set to YES then this flag will automatically be disabled. See also
+# WARN_IF_INCOMPLETE_DOC
 # The default value is: NO.
 
 WARN_NO_PARAMDOC       = NO
 
+# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about
+# undocumented enumeration values. If set to NO, doxygen will accept
+# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: NO.
+
+WARN_IF_UNDOC_ENUM_VAL = NO
+
 # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
-# a warning is encountered.
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS_PRINT then doxygen behaves
+# like FAIL_ON_WARNINGS but in case no WARN_LOGFILE is defined doxygen will not
+# write the warning messages in between other messages but write them at the end
+# of a run, in case a WARN_LOGFILE is defined the warning messages will be
+# besides being in the defined file also be shown at the end of a run, unless
+# the WARN_LOGFILE is defined as - i.e. standard output (stdout) in that case
+# the behavior will remain as with the setting FAIL_ON_WARNINGS.
+# Possible values are: NO, YES, FAIL_ON_WARNINGS and FAIL_ON_WARNINGS_PRINT.
 # The default value is: NO.
 
 WARN_AS_ERROR          = NO
@@ -798,13 +914,27 @@ WARN_AS_ERROR          = NO
 # and the warning text. Optionally the format may contain $version, which will
 # be replaced by the version of the file (if it could be obtained via
 # FILE_VERSION_FILTER)
+# See also: WARN_LINE_FORMAT
 # The default value is: $file:$line: $text.
 
 WARN_FORMAT            = "$file:$line: $text"
 
+# In the $text part of the WARN_FORMAT command it is possible that a reference
+# to a more specific place is given. To make it easier to jump to this place
+# (outside of doxygen) the user can define a custom "cut" / "paste" string.
+# Example:
+# WARN_LINE_FORMAT = "'vi $file +$line'"
+# See also: WARN_FORMAT
+# The default value is: at line $line of file $file.
+
+WARN_LINE_FORMAT       = "at line $line of file $file"
+
 # The WARN_LOGFILE tag can be used to specify a file to which warning and error
 # messages should be written. If left blank the output is written to standard
-# error (stderr).
+# error (stderr). In case the file specified cannot be opened for writing the
+# warning and error messages are written to standard error. When as file - is
+# specified the warning and error messages are written to standard output
+# (stdout).
 
 WARN_LOGFILE           =
 
@@ -818,17 +948,29 @@ WARN_LOGFILE           =
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = "@DOXYGEN_INPUT_DIR@"
+INPUT                  = "../include/fbgemm/" \
+                         "../src/"
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
 # libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
-# possible encodings.
+# documentation (see:
+# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
+# See also: INPUT_FILE_ENCODING
 # The default value is: UTF-8.
 
 INPUT_ENCODING         = UTF-8
 
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify
+# character encoding on a per file pattern basis. Doxygen will compare the file
+# name with each pattern and apply the encoding instead of the default
+# INPUT_ENCODING) if there is a match. The character encodings are a list of the
+# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding
+# "INPUT_ENCODING" for further information on supported encodings.
+
+INPUT_FILE_ENCODING    =
+
 # If the value of the INPUT tag contains directories, you can use the
 # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
 # *.h) to filter out the source-files in the directories.
@@ -837,11 +979,15 @@ INPUT_ENCODING         = UTF-8
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # read by doxygen.
 #
-# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
-# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
-# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
-# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
-# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, *.qsf and *.ice.
+# Note the list of default checked file patterns might differ from the list of
+# default file extension mappings.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cxxm,
+# *.cpp, *.cppm, *.c++, *.c++m, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl,
+# *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, *.h++, *.ixx, *.l, *.cs, *.d, *.php,
+# *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be
+# provided as doxygen C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
+# *.f18, *.f, *.for, *.vhd, *.vhdl, *.ucf, *.qsf and *.ice.
 
 FILE_PATTERNS          = *.c \
                          *.cc \
@@ -924,10 +1070,7 @@ EXCLUDE_PATTERNS       =
 # (namespaces, classes, functions, etc.) that should be excluded from the
 # output. The symbol name can be a fully qualified name, a word, or if the
 # wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories use the pattern */test/*
+# ANamespace::AClass, ANamespace::*Test
 
 EXCLUDE_SYMBOLS        =
 
@@ -972,6 +1115,11 @@ IMAGE_PATH             =
 # code is scanned, but not when the output code is generated. If lines are added
 # or removed, the anchors will not be placed correctly.
 #
+# Note that doxygen will use the data processed and written to standard output
+# for further processing, therefore nothing else, like debug statements or used
+# commands (so in case of a Windows batch file always use @echo OFF), should be
+# written to standard output.
+#
 # Note that for custom extensions or not directly supported extensions you also
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # properly processed by doxygen.
@@ -1013,6 +1161,15 @@ FILTER_SOURCE_PATTERNS =
 
 USE_MDFILE_AS_MAINPAGE =
 
+# The Fortran standard specifies that for fixed formatted Fortran code all
+# characters from position 72 are to be considered as comment. A common
+# extension is to allow longer lines before the automatic comment starts. The
+# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can
+# be processed before the automatic comment starts.
+# Minimum value: 7, maximum value: 10000, default value: 72.
+
+FORTRAN_COMMENT_AFTER  = 72
+
 #---------------------------------------------------------------------------
 # Configuration options related to source browsing
 #---------------------------------------------------------------------------
@@ -1099,35 +1256,6 @@ USE_HTAGS              = NO
 
 VERBATIM_HEADERS       = YES
 
-# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
-# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
-# cost of reduced performance. This can be particularly helpful with template
-# rich C++ code for which doxygen's built-in parser lacks the necessary type
-# information.
-# Note: The availability of this option depends on whether or not doxygen was
-# generated with the -Duse_libclang=ON option for CMake.
-# The default value is: NO.
-
-CLANG_ASSISTED_PARSING = NO
-
-# If clang assisted parsing is enabled you can provide the compiler with command
-# line options that you would normally use when invoking the compiler. Note that
-# the include paths will already be set by doxygen for the files and directories
-# specified with INPUT and INCLUDE_PATH.
-# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
-
-CLANG_OPTIONS          =
-
-# If clang assisted parsing is enabled you can provide the clang parser with the
-# path to the compilation database (see:
-# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files
-# were built. This is equivalent to specifying the "-p" option to a clang tool,
-# such as clang-check. These options will then be passed to the parser.
-# Note: The availability of this option depends on whether or not doxygen was
-# generated with the -Duse_libclang=ON option for CMake.
-
-CLANG_DATABASE_PATH    =
-
 #---------------------------------------------------------------------------
 # Configuration options related to the alphabetical class index
 #---------------------------------------------------------------------------
@@ -1139,17 +1267,11 @@ CLANG_DATABASE_PATH    =
 
 ALPHABETICAL_INDEX     = YES
 
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX    = 5
-
-# In case all classes in a project start with a common prefix, all classes will
-# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
-# can be used to specify a prefix (or a list of prefixes) that should be ignored
-# while generating the index headers.
+# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes)
+# that should be ignored while generating the index headers. The IGNORE_PREFIX
+# tag works for classes, function and member names. The entity will be placed in
+# the alphabetical list under the first letter of the entity name that remains
+# after removing the prefix.
 # This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
 
 IGNORE_PREFIX          =
@@ -1228,7 +1350,12 @@ HTML_STYLESHEET        =
 # Doxygen will copy the style sheet files to the output directory.
 # Note: The order of the extra style sheet files is of importance (e.g. the last
 # style sheet in the list overrules the setting of the previous ones in the
-# list). For an example see the documentation.
+# list).
+# Note: Since the styling of scrollbars can currently not be overruled in
+# Webkit/Chromium, the styling will be left out of the default doxygen.css if
+# one or more extra stylesheets have been specified. So if scrollbar
+# customization is desired it has to be added explicitly. For an example see the
+# documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 HTML_EXTRA_STYLESHEET  =
@@ -1243,9 +1370,22 @@ HTML_EXTRA_STYLESHEET  =
 
 HTML_EXTRA_FILES       =
 
+# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output
+# should be rendered with a dark or light theme.
+# Possible values are: LIGHT always generate light mode output, DARK always
+# generate dark mode output, AUTO_LIGHT automatically set the mode according to
+# the user preference, use light mode if no preference is set (the default),
+# AUTO_DARK automatically set the mode according to the user preference, use
+# dark mode if no preference is set and TOGGLE allow to user to switch between
+# light and dark mode via a button.
+# The default value is: AUTO_LIGHT.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE        = AUTO_LIGHT
+
 # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
 # will adjust the colors in the style sheet and background images according to
-# this color. Hue is specified as an angle on a colorwheel, see
+# this color. Hue is specified as an angle on a color-wheel, see
 # https://en.wikipedia.org/wiki/Hue for more information. For instance the value
 # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
 # purple, and 360 is red again.
@@ -1255,7 +1395,7 @@ HTML_EXTRA_FILES       =
 HTML_COLORSTYLE_HUE    = 220
 
 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
-# in the HTML output. For a value of 0 the output will use grayscales only. A
+# in the HTML output. For a value of 0 the output will use gray-scales only. A
 # value of 255 will produce the most vivid colors.
 # Minimum value: 0, maximum value: 255, default value: 100.
 # This tag requires that the tag GENERATE_HTML is set to YES.
@@ -1273,20 +1413,11 @@ HTML_COLORSTYLE_SAT    = 100
 
 HTML_COLORSTYLE_GAMMA  = 80
 
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
-# page will contain the date and time when the page was generated. Setting this
-# to YES can help to show when doxygen was last run and thus if the
-# documentation is up to date.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_TIMESTAMP         = NO
-
 # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
 # documentation will contain a main index with vertical navigation menus that
-# are dynamically created via Javascript. If disabled, the navigation index will
+# are dynamically created via JavaScript. If disabled, the navigation index will
 # consists of multiple levels of tabs that are statically embedded in every HTML
-# page. Disable this option to support browsers that do not have Javascript,
+# page. Disable this option to support browsers that do not have JavaScript,
 # like the Qt help browser.
 # The default value is: YES.
 # This tag requires that the tag GENERATE_HTML is set to YES.
@@ -1301,6 +1432,13 @@ HTML_DYNAMIC_MENUS     = YES
 
 HTML_DYNAMIC_SECTIONS  = NO
 
+# If the HTML_CODE_FOLDING tag is set to YES then classes and functions can be
+# dynamically folded and expanded in the generated HTML source code.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_CODE_FOLDING      = YES
+
 # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
 # shown in the various tree structured indices initially; the user can expand
 # and collapse entries dynamically later on. Doxygen will expand the tree to
@@ -1316,10 +1454,11 @@ HTML_INDEX_NUM_ENTRIES = 100
 
 # If the GENERATE_DOCSET tag is set to YES, additional index files will be
 # generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: https://developer.apple.com/xcode/), introduced with OSX
-# 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
+# environment (see:
+# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
+# create a documentation set, doxygen will generate a Makefile in the HTML
+# output directory. Running make will produce the docset in that directory and
+# running make install will install the docset in
 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
 # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
 # genXcode/_index.html for more information.
@@ -1336,6 +1475,13 @@ GENERATE_DOCSET        = NO
 
 DOCSET_FEEDNAME        = "Doxygen generated docs"
 
+# This tag determines the URL of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDURL         =
+
 # This tag specifies a string that should uniquely identify the documentation
 # set bundle. This should be a reverse domain-name style string, e.g.
 # com.mycompany.MyDocSet. Doxygen will append .docset to the name.
@@ -1361,8 +1507,12 @@ DOCSET_PUBLISHER_NAME  = Publisher
 # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
 # additional HTML index files: index.hhp, index.hhc, and index.hhk. The
 # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
+# on Windows. In the beginning of 2021 Microsoft took the original page, with
+# a.o. the download links, offline the HTML help workshop was already many years
+# in maintenance mode). You can download the HTML help workshop from the web
+# archives at Installation executable (see:
+# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo
+# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe).
 #
 # The HTML Help Workshop contains a compiler that can convert all HTML output
 # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
@@ -1392,7 +1542,7 @@ CHM_FILE               =
 HHC_LOCATION           =
 
 # The GENERATE_CHI flag controls if a separate .chi index file is generated
-# (YES) or that it should be included in the master .chm file (NO).
+# (YES) or that it should be included in the main .chm file (NO).
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.
 
@@ -1419,6 +1569,16 @@ BINARY_TOC             = NO
 
 TOC_EXPAND             = NO
 
+# The SITEMAP_URL tag is used to specify the full URL of the place where the
+# generated documentation will be placed on the server by the user during the
+# deployment of the documentation. The generated sitemap is called sitemap.xml
+# and placed on the directory specified by HTML_OUTPUT. In case no SITEMAP_URL
+# is specified no sitemap is generated. For information about the sitemap
+# protocol see https://www.sitemaps.org
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SITEMAP_URL            =
+
 # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
 # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
 # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
@@ -1437,7 +1597,8 @@ QCH_FILE               =
 
 # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
 # Project output. For more information please see Qt Help Project / Namespace
-# (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
+# (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
@@ -1445,8 +1606,8 @@ QHP_NAMESPACE          = org.doxygen.Project
 
 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
 # Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
-# folders).
+# Folders (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
 # The default value is: doc.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
@@ -1454,30 +1615,30 @@ QHP_VIRTUAL_FOLDER     = doc
 
 # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
 # filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHP_CUST_FILTER_NAME   =
 
 # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
 # custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
-# filters).
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHP_CUST_FILTER_ATTRS  =
 
 # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
 # project's filter section matches. Qt Help Project / Filter Attributes (see:
-# http://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHP_SECT_FILTER_ATTRS  =
 
-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
+# The QHG_LOCATION tag can be used to specify the location (absolute path
+# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
+# run qhelpgenerator on the generated .qhp file.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHG_LOCATION           =
@@ -1520,16 +1681,28 @@ DISABLE_INDEX          = NO
 # to work a browser that supports JavaScript, DHTML, CSS and frames is required
 # (i.e. any modern browser). Windows users are probably better off using the
 # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
-# further fine-tune the look of the index. As an example, the default style
-# sheet generated by doxygen has an example that shows how to put an image at
-# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
-# the same information as the tab index, you could consider setting
-# DISABLE_INDEX to YES when enabling this option.
+# further fine tune the look of the index (see "Fine-tuning the output"). As an
+# example, the default style sheet generated by doxygen has an example that
+# shows how to put an image at the root of the tree instead of the PROJECT_NAME.
+# Since the tree basically has the same information as the tab index, you could
+# consider setting DISABLE_INDEX to YES when enabling this option.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 GENERATE_TREEVIEW      = NO
 
+# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the
+# FULL_SIDEBAR option determines if the side bar is limited to only the treeview
+# area (value NO) or if it should extend to the full height of the window (value
+# YES). Setting this to YES gives a layout similar to
+# https://docs.readthedocs.io with more room for contents, but less room for the
+# project logo, title, and description. If either GENERATE_TREEVIEW or
+# DISABLE_INDEX is set to NO, this option has no effect.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FULL_SIDEBAR           = NO
+
 # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
 # doxygen will group on one line in the generated HTML documentation.
 #
@@ -1554,6 +1727,24 @@ TREEVIEW_WIDTH         = 250
 
 EXT_LINKS_IN_WINDOW    = NO
 
+# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email
+# addresses.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+OBFUSCATE_EMAILS       = YES
+
+# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
+# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
+# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
+# the HTML output. These images will generally look nicer at scaled resolutions.
+# Possible values are: png (the default) and svg (looks nicer but requires the
+# pdf2svg or inkscape tool).
+# The default value is: png.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FORMULA_FORMAT    = png
+
 # Use this tag to change the font size of LaTeX formulas included as images in
 # the HTML documentation. When you change the font size after a successful
 # doxygen run you need to manually remove any form_*.png images from the HTML
@@ -1563,19 +1754,14 @@ EXT_LINKS_IN_WINDOW    = NO
 
 FORMULA_FONTSIZE       = 10
 
-# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
-# generated for formulas are transparent PNGs. Transparent PNGs are not
-# supported properly for IE 6.0, but are supported on all modern browsers.
-#
-# Note that when changing this option you need to delete any form_*.png files in
-# the HTML output directory before the changes have effect.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
+# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
+# to create new LaTeX commands to be used in formulas as building blocks. See
+# the section "Including formulas" for details.
 
-FORMULA_TRANSPARENT    = YES
+FORMULA_MACROFILE      =
 
 # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
-# https://www.mathjax.org) which uses client side Javascript for the rendering
+# https://www.mathjax.org) which uses client side JavaScript for the rendering
 # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
 # installed or if you want to formulas look prettier in the HTML output. When
 # enabled you may also need to install MathJax separately and configure the path
@@ -1585,11 +1771,29 @@ FORMULA_TRANSPARENT    = YES
 
 USE_MATHJAX            = NO
 
+# With MATHJAX_VERSION it is possible to specify the MathJax version to be used.
+# Note that the different versions of MathJax have different requirements with
+# regards to the different settings, so it is possible that also other MathJax
+# settings have to be changed when switching between the different MathJax
+# versions.
+# Possible values are: MathJax_2 and MathJax_3.
+# The default value is: MathJax_2.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_VERSION        = MathJax_2
+
 # When MathJax is enabled you can set the default output format to be used for
-# the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
+# the MathJax output. For more details about the output format see MathJax
+# version 2 (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3
+# (see:
+# http://docs.mathjax.org/en/latest/web/components/output.html).
 # Possible values are: HTML-CSS (which is slower, but has the best
-# compatibility), NativeMML (i.e. MathML) and SVG.
+# compatibility. This is the name for Mathjax version 2, for MathJax version 3
+# this will be translated into chtml), NativeMML (i.e. MathML. Only supported
+# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This
+# is the name for Mathjax version 3, for MathJax version 2 this will be
+# translated into HTML-CSS) and SVG.
 # The default value is: HTML-CSS.
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
@@ -1602,22 +1806,29 @@ MATHJAX_FORMAT         = HTML-CSS
 # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
 # Content Delivery Network so you can quickly see the result without installing
 # MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from https://www.mathjax.org before deployment.
-# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/.
+# MathJax from https://www.mathjax.org before deployment. The default value is:
+# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2
+# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
 MATHJAX_RELPATH        = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/
 
 # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
 # extension names that should be enabled during MathJax rendering. For example
+# for MathJax version 2 (see
+# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions):
 # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# For example for MathJax version 3 (see
+# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html):
+# MATHJAX_EXTENSIONS = ams
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
 MATHJAX_EXTENSIONS     =
 
 # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
 # of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
 # example see the documentation.
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
@@ -1645,7 +1856,7 @@ MATHJAX_CODEFILE       =
 SEARCHENGINE           = YES
 
 # When the SERVER_BASED_SEARCH tag is enabled the search engine will be
-# implemented using a web server instead of a web client using Javascript. There
+# implemented using a web server instead of a web client using JavaScript. There
 # are two flavors of web server based searching depending on the EXTERNAL_SEARCH
 # setting. When disabled, doxygen will generate a PHP script for searching and
 # an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
@@ -1664,7 +1875,8 @@ SERVER_BASED_SEARCH    = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/).
+# Xapian (see:
+# https://xapian.org/).
 #
 # See the section "External Indexing and Searching" for details.
 # The default value is: NO.
@@ -1677,8 +1889,9 @@ EXTERNAL_SEARCH        = NO
 #
 # Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: https://xapian.org/). See the section "External Indexing and
-# Searching" for details.
+# Xapian (see:
+# https://xapian.org/). See the section "External Indexing and Searching" for
+# details.
 # This tag requires that the tag SEARCHENGINE is set to YES.
 
 SEARCHENGINE_URL       =
@@ -1749,10 +1962,11 @@ LATEX_CMD_NAME         =
 MAKEINDEX_CMD_NAME     = makeindex
 
 # The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
-# generate index for LaTeX.
+# generate index for LaTeX. In case there is no backslash (\) as first character
+# it will be automatically added in the LaTeX code.
 # Note: This tag is used in the generated output file (.tex).
 # See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
-# The default value is: \makeindex.
+# The default value is: makeindex.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_MAKEINDEX_CMD    = \makeindex
@@ -1786,29 +2000,31 @@ PAPER_TYPE             = a4
 
 EXTRA_PACKAGES         =
 
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
-# generated LaTeX document. The header should contain everything until the first
-# chapter. If it is left blank doxygen will generate a standard header. See
-# section "Doxygen usage" for information on how to let doxygen write the
-# default header to a separate file.
+# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for
+# the generated LaTeX document. The header should contain everything until the
+# first chapter. If it is left blank doxygen will generate a standard header. It
+# is highly recommended to start with a default header using
+# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty
+# and then modify the file new_header.tex. See also section "Doxygen usage" for
+# information on how to generate the default header that doxygen normally uses.
 #
-# Note: Only use a user-defined header if you know what you are doing! The
-# following commands have a special meaning inside the header: $title,
-# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
-# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
-# string, for the replacement values of the other commands the user is referred
-# to HTML_HEADER.
+# Note: Only use a user-defined header if you know what you are doing!
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. The following
+# commands have a special meaning inside the header (and footer): For a
+# description of the possible markers and block names see the documentation.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_HEADER           =
 
-# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
-# generated LaTeX document. The footer should contain everything after the last
-# chapter. If it is left blank doxygen will generate a standard footer. See
+# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for
+# the generated LaTeX document. The footer should contain everything after the
+# last chapter. If it is left blank doxygen will generate a standard footer. See
 # LATEX_HEADER for more information on how to generate a default footer and what
-# special commands can be used inside the footer.
-#
-# Note: Only use a user-defined footer if you know what you are doing!
+# special commands can be used inside the footer. See also section "Doxygen
+# usage" for information on how to generate the default footer that doxygen
+# normally uses. Note: Only use a user-defined footer if you know what you are
+# doing!
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_FOOTER           =
@@ -1841,18 +2057,26 @@ LATEX_EXTRA_FILES      =
 
 PDF_HYPERLINKS         = YES
 
-# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
-# the PDF file directly from the LaTeX files. Set this option to YES, to get a
-# higher quality PDF documentation.
+# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
+# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
+# files. Set this option to YES, to get a higher quality PDF documentation.
+#
+# See also section LATEX_CMD_NAME for selecting the engine.
 # The default value is: YES.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 USE_PDFLATEX           = YES
 
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
-# command to the generated LaTeX files. This will instruct LaTeX to keep running
-# if errors occur, instead of asking the user for help. This option is also used
-# when generating formulas in HTML.
+# The LATEX_BATCHMODE tag signals the behavior of LaTeX in case of an error.
+# Possible values are: NO same as ERROR_STOP, YES same as BATCH, BATCH In batch
+# mode nothing is printed on the terminal, errors are scrolled as if <return> is
+# hit at every error; missing files that TeX tries to input or request from
+# keyboard input (\read on a not open input stream) cause the job to abort,
+# NON_STOP In nonstop mode the diagnostic message will appear on the terminal,
+# but there is no possibility of user interaction just like in batch mode,
+# SCROLL In scroll mode, TeX will stop only for missing files to input or if
+# keyboard input is necessary and ERROR_STOP In errorstop mode, TeX will stop at
+# each error, asking for user intervention.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
@@ -1865,16 +2089,6 @@ LATEX_BATCHMODE        = NO
 
 LATEX_HIDE_INDICES     = NO
 
-# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
-# code with syntax highlighting in the LaTeX output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_SOURCE_CODE      = NO
-
 # The LATEX_BIB_STYLE tag can be used to specify the style to use for the
 # bibliography, e.g. plainnat, or ieeetr. See
 # https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
@@ -1883,14 +2097,6 @@ LATEX_SOURCE_CODE      = NO
 
 LATEX_BIB_STYLE        = plain
 
-# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
-# page will contain the date and time when the page was generated. Setting this
-# to NO can help when comparing the output of multiple runs.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_TIMESTAMP        = NO
-
 # The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
 # path from which the emoji images will be read. If a relative path is entered,
 # it will be relative to the LATEX_OUTPUT directory. If left blank the
@@ -1955,16 +2161,6 @@ RTF_STYLESHEET_FILE    =
 
 RTF_EXTENSIONS_FILE    =
 
-# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
-# with syntax highlighting in the RTF output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_SOURCE_CODE        = NO
-
 #---------------------------------------------------------------------------
 # Configuration options related to the man page output
 #---------------------------------------------------------------------------
@@ -2016,9 +2212,9 @@ MAN_LINKS              = NO
 # If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
 # captures the structure of the code including all documentation.
 # The default value is: NO.
-#...
+
 GENERATE_XML           = YES
-#...
+
 # The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
 # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
 # it.
@@ -2061,27 +2257,44 @@ GENERATE_DOCBOOK       = NO
 
 DOCBOOK_OUTPUT         = docbook
 
-# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
-# program listings (including syntax highlighting and cross-referencing
-# information) to the DOCBOOK output. Note that enabling this will significantly
-# increase the size of the DOCBOOK output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_PROGRAMLISTING = NO
-
 #---------------------------------------------------------------------------
 # Configuration options for the AutoGen Definitions output
 #---------------------------------------------------------------------------
 
 # If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
-# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# AutoGen Definitions (see https://autogen.sourceforge.net/) file that captures
 # the structure of the code including all documentation. Note that this feature
 # is still experimental and incomplete at the moment.
 # The default value is: NO.
 
 GENERATE_AUTOGEN_DEF   = NO
 
+#---------------------------------------------------------------------------
+# Configuration options related to Sqlite3 output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_SQLITE3 tag is set to YES doxygen will generate a Sqlite3
+# database with symbols found by doxygen stored in tables.
+# The default value is: NO.
+
+GENERATE_SQLITE3       = NO
+
+# The SQLITE3_OUTPUT tag is used to specify where the Sqlite3 database will be
+# put. If a relative path is entered the value of OUTPUT_DIRECTORY will be put
+# in front of it.
+# The default directory is: sqlite3.
+# This tag requires that the tag GENERATE_SQLITE3 is set to YES.
+
+SQLITE3_OUTPUT         = sqlite3
+
+# The SQLITE3_OVERWRITE_DB tag is set to YES, the existing doxygen_sqlite3.db
+# database file will be recreated with each doxygen run. If set to NO, doxygen
+# will warn if an a database file is already found and not modify it.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_SQLITE3 is set to YES.
+
+SQLITE3_RECREATE_DB    = YES
+
 #---------------------------------------------------------------------------
 # Configuration options related to the Perl module output
 #---------------------------------------------------------------------------
@@ -2156,10 +2369,11 @@ SEARCH_INCLUDES        = YES
 
 # The INCLUDE_PATH tag can be used to specify one or more directories that
 # contain include files that are not input files but should be processed by the
-# preprocessor.
+# preprocessor. Note that the INCLUDE_PATH is not recursive, so the setting of
+# RECURSIVE has no effect here.
 # This tag requires that the tag SEARCH_INCLUDES is set to YES.
 
-INCLUDE_PATH           =
+INCLUDE_PATH           = "../include/fbgemm"
 
 # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
 # patterns (like *.h and *.hpp) to filter out the header-files in the
@@ -2223,15 +2437,15 @@ TAGFILES               =
 
 GENERATE_TAGFILE       =
 
-# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
-# the class index. If set to NO, only the inherited external classes will be
-# listed.
+# If the ALLEXTERNALS tag is set to YES, all external classes and namespaces
+# will be listed in the class and namespace index. If set to NO, only the
+# inherited external classes will be listed.
 # The default value is: NO.
 
 ALLEXTERNALS           = NO
 
 # If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will be
+# in the topic index. If set to NO, only the current project's groups will be
 # listed.
 # The default value is: YES.
 
@@ -2244,41 +2458,10 @@ EXTERNAL_GROUPS        = YES
 
 EXTERNAL_PAGES         = YES
 
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of 'which perl').
-# The default file (with absolute path) is: /usr/bin/perl.
-
-PERL_PATH              = /usr/bin/perl
-
 #---------------------------------------------------------------------------
-# Configuration options related to the dot tool
+# Configuration options related to diagram generator tools
 #---------------------------------------------------------------------------
 
-# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
-# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
-# NO turns the diagrams off. Note that this option also works with HAVE_DOT
-# disabled, but it is recommended to install and use dot, since it yields more
-# powerful graphs.
-# The default value is: YES.
-
-CLASS_DIAGRAMS         = YES
-
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see:
-# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH            =
-
-# You can include diagrams made with dia in doxygen documentation. Doxygen will
-# then run dia to produce the diagram and insert it in the documentation. The
-# DIA_PATH tag allows you to specify the directory where the dia binary resides.
-# If left empty dia is assumed to be found in the default search path.
-
-DIA_PATH               =
-
 # If set to YES the inheritance and collaboration graphs will hide inheritance
 # and usage relations if the target is undocumented or is not a class.
 # The default value is: YES.
@@ -2287,7 +2470,7 @@ HIDE_UNDOC_RELATIONS   = YES
 
 # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
 # available from the path. This tool is part of Graphviz (see:
-# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# https://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
 # Bell Labs. The other options in this section have no effect if this option is
 # set to NO
 # The default value is: NO.
@@ -2304,49 +2487,73 @@ HAVE_DOT               = NO
 
 DOT_NUM_THREADS        = 0
 
-# When you want a differently looking font in the dot files that doxygen
-# generates you can specify the font name using DOT_FONTNAME. You need to make
-# sure dot is able to find the font, which can be done by putting it in a
-# standard location or by setting the DOTFONTPATH environment variable or by
-# setting DOT_FONTPATH to the directory containing the font.
-# The default value is: Helvetica.
+# DOT_COMMON_ATTR is common attributes for nodes, edges and labels of
+# subgraphs. When you want a differently looking font in the dot files that
+# doxygen generates you can specify fontname, fontcolor and fontsize attributes.
+# For details please see <a href=https://graphviz.org/doc/info/attrs.html>Node,
+# Edge and Graph Attributes specification</a> You need to make sure dot is able
+# to find the font, which can be done by putting it in a standard location or by
+# setting the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
+# directory containing the font. Default graphviz fontsize is 14.
+# The default value is: fontname=Helvetica,fontsize=10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_COMMON_ATTR        = "fontname=Helvetica,fontsize=10"
+
+# DOT_EDGE_ATTR is concatenated with DOT_COMMON_ATTR. For elegant style you can
+# add 'arrowhead=open, arrowtail=open, arrowsize=0.5'. <a
+# href=https://graphviz.org/doc/info/arrows.html>Complete documentation about
+# arrows shapes.</a>
+# The default value is: labelfontname=Helvetica,labelfontsize=10.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_FONTNAME           = Helvetica
+DOT_EDGE_ATTR          = "labelfontname=Helvetica,labelfontsize=10"
 
-# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
-# dot graphs.
-# Minimum value: 4, maximum value: 24, default value: 10.
+# DOT_NODE_ATTR is concatenated with DOT_COMMON_ATTR. For view without boxes
+# around nodes set 'shape=plain' or 'shape=plaintext' <a
+# href=https://www.graphviz.org/doc/info/shapes.html>Shapes specification</a>
+# The default value is: shape=box,height=0.2,width=0.4.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
-DOT_FONTSIZE           = 10
+DOT_NODE_ATTR          = "shape=box,height=0.2,width=0.4"
 
-# By default doxygen will tell dot to use the default font as specified with
-# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
-# the path where dot can find it using this tag.
+# You can set the path where dot can find font specified with fontname in
+# DOT_COMMON_ATTR and others dot attributes.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 DOT_FONTPATH           =
 
-# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
-# each documented class showing the direct and indirect inheritance relations.
-# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# If the CLASS_GRAPH tag is set to YES or GRAPH or BUILTIN then doxygen will
+# generate a graph for each documented class showing the direct and indirect
+# inheritance relations. In case the CLASS_GRAPH tag is set to YES or GRAPH and
+# HAVE_DOT is enabled as well, then dot will be used to draw the graph. In case
+# the CLASS_GRAPH tag is set to YES and HAVE_DOT is disabled or if the
+# CLASS_GRAPH tag is set to BUILTIN, then the built-in generator will be used.
+# If the CLASS_GRAPH tag is set to TEXT the direct and indirect inheritance
+# relations will be shown as texts / links.
+# Possible values are: NO, YES, TEXT, GRAPH and BUILTIN.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
 
 CLASS_GRAPH            = YES
 
 # If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
 # graph for each documented class showing the direct and indirect implementation
 # dependencies (inheritance, containment, and class references variables) of the
-# class with other documented classes.
+# class with other documented classes. Explicit enabling a collaboration graph,
+# when COLLABORATION_GRAPH is set to NO, can be accomplished by means of the
+# command \collaborationgraph. Disabling a collaboration graph can be
+# accomplished by means of the command \hidecollaborationgraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 COLLABORATION_GRAPH    = YES
 
 # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
-# groups, showing the direct groups dependencies.
+# groups, showing the direct groups dependencies. Explicit enabling a group
+# dependency graph, when GROUP_GRAPHS is set to NO, can be accomplished by means
+# of the command \groupgraph. Disabling a directory graph can be accomplished by
+# means of the command \hidegroupgraph. See also the chapter Grouping in the
+# manual.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2369,10 +2576,32 @@ UML_LOOK               = NO
 # but if the number exceeds 15, the total amount of fields shown is limited to
 # 10.
 # Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
+# This tag requires that the tag UML_LOOK is set to YES.
 
 UML_LIMIT_NUM_FIELDS   = 10
 
+# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
+# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
+# tag is set to YES, doxygen will add type and arguments for attributes and
+# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
+# will not generate fields with class member information in the UML graphs. The
+# class diagrams will look similar to the default class diagrams but using UML
+# notation for the relationships.
+# Possible values are: NO, YES and NONE.
+# The default value is: NO.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+DOT_UML_DETAILS        = NO
+
+# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
+# to display on a single line. If the actual line length exceeds this threshold
+# significantly it will wrapped across multiple lines. Some heuristics are apply
+# to avoid ugly line breaks.
+# Minimum value: 0, maximum value: 1000, default value: 17.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_WRAP_THRESHOLD     = 17
+
 # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
 # collaboration graphs will show the relations between templates and their
 # instances.
@@ -2384,7 +2613,9 @@ TEMPLATE_RELATIONS     = NO
 # If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
 # YES then doxygen will generate a graph for each documented file showing the
 # direct and indirect include dependencies of the file with other documented
-# files.
+# files. Explicit enabling an include graph, when INCLUDE_GRAPH is is set to NO,
+# can be accomplished by means of the command \includegraph. Disabling an
+# include graph can be accomplished by means of the command \hideincludegraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2393,7 +2624,10 @@ INCLUDE_GRAPH          = YES
 # If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
 # set to YES then doxygen will generate a graph for each documented file showing
 # the direct and indirect include dependencies of the file with other documented
-# files.
+# files. Explicit enabling an included by graph, when INCLUDED_BY_GRAPH is set
+# to NO, can be accomplished by means of the command \includedbygraph. Disabling
+# an included by graph can be accomplished by means of the command
+# \hideincludedbygraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2433,16 +2667,26 @@ GRAPHICAL_HIERARCHY    = YES
 # If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
 # dependencies a directory has on other directories in a graphical way. The
 # dependency relations are determined by the #include relations between the
-# files in the directories.
+# files in the directories. Explicit enabling a directory graph, when
+# DIRECTORY_GRAPH is set to NO, can be accomplished by means of the command
+# \directorygraph. Disabling a directory graph can be accomplished by means of
+# the command \hidedirectorygraph.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 DIRECTORY_GRAPH        = YES
 
+# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels
+# of child directories generated in directory dependency graphs by dot.
+# Minimum value: 1, maximum value: 25, default value: 1.
+# This tag requires that the tag DIRECTORY_GRAPH is set to YES.
+
+DIR_GRAPH_MAX_DEPTH    = 1
+
 # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
 # generated by dot. For an explanation of the image formats see the section
 # output formats in the documentation of the dot tool (Graphviz (see:
-# http://www.graphviz.org/)).
+# https://www.graphviz.org/)).
 # Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
 # to make the SVG files visible in IE 9+ (other browsers do not have this
 # requirement).
@@ -2479,11 +2723,12 @@ DOT_PATH               =
 
 DOTFILE_DIRS           =
 
-# The MSCFILE_DIRS tag can be used to specify one or more directories that
-# contain msc files that are included in the documentation (see the \mscfile
-# command).
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
 
-MSCFILE_DIRS           =
+DIA_PATH               =
 
 # The DIAFILE_DIRS tag can be used to specify one or more directories that
 # contain dia files that are included in the documentation (see the \diafile
@@ -2492,10 +2737,10 @@ MSCFILE_DIRS           =
 DIAFILE_DIRS           =
 
 # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
-# path where java can find the plantuml.jar file. If left blank, it is assumed
-# PlantUML is not used or called during a preprocessing step. Doxygen will
-# generate a warning when it encounters a \startuml command in this case and
-# will not generate output for the diagram.
+# path where java can find the plantuml.jar file or to the filename of jar file
+# to be used. If left blank, it is assumed PlantUML is not used or called during
+# a preprocessing step. Doxygen will generate a warning when it encounters a
+# \startuml command in this case and will not generate output for the diagram.
 
 PLANTUML_JAR_PATH      =
 
@@ -2533,18 +2778,6 @@ DOT_GRAPH_MAX_NODES    = 50
 
 MAX_DOT_GRAPH_DEPTH    = 0
 
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is disabled by default, because dot on Windows does not seem
-# to support this out of the box.
-#
-# Warning: Depending on the platform used, enabling this option may lead to
-# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
-# read).
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_TRANSPARENT        = NO
-
 # Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
 # files in one run (i.e. multiple -o and -T options on the command line). This
 # makes dot run faster, but since only newer versions of dot (>1.8.10) support
@@ -2557,14 +2790,34 @@ DOT_MULTI_TARGETS      = NO
 # If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
 # explaining the meaning of the various boxes and arrows in the dot generated
 # graphs.
+# Note: This tag requires that UML_LOOK isn't set, i.e. the doxygen internal
+# graphical representation for inheritance and collaboration diagrams is used.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
 GENERATE_LEGEND        = YES
 
-# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
 # files that are used to generate the various graphs.
+#
+# Note: This setting is not only used for dot files but also for msc temporary
+# files.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
 
 DOT_CLEANUP            = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. If the MSCGEN_TOOL tag is left empty (the default), then doxygen will
+# use a built-in version of mscgen tool to produce the charts. Alternatively,
+# the MSCGEN_TOOL tag can also specify the name an external tool. For instance,
+# specifying prog as the value, doxygen will call the tool as prog -T
+# <outfile_format> -o <outputfile> <inputfile>. The external tool should support
+# output file formats "png", "eps", "svg", and "ismap".
+
+MSCGEN_TOOL            =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
diff --git a/fbgemm_gpu/docs/Doxyfile.in b/fbgemm_gpu/docs/Doxyfile.in
index fda0036769..a9bc2b5721 100644
--- a/fbgemm_gpu/docs/Doxyfile.in
+++ b/fbgemm_gpu/docs/Doxyfile.in
@@ -949,10 +949,11 @@ WARN_LOGFILE           =
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = "../" \
-                         "../include/" \
+INPUT                  = "../include/" \
                          "../codegen/" \
-                         "../src/"
+                         "../src/" \
+                         "../../include/" \
+                         "../../src/"
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
@@ -2338,7 +2339,8 @@ SEARCH_INCLUDES        = YES
 # This tag requires that the tag SEARCH_INCLUDES is set to YES.
 
 INCLUDE_PATH           = "../codegen/" \
-                         "../include/fbgemm_gpu"
+                         "../include/fbgemm_gpu/" \
+                         "../../include/fbgemm"
 
 # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
 # patterns (like *.h and *.hpp) to filter out the header-files in the
diff --git a/fbgemm_gpu/docs/src/conf.py b/fbgemm_gpu/docs/src/conf.py
index bbc09d1a9e..c37590f669 100644
--- a/fbgemm_gpu/docs/src/conf.py
+++ b/fbgemm_gpu/docs/src/conf.py
@@ -9,21 +9,32 @@
 # This file only contains a selection of the most common options. For a full
 # list see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
+#
+# Configuration is based on:
+# https://github.com/pytorch/pytorch/blob/main/docs/cpp/source/conf.py
+
 import os
 import sys
 
 import pytorch_sphinx_theme
 
+
+# -- Project information -----------------------------------------------------
+
+project = "FBGEMM"
+copyright = "2023, FBGEMM Team"
+author = "FBGEMM Team"
+
+# The short X.Y version.
+version = "0.6"
+
+# The full version, including alpha/beta/rc tags
+release = "0.6.0"
+
+
+# -- Path setup --------------------------------------------------------------
+
 for dir_i in os.listdir("../.."):
     if dir_i == "fbgemm_gpu":
         continue
@@ -32,18 +43,56 @@
         sys.path.insert(0, possible_dir)
 
 
-# -- Project information -----------------------------------------------------
-highlight_language = "C++"
+# Setup absolute paths for communicating with breathe / exhale where
+# items are expected / should be trimmed by.
+# This file is {repo_root}/fbgemm_gpu/docs/src/conf.py
+this_file_dir = os.path.abspath(os.path.dirname(__file__))
 
-project = "fbgemm"
-copyright = "2023, FBGEMM Team"
-author = "FBGEMM Team"
+doxygen_xml_dir = os.path.join(
+    os.path.dirname(this_file_dir),  # {repo_root}/fbgemm_gpu/docs
+    "build",  # {repo_root}/fbgemm_gpu/docs/build
+    "xml",  # {repo_root}/fbgemm_gpu/docs/build/xml
+)
+
+repo_root = os.path.dirname(  # {repo_root}
+    os.path.dirname(  # {repo_root}/fbgemm_gpu
+        os.path.dirname(  # {repo_root}/fbgemm_gpu/docs
+            this_file_dir  # {repo_root}/fbgemm_gpu/docs/src
+        )
+    )
+)
 
-# The full version, including alpha/beta/rc tags
-release = "0.1.2"
 
 # -- General configuration ---------------------------------------------------
 
+# Tell sphinx what the primary language being documented is.
+primary_domain = "cpp"
+
+# Tell sphinx what the pygments highlight language should be.
+highlight_language = "cpp"
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = "sphinx"
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = ".rst"
+
+# The master toctree document.
+master_doc = "index"
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = True
+
+# If true, Sphinx will warn about all references where the target cannot be
+# found.
+nitpicky = True
+
+# Make sure the target is unique
+autosectionlabel_prefix_document = True
+
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
@@ -58,6 +107,12 @@
     "sphinx.ext.napoleon",
 ]
 
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+    "pytorch": ("https://pytorch.org/docs/main", None),
+    "numpy": ("https://numpy.org/doc/stable", None),
+}
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
 
@@ -66,54 +121,97 @@
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = []
 
-intersphinx_mapping = {
-    "python": ("https://docs.python.org/3", None),
-    "pytorch": ("https://pytorch.org/docs/master", None),
-    "numpy": ("https://numpy.org/doc/stable", None),
-}
 
-# Setup absolute paths for communicating with breathe / exhale where
-# items are expected / should be trimmed by.
+# -- Breathe configuration ---------------------------------------------------
 
 # This should be a dictionary in which the keys are project names and the values
 # are paths to the folder containing the doxygen output for that project.
 breathe_projects = {
-    "fbgemm_gpu": "../build/xml/",
-    "codegen": "../build/xml/codegen/",
+    "FBGEMM": doxygen_xml_dir,
+    "codegen": f"{doxygen_xml_dir}/codegen",
 }
 
 # This should match one of the keys in the breathe_projects dictionary and
 # indicates which project should be used when the project is not specified on
 # the directive.
-breathe_default_project = "fbgemm_gpu"
+breathe_default_project = "FBGEMM"
 
-# If true, Sphinx will warn about all references where the target cannot be
-# found.
-nitpicky = True
-
-# Make sure the target is unique
-autosectionlabel_prefix_document = True
-
-# Tell sphinx what the primary language being documented is.
-primary_domain = "cpp"
-
-# Tell sphinx what the pygments highlight language should be.
-highlight_language = "cpp"
 
 # -- Options for HTML output -------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-#
 html_theme = "pytorch_sphinx_theme"
 html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+# NOTE: sharing python docs resources
+html_static_path = ["_static"]
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
 html_theme_options = {
     "pytorch_project": "fbgemm",
     "collapse_navigation": True,
+    "display_version": True,
     "analytics_id": "UA-117752657-2",
 }
 
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-# html_static_path = ["_static"]
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (
+        master_doc,
+        "fbgemm.tex",
+        "FBGEMM Documentation",
+        "FBGEMM Team",
+        "manual",
+    ),
+]
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [(master_doc, "FBGEMM", "FBGEMM Documentation", [author], 1)]
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (
+        master_doc,
+        "FBGEMM",
+        "FBGEMM Documentation",
+        author,
+        "FBGEMM",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
+]
diff --git a/fbgemm_gpu/docs/src/fbgemm-cpp-api/QuantUtils.rst b/fbgemm_gpu/docs/src/fbgemm-cpp-api/QuantUtils.rst
new file mode 100644
index 0000000000..715ec845ab
--- /dev/null
+++ b/fbgemm_gpu/docs/src/fbgemm-cpp-api/QuantUtils.rst
@@ -0,0 +1,20 @@
+Quantization Utilities
+======================
+
+Reference Implementation Methods
+--------------------------------
+
+.. doxygengroup:: fbgemm-quant-utils-generic
+   :content-only:
+
+AVX-2 Implementation Methods
+----------------------------
+
+.. doxygengroup:: fbgemm-quant-utils-avx2
+   :content-only:
+
+AVX-512 Implementation Methods
+------------------------------
+
+.. doxygengroup:: fbgemm-quant-utils-avx512
+   :content-only:
diff --git a/fbgemm_gpu/docs/src/cpp-api/embedding_ops.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/embedding_ops.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/cpp-api/embedding_ops.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/embedding_ops.rst
diff --git a/fbgemm_gpu/docs/src/cpp-api/input_combine.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/input_combine.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/cpp-api/input_combine.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/input_combine.rst
diff --git a/fbgemm_gpu/docs/src/cpp-api/jagged_tensor_ops.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/jagged_tensor_ops.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/cpp-api/jagged_tensor_ops.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/jagged_tensor_ops.rst
diff --git a/fbgemm_gpu/docs/src/cpp-api/layout_transform_ops.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/layout_transform_ops.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/cpp-api/layout_transform_ops.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/layout_transform_ops.rst
diff --git a/fbgemm_gpu/docs/src/cpp-api/memory_utils.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/memory_utils.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/cpp-api/memory_utils.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/memory_utils.rst
diff --git a/fbgemm_gpu/docs/src/cpp-api/merge_pooled_embeddings.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/cpp-api/merge_pooled_embeddings.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst
diff --git a/fbgemm_gpu/docs/src/cpp-api/quantize_ops.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/quantize_ops.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/cpp-api/quantize_ops.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/quantize_ops.rst
diff --git a/fbgemm_gpu/docs/src/cpp-api/sparse_ops.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/sparse_ops.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/cpp-api/sparse_ops.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/sparse_ops.rst
diff --git a/fbgemm_gpu/docs/src/cpp-api/split_table_batched_embeddings.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/cpp-api/split_table_batched_embeddings.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst
diff --git a/fbgemm_gpu/docs/src/general/BuildInstructions.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-development/BuildInstructions.rst
similarity index 96%
rename from fbgemm_gpu/docs/src/general/BuildInstructions.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-development/BuildInstructions.rst
index 4a1734d47d..39cac649aa 100644
--- a/fbgemm_gpu/docs/src/general/BuildInstructions.rst
+++ b/fbgemm_gpu/docs/src/fbgemm_gpu-development/BuildInstructions.rst
@@ -13,7 +13,7 @@ The general steps for building FBGEMM_GPU are as follows:
 #. Run the build script.
 
 
-.. _fbgemm-gpu.docs.build.setup.env:
+.. _fbgemm-gpu.build.setup.env:
 
 Set Up an Isolated Build Environment
 ------------------------------------
@@ -68,8 +68,8 @@ Set Up for CPU-Only Build
 -------------------------
 
 Follow the instructions for setting up the Conda environment at
-:ref:`fbgemm-gpu.docs.build.setup.env`, followed by
-:ref:`fbgemm-gpu.docs.build.setup.tools.install`.
+:ref:`fbgemm-gpu.build.setup.env`, followed by
+:ref:`fbgemm-gpu.build.setup.tools.install`.
 
 
 Set Up for CUDA Build
@@ -81,7 +81,7 @@ FBGEMM_GPU can be done either through pre-built Docker images or through Conda
 installation on bare metal. Note that neither a GPU nor the NVIDIA drivers need
 to be present for builds, since they are only used at runtime.
 
-.. _fbgemm-gpu.docs.build.setup.cuda.image:
+.. _fbgemm-gpu.build.setup.cuda.image:
 
 CUDA Docker Image
 ~~~~~~~~~~~~~~~~~
@@ -99,7 +99,7 @@ From here, the rest of the build environment may be constructed through Conda,
 as it is still the recommended mechanism for creating an isolated and
 reproducible build environment.
 
-.. _fbgemm-gpu.docs.build.setup.cuda.install:
+.. _fbgemm-gpu.build.setup.cuda.install:
 
 Install CUDA
 ~~~~~~~~~~~~
@@ -146,7 +146,7 @@ FBGEMM_GPU supports running on AMD (ROCm) devices. Setting the machine
 up for ROCm builds of FBGEMM_GPU can be done either through pre-built
 Docker images or through bare metal.
 
-.. _fbgemm-gpu.docs.build.setup.rocm.image:
+.. _fbgemm-gpu.build.setup.rocm.image:
 
 ROCm Docker Image
 ~~~~~~~~~~~~~~~~~
@@ -168,7 +168,7 @@ From here, the rest of the build environment may be constructed through Conda,
 as it is still the recommended mechanism for creating an isolated and
 reproducible build environment.
 
-.. _fbgemm-gpu.docs.build.setup.rocm.install:
+.. _fbgemm-gpu.build.setup.rocm.install:
 
 Install ROCm
 ~~~~~~~~~~~~
@@ -206,7 +206,7 @@ installed:
   apt install hipify-clang miopen-hip miopen-hip-dev
 
 
-.. _fbgemm-gpu.docs.build.setup.tools.install:
+.. _fbgemm-gpu.build.setup.tools.install:
 
 Install the Build Tools
 -----------------------
@@ -262,7 +262,7 @@ Install the other necessary build tools such as ``ninja``, ``cmake``, etc:
       wheel
 
 
-.. _fbgemm-gpu.docs.build.setup.pytorch.install:
+.. _fbgemm-gpu.build.setup.pytorch.install:
 
 Install PyTorch
 ---------------
@@ -380,7 +380,7 @@ build cache:
 
   python setup.py clean
 
-.. _fbgemm-gpu.docs.build.process.cuda:
+.. _fbgemm-gpu.build.process.cuda:
 
 CUDA Build
 ~~~~~~~~~~
@@ -439,7 +439,7 @@ CUDA device, however, is not required for building the package.
       --nvml_lib_path=${NVML_LIB_PATH} \
       -DTORCH_CUDA_ARCH_LIST="${cuda_arch_list}"
 
-.. _fbgemm-gpu.docs.build.process.rocm:
+.. _fbgemm-gpu.build.process.rocm:
 
 ROCm Build
 ~~~~~~~~~~
@@ -479,7 +479,7 @@ the package.
       -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
       -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
 
-.. _fbgemm-gpu.docs.build.process.cpu:
+.. _fbgemm-gpu.build.process.cpu:
 
 CPU-Only Build
 ~~~~~~~~~~~~~~
diff --git a/fbgemm_gpu/docs/src/general/InstallationInstructions.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-development/InstallationInstructions.rst
similarity index 94%
rename from fbgemm_gpu/docs/src/general/InstallationInstructions.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-development/InstallationInstructions.rst
index 873fbef5e8..a15e74a745 100644
--- a/fbgemm_gpu/docs/src/general/InstallationInstructions.rst
+++ b/fbgemm_gpu/docs/src/fbgemm_gpu-development/InstallationInstructions.rst
@@ -18,8 +18,8 @@ Set Up CPU-Only Environment
 ---------------------------
 
 Follow the instructions for setting up the Conda environment at
-:ref:`fbgemm-gpu.docs.build.setup.env`, followed by
-:ref:`fbgemm-gpu.docs.install.libraries`.
+:ref:`fbgemm-gpu.build.setup.env`, followed by
+:ref:`fbgemm-gpu.install.libraries`.
 
 
 Set Up CUDA Environment
@@ -81,19 +81,19 @@ container. The install steps provided by
 provide details on how to achieve this.
 
 Once this is done, follow the instructions in
-:ref:`fbgemm-gpu.docs.build.setup.cuda.image` for pulling the CUDA Docker image
+:ref:`fbgemm-gpu.build.setup.cuda.image` for pulling the CUDA Docker image
 and launching a container.
 
 From there, the rest of the runtime environment may be constructed through
 Conda. Follow the instructions for setting up the Conda environment at
-:ref:`fbgemm-gpu.docs.build.setup.env`, followed by
-:ref:`fbgemm-gpu.docs.install.libraries`.
+:ref:`fbgemm-gpu.build.setup.env`, followed by
+:ref:`fbgemm-gpu.install.libraries`.
 
 Install the CUDA Runtime
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 If the OS / Docker environment does not already contain the full CUDA runtime,
-follow the instructions in :ref:`fbgemm-gpu.docs.build.setup.cuda.install` for
+follow the instructions in :ref:`fbgemm-gpu.build.setup.cuda.install` for
 installing the CUDA toolkit inside a Conda environment.
 
 
@@ -133,15 +133,15 @@ It is recommended, though not required, to install and run FBGEMM_GPU through a
 Docker setup for isolation and reproducibility of the ROCm environment, which
 can be difficult to set up.
 
-Follow the instructions in :ref:`fbgemm-gpu.docs.build.setup.rocm.image` for
+Follow the instructions in :ref:`fbgemm-gpu.build.setup.rocm.image` for
 pulling the full ROCm Docker image and launching a container.
 
 From there, the rest of the runtime environment may be constructed through
 Conda. Follow the instructions for setting up the Conda environment at
-:ref:`fbgemm-gpu.docs.build.setup.rocm.install`, followed by
-:ref:`fbgemm-gpu.docs.install.libraries`.
+:ref:`fbgemm-gpu.build.setup.rocm.install`, followed by
+:ref:`fbgemm-gpu.install.libraries`.
 
-.. _fbgemm-gpu.docs.install.libraries:
+.. _fbgemm-gpu.install.libraries:
 
 Install Python Libraries
 ------------------------
@@ -159,7 +159,7 @@ Install the relevant Python libraries for working with FBGEMM_GPU:
 Install PyTorch
 ---------------
 
-Follow the instructions in :ref:`fbgemm-gpu.docs.build.setup.pytorch.install`
+Follow the instructions in :ref:`fbgemm-gpu.build.setup.pytorch.install`
 for installing PyTorch inside a Conda environment.
 
 
diff --git a/fbgemm_gpu/docs/src/general/TestInstructions.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-development/TestInstructions.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/general/TestInstructions.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-development/TestInstructions.rst
diff --git a/fbgemm_gpu/docs/src/overview/jagged-tensor-ops/JaggedTensorConversion1.png b/fbgemm_gpu/docs/src/fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorConversion1.png
similarity index 100%
rename from fbgemm_gpu/docs/src/overview/jagged-tensor-ops/JaggedTensorConversion1.png
rename to fbgemm_gpu/docs/src/fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorConversion1.png
diff --git a/fbgemm_gpu/docs/src/overview/jagged-tensor-ops/JaggedTensorConversion2.png b/fbgemm_gpu/docs/src/fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorConversion2.png
similarity index 100%
rename from fbgemm_gpu/docs/src/overview/jagged-tensor-ops/JaggedTensorConversion2.png
rename to fbgemm_gpu/docs/src/fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorConversion2.png
diff --git a/fbgemm_gpu/docs/src/overview/jagged-tensor-ops/JaggedTensorConversion3.png b/fbgemm_gpu/docs/src/fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorConversion3.png
similarity index 100%
rename from fbgemm_gpu/docs/src/overview/jagged-tensor-ops/JaggedTensorConversion3.png
rename to fbgemm_gpu/docs/src/fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorConversion3.png
diff --git a/fbgemm_gpu/docs/src/overview/jagged-tensor-ops/JaggedTensorExample.png b/fbgemm_gpu/docs/src/fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorExample.png
similarity index 100%
rename from fbgemm_gpu/docs/src/overview/jagged-tensor-ops/JaggedTensorExample.png
rename to fbgemm_gpu/docs/src/fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorExample.png
diff --git a/fbgemm_gpu/docs/src/overview/jagged-tensor-ops/JaggedTensorOps.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst
similarity index 97%
rename from fbgemm_gpu/docs/src/overview/jagged-tensor-ops/JaggedTensorOps.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst
index a56de4fbeb..424ead8fbd 100644
--- a/fbgemm_gpu/docs/src/overview/jagged-tensor-ops/JaggedTensorOps.rst
+++ b/fbgemm_gpu/docs/src/fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst
@@ -68,7 +68,7 @@ The information in ``MaxLengths`` is used for performing the conversion from
 jagged tensor to normal (dense) densor where it will be used to determine the
 shape of the tensor's dense form.
 
-.. _fbgemm-gpu.docs.overview.ops.jagged.example:
+.. _fbgemm-gpu.overview.ops.jagged.example:
 
 Jagged Tensor Example
 ~~~~~~~~~~~~~~~~~~~~~
@@ -97,7 +97,7 @@ The ``MaxLengths`` values in the example jagged tensor are ``[4 , 2]``.
 Below is a table of the partition indices applied to the ``Values`` tensor to
 construct the logical representation of the example jagged tensor:
 
-.. _fbgemm-gpu.docs.overview.ops.jagged.example.table:
+.. _fbgemm-gpu.overview.ops.jagged.example.table:
 
 .. list-table::
     :header-rows: 1
@@ -260,13 +260,13 @@ For each dimension in :math:`D`, the dimension size is:
   dim(i) = MaxLengths[i-1]  // (0 < i < D.rank-1)
 
 Using the example jagged tensor from
-:ref:`fbgemm-gpu.docs.overview.ops.jagged.example`, ``len(MaxLengths) = 2``, so
+:ref:`fbgemm-gpu.overview.ops.jagged.example`, ``len(MaxLengths) = 2``, so
 the equivalent dense tensor's rank (number of dimension) will be ``4``.  The
 example jagged tensor two offset tensors, ``Offsets[0]`` and ``Offsets[1]``.
 During the conversion process, elements from ``Values`` will be loaded onto the
 dense tensor based on the ranges denoted in the partition indices of
 ``Offsets[0]`` and ``Offsets[1]`` (see the
-:ref:`table <fbgemm-gpu.docs.overview.ops.jagged.example.table>` for the mapping
+:ref:`table <fbgemm-gpu.overview.ops.jagged.example.table>` for the mapping
 of the groups to corresponding rows in the dense table):
 
 .. image:: JaggedTensorConversion2.png
diff --git a/fbgemm_gpu/docs/src/python-api/jagged_tensor_ops.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-python-api/jagged_tensor_ops.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/python-api/jagged_tensor_ops.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-python-api/jagged_tensor_ops.rst
diff --git a/fbgemm_gpu/docs/src/python-api/table_batched_embedding_ops.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-python-api/table_batched_embedding_ops.rst
similarity index 100%
rename from fbgemm_gpu/docs/src/python-api/table_batched_embedding_ops.rst
rename to fbgemm_gpu/docs/src/fbgemm_gpu-python-api/table_batched_embedding_ops.rst
diff --git a/fbgemm_gpu/docs/src/general/DocsInstructions.rst b/fbgemm_gpu/docs/src/general/DocsInstructions.rst
index 4deb915ea6..f59e357d47 100644
--- a/fbgemm_gpu/docs/src/general/DocsInstructions.rst
+++ b/fbgemm_gpu/docs/src/general/DocsInstructions.rst
@@ -1,18 +1,22 @@
-Building Documentation
-======================
+Documentation
+=============
 
-FBGEMM_GPU provides extensive comments in its source files, which provide the
-most authoritative and up-to-date documentation available for the package.
+Both FBGEMM and FBGEMM_GPU provide extensive comments in its source files, which
+serve as the most authoritative and up-to-date documentation available for the
+two libraries.
 
 
+.. _fbgemm-gpu.docs.build:
+
 Building the API Documentation
 ------------------------------
 
 **Note:** The most up-to-date documentation build instructions are embedded in
-a set of scripts bundled in the FBGEMM_GPU repo under
+a set of scripts bundled in the FBGEMM repo under
 `setup_env.bash <https://github.com/pytorch/FBGEMM/blob/main/.github/scripts/setup_env.bash>`_.
 
-The general steps for building the FBGEMM_GPU documentation are as follows:
+The general steps for building the FBGEMM and FBGEMM_GPU documentation are as
+follows:
 
 #. Set up an isolated build environment.
 #. Build FBGEMM_GPU (CPU variant).
@@ -23,15 +27,15 @@ Set Up Build Environment
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 Follow the instructions for setting up the Conda environment at
-:ref:`fbgemm-gpu.docs.build.setup.env`.
+:ref:`fbgemm-gpu.build.setup.env`.
 
 Build FBGEMM_GPU
 ~~~~~~~~~~~~~~~~
 
 A build pass of FBGEMM_GPU is required for the documentation to be built
 correctly.  Follow the instructions in
-:ref:`fbgemm-gpu.docs.build.setup.tools.install`, followed by
-:ref:`fbgemm-gpu.docs.build.process.cpu`, to build FBGEMM_GPU (CPU variant).
+:ref:`fbgemm-gpu.build.setup.tools.install`, followed by
+:ref:`fbgemm-gpu.build.process.cpu`, to build FBGEMM_GPU (CPU variant).
 
 Set Up Documentation Toolchain
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -69,8 +73,8 @@ After the build completes, view the generated documentation:
 Deployment Preview
 ~~~~~~~~~~~~~~~~~~
 
-As a PyTorch project, a preview of the FBGEMM_GPU documentation will be
-automatically built and deployed by `Netlify <https://www.netlify.com/>`__
+As a PyTorch project, a preview of the FBGEMM and FBGEMM_GPU documentation will
+be automatically built and deployed by `Netlify <https://www.netlify.com/>`__
 when pull requests are made.  When the build completes, the deployment preview
 can be found at:
 
@@ -83,7 +87,8 @@ General Documentation Guidelines
 --------------------------------
 
 When new public API methods are added, they should be accompanied by sufficient
-documentation.  Here are some guidelines for documenting FBGEMM_GPU code:
+documentation.  Here are some guidelines for documenting FBGEMM and FBGEMM_GPU
+code:
 
 * Code by itself is not documentation! Put yourself in the shoes of new
   developers who has to understand what your code does, and make their lives
@@ -158,83 +163,75 @@ Adding Documentation to C++ Code
 
 Documentation for C++ is provided through
 `Javadoc-style comments <https://www.oracle.com/technical-resources/articles/java/javadoc-tool.html>`__
-and generated using Sphinx + `Doxygen <https://www.doxygen.nl/>`__ +
+and generated using Sphinx, `Doxygen <https://www.doxygen.nl/>`__, and
 `Breathe <https://www.breathe-doc.org/>`__.
 
-
 Documentation is kept in header files with the ``.h`` extension as well as in
-``.cpp``, ``cu``, and ``cuh`` files. In these files, everything between
+``.cpp``, ``cu``, and ``cuh`` files.  In these files, everything between
 ``#ifndef DOXYGEN_THIS_WILL_BE_SKIPPED`` and ``#endif`` will be hidden from the
-HTML output. At the moment, undocumented functions are hidden in these tags.
-When you add descriptionss to a function, make sure that the ``#ifndef`` and
-``#endif`` are configured correctly.
-
-All functions are grouped by a specific group for better organization.
-Make sure you add ``@defgroup`` to the code comments to define the group, and
-``@ingroup`` in each docstring to associate the target method with the group.
+HTML output.  When you add descriptionss to a function, make sure that the
+``#ifndef`` and ``#endif`` are configured correctly.
 
 Follow these instructions to document, generate, and publish a new C++
 description:
 
-#.  Add a description to the source header file. At a very minimum, add a
-    description verbatim, parameters by using the ``@param`` tag, and
-    return value by using the @return tag. You can other tags as needed.
-    Here is an example of how it can look:
-
-    .. code:: cpp
-
-      /// @defgroup example-method-group Example Method Group
-      /// This is a description of the example method group.
-
-      /// @ingroup example-method-group
-      /// Description of `example_method`
-      ///
-      /// **Example:**
-      /// ```python
-      /// # Here is a Python code block
-      /// def foo(lst: List[int]):
-      ///   return [ x ** 2 for x in lst ]
-      /// ```
-      ///
-      /// @param param1 Description of param #1
-      /// @param param2 Description of param #2
-      ///
-      /// @return Description of the method's return value.
-      ///
-      /// @throw fbgemm_gpu::my_error if an error occurs
-      ///
-      /// @note This is an example note.
-      /// @warning This is an example  warning.
-      /// @see For more info, see <a href="https://www.doxygen.nl/manual/commands.html#cmdlink">here</a>.
-      int32_t example_method(bool foo, float bar);
-
-#.  Add a ``doxygengroup`` directive to the corresponding ``.rst`` file.  If
-    an ``.rst`` file for the corresponding header file does not exist, create a
-    new one by the same name as the header file.  If an ``.rst`` file already
-    exists, make sure the ``doxygengroup`` is defined in that file.
-    Using the above example:
+#.  API methods are grouped together by group tags for better organization in
+    Sphinx.  If a desired method group for the target method is not defined yet,
+    define it near the top of the relevant header file with the ``@defgroup``
+    keyword:
 
-    .. code:: rst
+    .. literalinclude::  ../../../src/docs/example_code.cpp
+      :language: cpp
+      :start-after: fbgemm-gpu.docs.example.defgroup.start
+      :end-before: fbgemm-gpu.docs.example.defgroup.end
 
-      Example Methods Group
-      ---------------------
+#.  Add the docstring above the target method's declaration.  At a very minimum,
+    please add descriptions of:
 
-      .. doxygengroup:: example-method-group
-        :content-only:
+    * The method's functional behavior
+    * The type parameters, as denoted by the ``@tparam`` tag
+    * The arguments, as denoted by the ``@param`` tag
+    * The return value, as denoted by the ``@return`` tag
+    * The exceptions that can be thrown (if applicable), as denoted by the
+      ``@throw`` tag
+
+    Other tags ``@note``, ``@warning``, and ``@see`` should be added as needed.
+    Here is an example docstring:
+
+    .. literalinclude::  ../../../src/docs/example_code.cpp
+      :language: cpp
+      :start-after: fbgemm-gpu.docs.example.docstring.start
+      :end-before: fbgemm-gpu.docs.example.docstring.end
+
+#.  On the Sphinx documentation side, add a ``doxygengroup`` directive to the
+    corresponding ``.rst`` file.  If an ``.rst`` file for the corresponding
+    header file does not exist, create a new one by the same name as the header
+    file.  Using the above example:
 
-    This example generates the following HTML output:
+    .. code:: rst
 
-    .. image:: ExampleDocsOutput.png
+      .. doxygengroup:: example-method-group
+        :content-only:
 
 #.  Make sure the ``.rst`` file is included in to the ``toctree`` in
-    ``index.rst`` (:ref:`fbgemm-gpu.docs.toc.cpp`).
+    ``index.rst`` (:ref:`fbgemm-gpu.toc.cpp`).
 
 #.  The C++ source header file needs to be in one of the directories listed in
-    the ``INPUT`` parameter in ``Doxygen.ini``.  If it's in a directory not
-    listed, be sure to append the directory path to the parameter.
+    the ``INPUT`` parameter in ``Doxygen.ini``.  In general, this has already
+    been taken care of, but if it's in a directory not listed, be sure to
+    append the directory path to the parameter.
 
-#.  Verify the changes by building the docs locally or submitting a PR for a
-    Netlify preview.
+#.  Verify the changes by building the docs locally with
+    :ref:`fbgemm-gpu.docs.build` or submitting a PR for a Netlify preview.
+
+------------
+
+Following the example above generates the following HTML output:
+
+.. doxygengroup:: example-method-group
+  :content-only:
+
+------------
 
 
 Sphinx Documentation Pointers
@@ -248,7 +245,7 @@ created above the target section:
 
 .. code:: rst
 
-  .. _fbgemm-gpu.docs.example.reference:
+  .. _fbgemm-gpu.example.reference:
 
   Example Section Header
   ----------------------
@@ -263,11 +260,11 @@ The anchor can then be referenced elsewhere in the docs:
 
 .. code:: rst
 
-  Referencing the section :ref:`fbgemm-gpu.docs.example.reference` from
+  Referencing the section :ref:`fbgemm-gpu.example.reference` from
   another page in the docs.
 
   Referencing the section with
-  :ref:`custom text <fbgemm-gpu.docs.example.reference>` from another page
+  :ref:`custom text <fbgemm-gpu.example.reference>` from another page
   in the docs.
 
   Note that the prefix underscore is not needed when referencing the anchor.
diff --git a/fbgemm_gpu/docs/src/general/ExampleDocsOutput.png b/fbgemm_gpu/docs/src/general/ExampleDocsOutput.png
deleted file mode 100644
index 162c5610d8..0000000000
Binary files a/fbgemm_gpu/docs/src/general/ExampleDocsOutput.png and /dev/null differ
diff --git a/fbgemm_gpu/docs/src/index.rst b/fbgemm_gpu/docs/src/index.rst
index a52e566817..b87aa9c2dc 100644
--- a/fbgemm_gpu/docs/src/index.rst
+++ b/fbgemm_gpu/docs/src/index.rst
@@ -1,56 +1,80 @@
-.. FBGEMM documentation master file, copied from fbgemm/docs
-   on Wed Jun 8 17:19:01 2022.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
+FBGEMM and FBGEMM_GPU Documentation Homepage
+============================================
 
-Welcome to FBGEMM's documentation!
-=======================================
+Welcome to the documentation page for the **FBGEMM** and **FBGEMM_GPU**
+libraries!
 
-This documentation provides a comprehensive reference of the `fbgemm_gpu`
-library.
+**FBGEMM** (Facebook GEneral Matrix Multiplication) is a low-precision,
+high-performance matrix-matrix multiplications and convolution library for
+server-side inference.  This library is used as a backend of
+`Caffe2 <https://github.com/pytorch/pytorch/tree/master/caffe2/quantization/server>`__ and
+`PyTorch <https://github.com/pytorch/pytorch/tree/master/aten/src/ATen/native/quantized/cpu>`__
+quantized operators on x86 machines.
 
-.. _fbgemm-gpu.docs.toc.general:
+**FBGEMM_GPU** (FBGEMM GPU Kernels Library) is a collection of high-performance
+PyTorch GPU operator libraries for training and inference.  This library is
+built on top of FBGEMM and provides efficient table batched embedding bag, data
+layout transformation, and quantization support.
+
+Table of Contents
+
+.. _home.docs.toc.general:
 
 .. toctree::
-   :maxdepth: 2
-   :caption: FBGEMM_GPU General Info
+   :maxdepth: 1
+   :caption: General Info
 
-   general/BuildInstructions.rst
-   general/InstallationInstructions.rst
-   general/TestInstructions.rst
-   general/DocsInstructions.rst
    general/Contributing.rst
+   general/DocsInstructions.rst
    general/ContactUs.rst
 
-.. _fbgemm-gpu.docs.toc.overview:
+.. _fbgemm-gpu.toc.development:
+
+.. toctree::
+   :maxdepth: 1
+   :caption: FBGEMM_GPU Development
+
+   fbgemm_gpu-development/BuildInstructions.rst
+   fbgemm_gpu-development/InstallationInstructions.rst
+   fbgemm_gpu-development/TestInstructions.rst
+
+.. _fbgemm-gpu.toc.overview:
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
    :caption: FBGEMM_GPU Overview
 
-   overview/jagged-tensor-ops/JaggedTensorOps.rst
+   fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst
 
-.. _fbgemm-gpu.docs.toc.api.python:
+.. _fbgemm.toc.api.cpp:
 
 .. toctree::
-   :maxdepth: 2
-   :caption: FBGEMM_GPU Python API
+   :maxdepth: 1
+   :caption: FBGEMM C++ API
 
-   python-api/table_batched_embedding_ops.rst
-   python-api/jagged_tensor_ops.rst
+   fbgemm-cpp-api/QuantUtils.rst
 
-.. _fbgemm-gpu.docs.toc.api.cpp:
+.. _fbgemm-gpu.toc.api.cpp:
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
    :caption: FBGEMM_GPU C++ API
 
-   cpp-api/sparse_ops.rst
-   cpp-api/quantize_ops.rst
-   cpp-api/merge_pooled_embeddings.rst
-   cpp-api/split_table_batched_embeddings.rst
-   cpp-api/jagged_tensor_ops.rst
-   cpp-api/memory_utils.rst
-   cpp-api/input_combine.rst
-   cpp-api/layout_transform_ops.rst
-   cpp-api/embedding_ops.rst
+   fbgemm_gpu-cpp-api/sparse_ops.rst
+   fbgemm_gpu-cpp-api/quantize_ops.rst
+   fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst
+   fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst
+   fbgemm_gpu-cpp-api/jagged_tensor_ops.rst
+   fbgemm_gpu-cpp-api/memory_utils.rst
+   fbgemm_gpu-cpp-api/input_combine.rst
+   fbgemm_gpu-cpp-api/layout_transform_ops.rst
+   fbgemm_gpu-cpp-api/embedding_ops.rst
+
+.. _fbgemm-gpu.toc.api.python:
+
+.. toctree::
+   :maxdepth: 1
+   :caption: FBGEMM_GPU Python API
+
+   fbgemm_gpu-python-api/table_batched_embedding_ops.rst
+   fbgemm_gpu-python-api/jagged_tensor_ops.rst
diff --git a/fbgemm_gpu/fbgemm_gpu/docs/__init__.py b/fbgemm_gpu/fbgemm_gpu/docs/__init__.py
index 22cf065442..05551ca6da 100644
--- a/fbgemm_gpu/fbgemm_gpu/docs/__init__.py
+++ b/fbgemm_gpu/fbgemm_gpu/docs/__init__.py
@@ -6,4 +6,4 @@
 # LICENSE file in the root directory of this source tree.
 
 # Trigger the manual addition of docstrings to pybind11-generated operators
-from . import jagged_tensor_ops, table_batched_embedding_ops  # noqa: F401  # noqa: F401
+from . import jagged_tensor_ops, table_batched_embedding_ops  # noqa: F401
diff --git a/fbgemm_gpu/include/fbgemm_gpu/cumem_utils.h b/fbgemm_gpu/include/fbgemm_gpu/cumem_utils.h
index f2582ca392..9989c6a96b 100644
--- a/fbgemm_gpu/include/fbgemm_gpu/cumem_utils.h
+++ b/fbgemm_gpu/include/fbgemm_gpu/cumem_utils.h
@@ -18,6 +18,7 @@ using Tensor = at::Tensor;
 ///
 
 /// @ingroup cumem-utils
+///
 /// Allocate an `at::Tensor` with unified managed memory (UVM).  Then set its
 /// preferred storage location to CPU (host memory) and establish mappings
 /// on the CUDA device to the host memory.
@@ -31,6 +32,7 @@ Tensor new_managed_tensor(
     const std::vector<std::int64_t>& sizes);
 
 /// @ingroup cumem-utils
+///
 /// Placeholder operator for the `Meta` dispatch key.
 ///
 /// @param self The input tensor
@@ -42,6 +44,7 @@ Tensor new_managed_tensor_meta(
     const std::vector<std::int64_t>& sizes);
 
 /// @ingroup cumem-utils
+///
 /// Allocate the `at::Tensor` with host-mapped memory.
 ///
 /// @param self The input tensor
@@ -53,6 +56,7 @@ Tensor new_host_mapped_tensor(
     const std::vector<std::int64_t>& sizes);
 
 /// @ingroup cumem-utils
+///
 /// Allocate the `at::Tensor` with either unified managed memory (UVM) or
 /// host-mapped memory.
 ///
@@ -68,6 +72,7 @@ Tensor new_unified_tensor(
     bool is_host_mapped);
 
 /// @ingroup cumem-utils
+///
 /// Allocate an `at::Tensor` with unified managed memory (UVM), but allow for
 /// its preferred storage location to be automatically managed.
 ///
@@ -80,6 +85,7 @@ Tensor new_vanilla_managed_tensor(
     const std::vector<std::int64_t>& sizes);
 
 /// @ingroup cumem-utils
+///
 /// Check if a tensor is allocated with UVM (either CPU or GPU tensor).
 ///
 /// @param self The input tensor
@@ -88,6 +94,7 @@ Tensor new_vanilla_managed_tensor(
 bool uvm_storage(const Tensor& self);
 
 /// @ingroup cumem-utils
+///
 /// Check if a tensor is allocated with UVM, BUT is not a CPU tensor.
 ///
 /// @param self The input tensor
@@ -97,6 +104,7 @@ bool uvm_storage(const Tensor& self);
 bool is_uvm_tensor(const Tensor& self);
 
 /// @ingroup cumem-utils
+///
 /// Convert a UVM tensor to a CPU tensor.
 ///
 /// @param self The input tensor
@@ -105,6 +113,7 @@ bool is_uvm_tensor(const Tensor& self);
 Tensor uvm_to_cpu(const Tensor& self);
 
 /// @ingroup cumem-utils
+///
 /// Create a new UVM tensor that shares the same device and UVM storage with
 /// `prototype`.
 ///
@@ -117,6 +126,7 @@ Tensor uvm_to_cpu(const Tensor& self);
 Tensor uvm_to_device(const Tensor& self, const Tensor& prototype);
 
 /// @ingroup cumem-utils
+///
 /// Call `cudaMemAdvise()` on a UVM tensor's storage. The `cudaMemoryAdvise`
 /// enum is available on the Python side in the `fbgemm_gpu.uvm` namespace; see
 /// the documentation over there for valid values.
@@ -130,6 +140,7 @@ Tensor uvm_to_device(const Tensor& self, const Tensor& prototype);
 void uvm_cuda_mem_advise(const Tensor& self, int64_t cuda_memory_advise);
 
 /// @ingroup cumem-utils
+///
 /// Call `cudaMemPrefetchAsync()` on a UVM tensor's storage to prefetch memory
 /// to a destination device.
 ///
@@ -145,6 +156,7 @@ void uvm_cuda_mem_prefetch_async(
     c10::optional<Tensor> device_t);
 
 /// @ingroup cumem-utils
+///
 /// Call `madvise(...MADV_DONTFORK)` on a UVM tensor's storage. This is a
 /// workaround for an issue where the UVM kernel driver un-maps UVM storage
 /// pages from the page table on fork, causing slowdown on the next access from
@@ -158,6 +170,7 @@ void uvm_cuda_mem_prefetch_async(
 void uvm_mem_advice_dont_fork(const Tensor& self);
 
 /// @ingroup cumem-utils
+///
 /// Copy a UVM tensor's contiguous storage (uvm_storage(t) is true) into a new
 /// CPU Tensor.  The copy operation uses single-threaded `memcpy()`.
 ///
diff --git a/fbgemm_gpu/include/fbgemm_gpu/sparse_ops.h b/fbgemm_gpu/include/fbgemm_gpu/sparse_ops.h
index 1552dd1be2..10685416a1 100644
--- a/fbgemm_gpu/include/fbgemm_gpu/sparse_ops.h
+++ b/fbgemm_gpu/include/fbgemm_gpu/sparse_ops.h
@@ -103,6 +103,7 @@ at::Tensor invert_permute_cuda(const at::Tensor& permute);
 #endif
 
 /// @ingroup sparse-data-cuda
+///
 /// expand_into_jagged_permute expand the sparse data permute index from
 /// table dimension to batch dimension, for cases where the sparse features
 /// has different batch sizes across ranks.
diff --git a/fbgemm_gpu/src/docs/example_code.cpp b/fbgemm_gpu/src/docs/example_code.cpp
new file mode 100644
index 0000000000..04d4eb14a5
--- /dev/null
+++ b/fbgemm_gpu/src/docs/example_code.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <cstddef>
+
+/// [fbgemm-gpu.docs.example.defgroup.start]
+/// @defgroup example-method-group Example Method Group
+/// This is a description of the example method group.
+/// [fbgemm-gpu.docs.example.defgroup.end]
+
+/// @skipline [fbgemm-gpu.docs.example.docstring.start]
+/// @ingroup example-method-group
+///
+/// @brief A very short description of `example_method`.
+///
+/// Here is a much longer description of `example_method` with code examples:
+///
+/// **Example:**
+/// ```python
+/// # Here is a Python code block
+/// def foo(lst: List[int]):
+///   return [ x ** 2 for x in lst ]
+/// ```
+///
+/// And here is a verbatim-text diagram example:
+///
+/// @code{.unparsed}
+///   .------+---------------------------------.-----------------------------
+///   |            Block A (first)             |       Block B (second)
+///
+///   +------+------+--------------------------+------+------+---------------
+///   | Next | Prev |   usable space           | Next | Prev | usable space..
+///   +------+------+--------------------------+------+--+---+---------------
+///   ^  |                                     ^         |
+///   |  '-------------------------------------'         |
+///   |                                                  |
+///   '----------- Block B's prev points to Block A -----'
+/// @endcode
+///
+/// @tparam T Description of T
+/// @tparam Alignment Description of Alignment value
+/// @param param1 Description of `param1`
+/// @param param2 Description of `param2`
+///
+/// @return Description of the method's return value.
+///
+/// @throw fbgemm_gpu::error1 if a type-1 error occurs
+/// @throw fbgemm_gpu::error2 if a type-2 error occurs
+///
+/// @note This is an example note.
+///
+/// @warning This is an example  warning.
+///
+/// @see For more info, see
+/// <a href="https://www.doxygen.nl/manual/commands.html#cmdlink">here</a>.
+template <typename T, std::size_t Alignment>
+int32_t example_method(T param1, float param2);
+/// @skipline [fbgemm-gpu.docs.example.docstring.end]
diff --git a/fbgemm_gpu/src/quantize_ops/quantize_bfloat16.cu b/fbgemm_gpu/src/quantize_ops/quantize_bfloat16.cu
index 241545411c..4e42a9d8e4 100644
--- a/fbgemm_gpu/src/quantize_ops/quantize_bfloat16.cu
+++ b/fbgemm_gpu/src/quantize_ops/quantize_bfloat16.cu
@@ -13,6 +13,7 @@ using Tensor = at::Tensor;
 namespace fbgemm_gpu {
 
 /// @ingroup quantize-ops-cuda
+///
 /// Converts a tensor of `float` values into a tensor of Brain Floating Point
 /// (`bfloat16`) values.
 ///
@@ -44,6 +45,7 @@ DLL_PUBLIC at::Tensor _float_to_bfloat16_gpu(const at::Tensor& input) {
 }
 
 /// @ingroup quantize-ops-cuda
+///
 /// Converts a tensor of Brain Floating Point (`bfloat16`) values into a tensor
 /// of `float` values.
 ///
diff --git a/fbgemm_gpu/src/quantize_ops/quantize_ops_cpu.cpp b/fbgemm_gpu/src/quantize_ops/quantize_ops_cpu.cpp
index 5e8d8c8376..eaaa4853c7 100644
--- a/fbgemm_gpu/src/quantize_ops/quantize_ops_cpu.cpp
+++ b/fbgemm_gpu/src/quantize_ops/quantize_ops_cpu.cpp
@@ -149,6 +149,7 @@ Tensor _fusednbitrowwise_to_float_cpu(
 }
 
 /// @ingroup quantize-data-cpu
+///
 Tensor& _fused8bitrowwise_to_float_cpu_out(
     Tensor& output,
     const Tensor& input) {
@@ -160,6 +161,7 @@ Tensor& fused8bitrowwise_to_half_cpu_out(Tensor& output, const Tensor& input) {
 }
 
 /// @ingroup quantize-data-cpu
+///
 Tensor& _float_to_fused8bitrowwise_cpu_out(
     Tensor& output,
     const Tensor& input) {
@@ -169,7 +171,9 @@ Tensor& _float_to_fused8bitrowwise_cpu_out(
 Tensor& _half_to_fused8bitrowwise_cpu_out(Tensor& output, const Tensor& input) {
   return _float_to_fused8bitrowwise_cpu_out_t<fbgemm::float16>(output, input);
 }
+
 /// @ingroup quantize-data-cpu
+///
 Tensor float_to_fused8bitrowwise_cpu(const Tensor& input) {
   auto output = at::empty(
       {0},
@@ -178,6 +182,7 @@ Tensor float_to_fused8bitrowwise_cpu(const Tensor& input) {
 }
 
 /// @ingroup quantize-data-cpu
+///
 Tensor half_to_fused8bitrowwise_cpu(const Tensor& input) {
   auto output = at::empty(
       {0},
@@ -186,6 +191,7 @@ Tensor half_to_fused8bitrowwise_cpu(const Tensor& input) {
 }
 
 /// @ingroup quantize-data-cpu
+///
 Tensor float_or_half_to_fused8bitrowwise_cpu(const Tensor& input) {
   auto output = at::empty(
       {0},
@@ -200,17 +206,23 @@ Tensor float_or_half_to_fused8bitrowwise_cpu(const Tensor& input) {
       });
   return output;
 }
+
 /// @ingroup quantize-data-cpu
+///
 Tensor fused8bitrowwise_to_float_cpu(const Tensor& input) {
   auto output = at::empty({0}, input.options().dtype(at::kFloat));
   return _fused8bitrowwise_to_float_cpu_out(output, input);
 }
+
 /// @ingroup quantize-data-cpu
+///
 Tensor fused8bitrowwise_to_half_cpu(const Tensor& input) {
   auto output = at::empty({0}, input.options().dtype(at::kHalf));
   return fused8bitrowwise_to_half_cpu_out(output, input);
 }
+
 /// @ingroup quantize-data-cpu
+///
 Tensor fused8bitrowwise_to_float_or_half_cpu(
     const Tensor& input,
     const int64_t output_dtype) {
@@ -235,12 +247,14 @@ Tensor fused8bitrowwise_to_float_or_half_cpu(
 }
 // dummy cpu code for gpu fp8_rowwise conversions
 /// @ingroup quantize-data-cpu
+///
 Tensor float_to_FP8rowwise_cpu(const Tensor& input, bool forward) {
   TORCH_CHECK(false, "fp8 is not supported by CPU");
   return input;
 }
 
 /// @ingroup quantize-data-cpu
+///
 Tensor FP8rowwise_to_float_cpu(
     const Tensor& input,
     bool forward,
@@ -250,6 +264,7 @@ Tensor FP8rowwise_to_float_cpu(
 }
 
 /// @ingroup quantize-data-cpu
+///
 Tensor fusednbitrowwise_to_float_cpu(
     const Tensor& input,
     const int64_t bit_rate) {
@@ -257,6 +272,7 @@ Tensor fusednbitrowwise_to_float_cpu(
 }
 
 /// @ingroup quantize-data-cpu
+///
 Tensor fusednbitrowwise_to_half_cpu(
     const Tensor& input,
     const int64_t bit_rate) {
@@ -264,6 +280,7 @@ Tensor fusednbitrowwise_to_half_cpu(
 }
 
 /// @ingroup quantize-data-cpu
+///
 Tensor fusednbitrowwise_to_float_or_half_cpu(
     const Tensor& input,
     const int64_t bit_rate,
@@ -315,6 +332,7 @@ Tensor float_or_half_to_fusednbitrowwise_cpu(
 }
 
 /// @ingroup quantize-data-cpu
+///
 void FloatToFP8Quantized_ref(
     const float* const input,
     const size_t nrows,
@@ -335,6 +353,7 @@ void FloatToFP8Quantized_ref(
 }
 
 /// @ingroup quantize-data-cpu
+///
 void FP8QuantizedToFloat_ref(
     const uint8_t* const input,
     const size_t nrows,
diff --git a/fbgemm_gpu/src/quantize_ops/quantize_ops_meta.cpp b/fbgemm_gpu/src/quantize_ops/quantize_ops_meta.cpp
index 8f62be6a0e..eca965734d 100644
--- a/fbgemm_gpu/src/quantize_ops/quantize_ops_meta.cpp
+++ b/fbgemm_gpu/src/quantize_ops/quantize_ops_meta.cpp
@@ -20,6 +20,7 @@ using Tensor = at::Tensor;
 namespace fbgemm_gpu {
 
 /// @ingroup quantize-data-meta
+///
 Tensor FP8rowwise_to_float_meta(
     const Tensor& input,
     [[maybe_unused]] bool forward,
@@ -50,6 +51,7 @@ Tensor FP8rowwise_to_float_meta(
 }
 
 /// @ingroup quantize-data-meta
+///
 Tensor FloatToFP8RowwiseQuantized_meta(const Tensor& input, bool forward) {
   TORCH_CHECK(input.is_contiguous(), "input must be contiguous");
 
diff --git a/fbgemm_gpu/src/quantize_ops/quantize_padded_fp8_rowwise.cu b/fbgemm_gpu/src/quantize_ops/quantize_padded_fp8_rowwise.cu
index 380a756116..59c95b3c13 100644
--- a/fbgemm_gpu/src/quantize_ops/quantize_padded_fp8_rowwise.cu
+++ b/fbgemm_gpu/src/quantize_ops/quantize_padded_fp8_rowwise.cu
@@ -395,6 +395,7 @@ Tensor _paddedFP8rowwise_to_float_gpu_t(
 }
 
 /// @ingroup quantize-ops-cuda
+///
 /// Converts a tensor of `float` values into a tensor of padded `fp8` rowwise
 /// values.
 ///
@@ -413,6 +414,7 @@ DLL_PUBLIC Tensor _float_to_paddedFP8rowwise_gpu(
 }
 
 /// @ingroup quantize-ops-cuda
+///
 /// Converts a tensor of padded `fp8` rowwise values into a tensor of `float
 /// values`.
 ///
diff --git a/include/fbgemm/QuantUtils.h b/include/fbgemm/QuantUtils.h
index c4fefa34e2..8b0adedef0 100644
--- a/include/fbgemm/QuantUtils.h
+++ b/include/fbgemm/QuantUtils.h
@@ -19,6 +19,9 @@
 #include <cstdint>
 #include <limits>
 
+/// @defgroup fbgemm-quant-utils-generic Quantization Utilities (Generic)
+///
+
 namespace fbgemm {
 
 FBGEMM_API TensorQuantizationParams ChooseQuantizationParams(
@@ -109,29 +112,31 @@ FBGEMM_API void Quantize(
     int thread_id = 0,
     int num_threads = 1);
 
-/*
- * @brief Quantize floating point data in src to type T
- *
- * @tparam T output quantized data type (int8_t, uint8_t and int32_t are
- *                  supported)
- *
- * @tparam T LAYOUT layout of input tensor in src. (KCX and KXC are supported)
- *                  KCX corresponds to KCRS or KCTRS (for weight tensors with
- *                  time dimension)
- *                  KXC corresponds to KRSC or KTRSC (for weight tensors with
- *                  time dimension)
- *
- * @param K Output channels for weight tensors
- * @param C Number of channels
- * @param X R*S or T*R*S
- * @param G Groups (if G == C the function performs channelwise quantization;
- *                  if 1 < G < C the function performs groupwise quantization;
- *                  if G == 1 the function performs per tensor quantization;)
- * @param scales floating point scales.
- *               Size should be equal G
- * @param zero_points zero points (should be reprsentable in type T).
- *                    Size should be equal G
- */
+/// @ingroup fbgemm-quant-utils-generic
+///
+/// Quantize floating point data in `src` to type `T`.
+///
+/// @tparam T output quantized data type (`int8_t`, `uint8_t`, and `int32_t` are
+///         supported)
+///
+/// @tparam LAYOUT layout of input tensor in `src`. (`KCX` and `KXC` are
+///         supported)
+///         `KCX` corresponds to `KCRS` or `KCTRS` (for weight tensors with time
+///         dimension)
+///         `KXC` corresponds to `KRSC` or `KTRSC` (for weight tensors with time
+///         dimension)
+///
+///  @param K Output channels for weight tensors
+///  @param C Number of channels
+///  @param X `R*S` or `T*R*S`
+///  @param G Groups (if `G == C` the function performs channelwise
+///  quantization;
+///                   if `1 < G < C` the function performs groupwise
+///                   quantization; if `G == 1` the function performs per tensor
+///                   quantization;)
+///  @param scales floating point scales.  Size should be equal `G`
+///  @param zero_points zero points (should be reprsentable in type `T`).
+///                     Size should be equal `G`
 template <typename T, layout_t LAYOUT = layout_t::KCX>
 FBGEMM_API void QuantizeGroupwise(
     const float* src,
@@ -172,11 +177,12 @@ float FusedQuantizeDequantize(
   return Dequantize<T>(q, qparams);
 }
 
-/*
-Fused integer quantization dequantization kernel to accelerate
-quantization-aware training. Quantize fp32 values in src to (u)int8 using the
-provided qparams, and dequantize quantized integer values back into fp32.
-*/
+/// @ingroup fbgemm-quant-utils-generic
+///
+/// Fused integer quantization dequantization kernel to accelerate
+/// quantization-aware training. Quantize `fp32` values in src to `(u)int8`
+/// using the provided qparams, and dequantize quantized integer values back
+/// into `fp32`.
 template <typename T>
 FBGEMM_API void FusedQuantizeDequantize(
     const float* src,
@@ -263,6 +269,8 @@ FBGEMM_API void Requantize(
     int num_threads = 1);
 
 /**
+ * @ingroup fbgemm-quant-utils-generic
+ *
  * Convert float (fp32 or fp16) inputs to rowwise quantized outputs.
  * bitrate specifies the number of bits in quantized output.
  * Scale and Bias are in fp16. Each row's Scale and Bias are stored in
diff --git a/include/fbgemm/QuantUtilsAvx2.h b/include/fbgemm/QuantUtilsAvx2.h
index 64d1323097..ca5b70308e 100644
--- a/include/fbgemm/QuantUtilsAvx2.h
+++ b/include/fbgemm/QuantUtilsAvx2.h
@@ -12,18 +12,21 @@
 #include "./FbgemmBuild.h"
 #include "./UtilsAvx2.h"
 
+/// @defgroup fbgemm-quant-utils-avx2 Quantization Utilities (AVX2)
+///
+
 namespace fbgemm {
 
-// Structs from gemmlowp
-//
-// A structure to hold quantization parameters 'scale' and 'zero_point'.
-// The meaning of these values is as the constants in the quantization equation
-//
-//   real_value = scale * (quantized_value - zero_point)
-//
-// In other words, 'zero_point' is the quantized value that corresponds
-// to the real value 0, and 'scale' is the difference of real values
-// corresponding to consecutive quantized values.
+/// Struct from <a href="https://github.com/google/gemmlowp">`gemmlowp`</a>
+///
+/// A structure to hold quantization parameters `scale` and `zero_point`.
+/// The meaning of these values is as the constants in the quantization equation
+///
+///   `real_value = scale * (quantized_value - zero_point)`
+///
+/// In other words, 'zero_point' is the quantized value that corresponds
+/// to the real value 0, and 'scale' is the difference of real values
+/// corresponding to consecutive quantized values.
 struct FBGEMM_API TensorQuantizationParams {
   float scale;
   std::int32_t zero_point;
@@ -32,13 +35,13 @@ struct FBGEMM_API TensorQuantizationParams {
   float Max() const;
 };
 
-// Parameters when we scale from int32 intermediate matrix multiplication
-// results to 8-bit integers
+/// Parameters when we scale from int32 intermediate matrix multiplication
+/// results to 8-bit integers
 struct FBGEMM_API RequantizationParams {
-  // For floating-point requantization
+  /// For floating-point requantization
   float real_multiplier;
 
-  // For fixed-point requantization
+  /// For fixed-point requantization
   std::int32_t multiplier;
   int right_shift;
 
@@ -47,6 +50,7 @@ struct FBGEMM_API RequantizationParams {
 
 ////////////////////////////////////////////////////////////////////////////////
 // Utility functions
+////////////////////////////////////////////////////////////////////////////////
 
 template <typename T = std::uint8_t, bool LEGACY = true>
 void QuantizeAvx2(
@@ -63,14 +67,15 @@ void FusedQuantizeDequantizeAvx2(
     const TensorQuantizationParams& qparams,
     float noise_ratio = 0.0f);
 
-/*
- * Random number generator in [0, 9]: https://www.jstatsoft.org/v08/i14/paper
- */
+/// @ingroup fbgemm-quant-utils-avx2
+///
+/// Random number generator in [0, 9] based on
+/// <a href="https://www.jstatsoft.org/v08/i14/paper">this paper</a>.
 uint32_t FBGEMM_API Xor128(void);
 
-/**
- * @brief Find the min and max value in a float matrix.
- */
+/// @ingroup fbgemm-quant-utils-avx2
+///
+/// @brief Find the min and max value in a float matrix.
 void FBGEMM_API FindMinMax(const float* m, float* min, float* max, int64_t len);
 
 void RequantizeFixedPointAvx2(
@@ -85,9 +90,9 @@ void RequantizeAvx2(
     int len,
     const RequantizationParams& params);
 
-/**
- * @brief Requantize with avx2 and bias is fused.
- */
+/// @ingroup fbgemm-quant-utils-avx2
+///
+/// Requantize with avx2 and bias is fused.
 template <
     bool A_SYMMETRIC,
     bool B_SYMMETRIC,
diff --git a/include/fbgemm/QuantUtilsAvx512.h b/include/fbgemm/QuantUtilsAvx512.h
index 0b43477e06..9dff4af8f7 100644
--- a/include/fbgemm/QuantUtilsAvx512.h
+++ b/include/fbgemm/QuantUtilsAvx512.h
@@ -12,7 +12,14 @@
 #include "./FbgemmBuild.h"
 #include "./UtilsAvx2.h"
 
+/// @defgroup fbgemm-quant-utils-avx512 Quantization Utilities (AVX512)
+///
+
 namespace fbgemm {
+
+/// @ingroup fbgemm-quant-utils-avx512
+///
+/// Requantize with AVX512.
 template <
     bool A_SYMMETRIC,
     bool B_SYMMETRIC,
@@ -28,4 +35,4 @@ FBGEMM_API void requantizeOutputProcessingGConvAvx512(
     int ld_out,
     int ld_in,
     const requantizationParams_t<BIAS_TYPE>& r);
-}
+} // namespace fbgemm