diff --git a/docs/api_docs/python/index.md b/docs/api_docs/python/index.md index c1ca155db..a182c83d3 100644 --- a/docs/api_docs/python/index.md +++ b/docs/api_docs/python/index.md @@ -15,7 +15,7 @@ * text.UnicodeCharTokenizer * text.UnicodeScriptTokenizer * text.WhitespaceTokenizer -* text.WordShape +* text.WordShape * text.WordpieceTokenizer * text.case_fold_utf8 * text.coerce_to_structurally_valid_utf8 diff --git a/docs/api_docs/python/text.md b/docs/api_docs/python/text.md index a82cbc5c8..e334154e7 100644 --- a/docs/api_docs/python/text.md +++ b/docs/api_docs/python/text.md @@ -85,7 +85,7 @@ allocates a length budget to segments in order. [`class WhitespaceTokenizer`](./text/WhitespaceTokenizer.md): Tokenizes a tensor of UTF-8 strings on whitespaces. -[`class WordShape`](./text/WordShape.md): Values for the 'pattern' arg of the +[`class WordShape`](./text/WordShape_cls.md): Values for the 'pattern' arg of the wordshape op. [`class WordpieceTokenizer`](./text/WordpieceTokenizer.md): Tokenizes a tensor diff --git a/docs/api_docs/python/text/WordShape.md b/docs/api_docs/python/text/WordShape_cls.md similarity index 63% rename from docs/api_docs/python/text/WordShape.md rename to docs/api_docs/python/text/WordShape_cls.md index 6581bc04e..1af315026 100644 --- a/docs/api_docs/python/text/WordShape.md +++ b/docs/api_docs/python/text/WordShape_cls.md @@ -54,7 +54,7 @@ Values for the 'pattern' arg of the wordshape op. The supported wordshape identifiers are: -* WordShape.BEGINS_WITH_OPEN_QUOTE: +* WordShape.BEGINS_WITH_OPEN_QUOTE: The input begins with an open quote. The following strings are considered open quotes: @@ -85,10 +85,10 @@ The supported wordshape identifiers are: Note: U+B4 (acute accent) not included. -* WordShape.BEGINS_WITH_PUNCT_OR_SYMBOL: +* WordShape.BEGINS_WITH_PUNCT_OR_SYMBOL: The input starts with a punctuation or symbol character. -* WordShape.ENDS_WITH_CLOSE_QUOTE: +* WordShape.ENDS_WITH_CLOSE_QUOTE: The input ends witha closing quote character. The following strings are considered close quotes: @@ -115,102 +115,102 @@ The supported wordshape identifiers are: Note: U+B4 (ACUTE ACCENT) is not included. -* WordShape.ENDS_WITH_ELLIPSIS: +* WordShape.ENDS_WITH_ELLIPSIS: The input ends with an ellipsis (i.e. with three or more periods or a unicode ellipsis character). -* WordShape.ENDS_WITH_EMOTICON: +* WordShape.ENDS_WITH_EMOTICON: The input ends with an emoticon. -* WordShape.ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL: +* WordShape.ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL: The input ends with multiple sentence-terminal characters. -* WordShape.ENDS_WITH_MULTIPLE_TERMINAL_PUNCT: +* WordShape.ENDS_WITH_MULTIPLE_TERMINAL_PUNCT: The input ends with multiple terminal-punctuation characters. -* WordShape.ENDS_WITH_PUNCT_OR_SYMBOL: +* WordShape.ENDS_WITH_PUNCT_OR_SYMBOL: The input ends with a punctuation or symbol character. -* WordShape.ENDS_WITH_SENTENCE_TERMINAL: +* WordShape.ENDS_WITH_SENTENCE_TERMINAL: The input ends with a sentence-terminal character. -* WordShape.ENDS_WITH_TERMINAL_PUNCT: +* WordShape.ENDS_WITH_TERMINAL_PUNCT: The input ends with a terminal-punctuation character. -* WordShape.HAS_CURRENCY_SYMBOL: +* WordShape.HAS_CURRENCY_SYMBOL: The input contains a currency symbol. -* WordShape.HAS_EMOJI: +* WordShape.HAS_EMOJI: The input contains an emoji character. See http://www.unicode.org/Public/emoji/1.0//emoji-data.txt. Emojis are in unicode ranges `2600-26FF`, `1F300-1F6FF`, and `1F900-1F9FF`. -* WordShape.HAS_MATH_SYMBOL: +* WordShape.HAS_MATH_SYMBOL: The input contains a mathematical symbol. -* WordShape.HAS_MIXED_CASE: +* WordShape.HAS_MIXED_CASE: The input contains both uppercase and lowercase letterforms. -* WordShape.HAS_NON_LETTER: +* WordShape.HAS_NON_LETTER: The input contains a non-letter character. -* WordShape.HAS_NO_DIGITS: +* WordShape.HAS_NO_DIGITS: The input contains no digit characters. -* WordShape.HAS_NO_PUNCT_OR_SYMBOL: +* WordShape.HAS_NO_PUNCT_OR_SYMBOL: The input contains no unicode punctuation or symbol characters. -* WordShape.HAS_NO_QUOTES: +* WordShape.HAS_NO_QUOTES: The input string contains no quote characters. -* WordShape.HAS_ONLY_DIGITS: +* WordShape.HAS_ONLY_DIGITS: The input consists entirely of unicode digit characters. -* WordShape.HAS_PUNCTUATION_DASH: +* WordShape.HAS_PUNCTUATION_DASH: The input contains at least one unicode dash character. Note that this uses the Pd (Dash) unicode property. This property will not match to soft-hyphens and katakana middle dot characters. -* WordShape.HAS_QUOTE: +* WordShape.HAS_QUOTE: The input starts or ends with a unicode quotation mark. -* WordShape.HAS_SOME_DIGITS: +* WordShape.HAS_SOME_DIGITS: The input contains a mix of digit characters and non-digit characters. -* WordShape.HAS_SOME_PUNCT_OR_SYMBOL: +* WordShape.HAS_SOME_PUNCT_OR_SYMBOL: The input contains a mix of punctuation or symbol characters, and non-punctuation non-symbol characters. -* WordShape.HAS_TITLE_CASE: +* WordShape.HAS_TITLE_CASE: The input has title case (i.e. the first character is upper or title case, and the remaining characters are lowercase). -* WordShape.IS_ACRONYM_WITH_PERIODS: +* WordShape.IS_ACRONYM_WITH_PERIODS: The input is a period-separated acronym. This matches for strings of the form "I.B.M." but not "IBM". -* WordShape.IS_EMOTICON: +* WordShape.IS_EMOTICON: The input is a single emoticon. -* WordShape.IS_LOWERCASE: +* WordShape.IS_LOWERCASE: The input contains only lowercase letterforms. -* WordShape.IS_MIXED_CASE_LETTERS: +* WordShape.IS_MIXED_CASE_LETTERS: The input contains only uppercase and lowercase letterforms. -* WordShape.IS_NUMERIC_VALUE: +* WordShape.IS_NUMERIC_VALUE: The input is parseable as a numeric value. This will match a fairly broad set of floating point and integer representations (but not Nan or Inf). -* WordShape.IS_PUNCT_OR_SYMBOL: +* WordShape.IS_PUNCT_OR_SYMBOL: The input contains only punctuation and symbol characters. -* WordShape.IS_UPPERCASE: +* WordShape.IS_UPPERCASE: The input contains only uppercase letterforms. -* WordShape.IS_WHITESPACE: +* WordShape.IS_WHITESPACE: The input consists entirely of whitespace. @@ -223,224 +223,224 @@ The supported wordshape identifiers are: BEGINS_WITH_OPEN_QUOTE -text.WordShape +text.WordShape BEGINS_WITH_PUNCT_OR_SYMBOL -text.WordShape +text.WordShape ENDS_WITH_CLOSE_QUOTE -text.WordShape +text.WordShape ENDS_WITH_ELLIPSIS -text.WordShape +text.WordShape ENDS_WITH_EMOTICON -text.WordShape +text.WordShape ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL -text.WordShape +text.WordShape ENDS_WITH_MULTIPLE_TERMINAL_PUNCT -text.WordShape +text.WordShape ENDS_WITH_PUNCT_OR_SYMBOL -text.WordShape +text.WordShape ENDS_WITH_SENTENCE_TERMINAL -text.WordShape +text.WordShape ENDS_WITH_TERMINAL_PUNCT -text.WordShape +text.WordShape HAS_CURRENCY_SYMBOL -text.WordShape +text.WordShape HAS_EMOJI -text.WordShape +text.WordShape HAS_MATH_SYMBOL -text.WordShape +text.WordShape HAS_MIXED_CASE -text.WordShape +text.WordShape HAS_NON_LETTER -text.WordShape +text.WordShape HAS_NO_DIGITS -text.WordShape +text.WordShape HAS_NO_PUNCT_OR_SYMBOL -text.WordShape +text.WordShape HAS_NO_QUOTES -text.WordShape +text.WordShape HAS_ONLY_DIGITS -text.WordShape +text.WordShape HAS_PUNCTUATION_DASH -text.WordShape +text.WordShape HAS_QUOTE -text.WordShape +text.WordShape HAS_SOME_DIGITS -text.WordShape +text.WordShape HAS_SOME_PUNCT_OR_SYMBOL -text.WordShape +text.WordShape HAS_TITLE_CASE -text.WordShape +text.WordShape IS_ACRONYM_WITH_PERIODS -text.WordShape +text.WordShape IS_EMOTICON -text.WordShape +text.WordShape IS_LOWERCASE -text.WordShape +text.WordShape IS_MIXED_CASE_LETTERS -text.WordShape +text.WordShape IS_NUMERIC_VALUE -text.WordShape +text.WordShape IS_PUNCT_OR_SYMBOL -text.WordShape +text.WordShape IS_UPPERCASE -text.WordShape +text.WordShape IS_WHITESPACE -text.WordShape +text.WordShape