Skip to content

Commit

Permalink
update max_repeated_len
Browse files Browse the repository at this point in the history
  • Loading branch information
DrownFish19 committed Jan 4, 2024
1 parent c2436f4 commit 93ac4fa
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions model_zoo/ernie-1.0/preprocess/create_pretraining_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def get_args():
group.add_argument("--workers", type=int, default=1, help="Number of worker processes to launch")
group.add_argument("--max_doc_num", type=int, default=sys.maxsize, help="Number of worker processes to launch")
group.add_argument(
"--max_repeated_len", type=int, default=20, help="The maximum length of the repeated characters to keep"
"--max_repeated_len", type=int, default=100, help="The maximum length of the repeated characters to keep"
)

args = parser.parse_args()
Expand Down Expand Up @@ -281,7 +281,7 @@ def process(text):

Converter.process = process

def remove_repeated_chars(text, max_repeated_len=20):
def remove_repeated_chars(text, max_repeated_len=100):
"""
Removes repeated characters from the given text, where the length of
the repeated characters is greater than or equal to the specified length.
Expand Down

0 comments on commit 93ac4fa

Please sign in to comment.