We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
使用BML Codelab运行quickstart.md的2.2.3 版面分析 第二段程序无法正确运行,详细见日志
使用 https://aistudio.baidu.com/ 提供的gpu测试环境 项目框架:PaddlePaddle 2.6.1 使用套件功能下载PaddleOCR v2.6.0
# !git clone https://github.com/PaddlePaddle/PaddleOCR.git !git clone https://github.com/PaddlePaddle/PaddleOCR.git !python3 -m pip install --upgrade pip !python3 -m pip install "paddlepaddle-gpu" -i https://mirror.baidu.com/pypi/simple !python3 -m pip install -r /home/aistudio/PaddleOCR-2.6.0/ppstructure/recovery/requirements.txt !wget https://paddleocr.bj.bcebos.com/whl/pdf2docx-0.0.0-py3-none-any.whl !pip3 install pdf2docx-0.0.0-py3-none-any.whl !pip install premailer !pip install openpyxl from paddleocr import PPStructure,save_structure_res from paddleocr.ppstructure.recovery.recovery_to_doc import sorted_layout_boxes, convert_info_docx table_engine = PPStructure(recovery=True) save_folder = './output' img_path = '/home/aistudio/PaddleOCR-2.6.0/ppstructure/docs/table/1.png' img = cv2.imread(img_path) result = table_engine(img) save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0]) for line in result: line.pop('img') print(line) h, w, _ = img.shape res = sorted_layout_boxes(result, w) convert_info_docx(img, res, save_folder, os.path.basename(img_path).split('.')[0])
[2024/07/18 23:42:51] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=True, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/home/aistudio/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/home/aistudio/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text [2024/07/18 23:42:58] ppocr DEBUG: dt_boxes num : 223, elapsed : 0.23255515098571777 [2024/07/18 23:43:00] ppocr DEBUG: rec_res num : 223, elapsed : 1.5538420677185059 [2024/07/18 23:43:01] ppocr DEBUG: dt_boxes num : 80, elapse : 0.0471186637878418 [2024/07/18 23:43:01] ppocr DEBUG: rec_res num : 80, elapse : 0.4826929569244385 [2024/07/18 23:43:03] ppocr DEBUG: dt_boxes num : 110, elapse : 0.059357404708862305 [2024/07/18 23:43:04] ppocr DEBUG: rec_res num : 110, elapse : 0.6696820259094238 --------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[15], line 16 14 img = cv2.imread(img_path) 15 result = table_engine(img) ---> 16 save_structure_res(result, save_folder, os.path.basename(img_path).split('.')[0]) 18 for line in result: 19 line.pop('img') File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/predict_system.py:280, in save_structure_res(res, save_folder, img_name, img_idx) 272 if ( 273 region["type"].lower() == "table" 274 and len(region["res"]) > 0 275 and "html" in region["res"] 276 ): 277 excel_path = os.path.join( 278 excel_save_folder, "{}_{}.xlsx".format(region["bbox"], img_idx) 279 ) --> 280 to_excel(region["res"]["html"], excel_path) 281 elif region["type"].lower() == "figure": 282 img_path = os.path.join( 283 excel_save_folder, "{}_{}.jpg".format(region["bbox"], img_idx) 284 ) File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/predict_table.py:153, in to_excel(html_table, excel_path) 150 def to_excel(html_table, excel_path): 151 from tablepyxl import tablepyxl --> 153 tablepyxl.document_to_xl(html_table, excel_path) File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/tablepyxl.py:118, in document_to_xl(doc, filename, base_url) 113 def document_to_xl(doc, filename, base_url=None): 114 """ 115 Takes a string representation of an html document and writes one sheet for 116 every table in the document. The workbook is written out to a file called filename 117 """ --> 118 wb = document_to_workbook(doc, base_url=base_url) 119 wb.save(filename) File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/tablepyxl.py:105, in document_to_workbook(doc, wb, base_url) 100 wb.remove(wb.active) 102 inline_styles_doc = Premailer( 103 doc, base_url=base_url, remove_classes=False 104 ).transform() --> 105 tables = get_Tables(inline_styles_doc) 107 for table in tables: 108 table_to_sheet(table, wb) File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/tablepyxl.py:23, in get_Tables(doc) 21 for comment in comments: 22 comment.drop_tag() ---> 23 return [Table(table) for table in tree.xpath("//table")] File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/tablepyxl.py:23, in <listcomp>(.0) 21 for comment in comments: 22 comment.drop_tag() ---> 23 return [Table(table) for table in tree.xpath("//table")] File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/style.py:234, in Table.__init__(self, table) 231 super(Table, self).__init__(table) 232 table_head = table.find("thead") 233 self.head = ( --> 234 TableHead(table_head, parent=self) if table_head is not None else None 235 ) 236 table_body = table.find("tbody") 237 self.body = TableBody( 238 table_body if table_body is not None else table, parent=self 239 ) File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/style.py:249, in TableHead.__init__(self, head, parent) 247 def __init__(self, head, parent=None): 248 super(TableHead, self).__init__(head, parent=parent) --> 249 self.rows = [TableRow(tr, parent=self) for tr in head.findall("tr")] File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/style.py:249, in <listcomp>(.0) 247 def __init__(self, head, parent=None): 248 super(TableHead, self).__init__(head, parent=parent) --> 249 self.rows = [TableRow(tr, parent=self) for tr in head.findall("tr")] File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/style.py:269, in TableRow.__init__(self, tr, parent) 267 def __init__(self, tr, parent=None): 268 super(TableRow, self).__init__(tr, parent=parent) --> 269 self.cells = [ 270 TableCell(cell, parent=self) for cell in tr.findall("th") + tr.findall("td") 271 ] File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/style.py:270, in <listcomp>(.0) 267 def __init__(self, tr, parent=None): 268 super(TableRow, self).__init__(tr, parent=parent) 269 self.cells = [ --> 270 TableCell(cell, parent=self) for cell in tr.findall("th") + tr.findall("td") 271 ] File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/style.py:312, in TableCell.__init__(self, cell, parent) 310 super(TableCell, self).__init__(cell, parent=parent) 311 self.value = element_to_string(cell) --> 312 self.number_format = self.get_number_format() File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/style.py:337, in TableCell.get_number_format(self) 335 if "TYPE_DATE" in self.element.get("class", "").split(): 336 return FORMAT_DATE_MMDDYYYY --> 337 if self.data_type() == cell.TYPE_NUMERIC: 338 try: 339 int(self.value) File /opt/conda/envs/python35-paddle120-env/lib/python3.10/site-packages/paddleocr/ppstructure/table/tablepyxl/style.py:326, in TableCell.data_type(self) 324 else: 325 cell_type = "TYPE_STRING" --> 326 return getattr(cell, cell_type) NameError: name 'cell' is not defined
None
The text was updated successfully, but these errors were encountered:
遇到同样问题
Sorry, something went wrong.
Successfully merging a pull request may close this issue.
问题描述 / Problem Description
使用BML Codelab运行quickstart.md的2.2.3 版面分析 第二段程序无法正确运行,详细见日志
运行环境 / Runtime Environment
使用 https://aistudio.baidu.com/ 提供的gpu测试环境
项目框架:PaddlePaddle 2.6.1
使用套件功能下载PaddleOCR v2.6.0
复现代码 / Reproduction Code
完整报错 / Complete Error Message
可能解决方案 / Possible solutions
None
附件 / Appendix
None
The text was updated successfully, but these errors were encountered: