-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr_spreadsheet.py
43 lines (33 loc) · 1.42 KB
/
ocr_spreadsheet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import torch
from paddleocr import PaddleOCR
from transformers import pipeline
import gspread
from oauth2client.service_account import ServiceAccountCredentials
# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_space_char=True, show_log=False, enable_mkldnn=True)
img_path = 'data/singapore.jpg'
# Perform OCR on the image
result = ocr.ocr(img_path, cls=True)
# Extract the text from the OCR result and concatenate it to ocr_string
ocr_string = ""
for i in range(len(result[0])):
ocr_string = ocr_string + result[0][i][1][0] + " "
# print("OCR String:", ocr_string)
##############################################################################
# Initialize Google Sheets credentials
scope = ['https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('your_credentials.json', scope)
client = gspread.authorize(creds)
# Access the Google Spreadsheet
spreadsheet = client.open('YourSpreadsheetName')
# Select the first worksheet
worksheet = spreadsheet.sheet1
# Clear existing content in the worksheet
worksheet.clear()
# Write the OCR string to the first cell of the worksheet
worksheet.update('A1', ocr_string)
# Save the OCR string to a text file
with open('ocr_text.txt', 'w') as file:
file.write(ocr_string)
##########################################################################################################