kaldi-asr · danpovey · Oct 5, 2018 · Sep 13, 2018 · Sep 13, 2018 · Sep 13, 2018
diff --git a/egs/cifar/v1/image/ocr/make_features.py b/egs/cifar/v1/image/ocr/make_features.py
@@ -43,6 +43,8 @@
 parser.add_argument('--padding', type=int, default=5,
                     help='Number of white pixels to pad on the left'
                     'and right side of the image.')
+parser.add_argument('--num-channels', type=int, default=1,
+                    help='Number of color channels')
 parser.add_argument('--fliplr', type=lambda x: (str(x).lower()=='true'), default=False,
                    help="Flip the image left-right for right to left languages")
 parser.add_argument("--augment", type=lambda x: (str(x).lower()=='true'), default=False,
@@ -84,9 +86,9 @@ def horizontal_pad(im, allowed_lengths = None):
         left_padding = int(padding // 2)
         right_padding = padding - left_padding
     dim_y = im.shape[0] # height
-    im_pad = np.concatenate((255 * np.ones((dim_y, left_padding),
+    im_pad = np.concatenate((255 * np.ones((dim_y, left_padding, args.num_channels),
                                            dtype=int), im), axis=1)
-    im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding),
+    im_pad1 = np.concatenate((im_pad, 255 * np.ones((dim_y, right_padding, args.num_channels),
                                                     dtype=int)), axis=1)
     return im_pad1
 
@@ -150,7 +152,13 @@ def get_scaled_image_aug(im, mode='normal'):
         if im_horizontal_padded is None:
             num_fail += 1
             continue
-        data = np.transpose(im_horizontal_padded, (1, 0))
+        if args.num_channels == 1:
+            data = np.transpose(im_horizontal_padded, (1, 0))
+        elif args.num_channels == 3:
+            H = im_horizontal_padded.shape[0]
+            W = im_horizontal_padded.shape[1]
+            C = im_horizontal_padded.shape[2]
+            data = np.reshape(np.transpose(im_horizontal_padded, (1, 0, 2)), (W, H * C))
         data = np.divide(data, 255.0)
         num_ok += 1
         write_kaldi_matrix(out_fh, data, image_id)

diff --git a/egs/wsj/s5/utils/lang/bpe/prepend_words.py b/egs/wsj/s5/utils/lang/bpe/prepend_words.py
@@ -4,10 +4,16 @@
 # the beginning of the words for finding the initial-space of every word
 # after decoding.
 
+import argparse
 import sys, io
 
-infile = io.TextIOWrapper(sys.stdin.buffer, encoding='latin-1')
-output = io.TextIOWrapper(sys.stdout.buffer, encoding='latin-1')
+parser = argparse.ArgumentParser(description="Prepends '|' to the beginning of every word")
+parser.add_argument('--encoding', type=str, default='latin-1',
+                    help='Type of encoding')
+args = parser.parse_args()
+
+infile = io.TextIOWrapper(sys.stdin.buffer, encoding=args.encoding)
+output = io.TextIOWrapper(sys.stdout.buffer, encoding=args.encoding)
 for line in infile:
     output.write(' '.join([ "|"+word for word in line.split()]) + '\n')
 

diff --git a/egs/wsj/s5/utils/lang/make_lexicon_fst.py b/egs/wsj/s5/utils/lang/make_lexicon_fst.py
@@ -72,7 +72,7 @@ def read_lexiconp(filename):
     with open(filename, 'r', encoding='latin-1') as f:
         whitespace = re.compile("[ \t]+")
         for line in f:
-            a = whitespace.split(line.strip())
+            a = whitespace.split(line.rstrip('\n'))
             if len(a) < 2:
                 print("{0}: error: found bad line '{1}' in lexicon file {2} ".format(
                     sys.argv[0], line.strip(), filename), file=sys.stderr)

diff --git a/egs/yomdle_fa/README.txt b/egs/yomdle_fa/README.txt
@@ -0,0 +1,4 @@
+This directory contains example scripts for OCR on the Yomdle and Slam datasets.
+Training is done on the Yomdle dataset and testing is done on Slam.
+LM rescoring is also done with extra corpus data obtained from various newswires (e.g. Hamshahri)
+There is also an option for normalized scoring provided in the local/normalize_scoring/normalized_socring.sh
diff --git a/egs/yomdle_fa/v1/cmd.sh b/egs/yomdle_fa/v1/cmd.sh
@@ -0,0 +1,13 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export cmd="queue.pl"
diff --git a/egs/yomdle_fa/v1/image b/egs/yomdle_fa/v1/image
@@ -0,0 +1 @@
+../../cifar/v1/image/
diff --git a/egs/yomdle_fa/v1/local/GEDI2CSV_enriched.py b/egs/yomdle_fa/v1/local/GEDI2CSV_enriched.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+
+'''
+
+GEDI2CSV
+
+Convert GEDI-type bounding boxes to CSV format
+
+'''
+
+import logging
+import os
+import sys
+import time
+import glob
+import csv
+import imghdr
+from PIL import Image
+import argparse
+import pdb
+import cv2
+import numpy as np
+import xml.etree.ElementTree as ET
+
+sin = np.sin
+cos = np.cos
+pi = np.pi
+
+def Rotate2D(pts, cnt, ang=90):
+    M = np.array([[cos(ang),-sin(ang)],[sin(ang),cos(ang)]])
+    res = np.dot(pts-cnt,M)+cnt
+    return M, res
+
+def npbox2string(npar):
+    if np.shape(npar)[0] != 1:
+        print('Error during CSV conversion\n')
+    c1,r1 = npar[0][0],npar[0][1]
+    c2,r2 = npar[0][2],npar[0][3]
+    c3,r3 = npar[0][4],npar[0][5]
+    c4,r4 = npar[0][6],npar[0][7]
+
+    return c1,r1,c2,r2,c3,r3,c4,r4
+
+# cv2.minAreaRect() returns a Box2D structure which contains following detals - ( center (x,y), (width, height), angle of rotation )
+# Get 4 corners of the rectangle using cv2.boxPoints()
+
+class GEDI2CSV():
+
+    ''' Initialize the extractor'''
+    def __init__(self, logger, args):
+        self._logger = logger
+        self._args = args
+
+    '''
+    Segment image with GEDI bounding box information
+    '''
+    def csvfile(self, coords, polys, baseName, pgrot):
+
+        ''' for writing the files '''
+        writePath = self._args.outputDir
+        writePath = os.path.join(writePath,'')
+        if os.path.isdir(writePath) != True:
+            os.makedirs(writePath)
+
+        rotlist = []
+
+        header=['ID','name','col1','row1','col2','row2','col3','row3','col4','row4','confidence','truth','pgrot','bbrot','qual','script','text_type']
+        conf=100
+        write_ctr = 0
+        if len(coords) == 0 and len(polys) == 0:
+            self._logger.info('Found %s with no text content',(baseName))
+            print('...Found %s with no text content' % (baseName))
+            return
+
+        strPos = writePath + baseName
+
+        ''' for each group of coordinates '''
+        for i in coords:
+
+            [id,x,y,w,h,degrees,text,qual,script,text_type] = i
+
+            contour = np.array([(x,y),(x+w,y),(x+w,y+h),(x,y+h)])
+
+            '''
+            First rotate around upper left corner based on orientationD keyword
+            '''
+            M, rot = Rotate2D(contour, np.array([x,y]), degrees*pi/180)
+            rot = np.int0(rot)
+
+            # rot is the 8 points rotated by degrees
+            # pgrot is the rotation after extraction, so save
+
+            # save rotated points to list or array
+            rot = np.reshape(rot,(-1,1)).T
+            c1,r1,c2,r2,c3,r3,c4,r4 = npbox2string(rot)
+
+            text = text.replace(u'\ufeff','')
+
+            bbrot = degrees
+            rotlist.append([id,baseName + '_' + id + '.png',c1,r1,c2,r2,c3,r3,c4,r4,conf,text,pgrot,bbrot,qual,script,text_type])
+
+        # if there are polygons, first save the text
+        for j in polys:
+            arr = []
+            [id,poly_val,text,qual,script,text_type] = j
+            for i in poly_val:
+                arr.append(eval(i))
+
+            contour = np.asarray(arr)
+            convex = cv2.convexHull(contour)
+            rect = cv2.minAreaRect(convex)
+            box = cv2.boxPoints(rect)
+            box = np.int0(box)
+            box = np.reshape(box,(-1,1)).T
+            c1,r1,c2,r2,c3,r3,c4,r4 = npbox2string(box)
+
+            bbrot = 0.0
+
+            rotlist.append([id,baseName + '_' + id + '.png',c1,r1,c2,r2,c3,r3,c4,r4,conf,text,pgrot,bbrot,qual,script,text_type])
+
+        # then write out all of list to file
+        with open(strPos + ".csv", "w", encoding="utf-8") as f:
+            writer = csv.writer(f)
+            writer.writerow(header)
+            for row in rotlist:
+                writer.writerow(row)
+                write_ctr += 1
+
+        return write_ctr
+
+
+def main(args):
+
+    startTime = time.clock()
+
+    writePath = args.outputDir
+    if os.path.isdir(writePath) != True:
+        os.makedirs(writePath)
+
+    ''' Setup logging '''
+    logger = logging.getLogger(__name__)
+    logger.setLevel(logging.INFO)
+    if args.log:
+        handler = logging.FileHandler(args.log)
+        handler.setLevel(logging.INFO)
+        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
+
+    gtconverter = GEDI2CSV(logger, args)
+    namespaces = {"gedi" : "http://lamp.cfar.umd.edu/media/projects/GEDI/"}
+    keyCnt=0
+
+    fileCnt = 0
+    line_write_ctr = 0
+    line_error_ctr = 0
+
+    '''
+    Get all XML files in the directory and sub folders
+    '''
+    for root, dirnames, filenames in os.walk(args.inputDir, followlinks=True):
+        for file in filenames:
+            if file.lower().endswith('.xml'):
+                fullName = os.path.join(root,file)
+                baseName = os.path.splitext(fullName)
+
+                fileCnt += 1
+
+                ''' read the XML file '''
+                tree = ET.parse(fullName)
+                gedi_root = tree.getroot()
+                child = gedi_root.findall('gedi:DL_DOCUMENT',namespaces)[0]
+                totalpages = int(child.attrib['NrOfPages'])
+                coordinates=[]
+                polygons = []
+                if args.ftype == 'boxed':
+                    fileTypeStr = 'col'
+                elif args.ftype == 'transcribed':
+                    fileTypeStr = 'Text_Content'
+                else:
+                    print('Filetype must be either boxed or transcribed!')
+                    logger.info('Filetype must be either boxed or transcribed!')
+                    sys.exit(-1)
+
+                if args.quality == 'both':
+                    qualset = {'Regular','Low-Quality'}
+                elif args.quality == 'low':
+                    qualset = {'Low-Quality'}
+                elif args.quality == 'regular':
+                    qualset = {'Regular'}
+                else:
+                    print('Quality must be both, low or regular!')
+                    logger.info('Quality must be both, low or regular!')
+                    sys.exit(-1)
+
+
+
+                ''' and for each page '''
+                for i, pgs in enumerate(child.iterfind('gedi:DL_PAGE',namespaces)):
+
+                    if 'GEDI_orientation' not in pgs.attrib:
+                        pageRot=0
+                    else:
+                        pageRot = int(pgs.attrib['GEDI_orientation'])
+                        logger.info(' PAGE ROTATION %s, %s' % (fullName, str(pageRot)))
+
+                    ''' find children for each page '''
+                    for zone in pgs.findall('gedi:DL_ZONE',namespaces):
+
+                        if zone.attrib['gedi_type']=='Text' and zone.attrib['Type'] in \
+                            ('Machine_Print','Confusable_Allograph','Handwriting') and zone.attrib['Quality'] in qualset:
+                            if zone.get('polygon'):
+                                keyCnt+=1
+                                polygons.append([zone.attrib['id'],zone.get('polygon').split(';'),
+                                                 zone.get('Text_Content'),zone.get('Quality'),zone.get('Script'),zone.get('Type')])
+                            elif zone.get(fileTypeStr) != None:
+                                keyCnt+=1
+                                coord = [zone.attrib['id'],int(zone.attrib['col']),int(zone.attrib['row']),
+                                                    int(zone.attrib['width']), int(zone.attrib['height']),
+                                                    float(zone.get('orientationD',0.0)),
+                                                    zone.get('Text_Content'),zone.get('Quality'),zone.get('Script'),zone.get('Type')]
+                                coordinates.append(coord)
+
+                if len(coordinates) > 0 or len(polygons) > 0:
+                    line_write_ctr += gtconverter.csvfile(coordinates, polygons, os.path.splitext(file)[0], pageRot)
+                else:
+                    print('...%s has no applicable content' % (baseName[0]))
+
+    print('complete...total files %d, lines written %d' % (fileCnt, line_write_ctr))
+
+
+''' Args and defaults '''
+def parse_arguments(argv):
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('--inputDir', type=str, help='Input directory', required=True)
+    parser.add_argument('--outputDir', type=str, help='Output directory', required=True)
+    parser.add_argument('--ftype', type=str, help='GEDI file type (either "boxed" or "transcribed")', default='transcribed')
+    parser.add_argument('--quality', type=str, help='GEDI file quality (either "both" or "low" or "regular")', default='regular')
+    parser.add_argument('--log', type=str, help='Log directory', default='./GEDI2CSV_enriched.log')
+
+    return parser.parse_args(argv)
+
+''' Run '''
+if __name__ == '__main__':
+    main(parse_arguments(sys.argv[1:]))
+
+
+
+
+
+