TensorFlowASR
TensorFlowASR copied to clipboard
Issue about CharFeaturizer
Can anyone explain why the code below work? It seems to just extract first letter of the tokens. Thanks.
class CharFeaturizer(TextFeaturizer):
__def __init_vocabulary(self): lines = [] if self.decoder_config.vocabulary is not None: with codecs.open(self.decoder_config.vocabulary, "r") as fin: lines.extend(fin.readlines()) else: lines = ENGLISH_CHARACTERS self.blank = 0 if self.decoder_config.blank_at_zero else None self.tokens2indices = {} self.tokens = [] index = 1 if self.blank == 0 else 0
for line in lines:
line = self.preprocess_text(line)
if line.startswith("#") or not line:
continue
self.tokens2indices[line[0]] = index
self.tokens.append(line[0])
index += 1
if self.blank is None:
self.blank = len(self.tokens) # blank not at zero
self.non_blank_tokens = self.tokens.copy()
self.tokens.insert(self.blank, "") # add blank token to tokens
self.num_classes = len(self.tokens)
self.tokens = tf.convert_to_tensor(self.tokens, dtype=tf.string)
self.upoints = tf.strings.unicode_decode(self.tokens, "UTF-8").to_tensor(shape=[None, 1])__
@yiqiaoc11 yes, it extracts the first letter. have you found the solution to it?