Module maleo.pos_tag
Expand source code Browse git
from ._pos import *
__all__ = ["POS"]
Classes
class POS
-
A Part-of-Speech Tagging classifier.
POST, also called grammatical tagging is the process of marking up a word in a text as corresponding to a particular part of speech.
References
Expand source code Browse git
class POS: """A Part-of-Speech Tagging classifier. POST, also called grammatical tagging is the process of marking up a word in a text as corresponding to a particular part of speech. References ---------- https://universaldependencies.org/u/pos/index.html """ def __init__(self): self.model = self.load_model() def check_model(self, model_filename): """Check existence of model, if not exist then will download the model.""" if not path.exists(model_filename): url = 'https://drive.google.com/uc?id=1-C8RRM9c-IaGgN2jdzFtlwY7BuOAMY3W' print('Downloading model ...') gdown.download(url, model_filename, quiet=False) print('DONE ...') def load_model(self): """Load POST model""" model_path = pkg_resources.resource_filename('maleo','pos_tag/pos_model.pt') self.check_model(model_path) model = SequenceTagger.load(model_path) return model def predict(self, text:str, output_pair=False): """Inference POST model. Parameters ---------- text: str Input text output_pair: boolean True -> list of tuples False -> tuple Returns: ------- out : tuple or list of tuples Inference result with format based on output_pair """ input_text = Sentence(text) self.model.predict(input_text) output_text = input_text.to_tagged_string() item = output_text.split() out = [(item[idx], item[idx+1]) for idx in range(0, len(item), 2)] if not output_pair: sent, pos = zip(*out) out = (' '.join(sent), ' '.join(pos)) return out else: return out
Methods
def check_model(self, model_filename)
-
Check existence of model, if not exist then will download the model.
Expand source code Browse git
def check_model(self, model_filename): """Check existence of model, if not exist then will download the model.""" if not path.exists(model_filename): url = 'https://drive.google.com/uc?id=1-C8RRM9c-IaGgN2jdzFtlwY7BuOAMY3W' print('Downloading model ...') gdown.download(url, model_filename, quiet=False) print('DONE ...')
def load_model(self)
-
Load POST model
Expand source code Browse git
def load_model(self): """Load POST model""" model_path = pkg_resources.resource_filename('maleo','pos_tag/pos_model.pt') self.check_model(model_path) model = SequenceTagger.load(model_path) return model
def predict(self, text: str, output_pair=False)
-
Inference POST model.
Parameters
text
:str
- Input text
output_pair
:boolean
- True -> list of tuples False -> tuple
Returns:
out : tuple or list of tuples Inference result with format based on output_pair
Expand source code Browse git
def predict(self, text:str, output_pair=False): """Inference POST model. Parameters ---------- text: str Input text output_pair: boolean True -> list of tuples False -> tuple Returns: ------- out : tuple or list of tuples Inference result with format based on output_pair """ input_text = Sentence(text) self.model.predict(input_text) output_text = input_text.to_tagged_string() item = output_text.split() out = [(item[idx], item[idx+1]) for idx in range(0, len(item), 2)] if not output_pair: sent, pos = zip(*out) out = (' '.join(sent), ' '.join(pos)) return out else: return out