Source code for enlp.pipeline

"""
Class for piping functions for natural language processing
"""

from enlp.processing.stdtools import *
from enlp.understanding.linguistic import pos_tag


[docs]class NLPPipeline(object): """Pipeline class for combining functions from nlp_tools Attributes ---------- model : :obj:`spacy.lang` SpaCy language model text : :obj:`str` text string on which to perform processing pos : :obj:`list` list of Parts-of-Speech tags tokens : :obj:`list` list of tokens """ def __init__(self, model, text): """__init__ method of nlp_pipeline class Parameters ---------- model : :obj:`spacy.lang` SpaCy language model text : :obj:`str` text string on which to perform processing """ self.model = model self.text = text
[docs] def rm_punctuation(self, **kwargs): """remove punctuation from text """ self.text = rm_punctuation(self.model, self.text) return self
[docs] def rm_stopwords(self, **kwargs): """remove stopwords from text Notes ----- List of stopwords can be obtained from stdtools.get_stopwords() """ self.text = rm_stopwords(self.model, self.text, stopwords=kwargs['stopwords']) return self
[docs] def spacy_lemmatize(self): """lemmatise text """ self.text = spacy_lemmatize(self.model, self.text) return self
[docs] def nltk_stem_no(self): """stem text """ self.text = nltk_stem_no(self.model, self.text) return self
[docs] def pos_tag(self): """get part-of-speech tags """ self.pos = pos_tag(self.model, self.text) return self
[docs] def tokenise(self): """tokenise text """ self.tokens = tokenise(self.model, self.text) return self