Source code for enlp.understanding.linguistic

"""
Contains functions for linguistic features of natural language understanding
"""


[docs]def pos_tag(model, text): """Return parts-of-speech for words in a peice of text. Part-of-speech tagging is the process of marking up a word in a text (corpus) as corresponding to a particular part of speech, based on both its definition and its context. A simplified form of this is commonly taught to children, in the identification of words as nouns, verbs, adjectives, adverbs, etc. Parameters ---------- model : :obj:`spacy.lang` SpaCy language model text : :obj:`str` text string on which to remove stopwords Returns ------- tags : :obj:`list` List of part of speech tags, list is ordered as tokens appear in sentence. Notes ----- To get direct linking with words corresponding to tags use tokenise function to get word list in same order as PoS tags. Example ------- >>> import spacy >>> import enlp.processing.stdtools as stdt >>> lang_mod = spacy.load('nb_dep_ud_sm') >>> text = 'Den raske brune reven hoppet over den late hunden.' >>> word_list = stdt.tokenise(lang_mod,text) >>> print (pos_tag(lang_mod,text)) ['DET', 'ADJ', 'ADJ', 'NOUN', 'VERB', 'ADP', 'DET', 'ADJ', 'NOUN', 'PUNCT'] """ tags = [t.pos_ for t in model(text.lower())] return tags