Python - การติดแท็ก PoS และ Lemmatization โดยใช้ spaCy

spaCy เป็นหนึ่งในห้องสมุดวิเคราะห์ข้อความที่ดีที่สุด spaCy เป็นเลิศในงานดึงข้อมูลขนาดใหญ่และเป็นหนึ่งในงานที่เร็วที่สุดในโลก นอกจากนี้ยังเป็นวิธีที่ดีที่สุดในการเตรียมข้อความสำหรับการเรียนรู้เชิงลึก spaCy นั้นเร็วกว่าและแม่นยำกว่า NLTKTagger และ TextBlob มาก

วิธีการติดตั้ง

pip install spacy
python -m spacy download en_core_web_sm

ตัวอย่าง

#importing loading the library
import spacy
# python -m spacy download en_core_web_sm
nlp = spacy.load("en_core_web_sm")
#POS-TAGGING
# Process whole documents
text = ("""My name is Vishesh. I love to work on data science problems. Please check out my github profile!""")
doc = nlp(text)
# Token and Tag
for token in doc:
print(token, token.pos_)
# You want list of Verb tokens
print("Verbs:", [token.text for token in doc if token.pos_ == "VERB"])
#Lemmatization : It is a process of grouping together the inflected #forms of a word so they can be analyzed as a single item, #identified by the word’s lemma, or dictionary form.
import spacy
# Load English tokenizer, tagger,
# parser, NER and word vectors
nlp = spacy.load("en_core_web_sm")
# Process whole documents
text = ("""My name is Vishesh. I love to work on data science problems. Please check out my github profile!""")
doc = nlp(text)
for token in doc:
print(token, token.lemma_)