NLP | Brill Tagger
- BrillTagger class is a transformation-based tagger. It is not a subclass of SequentialBackoffTagger.
- Moreover, it uses a series of rules to correct the results of an initial tagger.
- These rules it follows are scored based. This score is equal to the no. of errors they correct minus the no. of new errors they produce.
Code #1 : Training a BrillTagger class
# Loading Libraries from nltk.tag import brill, brill_trainer def train_brill_tagger(initial_tagger, train_sents, * * kwargs): templates = [ brill.Template(brill.Pos([ - 1 ])), brill.Template(brill.Pos([ 1 ])), brill.Template(brill.Pos([ - 2 ])), brill.Template(brill.Pos([ 2 ])), brill.Template(brill.Pos([ - 2 , - 1 ])), brill.Template(brill.Pos([ 1 , 2 ])), brill.Template(brill.Pos([ - 3 , - 2 , - 1 ])), brill.Template(brill.Pos([ 1 , 2 , 3 ])), brill.Template(brill.Pos([ - 1 ]), brill.Pos([ 1 ])), brill.Template(brill.Word([ - 1 ])), brill.Template(brill.Word([ 1 ])), brill.Template(brill.Word([ - 2 ])), brill.Template(brill.Word([ 2 ])), brill.Template(brill.Word([ - 2 , - 1 ])), brill.Template(brill.Word([ 1 , 2 ])), brill.Template(brill.Word([ - 3 , - 2 , - 1 ])), brill.Template(brill.Word([ 1 , 2 , 3 ])), brill.Template(brill.Word([ - 1 ]), brill.Word([ 1 ])), ] # Using BrillTaggerTrainer to train trainer = brill_trainer.BrillTaggerTrainer( initial_tagger, templates, deterministic = True ) return trainer.train(train_sents, * * kwargs) |
Code #2 : Let’s use the trained BrillTagger
from nltk.tag import brill, brill_trainer from nltk.tag import DefaultTagger from nltk.corpus import treebank from tag_util import train_brill_tagger # Initializing default_tag = DefaultTagger( 'NN' ) # initializing training and testing set train_data = treebank.tagged_sents()[: 3000 ] test_data = treebank.tagged_sents()[ 3000 :] initial_tag = backoff_tagger( train_data, [UnigramTagger, BigramTagger, TrigramTagger], backoff = default_tagger) a = initial_tag.evaluate(test_data) print ( "Accuracy of Initial Tag : " , a) |
Output :
Accuracy of Initial Tag : 0.8806820634578028
Code #3 :
brill_tag = train_brill_tagger(initial_tag, train_data) b = brill_tag.evaluate(test_data) print ( "Accuracy of brill_tag : " , b) |
Output :
Accuracy of brill_tag : 0.8827541549751781
Contact Us