[Checkins] SVN: topia.postag/trunk/ * Make filter customization easier. Provided permissiveFilter.
Stephan Richter
srichter at gmail.com
Sat May 30 11:16:11 EDT 2009
Log message for revision 100552:
* Make filter customization easier. Provided permissiveFilter.
* Add another rule that looks for the verb after a modal verb,
eliminating several false noun detections.
* Ensured a few more test cases are working correctly.
* Added documentation for keyword extraction.
Changed:
_U topia.postag/trunk/
_U topia.postag/trunk/src/
U topia.postag/trunk/src/topia/postag/README.txt
U topia.postag/trunk/src/topia/postag/data/english-lexicon.txt
U topia.postag/trunk/src/topia/postag/example.txt
U topia.postag/trunk/src/topia/postag/extract.py
U topia.postag/trunk/src/topia/postag/tag.py
U topia.postag/trunk/src/topia/postag/tests.py
-=-
Property changes on: topia.postag/trunk
___________________________________________________________________
Added: svn:ignore
+ .installed.cfg
bin
develop-eggs
dist
parts
Property changes on: topia.postag/trunk/src
___________________________________________________________________
Added: svn:ignore
+ topia.postag.egg-info
Modified: topia.postag/trunk/src/topia/postag/README.txt
===================================================================
--- topia.postag/trunk/src/topia/postag/README.txt 2009-05-30 15:13:25 UTC (rev 100551)
+++ topia.postag/trunk/src/topia/postag/README.txt 2009-05-30 15:16:11 UTC (rev 100552)
@@ -128,9 +128,31 @@
['examples', 'NNS', 'example'],
['.', '.', '.']]
-So far so good. Let's now test the phase 2 rules.
+So far so good. Let's test a few more cases:
+ >>> tagger("The fox's tail is red.")
+ [['The', 'DT', 'The'],
+ ['fox', 'NN', 'fox'],
+ ["'s", 'POS', "'s"],
+ ['tail', 'NN', 'tail'],
+ ['is', 'VBZ', 'is'],
+ ['red', 'JJ', 'red'],
+ ['.', '.', '.']]
+ >>> tagger("The fox can't really jump over the fox's tail.")
+ [['The', 'DT', 'The'],
+ ['fox', 'NN', 'fox'],
+ ['can', 'MD', 'can'],
+ ["'t", 'RB', "'t"],
+ ['really', 'RB', 'really'],
+ ['jump', 'VB', 'jump'],
+ ['over', 'IN', 'over'],
+ ['the', 'DT', 'the'],
+ ['fox', 'NN', 'fox'],
+ ["'s", 'POS', "'s"],
+ ['tail', 'NN', 'tail'],
+ ['.', '.', '.']]
+
Rules
~~~~~
@@ -165,3 +187,59 @@
[['men', 'NNS', 'men']]
>>> tagger('feet')
[['feet', 'NNS', 'feet']]
+
+
+Keywordword Extraction
+----------------------
+
+Now that we can tag a text, let's have a look at the keyword extractions.
+
+ >>> from topia.postag import extract
+ >>> extractor = extract.KeywordExtractor()
+ >>> extractor
+ <KeywordExtractor using <Tagger for english>>
+
+As you can see, the extractor maintains a tagger:
+
+ >>> extractor.tagger
+ <Tagger for english>
+
+When creating an extractor, you can also pass in a tagger to avoid frequent
+tagger initialization:
+
+ >>> extractor = extract.KeywordExtractor(tagger)
+ >>> extractor.tagger is tagger
+ True
+
+Let's get the keywords for a simple text.
+
+ >>> extractor("The fox can't jump over the fox's tail.")
+ []
+
+We got no keywords. That's because by default at least 3 occurences of a
+keyword must be detected, if the keyword consists of a single word.
+
+The extractor maintains a filter component. Let's register the trivial
+permissive filter, which simply return everything that the extractor suggests:
+
+ >>> extractor.filter = extract.permissiveFilter
+ >>> extractor("The fox can't jump over the fox's tail.")
+ [('tail', 1, 1), ('fox', 2, 1)]
+
+But let's look at the default filter again, since it allows tweaking its
+parameters:
+
+ >>> extractor.filter = extract.DefaultFilter(singleStrengthMinOccur=2)
+ >>> extractor("The fox can't jump over the fox's tail.")
+ [('fox', 2, 1)]
+
+Let's now have a look at multi-word keywords. Oftentimes multi-word nouns and
+proper names occur only once or twice in a text. But they are often great
+keywords! To handle this scenario, the concept of "strength" was
+introduced. Currently the strength is simply the amount of words in the
+keyword/term. By default, all keywords with a strength larger than 1 are
+selected regardless of the number of occurances.
+
+ >>> extractor('The German consul of Boston resides in Newton.')
+ [('German consul', 1, 2)]
+
Modified: topia.postag/trunk/src/topia/postag/data/english-lexicon.txt
===================================================================
--- topia.postag/trunk/src/topia/postag/data/english-lexicon.txt 2009-05-30 15:13:25 UTC (rev 100551)
+++ topia.postag/trunk/src/topia/postag/data/english-lexicon.txt 2009-05-30 15:16:11 UTC (rev 100552)
@@ -17,6 +17,7 @@
} )
# #
$ $
+'t RB
Prizm NNP
shakeup NN
Laurance NNP
Modified: topia.postag/trunk/src/topia/postag/example.txt
===================================================================
--- topia.postag/trunk/src/topia/postag/example.txt 2009-05-30 15:13:25 UTC (rev 100551)
+++ topia.postag/trunk/src/topia/postag/example.txt 2009-05-30 15:16:11 UTC (rev 100552)
@@ -362,263 +362,263 @@
Let's look at the result of the tagger first:
- >>> extractor.tagger(text)
- [['police', 'NN', 'police'],
- ['shut', 'VBN', 'shut'],
- ['Palestinian', 'JJ', 'Palestinian'],
- ['theatre', 'NN', 'theatre'],
- ['in', 'IN', 'in'],
- ['Jerusalem', 'NNP', 'Jerusalem'],
- ['.', '.', '.'],
- ['Israeli', 'JJ', 'Israeli'],
- ['police', 'NN', 'police'],
- ['have', 'VBP', 'have'],
- ['shut', 'VBN', 'shut'],
- ['down', 'RB', 'down'],
- ['a', 'DT', 'a'],
- ['Palestinian', 'JJ', 'Palestinian'],
- ['theatre', 'NN', 'theatre'],
- ['in', 'IN', 'in'],
- ['East', 'NNP', 'East'],
- ['Jerusalem', 'NNP', 'Jerusalem'],
- ['.', '.', '.'],
- ['The', 'DT', 'The'],
- ['action', 'NN', 'action'],
- [',', ',', ','],
- ['on', 'IN', 'on'],
- ['Thursday', 'NNP', 'Thursday'],
- [',', ',', ','],
- ['prevented', 'VBN', 'prevented'],
- ['the', 'DT', 'the'],
- ['closing', 'VBG', 'closing'],
- ['event', 'NN', 'event'],
- ['of', 'IN', 'of'],
- ['an', 'DT', 'an'],
- ['international', 'JJ', 'international'],
- ['literature', 'NN', 'literature'],
- ['festival', 'NN', 'festival'],
- ['from', 'IN', 'from'],
- ['taking', 'VBG', 'taking'],
- ['place', 'NN', 'place'],
- ['.', '.', '.'],
- ['police', 'NN', 'police'],
- ['said', 'VBD', 'said'],
- ['they', 'PRP', 'they'],
- ['were', 'VBD', 'were'],
- ['acting', 'VBG', 'acting'],
- ['on', 'IN', 'on'],
- ['a', 'DT', 'a'],
- ['court', 'NN', 'court'],
- ['order', 'NN', 'order'],
- [',', ',', ','],
- ['issued', 'VBN', 'issued'],
- ['after', 'IN', 'after'],
- ['intelligence', 'NN', 'intelligence'],
- ['indicated', 'VBD', 'indicated'],
- ['that', 'IN', 'that'],
- ['the', 'DT', 'the'],
- ['Palestinian', 'JJ', 'Palestinian'],
- ['Authority', 'NNP', 'Authority'],
- ['was', 'VBD', 'was'],
- ['involved', 'VBN', 'involved'],
- ['in', 'IN', 'in'],
- ['the', 'DT', 'the'],
- ['event', 'NN', 'event'],
- ['.', '.', '.'],
- ['Israel', 'NNP', 'Israel'],
- ['has', 'VBZ', 'has'],
- ['occupied', 'VBN', 'occupied'],
- ['East', 'NNP', 'East'],
- ['Jerusalem', 'NNP', 'Jerusalem'],
- ['since', 'IN', 'since'],
- ['1967', 'NN', '1967'],
- ['and', 'CC', 'and'],
- ['has', 'VBZ', 'has'],
- ['annexed', 'VBD', 'annexed'],
- ['the', 'DT', 'the'],
- ['area', 'NN', 'area'],
- ['.', '.', '.'],
- ['This', 'DT', 'This'],
- ['is', 'VBZ', 'is'],
- ['not', 'RB', 'not'],
- ['recognised', 'VBD', 'recognised'],
- ['by', 'IN', 'by'],
- ['the', 'DT', 'the'],
- ['international', 'JJ', 'international'],
- ['community', 'NN', 'community'],
- ['.', '.', '.'],
- ['The', 'DT', 'The'],
- ['British', 'JJ', 'British'],
- ['consul-general', 'NN', 'consul-general'],
- ['in', 'IN', 'in'],
- ['Jerusalem', 'NNP', 'Jerusalem'],
- [',', ',', ','],
- ['Richard', 'NNP', 'Richard'],
- ['Makepeace', 'NNP', 'Makepeace'],
- [',', ',', ','],
- ['was', 'VBD', 'was'],
- ['attending', 'VBG', 'attending'],
- ['the', 'DT', 'the'],
- ['event', 'NN', 'event'],
- ['.', '.', '.'],
- ['"', '"', '"'],
- ['I', 'PRP', 'I'],
- ['think', 'VBP', 'think'],
- ['all', 'DT', 'all'],
- ['lovers', 'NNS', 'lover'],
- ['of', 'IN', 'of'],
- ['literature', 'NN', 'literature'],
- ['would', 'MD', 'would'],
- ['regard', 'NN', 'regard'],
- ['this', 'DT', 'this'],
- ['as', 'IN', 'as'],
- ['a', 'DT', 'a'],
- ['very', 'RB', 'very'],
- ['regrettable', 'JJ', 'regrettable'],
- ['moment', 'NN', 'moment'],
- ['and', 'CC', 'and'],
- ['regrettable', 'JJ', 'regrettable'],
- ['decision', 'NN', 'decision'],
- [',"', ',', ',"'],
- ['he', 'PRP', 'he'],
- ['added', 'VBD', 'added'],
- ['.', '.', '.'],
- ['Mr', 'NNP', 'Mr'],
- ['Makepeace', 'NNP', 'Makepeace'],
- ['said', 'VBD', 'said'],
- ['the', 'DT', 'the'],
- ['festival', 'NN', 'festival'],
- ["'s", 'POS', "'s"],
- ['closing', 'VBG', 'closing'],
- ['event', 'NN', 'event'],
- ['would', 'MD', 'would'],
- ['be', 'VB', 'be'],
- ['reorganised', 'NN', 'reorganised'],
- ['to', 'TO', 'to'],
- ['take', 'VB', 'take'],
- ['place', 'NN', 'place'],
- ['at', 'IN', 'at'],
- ['the', 'DT', 'the'],
- ['British', 'JJ', 'British'],
- ['Council', 'NNP', 'Council'],
- ['in', 'IN', 'in'],
- ['Jerusalem', 'NNP', 'Jerusalem'],
- ['.', '.', '.'],
- ['The', 'DT', 'The'],
- ['Israeli', 'JJ', 'Israeli'],
- ['authorities', 'NNS', 'authority'],
- ['often', 'RB', 'often'],
- ['take', 'VB', 'take'],
- ['action', 'NN', 'action'],
- ['against', 'IN', 'against'],
- ['events', 'NNS', 'event'],
- ['in', 'IN', 'in'],
- ['East', 'NNP', 'East'],
- ['Jerusalem', 'NNP', 'Jerusalem'],
- ['they', 'PRP', 'they'],
- ['see', 'VB', 'see'],
- ['as', 'IN', 'as'],
- ['connected', 'VBN', 'connected'],
- ['to', 'TO', 'to'],
- ['the', 'DT', 'the'],
- ['Palestinian', 'JJ', 'Palestinian'],
- ['Authority', 'NNP', 'Authority'],
- ['.', '.', '.'],
- ['Saturday', 'NNP', 'Saturday'],
- ["'s", 'POS', "'s"],
- ['opening', 'NN', 'opening'],
- ['event', 'NN', 'event'],
- ['at', 'IN', 'at'],
- ['the', 'DT', 'the'],
- ['same', 'JJ', 'same'],
- ['theatre', 'NN', 'theatre'],
- ['was', 'VBD', 'was'],
- ['also', 'RB', 'also'],
- ['shut', 'VBN', 'shut'],
- ['down', 'RB', 'down'],
- ['.', '.', '.'],
- ['A', 'DT', 'A'],
- ['police', 'NN', 'police'],
- ['notice', 'NN', 'notice'],
- ['said', 'VBD', 'said'],
- ['the', 'DT', 'the'],
- ['closure', 'NN', 'closure'],
- ['was', 'VBD', 'was'],
- ['on', 'IN', 'on'],
- ['the', 'DT', 'the'],
- ['orders', 'NNS', 'order'],
- ['of', 'IN', 'of'],
- ['Israel', 'NNP', 'Israel'],
- ["'s", 'POS', "'s"],
- ['internal', 'JJ', 'internal'],
- ['security', 'NN', 'security'],
- ['minister', 'NN', 'minister'],
- ['on', 'IN', 'on'],
- ['the', 'DT', 'the'],
- ['grounds', 'NNS', 'ground'],
- ['of', 'IN', 'of'],
- ['a', 'DT', 'a'],
- ['breach', 'NN', 'breach'],
- ['of', 'IN', 'of'],
- ['interim', 'JJ', 'interim'],
- ['peace', 'NN', 'peace'],
- ['accords', 'NNS', 'accord'],
- ['from', 'IN', 'from'],
- ['the', 'DT', 'the'],
- ['1990', 'NN', '1990'],
- ['s', 'PRP', 's'],
- ['.', '.', '.'],
- ['These', 'DT', 'These'],
- ['laid', 'VBN', 'laid'],
- ['the', 'DT', 'the'],
- ['framework', 'NN', 'framework'],
- ['for', 'IN', 'for'],
- ['talks', 'NNS', 'talk'],
- ['on', 'IN', 'on'],
- ['establishing', 'VBG', 'establishing'],
- ['a', 'DT', 'a'],
- ['Palestinian', 'JJ', 'Palestinian'],
- ['state', 'NN', 'state'],
- ['alongside', 'IN', 'alongside'],
- ['Israel', 'NNP', 'Israel'],
- [',', ',', ','],
- ['but', 'CC', 'but'],
- ['left', 'VBN', 'left'],
- ['the', 'DT', 'the'],
- ['status', 'NN', 'status'],
- ['of', 'IN', 'of'],
- ['Jerusalem', 'NNP', 'Jerusalem'],
- ['to', 'TO', 'to'],
- ['be', 'VB', 'be'],
- ['determined', 'VBN', 'determined'],
- ['by', 'IN', 'by'],
- ['further', 'JJ', 'further'],
- ['negotiation', 'NN', 'negotiation'],
- ['.', '.', '.'],
- ['Israel', 'NNP', 'Israel'],
- ['has', 'VBZ', 'has'],
- ['annexed', 'VBD', 'annexed'],
- ['East', 'NNP', 'East'],
- ['Jerusalem', 'NNP', 'Jerusalem'],
- ['and', 'CC', 'and'],
- ['declares', 'VBZ', 'declares'],
- ['it', 'PRP', 'it'],
- ['part', 'NN', 'part'],
- ['of', 'IN', 'of'],
- ['its', 'PRP$', 'its'],
- ['eternal', 'JJ', 'eternal'],
- ['capital', 'NN', 'capital'],
- ['.', '.', '.'],
- ['Palestinians', 'NNPS', 'Palestinian'],
- ['hope', 'NN', 'hope'],
- ['to', 'TO', 'to'],
- ['establish', 'VB', 'establish'],
- ['their', 'PRP$', 'their'],
- ['capital', 'NN', 'capital'],
- ['in', 'IN', 'in'],
- ['the', 'DT', 'the'],
- ['area', 'NN', 'area'],
- ['.', '.', '.']]
+ >>> printTaggedTerms(extractor.tagger(text)) #doctest: +REPORT_NDIFF
+ police NN police
+ shut VBN shut
+ Palestinian JJ Palestinian
+ theatre NN theatre
+ in IN in
+ Jerusalem NNP Jerusalem
+ . . .
+ Israeli JJ Israeli
+ police NN police
+ have VBP have
+ shut VBN shut
+ down RB down
+ a DT a
+ Palestinian JJ Palestinian
+ theatre NN theatre
+ in IN in
+ East NNP East
+ Jerusalem NNP Jerusalem
+ . . .
+ The DT The
+ action NN action
+ , , ,
+ on IN on
+ Thursday NNP Thursday
+ , , ,
+ prevented VBN prevented
+ the DT the
+ closing VBG closing
+ event NN event
+ of IN of
+ an DT an
+ international JJ international
+ literature NN literature
+ festival NN festival
+ from IN from
+ taking VBG taking
+ place NN place
+ . . .
+ police NN police
+ said VBD said
+ they PRP they
+ were VBD were
+ acting VBG acting
+ on IN on
+ a DT a
+ court NN court
+ order NN order
+ , , ,
+ issued VBN issued
+ after IN after
+ intelligence NN intelligence
+ indicated VBD indicated
+ that IN that
+ the DT the
+ Palestinian JJ Palestinian
+ Authority NNP Authority
+ was VBD was
+ involved VBN involved
+ in IN in
+ the DT the
+ event NN event
+ . . .
+ Israel NNP Israel
+ has VBZ has
+ occupied VBN occupied
+ East NNP East
+ Jerusalem NNP Jerusalem
+ since IN since
+ 1967 NN 1967
+ and CC and
+ has VBZ has
+ annexed VBD annexed
+ the DT the
+ area NN area
+ . . .
+ This DT This
+ is VBZ is
+ not RB not
+ recognised VBD recognised
+ by IN by
+ the DT the
+ international JJ international
+ community NN community
+ . . .
+ The DT The
+ British JJ British
+ consul-general NN consul-general
+ in IN in
+ Jerusalem NNP Jerusalem
+ , , ,
+ Richard NNP Richard
+ Makepeace NNP Makepeace
+ , , ,
+ was VBD was
+ attending VBG attending
+ the DT the
+ event NN event
+ . . .
+ " " "
+ I PRP I
+ think VBP think
+ all DT all
+ lovers NNS lover
+ of IN of
+ literature NN literature
+ would MD would
+ regard VB regard
+ this DT this
+ as IN as
+ a DT a
+ very RB very
+ regrettable JJ regrettable
+ moment NN moment
+ and CC and
+ regrettable JJ regrettable
+ decision NN decision
+ ," , ,"
+ he PRP he
+ added VBD added
+ . . .
+ Mr NNP Mr
+ Makepeace NNP Makepeace
+ said VBD said
+ the DT the
+ festival NN festival
+ 's POS 's
+ closing VBG closing
+ event NN event
+ would MD would
+ be VB be
+ reorganised NN reorganised
+ to TO to
+ take VB take
+ place NN place
+ at IN at
+ the DT the
+ British JJ British
+ Council NNP Council
+ in IN in
+ Jerusalem NNP Jerusalem
+ . . .
+ The DT The
+ Israeli JJ Israeli
+ authorities NNS authority
+ often RB often
+ take VB take
+ action NN action
+ against IN against
+ events NNS event
+ in IN in
+ East NNP East
+ Jerusalem NNP Jerusalem
+ they PRP they
+ see VB see
+ as IN as
+ connected VBN connected
+ to TO to
+ the DT the
+ Palestinian JJ Palestinian
+ Authority NNP Authority
+ . . .
+ Saturday NNP Saturday
+ 's POS 's
+ opening NN opening
+ event NN event
+ at IN at
+ the DT the
+ same JJ same
+ theatre NN theatre
+ was VBD was
+ also RB also
+ shut VBN shut
+ down RB down
+ . . .
+ A DT A
+ police NN police
+ notice NN notice
+ said VBD said
+ the DT the
+ closure NN closure
+ was VBD was
+ on IN on
+ the DT the
+ orders NNS order
+ of IN of
+ Israel NNP Israel
+ 's POS 's
+ internal JJ internal
+ security NN security
+ minister NN minister
+ on IN on
+ the DT the
+ grounds NNS ground
+ of IN of
+ a DT a
+ breach NN breach
+ of IN of
+ interim JJ interim
+ peace NN peace
+ accords NNS accord
+ from IN from
+ the DT the
+ 1990 NN 1990
+ s PRP s
+ . . .
+ These DT These
+ laid VBN laid
+ the DT the
+ framework NN framework
+ for IN for
+ talks NNS talk
+ on IN on
+ establishing VBG establishing
+ a DT a
+ Palestinian JJ Palestinian
+ state NN state
+ alongside IN alongside
+ Israel NNP Israel
+ , , ,
+ but CC but
+ left VBN left
+ the DT the
+ status NN status
+ of IN of
+ Jerusalem NNP Jerusalem
+ to TO to
+ be VB be
+ determined VBN determined
+ by IN by
+ further JJ further
+ negotiation NN negotiation
+ . . .
+ Israel NNP Israel
+ has VBZ has
+ annexed VBD annexed
+ East NNP East
+ Jerusalem NNP Jerusalem
+ and CC and
+ declares VBZ declares
+ it PRP it
+ part NN part
+ of IN of
+ its PRP$ its
+ eternal JJ eternal
+ capital NN capital
+ . . .
+ Palestinians NNPS Palestinian
+ hope NN hope
+ to TO to
+ establish VB establish
+ their PRP$ their
+ capital NN capital
+ in IN in
+ the DT the
+ area NN area
+ . . .
Let's now apply the extractor.
Modified: topia.postag/trunk/src/topia/postag/extract.py
===================================================================
--- topia.postag/trunk/src/topia/postag/extract.py 2009-05-30 15:13:25 UTC (rev 100551)
+++ topia.postag/trunk/src/topia/postag/extract.py 2009-05-30 15:16:11 UTC (rev 100552)
@@ -22,10 +22,19 @@
SEARCH = 0
NOUN = 1
-def defaultFilter(word, occur, strength):
- return ((strength == 1 and occur >= 3) or
- (strength >= 2))
+def permissiveFilter(word, occur, strength):
+ return True
+class DefaultFilter(object):
+
+ def __init__(self, singleStrengthMinOccur=3, noLimitStrength=2):
+ self.singleStrengthMinOccur = singleStrengthMinOccur
+ self.noLimitStrength = noLimitStrength
+
+ def __call__(self, word, occur, strength):
+ return ((strength == 1 and occur >= self.singleStrengthMinOccur) or
+ (strength >= self.noLimitStrength))
+
def _add(term, norm, keyword, keywords):
keyword.append((term, norm))
keywords.setdefault(norm, 0)
@@ -34,11 +43,13 @@
class KeywordExtractor(object):
zope.interface.implements(interfaces.IKeywordExtractor)
- def __init__(self, tagger=None, filter=defaultFilter):
+ def __init__(self, tagger=None, filter=None):
if tagger is None:
tagger = tag.Tagger()
tagger.initialize()
self.tagger = tagger
+ if filter is None:
+ filter = DefaultFilter()
self.filter = filter
def extract(self, terms):
Modified: topia.postag/trunk/src/topia/postag/tag.py
===================================================================
--- topia.postag/trunk/src/topia/postag/tag.py 2009-05-30 15:13:25 UTC (rev 100551)
+++ topia.postag/trunk/src/topia/postag/tag.py 2009-05-30 15:16:11 UTC (rev 100552)
@@ -46,6 +46,21 @@
tagged_term[0] = tagged_term[2] = lower_term
tagged_term[1] = lower_tag
+def determineVerbAfterModal(idx, tagged_term, tagged_terms, lexicon):
+ "Determine the verb after a modal verb to avoid accidental noun detection."
+ term, tag, norm = tagged_term
+ if tag != 'MD':
+ return
+ len_terms = len(tagged_terms)
+ idx += 1
+ while idx < len_terms:
+ if tagged_terms[idx][1] == 'RB':
+ idx += 1
+ continue
+ if tagged_terms[idx][1] == 'NN':
+ tagged_terms[idx][1] = 'VB'
+ break
+
def normalizePluralForms(idx, tagged_term, tagged_terms, lexicon):
term, tag, norm = tagged_term
if tag in ('NNS', 'NNPS') and term == norm:
@@ -75,6 +90,7 @@
rules = (
correctDefaultNounTag,
verifyProperNounAtSentenceStart,
+ determineVerbAfterModal,
normalizePluralForms,
)
Modified: topia.postag/trunk/src/topia/postag/tests.py
===================================================================
--- topia.postag/trunk/src/topia/postag/tests.py 2009-05-30 15:13:25 UTC (rev 100551)
+++ topia.postag/trunk/src/topia/postag/tests.py 2009-05-30 15:16:11 UTC (rev 100552)
@@ -20,6 +20,13 @@
from zope.testing import doctest
from zope.testing.doctestunit import DocFileSuite
+def printTaggedTerms(terms):
+ for term, tag, norm in terms:
+ print (
+ term + ' '*(16-len(term)) +
+ tag + ' '*(6-len(tag)) +
+ norm )
+
def test_suite():
return unittest.TestSuite((
DocFileSuite(
@@ -28,6 +35,7 @@
),
DocFileSuite(
'example.txt',
+ globs={'printTaggedTerms': printTaggedTerms},
optionflags=doctest.NORMALIZE_WHITESPACE|doctest.ELLIPSIS,
),
))
More information about the Checkins
mailing list