This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def coroutine(func): | |
""" A decorator function that takes care | |
of starting a coroutine automatically on call """ | |
def start(*args,**kwargs): | |
coro = func(*args,**kwargs) | |
coro.next() | |
return coro | |
return start | |
@coroutine | |
def ngrams(n, target): | |
""" A coroutine to generate ngrams. | |
Accepts one char at a time """ | |
chars = collections.deque() | |
while True: | |
chars.append((yield)) | |
if len(chars) == n: | |
target.send(chars) | |
chars.popleft() |
I need to filter text before generating ngrams and also, I want to process ngrams (in this case count bigrams)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@coroutine | |
def filter_chars(accepted_chars,target): | |
""" A coroutine to filter out unaccepted chars. | |
Accepts one char at a time """ | |
while True: | |
c = (yield) | |
if c.lower() in accepted_chars: | |
target.send(c.lower()) | |
@coroutine | |
def counter(matrix): | |
""" A counter sink """ | |
while True: | |
a, b = (yield) | |
matrix[pos[a]][pos[b]] += 1 |
I combine my coroutines together
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
counts = [[10 for i in xrange(k)] for i in xrange(k)] | |
bigrams = filter_chars(accepted_chars, ngrams(2, counter(counts))) | |
for c in open('big.txt').read().decode(enc): bigrams.send(c) |
Full source can be found in my fork of rrenaud's gibberish detector.