Python 2.4.4 (#1, Oct 18 2006, 10:34:39) [GCC 4.0.1 (Apple Computer, Inc. build 5341)] on darwin Type "copyright", "credits" or "license()" for more information. **************************************************************** Personal firewall software may warn about the connection IDLE makes to its subprocess using this computer's internal loopback interface. This connection is not visible on any external interface and no data is sent to or received from the Internet. **************************************************************** IDLE 1.1.4 >>> print u'еяерс' Unsupported characters in input >>> >>> print u'\u015bl\u0105sk' śląsk >>> print u'\u015bl\u0105sk' śląsk >>> ================================ RESTART ================================ >>> >>> words = nltk.corpus.gutenberg.words() >>> len(words) 2055271 >>> import nltk >>> nltk.corpus.gutenberg >>> r = corpusNgrams('austen-persuasion', 1) >>> r.items()[:10] Traceback (most recent call last): File "", line 1, in -toplevel- r.items()[:10] AttributeError: 'NoneType' object has no attribute 'items' >>> ================================ RESTART ================================ >>> >>> ================================ RESTART ================================ SyntaxError: invalid syntax >>> >>> >>> >>> >>> ================================ RESTART ================================ >>> >>> r = corpusNgrams('austen-persuasion', 1) >>> len(r) 6134 >>> r.items()[:10] [('foul', 3.0552698312472627e-05), ('four', 0.0002138688881873084), ('Does', 3.0552698312472627e-05), ('hanging', 3.0552698312472627e-05), ('woody', 1.018423277082421e-05), ('looking', 0.00045829047468708947), ('eligible', 1.018423277082421e-05), ('unanswered', 1.018423277082421e-05), ('Western', 1.018423277082421e-05), ('lord', 1.018423277082421e-05)] >>> sorted([(freq, word) for (word, freq) in r.items()])[:10] [(1.018423277082421e-05, '!--'), (1.018423277082421e-05, "'."), (1.018423277082421e-05, ').'), (1.018423277082421e-05, ',"--'), (1.018423277082421e-05, ',"--"'), (1.018423277082421e-05, ',)--'), (1.018423277082421e-05, ',--`'), (1.018423277082421e-05, '--!'), (1.018423277082421e-05, '--('), (1.018423277082421e-05, '---')] >>> sorted([(freq, word) for (word, freq) in r.items()], reverse=True)[:10] [(0.068743571203063417, ','), (0.031784990477742359, 'the'), (0.028261245939037182, 'to'), (0.027914982024829161, '.'), (0.027894613559287511, 'and'), (0.026142925522705748, 'of'), (0.015571691906590216, 'a'), (0.013707977309529386, 'in'), (0.013545029585196199, 'was'), (0.013137660274363231, ';')] >>> import random >>> sum(r.items()) Traceback (most recent call last): File "", line 1, in -toplevel- sum(r.items()) File "/Library/Frameworks/Python.framework/Versions/2.4/lib/python2.4/site-packages/numpy-1.0.3.1-py2.4-macosx-10.3-fat.egg/numpy/oldnumeric/functions.py", line 22, in sum return N.sum(x, axis) File "/Library/Frameworks/Python.framework/Versions/2.4/lib/python2.4/site-packages/numpy-1.0.3.1-py2.4-macosx-10.3-fat.egg/numpy/core/fromnumeric.py", line 633, in sum return _wrapit(x, 'sum', axis, dtype, out) File "/Library/Frameworks/Python.framework/Versions/2.4/lib/python2.4/site-packages/numpy-1.0.3.1-py2.4-macosx-10.3-fat.egg/numpy/core/fromnumeric.py", line 37, in _wrapit result = getattr(asarray(obj),method)(*args, **kwds) TypeError: cannot perform reduce with flexible type >>> sum(r.values()) 1.0 >>> ================================ RESTART ================================ >>> >>> r = corusNgrams('austen-persuasion', 1) Traceback (most recent call last): File "", line 1, in -toplevel- r = corusNgrams('austen-persuasion', 1) NameError: name 'corusNgrams' is not defined >>> r = corpusNgrams('austen-persuasion', 1) >>> generateText(r, 100) Traceback (most recent call last): File "", line 1, in -toplevel- generateText(r, 100) File "/Users/max/zipf.py", line 43, in generateText gram = randomGram(ngrams) File "/Users/max/zipf.py", line 32, in randomGram assert(sum(ngrams.values()) == 1.0) AssertionError >>> sum(r.values()) 1.0 >>> sum(r.values()) == 1.0 False >>> ================================ RESTART ================================ >>> >>> r = corpusNgrams('austen-persuasion', 1) >>> generateText(r, 100) "sort or day long each s value the from it another Cottage walking left at The Bless was in walked spread in as , fancied s , ' hand left evil Elliot time an have being would nay still herself pretty , , on could there left t within . for superiority sentiment was by death young his good as we appeared She a she he advantage of ; of never money He parties Miss ; all . , me new a , of gloomy elegant Crofts by anything much , It life theirs they own Bath the be six" >>> "sort or day long each s value the from it another Cottage walking left at The Bless was in walked spread in as , fancied s , ' hand left evil Elliot time an have being would nay still herself pretty , , on could there left t within . for superiority sentiment was by death young his good as we appeared She a she he advantage of ; of never money He parties Miss ; all . , me new a , of gloomy elegant Crofts by anything much , It life theirs they own Bath the be six" "sort or day long each s value the from it another Cottage walking left at The Bless was in walked spread in as , fancied s , ' hand left evil Elliot time an have being would nay still herself pretty , , on could there left t within . for superiority sentiment was by death young his good as we appeared She a she he advantage of ; of never money He parties Miss ; all . , me new a , of gloomy elegant Crofts by anything much , It life theirs they own Bath the be six" >>> help(list.pop) Help on method_descriptor: pop(...) L.pop([index]) -> item -- remove and return item at index (default last) >>> ================================ RESTART ================================ >>> >>> ngrams = corpusNgrams('austen-persuasion', 2) >>> len(ngrams) 41576 >>> len(corpusNgrams('austen-persuasion', 1)) 6134 >>> ngrams.items()[:10] [(('her', 'taste'), 1.0184440212243734e-05), (('though', 'November'), 1.0184440212243734e-05), (('the', 'farmer'), 1.0184440212243734e-05), (('bit', 'did'), 1.0184440212243734e-05), (('last', 'June'), 1.0184440212243734e-05), (('be', 'beautiful'), 1.0184440212243734e-05), (('recollecting', 'what'), 1.0184440212243734e-05), (('September', ';'), 1.0184440212243734e-05), (('have', 'made'), 0.0001222132825469248), (('forbearing', 'feelings'), 1.0184440212243734e-05)] >>> ngrams = corpusNgrams('austen-persuasion', 4) >>> len(ngrams) 93584 >>> bigrams = corpusNgrams('austen-persuasion', 2) >>> generateText(bigrams, 100) Traceback (most recent call last): File "", line 1, in -toplevel- generateText(bigrams, 100) File "/Users/max/zipf.py", line 52, in generateText return ' '.join(text) TypeError: sequence item 0: expected string, tuple found >>> ================================ RESTART ================================ >>> >>> ================================ RESTART ================================ >>> >>> bigrams = corpusNgrams('austen-persuasion', 2) >>> unigrams = corpusNgrams('austen-persuasion', 1) >>> generateText(unigrams, 100) 'at replied , reflection rather motion what terms ," historian Mr been We then talked Anne Admiral my all . looked make Mrs . afford and his expected ; she above morning me Captain saw This give perhaps a speak all must countenance .--," the : ; a did the indifferent and he of a have were prevent ( - nothing and Louisa neglected she of him Cheshire he , sensible A the a in once grandfathers thrown little proof I " Kellynch belonged . Honourable to great the impression yet thing , , blows found and either so Hall' >>> bigrams = corpusNgrams('austen-persuasion', 2) >>> generateText(bigrams, 100) 'may seem had been usual style pure , mind destroyed . The nearly as had never he was could have had nothing course almost dear Anne in short ! Anne was cleared only say at Uppercross the beginning , had a blister Anne , her to Mary : house ; the last , was . I reached , blush that in orders enough to though partially away , could have ; a every little hand across , warm long enough prosperity in gradually learning he had though dreading . This . The to take or some under my way ;' >>> trigrams = corpusNgrams('austen-persuasion', 3) >>> generateText(trigrams, 100) 'beginning to end walk the young , and he which , as this point that It had been exactly ready for their party at he was at ; and her in defence of you , when match should have spite of herself bad business indeed sister , sat to vex her send you word or men of re - urge to such a replied Anne . to hope she love with him six miles another understanding would supply in my father design letting his ; I remember Sir Walter . s cheeks and fine country about saw that he of Monkford ,' >>> 'beginning to end walk the young , and he which , as this point that It had been exactly ready for their party at he was at ; and her in defence of you , when match should have spite of herself bad business indeed sister , sat to vex her send you word or men of re - urge to such a replied Anne . to hope she love with him six miles another understanding would supply in my father design letting his ; I remember Sir Walter . s cheeks and fine country about saw that he of Monkford ,' 'beginning to end walk the young , and he which , as this point that It had been exactly ready for their party at he was at ; and her in defence of you , when match should have spite of herself bad business indeed sister , sat to vex her send you word or men of re - urge to such a replied Anne . to hope she love with him six miles another understanding would supply in my father design letting his ; I remember Sir Walter . s cheeks and fine country about saw that he of Monkford ,' >>> help(random.random) Help on built-in function random: random(...) random() -> x in the interval [0, 1). >>> random.random(2.0) Traceback (most recent call last): File "", line 1, in -toplevel- random.random(2.0) TypeError: random() takes no arguments (1 given) >>> ================================ RESTART ================================ >>> >>> random.uniform(0.0, 0.5) 0.1481182823450134 >>> trigrams = corpusNgrams('austen-persuasion', 3) >>> generateText(trigrams, 100) Traceback (most recent call last): File "", line 1, in -toplevel- generateText(trigrams, 100) File "/Users/max/zipf.py", line 48, in generateText slidingWindow = list(randomGrams(ngrams)) NameError: global name 'randomGrams' is not defined >>> ================================ RESTART ================================ >>> >>> trigrams = corpusNgrams('austen-persuasion', 3) >>> generateText(trigrams, 100) Traceback (most recent call last): File "", line 1, in -toplevel- generateText(trigrams, 100) File "/Users/max/zipf.py", line 48, in generateText slidingWindow = list(randomGram(ngrams)) File "/Users/max/zipf.py", line 40, in randomGram num = random.uniform(0.0, sum([freq for gram,freq in ngrams])) ValueError: too many values to unpack >>> ================================ RESTART ================================ >>> >>> trigrams = corpusNgrams('austen-persuasion', 3) >>> generateText(trigrams, 100) Traceback (most recent call last): File "", line 1, in -toplevel- generateText(trigrams, 100) File "/Users/max/zipf.py", line 48, in generateText slidingWindow = list(randomGram(ngrams)) File "/Users/max/zipf.py", line 40, in randomGram num = random.random()* sum([freq for gram,freq in ngrams]) ValueError: too many values to unpack >>> ================================ RESTART ================================ >>> >>> trigrams = corpusNgrams('austen-persuasion', 3) >>> generateText(trigrams, 100)