第 0004 题: 任一个英文的纯文本文件,统计其中的单词出现的个数。
分析:从网上下载一份文件名问walden.txt的文件,截取了部分段落进行统计。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
with open('walden.txt', 'r') as f: text = f.read() list_text = text.split() for i in range(len(list_text)): if list_text[i] == ',' or list_text[i] == '.': list_text.remove(list_text[i]) elif list_text[i][-1] == ',' or list_text[i][-1] == '.' or list_text[i][-1] == '!' or list_text[i][-1] == '?': list_text[i] = list_text[i][:-1].lower() elif list_text[i] == ',' or list_text[i] == '.': list_text.remove(list_text[i]) else: list_text[i] = list_text[i].lower() dict_word = {} for word in list_text: if word in dict_word: pass else: # print(word, list_text.count(word)) dict_word[word] = list_text.count(word) print(dict_word) |
输出:
1 |
{'i': 21, 'should': 4, 'not': 10, 'obtrude': 1, 'my': 11, 'affairs': 1, 'so': 3, 'much': 3, 'on': 6, 'the': 29, 'notice': 1, 'of': 25, 'readers': 3, 'if': 7, 'very': 2, 'particular': 2, 'inquiries': 1, 'had': 3, 'been': 4, 'made': 2, 'by': 4, 'townsmen': 2, 'concerning': 2, 'mode': 1, 'life': 5, 'which': 3, 'some': 5, 'would': 3, 'call': 1, 'impertinent': 2, 'though': 1, 'they': 12, 'do': 3, 'appear': 1, 'to': 24, 'me': 5, 'at': 3, 'all': 3, 'but': 4, 'considering': 1, 'circumstances': 2, 'natural': 2, 'and': 19, 'pertinent': 1, 'have': 13, 'asked': 1, 'what': 6, 'got': 3, 'eat;': 1, 'did': 1, 'feel': 2, 'lonesome;': 1, 'was': 1, 'afraid;': 1, 'like': 2, 'others': 1, 'curious': 1, 'learn': 1, 'portion': 1, 'income': 1, 'devoted': 1, 'charitable': 1, 'purposes;': 1, 'who': 6, 'large': 1, 'families': 1, 'how': 2, 'many': 2, 'poor': 3, 'children': 1, 'maintained': 1, 'will': 4, 'therefore': 1, 'ask': 1, 'those': 2, 'no': 3, 'interest': 1, 'in': 17, 'pardon': 1, 'undertake': 1, 'answer': 1, 'these': 7, 'questions': 1, 'this': 5, 'book': 1, 'most': 1, 'books': 1, 'or': 10, 'first': 3, 'person': 2, 'is': 10, 'omitted;': 1, 'it': 14, 'be': 4, 'retained;': 1, 'that': 7, 'respect': 1, 'egotism': 1, 'main': 1, 'difference': 1, 'we': 1, 'commonly': 1, 'remember': 1, 'after': 1, 'always': 1, 'speaking': 1, 'talk': 1, 'about': 2, 'myself': 1, 'there': 1, 'were': 4, 'anybody': 1, 'else': 1, 'whom': 2, 'knew': 1, 'as': 15, 'well': 3, 'unfortunately': 1, 'am': 1, 'confined': 1, 'theme': 1, 'narrowness': 1, 'experience': 1, 'moreover': 1, 'side': 1, 'require': 1, 'every': 1, 'writer': 1, 'last': 1, 'a': 12, 'simple': 1, 'sincere': 1, 'account': 2, 'his': 3, 'own': 1, 'merely': 1, 'he': 3, 'has': 2, 'heard': 2, 'other': 1, "men's": 1, 'lives;': 1, 'such': 3, 'send': 1, 'kindred': 1, 'from': 2, 'distant': 2, 'land;': 1, 'for': 7, 'lived': 1, 'sincerely': 1, 'must': 1, 'land': 2, 'perhaps': 1, 'pages': 2, 'are': 5, 'more': 3, 'particularly': 1, 'addressed': 1, 'students': 1, 'rest': 1, 'accept': 1, 'portions': 1, 'apply': 1, 'them': 4, 'trust': 1, 'none': 1, 'stretch': 1, 'seams': 1, 'putting': 1, 'coat': 1, 'may': 1, 'good': 2, 'service': 1, 'him': 1, 'fits': 1, 'fain': 1, 'say': 1, 'something': 2, 'chinese': 1, 'sandwich': 1, 'islanders': 1, 'you': 1, 'read': 1, 'said': 1, 'live': 2, 'new': 1, 'england;': 1, 'your': 2, 'condition': 2, 'especially': 1, 'outward': 1, 'world': 1, 'town': 1, 'whether': 2, 'necessary': 1, 'bad': 1, 'cannot': 1, 'improved': 1, 'travelled': 1, 'deal': 1, 'concord;': 1, 'everywhere': 1, 'shops': 1, 'offices': 1, 'fields': 1, 'inhabitants': 1, 'appeared': 1, 'doing': 1, 'penance': 2, 'thousand': 1, 'remarkable': 1, 'ways': 1, 'bramins': 1, 'sitting': 1, 'exposed': 1, 'four': 1, 'fires': 1, 'looking': 2, 'face': 1, 'sun;': 1, 'hanging': 1, 'suspended': 1, 'with': 6, 'their': 6, 'heads': 1, 'downward': 1, 'over': 2, 'flames;': 1, 'heavens': 1, 'shoulders': 1, '"until': 1, 'becomes': 1, 'impossible': 1, 'resume': 1, 'position': 1, 'while': 1, 'twist': 1, 'neck': 1, 'nothing': 1, 'liquids': 1, 'can': 2, 'pass': 1, 'into': 1, 'stomach";': 1, 'dwelling': 1, 'chained': 1, 'foot': 1, 'tree;': 1, 'measuring': 1, 'bodies': 1, 'caterpillars': 1, 'breadth': 1, 'vast': 1, 'empires;': 1, 'standing': 1, 'one': 3, 'leg': 1, 'tops': 1, 'pillars-': 1, 'even': 1, 'forms': 1, 'conscious': 1, 'hardly': 1, 'incredible': 1, 'astonishing': 1, 'than': 2, 'scenes': 1, 'daily': 1, 'witness': 1, 'twelve': 2, 'labors': 1, 'hercules': 1, 'trifling': 1, 'comparison': 1, 'neighbors': 1, 'undertaken;': 1, 'only': 2, 'an': 1, 'end;': 1, 'could': 1, 'never': 2, 'see': 2, 'men': 2, 'slew': 1, 'captured': 1, 'any': 2, 'monster': 1, 'finished': 1, 'labor': 3, 'friend': 1, 'iolaus': 1, 'burn': 1, 'hot': 1, 'iron': 1, 'root': 1, "hydra's": 1, 'head': 2, 'soon': 2, 'crushed': 2, 'two': 1, 'spring': 1, 'up': 1, 'young': 1, 'whose': 1, 'misfortune': 1, 'inherited': 2, 'farms': 1, 'houses': 1, 'barns': 1, 'cattle': 1, 'farming': 1, 'tools;': 1, 'easily': 1, 'acquired': 1, 'rid': 1, 'better': 1, 'born': 2, 'open': 1, 'pasture': 2, 'suckled': 1, 'wolf': 1, 'might': 1, 'seen': 1, 'clearer': 1, 'eyes': 1, 'field': 1, 'called': 1, 'serfs': 1, 'soil': 1, 'why': 2, 'eat': 2, 'sixty': 1, 'acres': 2, 'when': 1, 'man': 1, 'condemned': 1, 'peck': 1, 'dirt': 1, 'begin': 1, 'digging': 1, 'graves': 1, "man's": 1, 'pushing': 2, 'things': 1, 'before': 2, 'get': 1, 'immortal': 1, 'soul': 1, 'met': 1, 'well-nigh': 1, 'smothered': 1, 'under': 1, 'its': 2, 'load': 1, 'creeping': 1, 'down': 1, 'road': 1, 'barn': 1, 'seventy-five': 1, 'feet': 2, 'forty': 1, 'augean': 1, 'stables': 1, 'cleansed': 1, 'hundred': 1, 'tillage': 1, 'mowing': 1, 'woodlot': 1, 'portionless': 1, 'struggle': 1, 'unnecessary': 1, 'encumbrances': 1, 'find': 1, 'enough': 1, 'subdue': 1, 'cultivate': 1, 'few': 1, 'cubic': 1, 'flesh': 1} |