WordCloudつくってみためも

とりあえず macabに食わせてみる

 cat input.txt | mecab

jupyterでwordcloudつくる

import MeCab
from matplotlib import pyplot as plt
from wordcloud import WordCloud

with open('input.txt', mode='rt', encoding='utf-8') as fi:
    source_text = fi.read()

tagger = MeCab.Tagger()
tagger.parse('')
node = tagger.parseToNode(source_text)

wl = []
while node:
    if node.feature.split(',')[0] == '名詞' and node.feature.split(',')[1] == '一般':
        wl.append(node.surface)
    elif node.feature.split(',')[0] == '名詞' and node.feature.split(',')[1] == 'サ変接続':
        wl.append(node.surface)
        
    node = node.next

word_chain = ' '.join(wl)

wc = WordCloud(width=1280,
               height=720,
               prefer_horizontal=1,
               background_color='white',
               colormap='bone',
               font_path='/System/Library/Fonts/ヒラギノ明朝 ProN.ttc').generate(word_chain)

wc.to_file("out.png")

plt.imshow(wc)
plt.axis('off')
plt.show()