以文字雲分析敘述性的轉院原因
- 匯入套件
- 匯入資料
- 將轉院原因進行斷詞分割
- 計算各詞彙出現頻率
library(wordcloud)
## Loading required package: RColorBrewer
library(data.table)
library(tm)
## Loading required package: NLP
library(jiebaR)
## Loading required package: jiebaRD
library(tmcn)
## # tmcn Version: 0.2-8
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
EMOC_tm<-fread('EMOC_tm.csv',encoding ="UTF-8")
EMOC_tm<-EMOC_tm[排除==""]
wk = worker()
EMOC_tm_seg<-segment(EMOC_tm[,轉院原因分析], wk)
ap.corpus <- Corpus(DataframeSource(data.frame(as.character(EMOC_tm_seg))))
tdm <- TermDocumentMatrix(ap.corpus, control = list(wordLengths = c(2, Inf)))
dtm <-DocumentTermMatrix(ap.corpus, control = list(wordLengths = c(2, Inf)))
m1 <- as.matrix(tdm)
v <- sort(rowSums(m1), decreasing = TRUE)
d <- data.frame(word = names(v), freq = v)
- 畫文字雲
pal2 <- brewer.pal(8,"Dark2")
wordcloud(d$word, d$freq, min.freq = 5, random.order = T, ordered.colors = F, rot.per=.15, colors=pal2)