diff --git a/Topic_Model_Script.R b/Topic_Model_Script.R index 085fa00..5824f51 100644 --- a/Topic_Model_Script.R +++ b/Topic_Model_Script.R @@ -1,6 +1,6 @@ -We're going to use a library called 'tm', and another called 'topic models' -#install.package("tm") -#install.package("topicmodels") +#We're going to use a library called 'tm', and another called 'topic models' +#install.packages("tm") +#install.packages("topicmodels") library(tm) #### instead of using 'setwd()' function, start by finding the file in the bottom right quadrant of R Studio @@ -42,10 +42,10 @@ nrow(a.tdm) #number of unique words in corpus findAssocs(a.tdm, 'includ', 0.25) # find associated words and strength of the common words.play around with this a.tdm.sp <- removeSparseTerms(a.tdm, sparse=0.987) #remove sparse terms, maybe not necessary, sometimes is. Play around with this if you're not getting good output. a.tdm.sp.df <- as.data.frame(inspect(a.tdm.sp)) # convert term document matrix to data frame -nrow(a.dtm.sp.df) # check to see how many words we're left +nrow(a.tdm.sp.df) # check to see how many words we're left require(slam) -a.dtm.sp. <- t(a.tdm.sp) # transpose term document matrix into document term matrix +a.dtm.sp.t <- t(a.tdm.sp) # transpose term document matrix into document term matrix require(topicmodels)