|
89 | 89 | "outputs": [], |
90 | 90 | "source": [ |
91 | 91 | "mystopwords = stopwords.words('english')+['ax','edu','com','would','nntp','ac','co','gv','bf','db','tin','apr','gmt','na','pl','di','inc','gov','max','acs','cs',\n", |
92 | | - " 'subject','lines','organization','writes','article','one','posting','host','ca','also','too']" |
| 92 | + " 'subject','lines','organization','writes','article','one','posting','host','ca','also','too','maxaxaxaxaxaxaxaxaxaxaxaxaxaxax']" |
93 | 93 | ] |
94 | 94 | }, |
95 | 95 | { |
|
115 | 115 | "### Frequencies" |
116 | 116 | ] |
117 | 117 | }, |
| 118 | + { |
| 119 | + "cell_type": "markdown", |
| 120 | + "metadata": {}, |
| 121 | + "source": [ |
| 122 | + "#### Matplotlib" |
| 123 | + ] |
| 124 | + }, |
118 | 125 | { |
119 | 126 | "cell_type": "code", |
120 | 127 | "execution_count": null, |
|
142 | 149 | "tv.frequencyPlot(listText=df.text_clean.to_list(), ngramRange=(3,3), stopwords=mystopwords)" |
143 | 150 | ] |
144 | 151 | }, |
| 152 | + { |
| 153 | + "cell_type": "markdown", |
| 154 | + "metadata": {}, |
| 155 | + "source": [ |
| 156 | + "#### Plotly" |
| 157 | + ] |
| 158 | + }, |
145 | 159 | { |
146 | 160 | "cell_type": "code", |
147 | 161 | "execution_count": null, |
|
172 | 186 | "fig.show()" |
173 | 187 | ] |
174 | 188 | }, |
| 189 | + { |
| 190 | + "cell_type": "markdown", |
| 191 | + "metadata": {}, |
| 192 | + "source": [ |
| 193 | + "#### Yellowbrick" |
| 194 | + ] |
| 195 | + }, |
175 | 196 | { |
176 | 197 | "cell_type": "code", |
177 | 198 | "execution_count": null, |
|
199 | 220 | "tv.frequencyPlotYellowbrick(listText=df.text_clean.to_list(), ngramRange=(3,3), stopwords=mystopwords)" |
200 | 221 | ] |
201 | 222 | }, |
| 223 | + { |
| 224 | + "cell_type": "markdown", |
| 225 | + "metadata": {}, |
| 226 | + "source": [ |
| 227 | + "#### Tree map" |
| 228 | + ] |
| 229 | + }, |
202 | 230 | { |
203 | 231 | "cell_type": "code", |
204 | 232 | "execution_count": null, |
|
229 | 257 | "fig.show()" |
230 | 258 | ] |
231 | 259 | }, |
| 260 | + { |
| 261 | + "cell_type": "markdown", |
| 262 | + "metadata": {}, |
| 263 | + "source": [ |
| 264 | + "### Donut chart" |
| 265 | + ] |
| 266 | + }, |
| 267 | + { |
| 268 | + "cell_type": "code", |
| 269 | + "execution_count": null, |
| 270 | + "metadata": { |
| 271 | + "tags": [] |
| 272 | + }, |
| 273 | + "outputs": [], |
| 274 | + "source": [ |
| 275 | + "fig = tv.frequencyDonutChart(listText=df.text_clean.to_list(), stopwords=mystopwords)\n", |
| 276 | + "fig.show()" |
| 277 | + ] |
| 278 | + }, |
| 279 | + { |
| 280 | + "cell_type": "code", |
| 281 | + "execution_count": null, |
| 282 | + "metadata": { |
| 283 | + "tags": [] |
| 284 | + }, |
| 285 | + "outputs": [], |
| 286 | + "source": [ |
| 287 | + "fig = tv.frequencyDonutChart(listText=df.text_clean.to_list(), ngramRange=(2,2), stopwords=mystopwords)\n", |
| 288 | + "fig.show()" |
| 289 | + ] |
| 290 | + }, |
| 291 | + { |
| 292 | + "cell_type": "code", |
| 293 | + "execution_count": null, |
| 294 | + "metadata": { |
| 295 | + "tags": [] |
| 296 | + }, |
| 297 | + "outputs": [], |
| 298 | + "source": [ |
| 299 | + "fig = tv.frequencyDonutChart(listText=df.text_clean.to_list(), ngramRange=(3,3), stopwords=mystopwords)\n", |
| 300 | + "fig.show()" |
| 301 | + ] |
| 302 | + }, |
232 | 303 | { |
233 | 304 | "cell_type": "markdown", |
234 | 305 | "metadata": {}, |
|
503 | 574 | "### Frequency" |
504 | 575 | ] |
505 | 576 | }, |
| 577 | + { |
| 578 | + "cell_type": "markdown", |
| 579 | + "metadata": {}, |
| 580 | + "source": [ |
| 581 | + "#### Matplotlib" |
| 582 | + ] |
| 583 | + }, |
506 | 584 | { |
507 | 585 | "cell_type": "code", |
508 | 586 | "execution_count": null, |
|
530 | 608 | "c.frequencyPlot(stopwords=mystopwords, labels = [\"rec.autos\",\"rec.motorcycles\"])" |
531 | 609 | ] |
532 | 610 | }, |
| 611 | + { |
| 612 | + "cell_type": "markdown", |
| 613 | + "metadata": {}, |
| 614 | + "source": [ |
| 615 | + "#### Plotly" |
| 616 | + ] |
| 617 | + }, |
533 | 618 | { |
534 | 619 | "cell_type": "code", |
535 | 620 | "execution_count": null, |
|
557 | 642 | "c.frequencyPlot(stopwords=mystopwords, labels = [\"rec.autos\",\"rec.motorcycles\"], package = 'plotly')" |
558 | 643 | ] |
559 | 644 | }, |
| 645 | + { |
| 646 | + "cell_type": "markdown", |
| 647 | + "metadata": {}, |
| 648 | + "source": [ |
| 649 | + "#### Yellowbrick" |
| 650 | + ] |
| 651 | + }, |
560 | 652 | { |
561 | 653 | "cell_type": "code", |
562 | 654 | "execution_count": null, |
|
584 | 676 | "c.frequencyPlot(stopwords=mystopwords, labels = [\"rec.autos\",\"rec.motorcycles\"], package = 'yellowbrick')" |
585 | 677 | ] |
586 | 678 | }, |
| 679 | + { |
| 680 | + "cell_type": "markdown", |
| 681 | + "metadata": {}, |
| 682 | + "source": [ |
| 683 | + "#### Tree Map" |
| 684 | + ] |
| 685 | + }, |
587 | 686 | { |
588 | 687 | "cell_type": "code", |
589 | 688 | "execution_count": null, |
|
620 | 719 | "c.frequencyTreeMap(stopwords=mystopwords, ngramRange=(2,2), labels = [\"rec.autos\",\"rec.motorcycles\"])" |
621 | 720 | ] |
622 | 721 | }, |
| 722 | + { |
| 723 | + "cell_type": "markdown", |
| 724 | + "metadata": {}, |
| 725 | + "source": [ |
| 726 | + "### Donut chart" |
| 727 | + ] |
| 728 | + }, |
| 729 | + { |
| 730 | + "cell_type": "code", |
| 731 | + "execution_count": null, |
| 732 | + "metadata": { |
| 733 | + "tags": [] |
| 734 | + }, |
| 735 | + "outputs": [], |
| 736 | + "source": [ |
| 737 | + "c.frequencyDonutChart(stopwords=mystopwords)" |
| 738 | + ] |
| 739 | + }, |
| 740 | + { |
| 741 | + "cell_type": "code", |
| 742 | + "execution_count": null, |
| 743 | + "metadata": { |
| 744 | + "tags": [] |
| 745 | + }, |
| 746 | + "outputs": [], |
| 747 | + "source": [ |
| 748 | + "c.frequencyDonutChart(ngramRange=(2,2), stopwords=mystopwords)" |
| 749 | + ] |
| 750 | + }, |
| 751 | + { |
| 752 | + "cell_type": "code", |
| 753 | + "execution_count": null, |
| 754 | + "metadata": { |
| 755 | + "tags": [] |
| 756 | + }, |
| 757 | + "outputs": [], |
| 758 | + "source": [ |
| 759 | + "c.frequencyDonutChart(ngramRange=(3,3), stopwords=mystopwords)" |
| 760 | + ] |
| 761 | + }, |
623 | 762 | { |
624 | 763 | "cell_type": "markdown", |
625 | 764 | "metadata": {}, |
|
0 commit comments