Skip to content
Snippets Groups Projects
Commit 1d817ab0 authored by Wade Fagen-Ulmschneider (waf)'s avatar Wade Fagen-Ulmschneider (waf)
Browse files

demo_textCleaning

parent c65dc1bf
Branches demo_textCleaning
No related tags found
No related merge requests found
{
"title": "NLTK: Text Cleaning Examples",
"index": 20170228,
"type": "Demo"
}
# Using Jupyter Notebook
This diff is collapsed.
name, rgb
ivory, "(255,255,240)"
beige, "(245,245,220)"
tan, "(210,180,140)"
khaki, "(240,130,140)"
silver, "(230,232,250)"
gray, "(84,84,84)"
grey, "(84,84,84)"
azure,"(240,255,255)"
cyan, "(0,255,255)"
aqua, "(0,255,255)"
turquoise, "(64,224,208)"
teal, "(0,128,128)"
olive, "(128,128,0)"
chartreuse, "(127,255,0)"
lime, "(0,255,0)"
golden, "(255,215,0)"
goldenrod, "(218,165,32)"
coral, "(255,127,0)"
salmon, "(250,128,114)"
fuchsia, "(255,0,255)"
lavender, "(230,230,250)"
plum, "(221,160,221)"
maroon, "(176,48,96)"
red, "(255,0,0)"
orange, "(255,165,0)"
yellow, "(255,255,0)"
green, "(0,255,0)"
blue, "(0,0,255)"
violet, "(238,130,238)"
purple, "(160,32,240)"
white, "(255,255,255)"
black, "(0,0,0)"
gold, "(255,215,0)"
magenta, "(255,0,255)"
pink, "(255,192,203)"
ivory
beige
tan
khaki
silver
gray
grey
azure
cyan
aqua
turquoise
teal
olive
chartreuse
lime
golden
goldenrod
coral
salmon
fuchsia
lavender
plum
maroon
red
orange
yellow
green
blue
violet
purple
white
black
gold
magenta
pink
This diff is collapsed.
This diff is collapsed.
{% extends "static/templates/projectBase.html" %}
{% block projectContent %}
<script src="https://cdnjs.cloudflare.com/ajax/libs/tinycolor/1.1.1/tinycolor.js"></script>
<script src="web/vis.js"></script>
<h2>Frequency of words in "The Hunger Games"</h2>
<hr />
<style>
.axis path,
.axis line {
fill: none;
stroke: black;
shape-rendering: crispEdges;
}
.axis text {
font-family: sans-serif;
font-size: 11px;
}
</style>
<div class="container" id="content">
</div>
{% endblock %}
"use strict";
/* Boilerplate jQuery */
var tokenize, lowercase, wordsOnly, stopwords, stem;
$.when(
$.getJSON("res/tokenize.json", function(data) { tokenize = data; }),
$.getJSON("res/lowercase.json", function(data) { lowercase = data; }),
$.getJSON("res/words-only.json", function(data) { wordsOnly = data; }),
$.getJSON("res/stopwords.json", function(data) { stopwords = data; }),
$.getJSON("res/stem.json", function(data) { stem = data; })
).then(function() {
var data = [];
if (tokenize) { data.push(tokenize); }
if (lowercase) { data.push(lowercase); }
if (wordsOnly) { data.push(wordsOnly); }
if (stopwords) { data.push(stopwords); }
if (stem) { data.push(stem); }
visualize(data);
});
/* Visualize the data in the visualize function */
var visualize = function(jsonData) {
for (var i = 0; i < jsonData.length; i++) {
// Add the graph
var freq = jsonData[i].freq;
var desc = jsonData[i].title;
var data = _.map(
freq,
function(value, key) {
return { word: key,
count: value };
}
);
data = _.sortBy(data,
function (d) {
return d.count;
});
data.reverse()
var margin = { top: 50,
left: 50,
right: 50,
bottom: 50 };
// Create DOM
var headerDOM = document.createElement("h2");
var text = document.createTextNode(desc);
headerDOM.appendChild(text);
var divDOM = document.createElement("div");
document.getElementById("content").appendChild(headerDOM);
document.getElementById("content").appendChild(divDOM);
var realW = document.getElementById("content").offsetWidth;
var width = realW - margin.left - margin.right,
height = 400 - margin.top - margin.bottom;
// We will use a ordinal scale, allowing us to map a series of elements
// to a range (which will be the location on the x-axis of the bar
// graph).
// @see: https://github.com/mbostock/d3/wiki/Ordinal-Scales
//
// Our domain is the names of the color, as an array.
// Ex: ["red", "green", "yellow", ...]
//
// To translate our data, which is an array of objects that contain
// both a .color and a .count, into an array of only color names,
// we will use _.map() to map our array into a new array.
//
// Our range is all the values in [0, width].
var x =
d3.scaleBand()
.domain( data.map(function (d) { return d.word; } ) )
.range( [0, width] );
var y =
d3.scaleLinear()
.domain( [0, d3.max( data, function(d) { return d.count; } )] )
.range( [height, 0] );
var xAxis =
d3.axisBottom()
.scale(x);
var yAxis =
d3.axisLeft()
.scale(y);
var svg =
d3.select(divDOM)
.append("svg")
.attr("w", width + margin.left + margin.right)
.attr("h", height + margin.top + margin.bottom)
.style("width", (width + margin.left + margin.right) + "px")
.style("height", (height + margin.top + margin.bottom) + "px")
.append("g")
.attr("transform", "translate(" + margin.left + ", " + margin.top + ")")
;
var c20 = d3.scaleOrdinal(d3.schemeCategory20c);
svg.selectAll("rect")
.data(data)
.enter()
.append("rect")
.attr("x", function (d, i) { return x(d.word); } )
.attr("y", function (d, i) { return y(d.count); })
.attr("width", x.bandwidth() )
.attr("height", function (d) { return height - y(d.count); })
.style("fill", function(d, i) { return c20(i); })
.style("stroke", function(d, i) { return tinycolor(c20(i)).darken(); })
.style("stroke-width", 1)
;
svg.append("g")
.attr("class", "axis")
.call(yAxis)
;
svg.append("g")
.attr("class", "axis")
.attr("transform", "translate(0, " + height + ")")
.call(xAxis)
;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment