JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
script to find word-frequency of tree of files
[four-random-words.git] / word-frequency-here.bash
diff --git a/word-frequency-here.bash b/word-frequency-here.bash
new file mode 100755 (executable)
index 0000000..ee16c58
--- /dev/null
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+find . -type f -print0 \
+       | xargs -0 grep -I --no-filename -v '(notice)' \
+       | sed -e 's/<[^>]*>//g; s/&nbsp;/ /g; s/&[nm]dash;/-/' \
+       | sed -ne "s/^(..:..:.. ..) [a-zA-Z0-9@ _.-]\{1,25\}: \(.*\)$/\1/p" \
+       | perl -0777 -p -e's/\s+/\n/mg' \
+       | grep -v '^&.*;$' \
+       | sed -e 's/^\W\+//g; s/\W\+$//g' \
+       | grep '^[a-z.'\''-]\{3,16\}$' \
+       | sort \
+       | uniq -c \
+       | sort -nr