JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
cleanup
[four-random-words.git] / purple-logs-word-frequency.bash
diff --git a/purple-logs-word-frequency.bash b/purple-logs-word-frequency.bash
new file mode 100755 (executable)
index 0000000..958e75f
--- /dev/null
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+find "$HOME/.purple/logs/" -type f -print0 \
+       | xargs -0 grep -I --no-filename -v '(notice)' \
+       | sed -e 's/<[^>]*>//g; s/&nbsp;/ /g; s/&[nm]dash;/-/' \
+       | sed -ne "s/^(..:..:.. ..) [a-zA-Z0-9@ _.-]\{1,25\}: \(.*\)$/\1/p" \
+       | perl -0777 -p -e's/\s+/\n/mg' \
+       | grep -v '^&.*;$' \
+       | sed -e 's/^\W\+//g; s/\W\+$//g' \
+       | grep '^[a-z.'\''-]\{3,16\}$' \
+       | sort \
+       | uniq -c \
+       | sort -nr