calculate letter frequency from wordlist

author Jason Woofenden <jason@jasonwoof.com>

Tue, 15 Jan 2013 11:54:50 +0000 (06:54 -0500)

committer Jason Woofenden <jason@jasonwoof.com>

Tue, 15 Jan 2013 11:54:50 +0000 (06:54 -0500)
author Jason Woofenden <jason@jasonwoof.com>
Tue, 15 Jan 2013 11:54:50 +0000 (06:54 -0500)
committer Jason Woofenden <jason@jasonwoof.com>
Tue, 15 Jan 2013 11:54:50 +0000 (06:54 -0500)
diff --git a/freq.coffee b/freq.coffee

new file mode 100644 (file)

index 0000000..bf4e497
--- /dev/null
+++ b/freq.coffee
@@ -0,0 +1,65 @@
+#!/usr/bin/coffee
+
+# this script tries to calculate an optimal letter distrobution.
+
+fs = require 'fs'
+
+
+fs.readFile 'wordlist.txt', 'utf8', (err, data) ->
+       if err?
+               console.log "Error reading #{orig_file}: #{err}"
+               process.exit(1)
+       else
+               weights = {
+                       a: 0
+                       b: 0
+                       c: 0
+                       d: 0
+                       e: 0
+                       f: 0
+                       g: 0
+                       h: 0
+                       i: 0
+                       j: 0
+                       k: 0
+                       l: 0
+                       m: 0
+                       n: 0
+                       o: 0
+                       p: 0
+                       q: 0
+                       r: 0
+                       s: 0
+                       t: 0
+                       u: 0
+                       v: 0
+                       w: 0
+                       x: 0
+                       y: 0
+                       z: 0
+                       qu: 0
+               }
+               words = data.split '\n'
+               for word in words
+                       weight = 1.0 * Math.pow .6, (Math.abs(word.length - 4))
+                       i = 0
+                       while i < word.length
+                               if word[i] is 'q' and i < word.length and word[i+1] is 'u'
+                                       weights['qu'] += weight
+                                       i += 2
+                               else
+                                       #if word[i] is 'u' and i > 0 and word[i-1] is 'q'
+                                       #       console.log "skipping the u of a qu didn't work"
+                                       weights[word[i]] += weight
+                                       i += 1
+
+               total = 0
+               for letter, weight of weights
+                       total += Math.round(weight)
+
+               for letter, weight of weights
+                       weight = Math.round(weight)
+                       #console.log "#{letter}: #{Math.round(weight / total * 1000)} (1 in #{Math.round(total / weight)})"
+                       console.log "#{weight} # #{letter}"
+
+               console.log "total: #{total}"
diff --git a/main.coffee b/main.coffee

index 99d2e43..5c1fab7 100644 (file)
--- a/main.coffee
+++ b/main.coffee
@@ -90,35 +90,36 @@ selected = []
  
  letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  letter_distribution = [
  
  letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  letter_distribution = [
-        9 # A
-        2 # B
-        2 # C
-        4 # D
-       14 # E
-        2 # F
-        3 # G
-        2 # H
-        7 # I
-        1 # J
-        1 # K
-        4 # L
-        2 # M
-        6 # N
-        8 # O
-        2 # P
-        1 # Q
-        6 # R
-        7 # S
-        6 # T
-        2 # U
-        2 # V
-        2 # W
-        1 # X
-        2 # Y
-        1 # Z
+       14355 # a
+        3968 # b
+        6325 # c
+        7045 # d
+       20258 # e
+        2739 # f
+        5047 # g
+        4372 # h
+       13053 # i
+         516 # j
+        2600 # k
+        9631 # l
+        5115 # m
+       10082 # n
+       11142 # o
+        5292 # p
+         287 # qu
+       12341 # r
+       16571 # s
+       10215 # t
+        6131 # u
+        1728 # v
+        2184 # w
+         619 # x
+        3512 # y
+         831 # z
+
  ]
  
  ]
  
-letter_distribution_total = letter_distribution.sum()
+letter_distribution_total = 175973 # letter_distribution.sum()
  
  
  new_letter_queue = []
  
  
  new_letter_queue = []
author	Jason Woofenden <jason@jasonwoof.com>
	Tue, 15 Jan 2013 11:54:50 +0000 (06:54 -0500)
committer	Jason Woofenden <jason@jasonwoof.com>
	Tue, 15 Jan 2013 11:54:50 +0000 (06:54 -0500)
freq.coffee	[new file with mode: 0644]	patch \| blob
main.coffee		patch \| blob \| history