forked from anton-siardziuk/detect_encoding
-
Notifications
You must be signed in to change notification settings - Fork 6
/
generate_specters.php
31 lines (23 loc) · 921 Bytes
/
generate_specters.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
<?php
$raw_specter = require 'specters/raw.php';
$encodings = array('windows-1251', 'koi8-r', 'iso8859-5');
$all_symbols = array_sum($raw_specter);
foreach ($encodings as $encoding)
{
$specter = array();
foreach ($raw_specter as $key => $count)
{
$weight = $count / $all_symbols;
$letter1 = mb_substr($key, 0, 1, 'UTF-8');
$letter2 = mb_substr($key, 1, 1, 'UTF-8');
$key1 = iconv('UTF-8', $encoding, $letter1.$letter2);
$key2 = iconv('UTF-8', $encoding, mb_strtoupper($letter1, 'UTF-8').$letter2);
$key3 = iconv('UTF-8', $encoding, $letter1.mb_strtoupper($letter2, 'UTF-8'));
$key4 = iconv('UTF-8', $encoding, mb_strtoupper($letter1, 'UTF-8').mb_strtoupper($letter2, 'UTF-8'));
$specter[$key1] = $weight;
$specter[$key2] = $weight;
$specter[$key3] = $weight;
$specter[$key4] = $weight;
}
file_put_contents('specters/'.$encoding.'.php', '<?php return '.var_export($specter, TRUE).';');
}