DEV Community

Discussion on: Daily Challenge #42 - Caesar Cipher

Collapse
 
oscherler profile image
Olivier “Ölbaum” Scherler

My updated Erlang solution.

I now perform frequency analysis on the encrypted message, calculate the correlation with the frequencies of letters in the English language, and use the best N correlation values to decrypt the message.

-module( caesar ).
-export( [ caesar/2, crack/1, crack/2 ] ).

-include_lib("eunit/include/eunit.hrl").

wrap( Char, Length ) when is_integer( Length ), Length > 0 ->
    ( Char rem Length + Length ) rem Length.

shift_char( Char, Key ) when Char >= $a, Char =< $z ->
    $a + wrap( Char - $a + Key, 26 );
shift_char( Char, Key ) when Char >= $A, Char =< $Z ->
    $A + wrap( Char - $A + Key, 26 );
shift_char( Char, _ ) ->
    Char.

caesar( Message, Key ) when is_integer( Key ) ->
    lists:map( fun( C ) -> shift_char( C, Key ) end, Message ).

% calculate the frequencies of the letters in the message
freqs( Message ) ->
    freqs( string:lowercase( Message ), #{}, 0 ).
freqs( [ C | Rest ], Freqs, Count ) when C >= $a, C =< $z ->
    NewFreqs = maps:put( C, maps:get( C, Freqs, 0 ) + 1, Freqs ),
    freqs( Rest, NewFreqs, Count + 1 );
freqs( [ _ | Rest ], Freqs, Count ) ->
    freqs( Rest, Freqs, Count );
freqs( [], Freqs, Count ) ->
    maps:map( fun( _C, Freq ) -> 100 * Freq / Count end, Freqs ).

freq_array( FreqMap ) ->
    lists:map(
        fun( X ) -> maps:get( $a + X - 1, FreqMap, 0 ) end,
        lists:seq( 1, 26 )
    ).

shift_fun( F, Tau, Period ) ->
    { F1, F2 } = lists:split( wrap( Tau, Period ), F ),
    F2 ++ F1.

correlation( F, G ) when length( F ) == length ( G ) ->
    Period = length( F ),
    lists:map(
        fun( Tau ) ->
            Z = lists:zip( F, shift_fun( G, Tau, Period ) ),
            lists:foldl( fun( { X, Y }, Acc ) -> X * Y / Period + Acc end, 0, Z )
        end,
        lists:seq( 0, Period - 1 )
    ).

% determine the most likely keys by correlation with the frequencies in the English language
crack( Message ) ->
    crack( Message, 1 ).

crack( Message, Attempts ) ->
    Freqs = freqs( Message ),
    Correlation = correlation( freq_array( english_frequencies() ), freq_array( Freqs ) ),
    Points = lists:zip( lists:seq( 0, 25 ), Correlation ),
    BestKeys = lists:reverse( lists:ukeysort( 2, Points ) ),
    lists:sublist(
        lists:map( fun( { Key, C } ) -> { Key, C, caesar( Message, - Key ) } end, BestKeys ), Attempts
    ).

% frequencies of the letters in the English language, from
% http://pi.math.cornell.edu/~mec/2003-2004/cryptography/subs/frequencies.html
english_frequencies() ->
    _Frequencies = #{
        $e => 12.02, $t => 9.10, $a => 8.12, $o => 7.68, $i => 7.31, $n => 6.95, $s => 6.28,
        $r => 6.02, $h => 5.92, $d => 4.32, $l => 3.98, $u => 2.88, $c => 2.71, $m => 2.61,
        $f => 2.30, $y => 2.11, $w => 2.09, $g => 2.03, $p => 1.82, $b => 1.49, $v => 1.11,
        $k => 0.69, $x => 0.17, $q => 0.11, $j => 0.10, $z => 0.07
    }.

wrap_test_() -> [
        ?_assertEqual(  0, wrap( 0, 26 ) ),
        ?_assertEqual(  3, wrap( 3, 26 ) ),
        ?_assertEqual(  0, wrap( 26, 26 ) ),
        ?_assertEqual( 25, wrap( -1, 26 ) ),
        ?_assertEqual( 10, wrap( -42, 26 ) ),
        ?_assertEqual( 25, wrap( 77, 26 ) ),
        ?_assertEqual( 5, wrap( 15, 10 ) ),
        ?_assertEqual( 0, wrap( 7, 1 ) )
    ].

shift_char_test_() -> [
    ?_assertEqual( $b, shift_char( $a, 1 ) ),
    ?_assertEqual( $f, shift_char( $a, 5 ) ),
    ?_assertEqual( $a, shift_char( $z, 1 ) ),
    ?_assertEqual( $z, shift_char( $r, 8 ) ),
    ?_assertEqual( $b, shift_char( $r, 10 ) ),
    ?_assertEqual( $M, shift_char( $K, 2 ) ),
    ?_assertEqual( $D, shift_char( $W, 7 ) ),
    ?_assertEqual( $a, shift_char( $a, 0 ) ),
    ?_assertEqual( $a, shift_char( $b, -1 ) ),
    ?_assertEqual( $c, shift_char( $a, 28 ) ),
    ?_assertEqual( $r, shift_char( $c, -11 ) )
].

casear_test_() -> [
    ?_assertEqual( "b", caesar( "a", 1 ) ),
    ?_assertEqual( "e", caesar( "a", 4 ) ),
    ?_assertEqual( "a", caesar( "z", 1 ) ),
    ?_assertEqual( "efgfoe uif fbtu xbmm pg uif dbtumf", caesar( "defend the east wall of the castle", 1 ) ),
    ?_assertEqual( "dwwdfn iurp wkh zrrgv dw gdzq", caesar( "attack from the woods at dawn", 3 ) ),
    ?_assertEqual( "Dro aesmu, lbygx pyh TEWZON yfob dro vkji nyq!", caesar( "The quick, brown fox JUMPED over the lazy dog!", 10 ) ),
    ?_assertEqual( caesar( "defend the east wall of the castle", 15 ), caesar( "defend the east wall of the castle", -11 ) )
].

freqs_test_() -> [
    ?_assertEqual( #{ $a => 100.0 }, freqs( "a" ) ),
    ?_assertEqual( #{ $b => 50.0, $f => 50.0 }, freqs( "bf fb" ) ),
    ?_assertEqual( #{ $a => 50.0, $b => 30.0, $c => 20.0 }, freqs( "baac baba bacca cabba aa" ) )
].

freq_array_test_() -> [
    ?_assertEqual( 100.0, lists:nth( 1, freq_array( #{ $a => 100.0 } ) ) ),
    ?_assertEqual(  50.0, lists:nth( 2, freq_array( #{ $b => 50.0, $f => 50.0 } ) ) ),
    ?_assertEqual(  50.0, lists:nth( 6, freq_array( #{ $b => 50.0, $f => 50.0 } ) ) ),
    ?_assertEqual(  50.0, lists:nth( 1, freq_array( #{ $a => 50.0, $b => 30.0, $c => 20.0 } ) ) ),
    ?_assertEqual(  30.0, lists:nth( 2, freq_array( #{ $a => 50.0, $b => 30.0, $c => 20.0 } ) ) ),
    ?_assertEqual(  20.0, lists:nth( 3, freq_array( #{ $a => 50.0, $b => 30.0, $c => 20.0 } ) ) )
].

correlation_test_() ->
    F = [ 0, 0, 2, 0, 0 ],
    G = [ 0, 1, 2, 3, 0 ],
    H = [ 1, 2, 1, 4, 6 ],
    Appr = fun( L ) -> lists:map( fun( X ) -> round( X * 100 ) end, L ) end,
    [
        ?_assertEqual( Appr( [ 4/5, 0.0, 0.0, 0.0, 0.0 ] ), Appr( correlation( F, F ) ) ),
        ?_assertEqual( Appr( [ 4/5, 6/5, 0.0, 0.0, 2/5 ] ), Appr( correlation( F, G ) ) ),
        ?_assertEqual( Appr( [ 2/5, 8/5, 12/5, 2/5, 4/5 ] ), Appr( correlation( F, H ) ) ),
        ?_assertEqual( Appr( [ 4/5, 2/5, 0.0, 0.0, 6/5 ] ), Appr( correlation( G, F ) ) ),
        ?_assertEqual( Appr( [ 14/5, 8/5, 3/5, 3/5, 8/5 ] ), Appr( correlation( G, G ) ) ),
        ?_assertEqual( Appr( [ 16/5, 27/5, 19/5, 14/5, 8/5 ] ), Appr( correlation( G, H ) ) ),
        ?_assertEqual( Appr( [ 2/5, 4/5, 2/5, 12/5, 8/5 ] ), Appr( correlation( H, F ) ) ),
        ?_assertEqual( Appr( [ 16/5, 8/5, 14/5, 19/5, 27/5 ] ), Appr( correlation( H, G ) ) ),
        ?_assertEqual( Appr( [ 58/5, 38/5, 31/5, 31/5, 38/5 ] ), Appr( correlation( H, H ) ) )
    ].

To try:

% erl
1> c(caesar).
{ok,caesar}

2> caesar:crack("efgfoe uif fbtu xbmm pg uif dbtumf").                                                               [{1,27.732142857142858, "defend the east wall of the castle"}]

3> caesar:crack("dwwdfn iurp wkh zrrgv dw gdzq").
[{3,24.07692307692308,"attack from the woods at dawn"}]

4> caesar:crack("Dro aesmu, lbygx pyh TEWZON yfob dro vkji nyq!", 3).
[{10,19.342948717948715, "The quick, brown fox JUMPED over the lazy dog!"},
 {0,16.821581196581192, "Dro aesmu, lbygx pyh TEWZON yfob dro vkji nyq!"},
 {20,16.384615384615383, "Jxu gkysa, rhemd ven ZKCFUT eluh jxu bqpo tew!"}]