mnemonic.py (9192B)
1 #!/usr/bin/env python 2 # 3 # Electrum - lightweight Bitcoin client 4 # Copyright (C) 2014 Thomas Voegtlin 5 # 6 # Permission is hereby granted, free of charge, to any person 7 # obtaining a copy of this software and associated documentation files 8 # (the "Software"), to deal in the Software without restriction, 9 # including without limitation the rights to use, copy, modify, merge, 10 # publish, distribute, sublicense, and/or sell copies of the Software, 11 # and to permit persons to whom the Software is furnished to do so, 12 # subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be 15 # included in all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 # SOFTWARE. 25 import os 26 import math 27 import hashlib 28 import unicodedata 29 import string 30 from typing import Sequence, Dict 31 from types import MappingProxyType 32 33 from .util import resource_path, bfh, bh2u, randrange 34 from .crypto import hmac_oneshot 35 from . import version 36 from .logging import Logger 37 38 39 # http://www.asahi-net.or.jp/~ax2s-kmtn/ref/unicode/e_asia.html 40 CJK_INTERVALS = [ 41 (0x4E00, 0x9FFF, 'CJK Unified Ideographs'), 42 (0x3400, 0x4DBF, 'CJK Unified Ideographs Extension A'), 43 (0x20000, 0x2A6DF, 'CJK Unified Ideographs Extension B'), 44 (0x2A700, 0x2B73F, 'CJK Unified Ideographs Extension C'), 45 (0x2B740, 0x2B81F, 'CJK Unified Ideographs Extension D'), 46 (0xF900, 0xFAFF, 'CJK Compatibility Ideographs'), 47 (0x2F800, 0x2FA1D, 'CJK Compatibility Ideographs Supplement'), 48 (0x3190, 0x319F , 'Kanbun'), 49 (0x2E80, 0x2EFF, 'CJK Radicals Supplement'), 50 (0x2F00, 0x2FDF, 'CJK Radicals'), 51 (0x31C0, 0x31EF, 'CJK Strokes'), 52 (0x2FF0, 0x2FFF, 'Ideographic Description Characters'), 53 (0xE0100, 0xE01EF, 'Variation Selectors Supplement'), 54 (0x3100, 0x312F, 'Bopomofo'), 55 (0x31A0, 0x31BF, 'Bopomofo Extended'), 56 (0xFF00, 0xFFEF, 'Halfwidth and Fullwidth Forms'), 57 (0x3040, 0x309F, 'Hiragana'), 58 (0x30A0, 0x30FF, 'Katakana'), 59 (0x31F0, 0x31FF, 'Katakana Phonetic Extensions'), 60 (0x1B000, 0x1B0FF, 'Kana Supplement'), 61 (0xAC00, 0xD7AF, 'Hangul Syllables'), 62 (0x1100, 0x11FF, 'Hangul Jamo'), 63 (0xA960, 0xA97F, 'Hangul Jamo Extended A'), 64 (0xD7B0, 0xD7FF, 'Hangul Jamo Extended B'), 65 (0x3130, 0x318F, 'Hangul Compatibility Jamo'), 66 (0xA4D0, 0xA4FF, 'Lisu'), 67 (0x16F00, 0x16F9F, 'Miao'), 68 (0xA000, 0xA48F, 'Yi Syllables'), 69 (0xA490, 0xA4CF, 'Yi Radicals'), 70 ] 71 72 def is_CJK(c): 73 n = ord(c) 74 for imin,imax,name in CJK_INTERVALS: 75 if n>=imin and n<=imax: return True 76 return False 77 78 79 def normalize_text(seed: str) -> str: 80 # normalize 81 seed = unicodedata.normalize('NFKD', seed) 82 # lower 83 seed = seed.lower() 84 # remove accents 85 seed = u''.join([c for c in seed if not unicodedata.combining(c)]) 86 # normalize whitespaces 87 seed = u' '.join(seed.split()) 88 # remove whitespaces between CJK 89 seed = u''.join([seed[i] for i in range(len(seed)) if not (seed[i] in string.whitespace and is_CJK(seed[i-1]) and is_CJK(seed[i+1]))]) 90 return seed 91 92 93 _WORDLIST_CACHE = {} # type: Dict[str, Wordlist] 94 95 96 class Wordlist(tuple): 97 98 def __init__(self, words: Sequence[str]): 99 super().__init__() 100 index_from_word = {w: i for i, w in enumerate(words)} 101 self._index_from_word = MappingProxyType(index_from_word) # no mutation 102 103 def index(self, word, start=None, stop=None) -> int: 104 try: 105 return self._index_from_word[word] 106 except KeyError as e: 107 raise ValueError from e 108 109 def __contains__(self, word) -> bool: 110 try: 111 self.index(word) 112 except ValueError: 113 return False 114 else: 115 return True 116 117 @classmethod 118 def from_file(cls, filename) -> 'Wordlist': 119 path = resource_path('wordlist', filename) 120 if path not in _WORDLIST_CACHE: 121 with open(path, 'r', encoding='utf-8') as f: 122 s = f.read().strip() 123 s = unicodedata.normalize('NFKD', s) 124 lines = s.split('\n') 125 words = [] 126 for line in lines: 127 line = line.split('#')[0] 128 line = line.strip(' \r') 129 assert ' ' not in line 130 if line: 131 words.append(line) 132 133 _WORDLIST_CACHE[path] = Wordlist(words) 134 return _WORDLIST_CACHE[path] 135 136 137 filenames = { 138 'en':'english.txt', 139 'es':'spanish.txt', 140 'ja':'japanese.txt', 141 'pt':'portuguese.txt', 142 'zh':'chinese_simplified.txt' 143 } 144 145 146 class Mnemonic(Logger): 147 # Seed derivation does not follow BIP39 148 # Mnemonic phrase uses a hash based checksum, instead of a wordlist-dependent checksum 149 150 def __init__(self, lang=None): 151 Logger.__init__(self) 152 lang = lang or 'en' 153 self.logger.info(f'language {lang}') 154 filename = filenames.get(lang[0:2], 'english.txt') 155 self.wordlist = Wordlist.from_file(filename) 156 self.logger.info(f"wordlist has {len(self.wordlist)} words") 157 158 @classmethod 159 def mnemonic_to_seed(self, mnemonic, passphrase) -> bytes: 160 PBKDF2_ROUNDS = 2048 161 mnemonic = normalize_text(mnemonic) 162 passphrase = passphrase or '' 163 passphrase = normalize_text(passphrase) 164 return hashlib.pbkdf2_hmac('sha512', mnemonic.encode('utf-8'), b'electrum' + passphrase.encode('utf-8'), iterations = PBKDF2_ROUNDS) 165 166 def mnemonic_encode(self, i): 167 n = len(self.wordlist) 168 words = [] 169 while i: 170 x = i%n 171 i = i//n 172 words.append(self.wordlist[x]) 173 return ' '.join(words) 174 175 def get_suggestions(self, prefix): 176 for w in self.wordlist: 177 if w.startswith(prefix): 178 yield w 179 180 def mnemonic_decode(self, seed): 181 n = len(self.wordlist) 182 words = seed.split() 183 i = 0 184 while words: 185 w = words.pop() 186 k = self.wordlist.index(w) 187 i = i*n + k 188 return i 189 190 def make_seed(self, *, seed_type=None, num_bits=None) -> str: 191 if seed_type is None: 192 seed_type = 'segwit' 193 if num_bits is None: 194 num_bits = 132 195 prefix = version.seed_prefix(seed_type) 196 # increase num_bits in order to obtain a uniform distribution for the last word 197 bpw = math.log(len(self.wordlist), 2) 198 num_bits = int(math.ceil(num_bits/bpw) * bpw) 199 self.logger.info(f"make_seed. prefix: '{prefix}', entropy: {num_bits} bits") 200 entropy = 1 201 while entropy < pow(2, num_bits - bpw): 202 # try again if seed would not contain enough words 203 entropy = randrange(pow(2, num_bits)) 204 nonce = 0 205 while True: 206 nonce += 1 207 i = entropy + nonce 208 seed = self.mnemonic_encode(i) 209 if i != self.mnemonic_decode(seed): 210 raise Exception('Cannot extract same entropy from mnemonic!') 211 if is_old_seed(seed): 212 continue 213 if is_new_seed(seed, prefix): 214 break 215 self.logger.info(f'{len(seed.split())} words') 216 return seed 217 218 219 def is_new_seed(x: str, prefix=version.SEED_PREFIX) -> bool: 220 x = normalize_text(x) 221 s = bh2u(hmac_oneshot(b"Seed version", x.encode('utf8'), hashlib.sha512)) 222 return s.startswith(prefix) 223 224 225 def is_old_seed(seed: str) -> bool: 226 from . import old_mnemonic 227 seed = normalize_text(seed) 228 words = seed.split() 229 try: 230 # checks here are deliberately left weak for legacy reasons, see #3149 231 old_mnemonic.mn_decode(words) 232 uses_electrum_words = True 233 except Exception: 234 uses_electrum_words = False 235 try: 236 seed = bfh(seed) 237 is_hex = (len(seed) == 16 or len(seed) == 32) 238 except Exception: 239 is_hex = False 240 return is_hex or (uses_electrum_words and (len(words) == 12 or len(words) == 24)) 241 242 243 def seed_type(x: str) -> str: 244 num_words = len(x.split()) 245 if is_old_seed(x): 246 return 'old' 247 elif is_new_seed(x, version.SEED_PREFIX): 248 return 'standard' 249 elif is_new_seed(x, version.SEED_PREFIX_SW): 250 return 'segwit' 251 elif is_new_seed(x, version.SEED_PREFIX_2FA) and (num_words == 12 or num_words >= 20): 252 # Note: in Electrum 2.7, there was a breaking change in key derivation 253 # for this seed type. Unfortunately the seed version/prefix was reused, 254 # and now we can only distinguish them based on number of words. :( 255 return '2fa' 256 elif is_new_seed(x, version.SEED_PREFIX_2FA_SW): 257 return '2fa_segwit' 258 return '' 259 260 261 def is_seed(x: str) -> bool: 262 return bool(seed_type(x)) 263 264 265 def is_any_2fa_seed_type(seed_type: str) -> bool: 266 return seed_type in ['2fa', '2fa_segwit']