mnemonic.py - electrum - Electrum Bitcoin wallet

mnemonic.py (9192B)
      1 #!/usr/bin/env python
      2 #
      3 # Electrum - lightweight Bitcoin client
      4 # Copyright (C) 2014 Thomas Voegtlin
      5 #
      6 # Permission is hereby granted, free of charge, to any person
      7 # obtaining a copy of this software and associated documentation files
      8 # (the "Software"), to deal in the Software without restriction,
      9 # including without limitation the rights to use, copy, modify, merge,
     10 # publish, distribute, sublicense, and/or sell copies of the Software,
     11 # and to permit persons to whom the Software is furnished to do so,
     12 # subject to the following conditions:
     13 #
     14 # The above copyright notice and this permission notice shall be
     15 # included in all copies or substantial portions of the Software.
     16 #
     17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     18 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     19 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     20 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     21 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     22 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     23 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     24 # SOFTWARE.
     25 import os
     26 import math
     27 import hashlib
     28 import unicodedata
     29 import string
     30 from typing import Sequence, Dict
     31 from types import MappingProxyType
     32 
     33 from .util import resource_path, bfh, bh2u, randrange
     34 from .crypto import hmac_oneshot
     35 from . import version
     36 from .logging import Logger
     37 
     38 
     39 # http://www.asahi-net.or.jp/~ax2s-kmtn/ref/unicode/e_asia.html
     40 CJK_INTERVALS = [
     41     (0x4E00, 0x9FFF, 'CJK Unified Ideographs'),
     42     (0x3400, 0x4DBF, 'CJK Unified Ideographs Extension A'),
     43     (0x20000, 0x2A6DF, 'CJK Unified Ideographs Extension B'),
     44     (0x2A700, 0x2B73F, 'CJK Unified Ideographs Extension C'),
     45     (0x2B740, 0x2B81F, 'CJK Unified Ideographs Extension D'),
     46     (0xF900, 0xFAFF, 'CJK Compatibility Ideographs'),
     47     (0x2F800, 0x2FA1D, 'CJK Compatibility Ideographs Supplement'),
     48     (0x3190, 0x319F , 'Kanbun'),
     49     (0x2E80, 0x2EFF, 'CJK Radicals Supplement'),
     50     (0x2F00, 0x2FDF, 'CJK Radicals'),
     51     (0x31C0, 0x31EF, 'CJK Strokes'),
     52     (0x2FF0, 0x2FFF, 'Ideographic Description Characters'),
     53     (0xE0100, 0xE01EF, 'Variation Selectors Supplement'),
     54     (0x3100, 0x312F, 'Bopomofo'),
     55     (0x31A0, 0x31BF, 'Bopomofo Extended'),
     56     (0xFF00, 0xFFEF, 'Halfwidth and Fullwidth Forms'),
     57     (0x3040, 0x309F, 'Hiragana'),
     58     (0x30A0, 0x30FF, 'Katakana'),
     59     (0x31F0, 0x31FF, 'Katakana Phonetic Extensions'),
     60     (0x1B000, 0x1B0FF, 'Kana Supplement'),
     61     (0xAC00, 0xD7AF, 'Hangul Syllables'),
     62     (0x1100, 0x11FF, 'Hangul Jamo'),
     63     (0xA960, 0xA97F, 'Hangul Jamo Extended A'),
     64     (0xD7B0, 0xD7FF, 'Hangul Jamo Extended B'),
     65     (0x3130, 0x318F, 'Hangul Compatibility Jamo'),
     66     (0xA4D0, 0xA4FF, 'Lisu'),
     67     (0x16F00, 0x16F9F, 'Miao'),
     68     (0xA000, 0xA48F, 'Yi Syllables'),
     69     (0xA490, 0xA4CF, 'Yi Radicals'),
     70 ]
     71 
     72 def is_CJK(c):
     73     n = ord(c)
     74     for imin,imax,name in CJK_INTERVALS:
     75         if n>=imin and n<=imax: return True
     76     return False
     77 
     78 
     79 def normalize_text(seed: str) -> str:
     80     # normalize
     81     seed = unicodedata.normalize('NFKD', seed)
     82     # lower
     83     seed = seed.lower()
     84     # remove accents
     85     seed = u''.join([c for c in seed if not unicodedata.combining(c)])
     86     # normalize whitespaces
     87     seed = u' '.join(seed.split())
     88     # remove whitespaces between CJK
     89     seed = u''.join([seed[i] for i in range(len(seed)) if not (seed[i] in string.whitespace and is_CJK(seed[i-1]) and is_CJK(seed[i+1]))])
     90     return seed
     91 
     92 
     93 _WORDLIST_CACHE = {}  # type: Dict[str, Wordlist]
     94 
     95 
     96 class Wordlist(tuple):
     97 
     98     def __init__(self, words: Sequence[str]):
     99         super().__init__()
    100         index_from_word = {w: i for i, w in enumerate(words)}
    101         self._index_from_word = MappingProxyType(index_from_word)  # no mutation
    102 
    103     def index(self, word, start=None, stop=None) -> int:
    104         try:
    105             return self._index_from_word[word]
    106         except KeyError as e:
    107             raise ValueError from e
    108 
    109     def __contains__(self, word) -> bool:
    110         try:
    111             self.index(word)
    112         except ValueError:
    113             return False
    114         else:
    115             return True
    116 
    117     @classmethod
    118     def from_file(cls, filename) -> 'Wordlist':
    119         path = resource_path('wordlist', filename)
    120         if path not in _WORDLIST_CACHE:
    121             with open(path, 'r', encoding='utf-8') as f:
    122                 s = f.read().strip()
    123             s = unicodedata.normalize('NFKD', s)
    124             lines = s.split('\n')
    125             words = []
    126             for line in lines:
    127                 line = line.split('#')[0]
    128                 line = line.strip(' \r')
    129                 assert ' ' not in line
    130                 if line:
    131                     words.append(line)
    132 
    133             _WORDLIST_CACHE[path] = Wordlist(words)
    134         return _WORDLIST_CACHE[path]
    135 
    136 
    137 filenames = {
    138     'en':'english.txt',
    139     'es':'spanish.txt',
    140     'ja':'japanese.txt',
    141     'pt':'portuguese.txt',
    142     'zh':'chinese_simplified.txt'
    143 }
    144 
    145 
    146 class Mnemonic(Logger):
    147     # Seed derivation does not follow BIP39
    148     # Mnemonic phrase uses a hash based checksum, instead of a wordlist-dependent checksum
    149 
    150     def __init__(self, lang=None):
    151         Logger.__init__(self)
    152         lang = lang or 'en'
    153         self.logger.info(f'language {lang}')
    154         filename = filenames.get(lang[0:2], 'english.txt')
    155         self.wordlist = Wordlist.from_file(filename)
    156         self.logger.info(f"wordlist has {len(self.wordlist)} words")
    157 
    158     @classmethod
    159     def mnemonic_to_seed(self, mnemonic, passphrase) -> bytes:
    160         PBKDF2_ROUNDS = 2048
    161         mnemonic = normalize_text(mnemonic)
    162         passphrase = passphrase or ''
    163         passphrase = normalize_text(passphrase)
    164         return hashlib.pbkdf2_hmac('sha512', mnemonic.encode('utf-8'), b'electrum' + passphrase.encode('utf-8'), iterations = PBKDF2_ROUNDS)
    165 
    166     def mnemonic_encode(self, i):
    167         n = len(self.wordlist)
    168         words = []
    169         while i:
    170             x = i%n
    171             i = i//n
    172             words.append(self.wordlist[x])
    173         return ' '.join(words)
    174 
    175     def get_suggestions(self, prefix):
    176         for w in self.wordlist:
    177             if w.startswith(prefix):
    178                 yield w
    179 
    180     def mnemonic_decode(self, seed):
    181         n = len(self.wordlist)
    182         words = seed.split()
    183         i = 0
    184         while words:
    185             w = words.pop()
    186             k = self.wordlist.index(w)
    187             i = i*n + k
    188         return i
    189 
    190     def make_seed(self, *, seed_type=None, num_bits=None) -> str:
    191         if seed_type is None:
    192             seed_type = 'segwit'
    193         if num_bits is None:
    194             num_bits = 132
    195         prefix = version.seed_prefix(seed_type)
    196         # increase num_bits in order to obtain a uniform distribution for the last word
    197         bpw = math.log(len(self.wordlist), 2)
    198         num_bits = int(math.ceil(num_bits/bpw) * bpw)
    199         self.logger.info(f"make_seed. prefix: '{prefix}', entropy: {num_bits} bits")
    200         entropy = 1
    201         while entropy < pow(2, num_bits - bpw):
    202             # try again if seed would not contain enough words
    203             entropy = randrange(pow(2, num_bits))
    204         nonce = 0
    205         while True:
    206             nonce += 1
    207             i = entropy + nonce
    208             seed = self.mnemonic_encode(i)
    209             if i != self.mnemonic_decode(seed):
    210                 raise Exception('Cannot extract same entropy from mnemonic!')
    211             if is_old_seed(seed):
    212                 continue
    213             if is_new_seed(seed, prefix):
    214                 break
    215         self.logger.info(f'{len(seed.split())} words')
    216         return seed
    217 
    218 
    219 def is_new_seed(x: str, prefix=version.SEED_PREFIX) -> bool:
    220     x = normalize_text(x)
    221     s = bh2u(hmac_oneshot(b"Seed version", x.encode('utf8'), hashlib.sha512))
    222     return s.startswith(prefix)
    223 
    224 
    225 def is_old_seed(seed: str) -> bool:
    226     from . import old_mnemonic
    227     seed = normalize_text(seed)
    228     words = seed.split()
    229     try:
    230         # checks here are deliberately left weak for legacy reasons, see #3149
    231         old_mnemonic.mn_decode(words)
    232         uses_electrum_words = True
    233     except Exception:
    234         uses_electrum_words = False
    235     try:
    236         seed = bfh(seed)
    237         is_hex = (len(seed) == 16 or len(seed) == 32)
    238     except Exception:
    239         is_hex = False
    240     return is_hex or (uses_electrum_words and (len(words) == 12 or len(words) == 24))
    241 
    242 
    243 def seed_type(x: str) -> str:
    244     num_words = len(x.split())
    245     if is_old_seed(x):
    246         return 'old'
    247     elif is_new_seed(x, version.SEED_PREFIX):
    248         return 'standard'
    249     elif is_new_seed(x, version.SEED_PREFIX_SW):
    250         return 'segwit'
    251     elif is_new_seed(x, version.SEED_PREFIX_2FA) and (num_words == 12 or num_words >= 20):
    252         # Note: in Electrum 2.7, there was a breaking change in key derivation
    253         #       for this seed type. Unfortunately the seed version/prefix was reused,
    254         #       and now we can only distinguish them based on number of words. :(
    255         return '2fa'
    256     elif is_new_seed(x, version.SEED_PREFIX_2FA_SW):
    257         return '2fa_segwit'
    258     return ''
    259 
    260 
    261 def is_seed(x: str) -> bool:
    262     return bool(seed_type(x))
    263 
    264 
    265 def is_any_2fa_seed_type(seed_type: str) -> bool:
    266     return seed_type in ['2fa', '2fa_segwit']
	electrum Electrum Bitcoin wallet
	git clone https://git.parazyd.org/electrum
	Log \| Files \| Refs \| Submodules