baseconv.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. #!/usr/bin/env python3
  2. #
  3. # mmgen = Multi-Mode GENerator, command-line Bitcoin cold storage solution
  4. # Copyright (C)2013-2022 The MMGen Project <mmgen@tuta.io>
  5. #
  6. # This program is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation, either version 3 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. """
  19. baseconv.py: base conversion class for the MMGen suite
  20. """
  21. from hashlib import sha256
  22. from .exception import *
  23. from .util import die
  24. def is_b58_str(s):
  25. return set(list(s)) <= set(baseconv.digits['b58'])
  26. def is_b32_str(s):
  27. return set(list(s)) <= set(baseconv.digits['b32'])
  28. def is_xmrseed(s):
  29. return bool(baseconv.tobytes(s.split(),wl_id='xmrseed'))
  30. class baseconv(object):
  31. desc = {
  32. 'b58': ('base58', 'base58-encoded data'),
  33. 'b32': ('MMGen base32', 'MMGen base32-encoded data created using simple base conversion'),
  34. 'b16': ('hexadecimal string','base16 (hexadecimal) string data'),
  35. 'b10': ('base10 string', 'base10 (decimal) string data'),
  36. 'b8': ('base8 string', 'base8 (octal) string data'),
  37. 'b6d': ('base6d (die roll)', 'base6 data using the digits from one to six'),
  38. 'tirosh':('Tirosh mnemonic', 'base1626 mnemonic using truncated Tirosh wordlist'), # not used by wallet
  39. 'mmgen': ('MMGen native mnemonic',
  40. 'MMGen native mnemonic seed phrase created using old Electrum wordlist and simple base conversion'),
  41. 'xmrseed': ('Monero mnemonic', 'Monero new-style mnemonic seed phrase'),
  42. }
  43. # https://en.wikipedia.org/wiki/Base32#RFC_4648_Base32_alphabet
  44. # https://tools.ietf.org/html/rfc4648
  45. digits = {
  46. 'b58': tuple('123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'),
  47. 'b32': tuple('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'), # RFC 4648 alphabet
  48. 'b16': tuple('0123456789abcdef'),
  49. 'b10': tuple('0123456789'),
  50. 'b8': tuple('01234567'),
  51. 'b6d': tuple('123456'),
  52. }
  53. mn_base = 1626 # tirosh list is 1633 words long!
  54. wl_chksums = {
  55. 'mmgen': '5ca31424',
  56. 'xmrseed':'3c381ebb',
  57. 'tirosh': '48f05e1f', # tirosh truncated to mn_base (1626)
  58. # 'tirosh1633': '1a5faeff'
  59. }
  60. seedlen_map = {
  61. 'b58': { 16:22, 24:33, 32:44 },
  62. 'b6d': { 16:50, 24:75, 32:100 },
  63. 'mmgen': { 16:12, 24:18, 32:24 },
  64. 'xmrseed': { 32:25 },
  65. }
  66. seedlen_map_rev = {
  67. 'b58': { 22:16, 33:24, 44:32 },
  68. 'b6d': { 50:16, 75:24, 100:32 },
  69. 'mmgen': { 12:16, 18:24, 24:32 },
  70. 'xmrseed': { 25:32 },
  71. }
  72. @classmethod
  73. def init_mn(cls,mn_id):
  74. if mn_id in cls.digits:
  75. return
  76. if mn_id == 'mmgen':
  77. from .mn_electrum import words
  78. cls.digits[mn_id] = words
  79. elif mn_id == 'xmrseed':
  80. from .mn_monero import words
  81. cls.digits[mn_id] = words
  82. elif mn_id == 'tirosh':
  83. from .mn_tirosh import words
  84. cls.digits[mn_id] = words[:cls.mn_base]
  85. else:
  86. raise ValueError(f'{mn_id}: unrecognized mnemonic ID')
  87. @classmethod
  88. def get_wordlist(cls,wl_id):
  89. cls.init_mn(wl_id)
  90. return cls.digits[wl_id]
  91. @classmethod
  92. def get_wordlist_chksum(cls,wl_id):
  93. cls.init_mn(wl_id)
  94. return sha256(' '.join(cls.digits[wl_id]).encode()).hexdigest()[:8]
  95. @classmethod
  96. def check_wordlists(cls):
  97. for k,v in list(cls.wl_chksums.items()):
  98. res = cls.get_wordlist_chksum(k)
  99. assert res == v,f'{res}: checksum mismatch for {k} (should be {v})'
  100. return True
  101. @classmethod
  102. def check_wordlist(cls,wl_id):
  103. cls.init_mn(wl_id)
  104. wl = cls.digits[wl_id]
  105. from .util import qmsg,compare_chksums
  106. ret = f'Wordlist: {wl_id}\nLength: {len(wl)} words'
  107. new_chksum = cls.get_wordlist_chksum(wl_id)
  108. a,b = 'generated','saved'
  109. compare_chksums(new_chksum,a,cls.wl_chksums[wl_id],b,die_on_fail=True)
  110. if tuple(sorted(wl)) == wl:
  111. return ret + '\nList is sorted'
  112. else:
  113. die(3,'ERROR: List is not sorted!')
  114. @classmethod
  115. def get_pad(cls,pad,seed_pad_func):
  116. """
  117. 'pad' argument to baseconv conversion methods must be either None, 'seed' or an integer.
  118. If None, output of minimum (but never zero) length will be produced.
  119. If 'seed', output length will be mapped from input length using data in seedlen_map.
  120. If an integer, the string, hex string or byte output will be padded to this length.
  121. """
  122. if pad == None:
  123. return 0
  124. elif type(pad) == int:
  125. return pad
  126. elif pad == 'seed':
  127. return seed_pad_func()
  128. else:
  129. raise BaseConversionPadError(f"{pad!r}: illegal value for 'pad' (must be None,'seed' or int)")
  130. @staticmethod
  131. def monero_mn_checksum(words):
  132. from binascii import crc32
  133. wstr = ''.join(word[:3] for word in words)
  134. return words[crc32(wstr.encode()) % len(words)]
  135. @classmethod
  136. def tohex(cls,words_arg,wl_id,pad=None):
  137. "convert string or list data of base 'wl_id' to hex string"
  138. return cls.tobytes(words_arg,wl_id,pad//2 if type(pad)==int else pad).hex()
  139. @classmethod
  140. def tobytes(cls,words_arg,wl_id,pad=None):
  141. "convert string or list data of base 'wl_id' to byte string"
  142. if wl_id not in cls.digits:
  143. cls.init_mn(wl_id)
  144. words = words_arg if isinstance(words_arg,(list,tuple)) else tuple(words_arg.strip())
  145. desc = cls.desc[wl_id][0]
  146. if len(words) == 0:
  147. raise BaseConversionError(f'empty {desc} data')
  148. def get_seed_pad():
  149. assert wl_id in cls.seedlen_map_rev,f'seed padding not supported for base {wl_id!r}'
  150. d = cls.seedlen_map_rev[wl_id]
  151. if not len(words) in d:
  152. raise BaseConversionError(
  153. f'{len(words)}: invalid length for seed-padded {desc} data in base conversion' )
  154. return d[len(words)]
  155. pad_val = max(cls.get_pad(pad,get_seed_pad),1)
  156. wl = cls.digits[wl_id]
  157. base = len(wl)
  158. if not set(words) <= set(wl):
  159. raise BaseConversionError(
  160. ( 'seed data' if pad == 'seed' else f'{words_arg!r}:' ) +
  161. f' not in {desc} format' )
  162. if wl_id == 'xmrseed':
  163. if len(words) not in cls.seedlen_map_rev['xmrseed']:
  164. die(2,f'{len(words)}: invalid length for Monero mnemonic')
  165. z = cls.monero_mn_checksum(words[:-1])
  166. assert z == words[-1],'invalid Monero mnemonic checksum'
  167. words = tuple(words[:-1])
  168. ret = b''
  169. for i in range(len(words)//3):
  170. w1,w2,w3 = [wl.index(w) for w in words[3*i:3*i+3]]
  171. x = w1 + base*((w2-w1)%base) + base*base*((w3-w2)%base)
  172. ret += x.to_bytes(4,'big')[::-1]
  173. return ret
  174. ret = sum([wl.index(words[::-1][i])*(base**i) for i in range(len(words))])
  175. bl = ret.bit_length()
  176. return ret.to_bytes(max(pad_val,bl//8+bool(bl%8)),'big')
  177. @classmethod
  178. def fromhex(cls,hexstr,wl_id,pad=None,tostr=False):
  179. "convert hex string to list or string data of base 'wl_id'"
  180. from .util import is_hex_str
  181. if not is_hex_str(hexstr):
  182. raise HexadecimalStringError(
  183. ( 'seed data' if pad == 'seed' else f'{hexstr!r}:' ) +
  184. ' not a hexadecimal string' )
  185. return cls.frombytes(bytes.fromhex(hexstr),wl_id,pad,tostr)
  186. @classmethod
  187. def frombytes(cls,bytestr,wl_id,pad=None,tostr=False):
  188. "convert byte string to list or string data of base 'wl_id'"
  189. if wl_id not in cls.digits:
  190. cls.init_mn(wl_id)
  191. if not bytestr:
  192. raise BaseConversionError('empty data not allowed in base conversion')
  193. def get_seed_pad():
  194. assert wl_id in cls.seedlen_map, f'seed padding not supported for base {wl_id!r}'
  195. d = cls.seedlen_map[wl_id]
  196. if not len(bytestr) in d:
  197. raise SeedLengthError(
  198. f'{len(bytestr)}: invalid byte length for seed data in seed-padded base conversion' )
  199. return d[len(bytestr)]
  200. pad = max(cls.get_pad(pad,get_seed_pad),1)
  201. wl = cls.digits[wl_id]
  202. base = len(wl)
  203. if wl_id == 'xmrseed':
  204. if len(bytestr) not in cls.seedlen_map['xmrseed']:
  205. die(2, f'{len(bytestr)}: invalid seed byte length for Monero mnemonic')
  206. def num2base_monero(num):
  207. w1 = num % base
  208. w2 = (num//base + w1) % base
  209. w3 = (num//base//base + w2) % base
  210. return [wl[w1], wl[w2], wl[w3]]
  211. o = []
  212. for i in range(len(bytestr)//4):
  213. o += num2base_monero(int.from_bytes(bytestr[i*4:i*4+4][::-1],'big'))
  214. o.append(cls.monero_mn_checksum(o))
  215. else:
  216. num = int.from_bytes(bytestr,'big')
  217. ret = []
  218. while num:
  219. ret.append(num % base)
  220. num //= base
  221. o = [wl[n] for n in [0] * (pad-len(ret)) + ret[::-1]]
  222. return (' ' if wl_id in ('mmgen','xmrseed') else '').join(o) if tostr else o