Browse Source

Format full-width CJK characters properly

MMGen 7 years ago
parent
commit
ea6629d644
4 changed files with 51 additions and 26 deletions
  1. 27 10
      mmgen/obj.py
  2. 1 1
      mmgen/tool.py
  3. 4 1
      mmgen/util.py
  4. 19 14
      test/test.py

+ 27 - 10
mmgen/obj.py

@@ -20,7 +20,7 @@
 obj.py: MMGen native classes
 """
 
-import sys,os
+import sys,os,unicodedata
 from decimal import *
 from mmgen.color import *
 from string import hexdigits,ascii_letters,digits
@@ -34,6 +34,14 @@ def is_tw_label(s):      return TwLabel(s,on_fail='silent')
 def is_wif(s):           return WifKey(s,on_fail='silent')
 def is_viewkey(s):       return ViewKey(s,on_fail='silent')
 
+def truncate_str(s,w):
+	wide_count = 0
+	w -= 1
+	for i in range(len(s)):
+		wide_count += unicodedata.east_asian_width(s[i]) in ('F','W')
+		if wide_count + i > w:
+			return s[:i]
+
 class MMGenObject(object):
 
 	# Pretty-print any object subclassed from MMGenObject, recursing into sub-objects - WIP
@@ -128,22 +136,31 @@ class Hilite(object):
 	trunc_ok = True
 
 	@classmethod
+	# 'width' is screen width (greater than len(s) for CJK strings)
+	# 'append_chars' and 'encl' must consist of single-width chars only
 	def fmtc(cls,s,width=None,color=False,encl='',trunc_ok=None,
 				center=False,nullrepl='',append_chars='',append_color=False):
+		s = unicode(s)
+		s_wide_count = len([1 for ch in s if unicodedata.east_asian_width(ch) in ('F','W')])
+		assert type(encl) is str and len(encl) in (0,2),"'encl' must be 2-character str"
+		a,b = list(encl) if encl else ('','')
+		add_len = len(a) + len(b) + len(append_chars)
 		if width == None: width = cls.width
 		if trunc_ok == None: trunc_ok = cls.trunc_ok
-		assert width > 0,'Width must be > 0'
+		assert width >= 2 + add_len,'Width must be at least 2' # 2 because CJK
+		if len(s) + s_wide_count + add_len > width:
+			assert trunc_ok, "If 'trunc_ok' is false, 'width' must be >= screen width of string"
+			s = truncate_str(s,width-add_len)
 		if s == '' and nullrepl:
-			s,center = nullrepl,True
-		if center: s = s.center(width)
-		assert type(encl) is str and len(encl) in (0,2),'type(encl) must be str and len(encl) be in (0,2)'
-		a,b = list(encl) if encl else ('','')
-		if trunc_ok and len(s) > width: s = s[:width]
+			s = nullrepl.center(width)
+		else:
+			s = a+s+b
+			if center: s = s.center(width)
 		if append_chars:
-			return cls.colorize(a+s+b,color=color) + \
-					cls.colorize(append_chars.ljust(width-len(a+s+b)),color=append_color)
+			return cls.colorize(s,color=color) + \
+					cls.colorize(append_chars.ljust(width-len(s)-s_wide_count),color=append_color)
 		else:
-			return cls.colorize((a+s+b).ljust(width),color=color)
+			return cls.colorize(s.ljust(width-s_wide_count),color=color)
 
 	def fmt(self,*args,**kwargs):
 		assert args == () # forbid invocation w/o keywords

+ 1 - 1
mmgen/tool.py

@@ -728,7 +728,7 @@ def Listaddresses(addrs='',minconf=1,
 	fs = u'{{mid}}{} {{cmt}} {{amt}}{}'.format(('',' {addr}')[showbtcaddrs],('',' {age}')[show_age])
 	mmaddrs = [k for k in addrs.keys() if k.type == 'mmgen']
 	max_mmid_len = max(len(k) for k in mmaddrs) + 2 if mmaddrs else 10
-	max_cmt_len  = max(max(len(v['lbl'].comment) for v in addrs.values()),7)
+	max_cmt_len  = max(max(screen_width(v['lbl'].comment) for v in addrs.values()),7)
 	addr_width = max(len(addrs[mmid]['addr']) for mmid in addrs)
 
 #	pmsg([a.split('.')[1] for a in [str(v['amt']) for v in addrs.values()] if '.' in a])

+ 4 - 1
mmgen/util.py

@@ -20,7 +20,7 @@
 util.py:  Low-level routines imported by other modules in the MMGen suite
 """
 
-import sys,os,time,stat,re
+import sys,os,time,stat,re,unicodedata
 from hashlib import sha256
 from binascii import hexlify,unhexlify
 from string import hexdigits
@@ -193,6 +193,9 @@ def split_into_cols(col_wid,s):
 	return ' '.join([s[col_wid*i:col_wid*(i+1)]
 					for i in range(len(s)/col_wid+1)]).rstrip()
 
+def screen_width(s):
+	return len(s) + len([1 for ch in s if unicodedata.east_asian_width(ch) in ('F','W')])
+
 def capfirst(s): # different from str.capitalize() - doesn't downcase any uc in string
 	return s if len(s) == 0 else s[0].upper() + s[1:]
 

+ 19 - 14
test/test.py

@@ -55,10 +55,14 @@ ref_wallet_incog_offset = 123
 
 from mmgen.obj import MMGenTXLabel,PrivKey
 from mmgen.addr import AddrGenerator,KeyGenerator,AddrList,AddrData,AddrIdxList
-ref_tx_label = ''.join(map(unichr,  range(65,91) +
+
+ref_tx_label_jp = u'必要なのは、信用ではなく暗号化された証明に基づく電子取引システムであり、これにより希望する二者が信用できる第三者機関を介さずに直接取引できるよう' # 72 chars ('W'ide)
+ref_tx_label_zh = u'所以,我們非常需要這樣一種電子支付系統,它基於密碼學原理而不基於信用,使得任何達成一致的雙方,能夠直接進行支付,從而不需要協力廠商仲介的參與。。' # 72 chars ('F'ull + 'W'ide)
+ref_tx_label_lat_cyr_gr = ''.join(map(unichr,
+									range(65,91) +
 									range(1040,1072) + # cyrillic
 									range(913,939) +   # greek
-									range(97,123)))[:MMGenTXLabel.max_len]
+									range(97,123)))[:MMGenTXLabel.max_len] # 72 chars
 ref_bw_hash_preset = '1'
 ref_bw_file        = 'wallet.mmbrain'
 ref_bw_file_spc    = 'wallet-spaced.mmbrain'
@@ -1118,8 +1122,8 @@ labels = [
 	"Automotive",
 	"Travel expenses",
 	"Healthcare",
-	"Freelancing 1",
-	"Freelancing 2",
+	ref_tx_label_jp[:40].encode('utf8'),
+	ref_tx_label_zh[:40].encode('utf8'),
 	"Alice's allowance",
 	"Bob's bequest",
 	"House purchase",
@@ -1647,7 +1651,7 @@ class MMGenTestSuite(object):
 		t.expect('OK? (Y/n): ','y') # change OK?
 		if do_label:
 			t.expect('Add a comment to transaction? (y/N): ','y')
-			t.expect('Comment: ',ref_tx_label.encode('utf8')+'\n')
+			t.expect('Comment: ',ref_tx_label_lat_cyr_gr.encode('utf8')+'\n')
 		else:
 			t.expect('Add a comment to transaction? (y/N): ','\n')
 		t.tx_view(view=view)
@@ -2516,7 +2520,7 @@ class MMGenTestSuite(object):
 		t.expect('OK? (Y/n): ','y') # fee OK?
 		t.expect('OK? (Y/n): ','y') # change OK?
 		t.expect('Add a comment to transaction? (y/N): ',('\n','y')[do_label])
-		if do_label: t.expect('Comment: ',ref_tx_label.encode('utf8')+'\n')
+		if do_label: t.expect('Comment: ',ref_tx_label_jp.encode('utf8')+'\n')
 		t.expect('View decoded transaction\? .*?: ',('t','v')[full_tx_view],regex=True)
 		if not do_label: t.expect('to continue: ','\n')
 		t.passphrase('MMGen wallet',pw)
@@ -2679,8 +2683,10 @@ class MMGenTestSuite(object):
 		t.expect('Removed label.*in tracking wallet',regex=True)
 		t.ok()
 
-	utf8_label     =  u'Edited label (40 characters, UTF8) α-β-γ'
-	utf8_label_pat = ur'Edited label \(40 characters, UTF8\) ..-..-..'
+# 	utf8_label     =  u'Edited label (40 characters, UTF8/JP) 月へ' # '\xe6\x9c\x88\xe3\x81\xb8' (Jp.)
+# 	utf8_label_pat = ur'Edited label \(40 characters, UTF8/JP\) ......'
+	utf8_label     = ref_tx_label_zh[:40]
+	utf8_label_pat = utf8_label
 
 	def regtest_bob_add_label(self,name):
 		sid = self.regtest_user_sid('bob')
@@ -2688,7 +2694,7 @@ class MMGenTestSuite(object):
 
 	def regtest_alice_add_label1(self,name):
 		sid = self.regtest_user_sid('alice')
-		return self.regtest_user_add_label(name,'alice',sid+':C:1','Original Label')
+		return self.regtest_user_add_label(name,'alice',sid+':C:1',u'Original Label - 月へ')
 
 	def regtest_alice_add_label2(self,name):
 		sid = self.regtest_user_sid('alice')
@@ -2738,12 +2744,12 @@ class MMGenTestSuite(object):
 
 	def regtest_user_chk_label(self,name,user,addr,label,label_pat=None):
 		t = MMGenExpect(name,'mmgen-tool',['--'+user,'listaddresses','all_labels=1'])
-		t.expect(ur'{}\s+\S{{30}}\S+\s+{}\s+'.format(addr,label_pat or label),regex=True)
+		t.expect(r'{}\s+\S{{30}}\S+\s+{}\s+'.format(addr,(label_pat or label).encode('utf8')),regex=True)
 		t.ok()
 
 	def regtest_alice_chk_label1(self,name):
 		sid = self.regtest_user_sid('alice')
-		return self.regtest_user_chk_label(name,'alice',sid+':C:1','Original Label')
+		return self.regtest_user_chk_label(name,'alice',sid+':C:1',u'Original Label - 月へ')
 
 	def regtest_alice_chk_label2(self,name):
 		sid = self.regtest_user_sid('alice')
@@ -2754,8 +2760,7 @@ class MMGenTestSuite(object):
 
 	def regtest_alice_chk_label3(self,name):
 		sid = self.regtest_user_sid('alice')
-		return self.regtest_user_chk_label(name,'alice',sid+':C:1',self.utf8_label,
-					label_pat=self.utf8_label_pat)
+		return self.regtest_user_chk_label(name,'alice',sid+':C:1',self.utf8_label,label_pat=self.utf8_label_pat)
 
 	def regtest_alice_chk_label4(self,name):
 		sid = self.regtest_user_sid('alice')
@@ -2906,7 +2911,7 @@ class MMGenTestSuite(object):
 		t.expect('OK? (Y/n): ','y') # fee OK?
 		t.expect('OK? (Y/n): ','y') # change OK?
 		t.expect('Add a comment to transaction? (y/N): ','y')
-		t.expect('Comment: ',ref_tx_label.encode('utf8')+'\n')
+		t.expect('Comment: ',ref_tx_label_zh.encode('utf8')+'\n')
 		t.expect('View decoded transaction\? .*?: ','n',regex=True)
 		t.expect('Save transaction? (y/N): ','y')
 		fn = t.written_to_file('Transaction')