文章目录
Python 编码
1. 字符串的本质
字符 --> 字节 :编码
字节--> 字符 : 解码

2. 字符编码

字节和文本的编码和解码

字符编码
Ord 意思的order
chr 意思是 characte
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
ord("A") Out[2]: 65 chr(127) Out[3]: '\x7f' chr(104) Out[4]: 'h' ord("宋") Out[5]: 23435 # 只能传入一个整数 ord ord("zeropython") Traceback (most recent call last): File "/Users/songhao/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-6-ddb4f1055b9c>", line 1, in <module> ord("zeropython") TypeError: ord() expected a character, but string of length 10 found help(ord) Help on built-in function ord in module builtins: ord(c, /) Return the Unicode code point for a one-character string. s = "zeropython" # 将特定的字符串转换成特定的编码 s.encode("ascii") Out[9]: b'zeropython' s.encode("gbk") Out[10]: b'zeropython' s.encode("utf-8") Out[11]: b'zeropython' s.encode("unicode") Traceback (most recent call last): File "/Users/songhao/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-12-ab39b0656cd1>", line 1, in <module> s.encode("unicode") LookupError: unknown encoding: unicode s2 = "从零开始学习Python zeropython" s2.encode('ascii') Traceback (most recent call last): File "/Users/songhao/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-14-91313a8298d3>", line 1, in <module> s2.encode('ascii') UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-5: ordinal not in range(128) # ordinal not in range(128) s2.encode('utf-8') Out[15]: b'\xe4\xbb\x8e\xe9\x9b\xb6\xe5\xbc\x80\xe5\xa7\x8b\xe5\xad\xa6\xe4\xb9\xa0Python zeropython' s2.encode('utf-16') Out[16]: b'\xff\xfe\xceN\xf6\x96\x00_\xcbYf[`NP\x00y\x00t\x00h\x00o\x00n\x00 \x00z\x00e\x00r\x00o\x00p\x00y\x00t\x00h\x00o\x00n\x00' s2.encode('utf-32') Out[17]: b'\xff\xfe\x00\x00\xceN\x00\x00\xf6\x96\x00\x00\x00_\x00\x00\xcbY\x00\x00f[\x00\x00`N\x00\x00P\x00\x00\x00y\x00\x00\x00t\x00\x00\x00h\x00\x00\x00o\x00\x00\x00n\x00\x00\x00 \x00\x00\x00z\x00\x00\x00e\x00\x00\x00r\x00\x00\x00o\x00\x00\x00p\x00\x00\x00y\x00\x00\x00t\x00\x00\x00h\x00\x00\x00o\x00\x00\x00n\x00\x00\x00' b1 = b'\xe4\xbb\x8e\xe9\x9b\xb6\xe5\xbc\x80\xe5\xa7\x8b\xe5\xad\xa6\xe4\xb9\xa0Python zeropython' b1.decode('utf-8') Out[19]: '从零开始学习Python zeropython' b1.decode('utf-16') Traceback (most recent call last): File "/Users/songhao/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-20-b6daf60ad93b>", line 1, in <module> b1.decode('utf-16') UnicodeDecodeError: 'utf-16-le' codec can't decode byte 0x6e in position 34: truncated data b1.decode('utf-32') Traceback (most recent call last): File "/Users/songhao/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-21-761b37cda613>", line 1, in <module> b1.decode('utf-32') UnicodeDecodeError: 'utf-32-le' codec can't decode bytes in position 0-3: code point not in range(0x110000) s1 Traceback (most recent call last): File "/Users/songhao/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-22-d0257f733e5e>", line 1, in <module> s1 NameError: name 's1' is not defined s1 = "从零开始学习" s1.encode('utf-8') Out[24]: b'\xe4\xbb\x8e\xe9\x9b\xb6\xe5\xbc\x80\xe5\xa7\x8b\xe5\xad\xa6\xe4\xb9\xa0' b'\xe4\xbb\x8e\xe9\x9b\xb6\xe5\xbc\x80\xe5\xa7\x8b\xe5\xad\xa6\xe4\xb9\xa0'.decode('utf-8') Out[25]: '从零开始学习' b'\xe4\xbb\x8e\xe9\x9b\xb6\xe5\xbc\x80\xe5\xa7\x8b\xe5\xad\xa6\xe4\xb9\xa0'.decode('utf-16') Out[26]: '믤\ue98e뚛볥\ue580讧귥\ue4a6ꂹ' b'\xe4\xbb\x8e\xe9\x9b\xb6\xe5\xbc\x80\xe5\xa7\x8b\xe5\xad\xa6\xe4\xb9\xa0'.decode('utf-32') Traceback (most recent call last): File "/Users/songhao/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-27-f8af0a75c110>", line 1, in <module> b'\xe4\xbb\x8e\xe9\x9b\xb6\xe5\xbc\x80\xe5\xa7\x8b\xe5\xad\xa6\xe4\xb9\xa0'.decode('utf-32') UnicodeDecodeError: 'utf-32-le' codec can't decode bytes in position 0-3: code point not in range(0x110000) |
获取系统默认编码
1 2 3 4 |
import sys sys.getdefaultencoding() Out[29]: 'utf-8' |
类型转换

bytes 函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
bytes("vac",encoding="utf-8") Out[31]: b'vac' # bytes bytes("你好") Traceback (most recent call last): File "/Users/songhao/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-33-183eb72da6e1>", line 1, in <module> bytes("你好") TypeError: string argument without an encoding bytes("你好",'utf-8') Out[34]: b'\xe4\xbd\xa0\xe5\xa5\xbd' bytes(list(range(19))) Out[35]: b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12' bytes(list(range(3))) Out[36]: b'\x00\x01\x02' bytes([2,3,4]) Out[37]: b'\x02\x03\x04' |
函数 使用bytearry
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# bytearry bytearray("你好",'utf-8') Out[39]: bytearray(b'\xe4\xbd\xa0\xe5\xa5\xbd') s1 = bytearray("你好",'utf-8') s1 Out[41]: bytearray(b'\xe4\xbd\xa0\xe5\xa5\xbd') s1[0] Out[42]: 228 s1[0] = 239 s1 Out[44]: bytearray(b'\xef\xbd\xa0\xe5\xa5\xbd') s1.decode('utf-8') Out[45]: '⦆好' s1.append("hell") Traceback (most recent call last): File "/Users/songhao/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-46-b1556e6fe9cd>", line 1, in <module> s1.append("hell") TypeError: 'str' object cannot be interpreted as an integer s1.append(32) s1.decode('utf-8') Out[48]: '⦆好 ' s1.append(65) s1.decode('utf-8') Out[50]: '⦆好 A' s1.decode('宋') Traceback (most recent call last): File "/Users/songhao/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-51-389d78baf084>", line 1, in <module> s1.decode('宋') LookupError: unknown encoding: 宋 s1.append("宋") Traceback (most recent call last): File "/Users/songhao/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-52-66d780330da5>", line 1, in <module> s1.append("宋") TypeError: 'str' object cannot be interpreted as an integer "宋".encode('utf-8') Out[53]: b'\xe5\xae\x8b' "宋".encode('utf-8').decode('utf-8') Out[54]: '宋' |
