@@ -190,25 +190,21 @@ encodeLatin1Lax = encodeLatin1
190190-- UTF-8 decoding
191191-------------------------------------------------------------------------------
192192
193- -- | CodePoint represents a specific character in the Unicode standard. The
194- -- code point is a numerical value assigned to each character, and UTF-8
195- -- encoding uses a variable number of bytes to represent different code points.
193+ -- | CodePoint represents a specific character in the Unicode standard.
196194--
197- -- Calculate the code point value: Depending on the type of the leading byte,
198- -- extract the significant bits from each byte of the sequence and combine them
199- -- to form the complete code point value. The specific bit manipulations will
200- -- differ based on the number of bytes used .
195+ -- It is meant to be used with the resumable decoding APIs such as
196+ -- 'resumeDecodeUtf8Either'.
197+ --
198+ -- We return the 'CodePoint' and the 'DecodeState' in 'DecodeError' .
201199type CodePoint = Int
202200
203201-- | DecodeState refers to the number of bytes remaining to complete the current
204- -- UTF-8 character decoding. For ASCII characters (code points 0 to 127), no
205- -- decoding state is necessary because they are represented by a single byte.
206- -- Therefore, the decoding state for ASCII characters can be considered as 0.
202+ -- UTF-8 character decoding.
203+ --
204+ -- It is meant to be used with the resumable decoding APIs such as
205+ -- 'resumeDecodeUtf8Either'.
207206--
208- -- For multi-byte characters, the decoding state indicates the number of bytes
209- -- remaining to complete the character. It is usually initialized to a non-zero
210- -- value corresponding to the number of bytes in the multi-byte character, e.g
211- -- DecodeState will be 1 for 2-bytes char.
207+ -- We return the 'CodePoint' and the 'DecodeState' in 'DecodeError'.
212208type DecodeState = Word8
213209
214210-- We can divide the errors in three general categories:
0 commit comments