Skip to content

Commit 9f7f733

Browse files
committed
Reimplement {from,to}UTF8 in terms of {encode,decode}StringUtf8
Since we don't use those functions anymore, we can finally `DEPRECATE` them.
1 parent 5027632 commit 9f7f733

File tree

1 file changed

+6
-58
lines changed

1 file changed

+6
-58
lines changed

Cabal/Distribution/Utils/Generic.hs

Lines changed: 6 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,6 @@ import Distribution.Compat.Prelude
7878

7979
import Distribution.Utils.String
8080

81-
import Data.Bits
82-
( Bits((.|.), (.&.), shiftL, shiftR) )
8381
import Data.List
8482
( isInfixOf )
8583
import Data.Ord
@@ -166,72 +164,22 @@ writeFileAtomic targetPath content = do
166164
-- This is a modification of the UTF8 code from gtk2hs and the
167165
-- utf8-string package.
168166

167+
{-# DEPRECATED fromUTF8 "Please use 'decodeStringUtf8', 'fromUTF8BS', or 'fromUTF8BS'" #-}
169168
fromUTF8 :: String -> String
170-
fromUTF8 [] = []
171-
fromUTF8 (c:cs)
172-
| c <= '\x7F' = c : fromUTF8 cs
173-
| c <= '\xBF' = replacementChar : fromUTF8 cs
174-
| c <= '\xDF' = twoBytes c cs
175-
| c <= '\xEF' = moreBytes 3 0x800 cs (ord c .&. 0xF)
176-
| c <= '\xF7' = moreBytes 4 0x10000 cs (ord c .&. 0x7)
177-
| c <= '\xFB' = moreBytes 5 0x200000 cs (ord c .&. 0x3)
178-
| c <= '\xFD' = moreBytes 6 0x4000000 cs (ord c .&. 0x1)
179-
| otherwise = replacementChar : fromUTF8 cs
169+
fromUTF8 = decodeStringUtf8 . map c2w
180170
where
181-
twoBytes c0 (c1:cs')
182-
| ord c1 .&. 0xC0 == 0x80
183-
= let d = ((ord c0 .&. 0x1F) `shiftL` 6)
184-
.|. (ord c1 .&. 0x3F)
185-
in if d >= 0x80
186-
then chr d : fromUTF8 cs'
187-
else replacementChar : fromUTF8 cs'
188-
twoBytes _ cs' = replacementChar : fromUTF8 cs'
189-
190-
moreBytes :: Int -> Int -> [Char] -> Int -> [Char]
191-
moreBytes 1 overlong cs' acc
192-
| overlong <= acc && acc <= 0x10FFFF
193-
&& (acc < 0xD800 || 0xDFFF < acc)
194-
&& (acc < 0xFFFE || 0xFFFF < acc)
195-
= chr acc : fromUTF8 cs'
196-
197-
| otherwise
198-
= replacementChar : fromUTF8 cs'
199-
200-
moreBytes byteCount overlong (cn:cs') acc
201-
| ord cn .&. 0xC0 == 0x80
202-
= moreBytes (byteCount-1) overlong cs'
203-
((acc `shiftL` 6) .|. ord cn .&. 0x3F)
204-
205-
moreBytes _ _ cs' _
206-
= replacementChar : fromUTF8 cs'
207-
208-
replacementChar = '\xfffd'
171+
c2w c | c > '\xFF' = error "fromUTF8: invalid input data"
172+
| otherwise = fromIntegral (ord c)
209173

210174
fromUTF8BS :: SBS.ByteString -> String
211175
fromUTF8BS = decodeStringUtf8 . SBS.unpack
212176

213177
fromUTF8LBS :: BS.ByteString -> String
214178
fromUTF8LBS = decodeStringUtf8 . BS.unpack
215179

180+
{-# DEPRECATED toUTF8 "Please use 'encodeStringUtf8', 'toUTF8BS', or 'toUTF8BS'" #-}
216181
toUTF8 :: String -> String
217-
toUTF8 [] = []
218-
toUTF8 (c:cs)
219-
| c <= '\x07F' = c
220-
: toUTF8 cs
221-
| c <= '\x7FF' = chr (0xC0 .|. (w `shiftR` 6))
222-
: chr (0x80 .|. (w .&. 0x3F))
223-
: toUTF8 cs
224-
| c <= '\xFFFF'= chr (0xE0 .|. (w `shiftR` 12))
225-
: chr (0x80 .|. ((w `shiftR` 6) .&. 0x3F))
226-
: chr (0x80 .|. (w .&. 0x3F))
227-
: toUTF8 cs
228-
| otherwise = chr (0xf0 .|. (w `shiftR` 18))
229-
: chr (0x80 .|. ((w `shiftR` 12) .&. 0x3F))
230-
: chr (0x80 .|. ((w `shiftR` 6) .&. 0x3F))
231-
: chr (0x80 .|. (w .&. 0x3F))
232-
: toUTF8 cs
233-
where w = ord c
234-
182+
toUTF8 = map (chr . fromIntegral) . encodeStringUtf8
235183

236184
toUTF8BS :: String -> SBS.ByteString
237185
toUTF8BS = SBS.pack . encodeStringUtf8

0 commit comments

Comments
 (0)