@@ -284,39 +284,64 @@ std::wstring utf8_to_utf16_little_endian(const std::string &in)
284284 return utf8_to_utf16 (in, swap_bytes);
285285}
286286
287- // / \par parameters: String in UTF-16LE format
288- // / \return String in US-ASCII format, with \uxxxx escapes for other characters
287+ // / \param ch: UTF-16LE character
288+ // / \param result: stream to receive string in US-ASCII format, with \\uxxxx
289+ // / escapes for other characters
290+ // / \param loc: locale to check for printable characters
291+ static void utf16_little_endian_to_java (
292+ const wchar_t ch,
293+ std::ostringstream &result,
294+ const std::locale &loc)
295+ {
296+ // \u unicode characters are translated very early by the Java compiler and so
297+ // \u000a or \u000d would become a newline character in a char constant, which
298+ // is illegal. Instead use \n or \r.
299+ if (ch == ' \n ' )
300+ result << " \\ n" ;
301+ else if (ch == ' \r ' )
302+ result << " \\ r" ;
303+ // \f, \b and \t do not need to be escaped, but this will improve readability
304+ // of generated tests.
305+ else if (ch == ' \f ' )
306+ result << " \\ f" ;
307+ else if (ch == ' \b ' )
308+ result << " \\ b" ;
309+ else if (ch == ' \t ' )
310+ result << " \\ t" ;
311+ else if (ch <= 255 && isprint (ch, loc))
312+ {
313+ const auto uch = static_cast <unsigned char >(ch);
314+ // ", \ and ' need to be escaped.
315+ if (uch == ' "' || uch == ' \\ ' || uch == ' \' ' )
316+ result << ' \\ ' ;
317+ result << uch;
318+ }
319+ else
320+ {
321+ // Format ch as a hexadecimal unicode character padded to four digits with
322+ // zeros.
323+ result << " \\ u" << std::hex << std::setw (4 ) << std::setfill (' 0' )
324+ << static_cast <unsigned int >(ch);
325+ }
326+ }
327+
328+ // / \param ch: UTF-16LE character
329+ // / \return String in US-ASCII format, with \\uxxxx escapes for other characters
330+ std::string utf16_little_endian_to_java (const wchar_t ch)
331+ {
332+ std::ostringstream result;
333+ const std::locale loc;
334+ utf16_little_endian_to_java (ch, result, loc);
335+ return result.str ();
336+ }
337+
338+ // / \param in: String in UTF-16LE format
339+ // / \return String in US-ASCII format, with \\uxxxx escapes for other characters
289340std::string utf16_little_endian_to_java (const std::wstring &in)
290341{
291342 std::ostringstream result;
292343 const std::locale loc;
293344 for (const auto ch : in)
294- {
295- if (ch==' \n ' )
296- result << " \\ n" ;
297- else if (ch==' \r ' )
298- result << " \\ r" ;
299- else if (ch==' \f ' )
300- result << " \\ f" ;
301- else if (ch==' \b ' )
302- result << " \\ b" ;
303- else if (ch==' \t ' )
304- result << " \\ t" ;
305- else if (ch<=255 && isprint (ch, loc))
306- {
307- const auto uch=static_cast <unsigned char >(ch);
308- if (uch==' "' || uch==' \\ ' )
309- result << ' \\ ' ;
310- result << uch;
311- }
312- else
313- {
314- result << " \\ u"
315- << std::hex
316- << std::setw (4 )
317- << std::setfill (' 0' )
318- << static_cast <unsigned int >(ch);
319- }
320- }
345+ utf16_little_endian_to_java (ch, result, loc);
321346 return result.str ();
322347}
0 commit comments