cplusplus · tkoeppe · Aug 7, 2022 · Jul 27, 2022 · Aug 7, 2022
diff --git a/source/back.tex b/source/back.tex
@@ -13,8 +13,10 @@ \chapter{Bibliography}
   \doccite{Information technology --- Language independent arithmetic ---
     Part 1: Integer and floating point arithmetic}
 \item
-  ISO/IEC/IEEE 60559:2011, \doccite{Information technology ---
-  Microprocessor Systems --- Floating-Point arithmetic}
+  ISO/IEC TS 18661-3:2015,
+  \doccite{Information Technology ---
+    Programming languages, their environments, and system software interfaces ---
+    Floating-point extensions for C --- Part 3: Interchange and extended types}
 % Other international standards.
 \item
   %%% Format for the following entry is based on that specified at

diff --git a/source/basic.tex b/source/basic.tex
@@ -4995,15 +4995,23 @@
 The types
 \keyword{float}, \keyword{double}, and \tcode{\keyword{long} \keyword{double}},
 and cv-qualified versions\iref{basic.type.qualifier} thereof,
+are collectively termed
+\defnx{standard floating-point types}{type!floating-point!standard}.
+An implementation may also provide additional types
+that represent floating-point values and define them (and cv-qualified versions thereof) to be
+\defnx{extended floating-point types}{type!floating-point!extended}.
+The standard and extended floating-point types
 are collectively termed \defnx{floating-point types}{type!floating-point}.
-The value
-representation of floating-point types is \impldef{value representation of
-floating-point types}.
-\indextext{floating-point type!implementation-defined}%
 \begin{note}
-This document imposes no requirements on the accuracy of
-floating-point operations; see also~\ref{support.limits}.
+Any additional implementation-specific types representing floating-point values
+that are not defined by the implementation to be extended floating-point types
+are not considered to be floating-point types, and
+this document imposes no requirements on them or
+their interactions with floating-point types.
 \end{note}
+Except as specified in \ref{basic.extended.fp},
+the object and value representations and accuracy of operations
+of floating-point types is \impldef{representation of floating-point types}.
 
 \pnum
 Integral and floating-point types are collectively
@@ -5049,6 +5057,90 @@
 same value representation, they are nevertheless different types.
 \end{note}
 
+\rSec2[basic.extended.fp]{Optional extended floating-point types}
+
+\pnum
+If the implementation supports an extended floating-point type\iref{basic.fundamental}
+whose properties are specified by
+the ISO/IEC/IEEE 60559 floating-point interchange format binary16,
+then the \grammarterm{typedef-name} \tcode{std::float16_t}
+is defined in the header \libheaderref{stdfloat} and names such a type,
+the macro \mname{STDCPP_FLOAT16_T} is defined\iref{cpp.predefined}, and
+the floating-point literal suffixes \tcode{f16} and \tcode{F16}
+are supported\iref{lex.fcon}.
+
+\pnum
+If the implementation supports an extended floating-point type
+whose properties are specified by
+the ISO/IEC/IEEE 60559 floating-point interchange format binary32,
+then the \grammarterm{typedef-name} \tcode{std::float32_t}
+is defined in the header \libheader{stdfloat} and names such a type,
+the macro \mname{STDCPP_FLOAT32_T} is defined, and
+the floating-point literal suffixes \tcode{f32} and \tcode{F32} are supported.
+
+\pnum
+If the implementation supports an extended floating-point type
+whose properties are specified by
+the ISO/IEC/IEEE 60559 floating-point interchange format binary64,
+then the \grammarterm{typedef-name} \tcode{std::float64_t}
+is defined in the header \libheader{stdfloat} and names such a type,
+the macro \mname{STDCPP_FLOAT64_T} is defined, and
+the floating-point literal suffixes \tcode{f64} and \tcode{F64} are supported.
+
+\pnum
+If the implementation supports an extended floating-point type
+whose properties are specified by
+the ISO/IEC/IEEE 60559 floating-point interchange format binary128,
+then the \grammarterm{typedef-name} \tcode{std::float128_t}
+is defined in the header \libheader{stdfloat} and names such a type,
+the macro \mname{STDCPP_FLOAT128_T} is defined, and
+the floating-point literal suffixes \tcode{f128} and \tcode{F128} are supported.
+
+\pnum
+If the implementation supports an extended floating-point type
+with the properties, as specified by ISO/IEC/IEEE 60559, of
+radix ($b$) of 2,
+storage width in bits ($k$) of 16,
+precision in bits ($p$) of 8,
+maximum exponent ($emax$) of 127, and
+exponent field width in bits ($w$) of 8, then
+the \grammarterm{typedef-name} \tcode{std::bfloat16_t}
+is defined in the header \libheader{stdfloat} and names such a type,
+the macro \mname{STDCPP_BFLOAT16_T} is defined, and
+the floating-point literal suffixes \tcode{bf16} and \tcode{BF16} are supported.
+
+\pnum
+\begin{note}
+A summary of the parameters for each type is given in \tref{basic.extended.fp}.
+The precision $p$ includes the implicit 1 bit at the beginning of the mantissa,
+so the storage used for the mantissa is $p-1$ bits.
+ISO/IEC/IEEE 60559 does not assign a name for a type
+having the parameters specified for \tcode{std::bfloat16_t}.
+\end{note}
+\begin{floattable}
+{Properties of named extended floating-point types}{basic.extended.fp}{llllll}
+\topline
+\lhdr{Parameter} & \chdr{\tcode{float16_t}} & \chdr{\tcode{float32_t}} &
+\chdr{\tcode{float64_t}} & \chdr{\tcode{float128_t}} &
+\rhdr{\tcode{bfloat16_t}} \\
+\capsep
+ISO/IEC/IEEE 60559 name & binary16 & binary32 & binary64 & binary128 & \\
+$k$, storage width in bits & 16 & 32 & 64 & 128 & 16 \\
+$p$, precision in bits & 11 & 24 & 53 & 113 & 8 \\
+$emax$, maximum exponent & 15 & 127 & 1023 & 16383 & 127 \\
+$w$, exponent field width in bits & 5 & 8 & 11 & 15 & 8 \\
+\end{floattable}
+
+\pnum
+\recommended
+Any names that the implementation provides for
+the extended floating-point types described in this subsection
+that are in addition to the names defined in the \libheader{stdfloat} header
+should be chosen to increase compatibility and interoperability
+with the interchange types
+\tcode{_Float16}, \tcode{_Float32}, \tcode{_Float64}, and \tcode{_Float128}
+defined in ISO/IEC TS 18661-3 and with future versions of the C standard.
+
 \rSec2[basic.compound]{Compound types}
 
 \pnum
@@ -5337,7 +5429,7 @@
 has the top-level cv-qualifier \keyword{volatile}.
 \end{example}
 
-\rSec2[conv.rank]{Integer conversion rank}%
+\rSec2[conv.rank]{Conversion ranks}%
 \indextext{conversion!integer rank}
 
 \pnum
@@ -5394,6 +5486,57 @@
 conversions\iref{expr.arith.conv}.
 \end{note}
 
+\pnum
+Every floating-point type has a \defnadj{floating-point}{conversion rank}
+defined as follows:
+\begin{itemize}
+\item
+The rank of a floating point type \tcode{T} is greater than
+the rank of any floating-point type
+whose set of values is a proper subset of the set of values of \tcode{T}.
+\item
+The rank of \tcode{\keyword{long} \keyword{double}} is greater than
+the rank of \keyword{double},
+which is greater than the rank of \keyword{float}.
+\item
+Two extended floating-point types with the same set of values have equal ranks.
+\item
+An extended floating-point type with the same set of values as
+exactly one cv-unqualified standard floating-point type
+has a rank equal to the rank of that standard floating-point type.
+\item
+An extended floating-point type with the same set of values as
+more than one cv-unqualified standard floating-point type
+has a rank equal to the rank of \keyword{double}.
+\end{itemize}
+\begin{note}
+The conversion ranks of floating-point types \tcode{T1} and \tcode{T2}
+are unordered if the set of values of \tcode{T1} is
+neither a subset nor a superset of the set of values of \tcode{T2}.
+This can happen when one type has both a larger range and a lower precision
+than the other.
+\end{note}
+
+\pnum
+Floating-point types that have equal floating-point conversion ranks
+are ordered by floating-point conversion subrank.
+The subrank forms a total order among types with equal ranks.
+The types
+\tcode{std::float16_t},
+\tcode{std::float32_t},
+\tcode{std::float64_t}, and
+\tcode{std::float128_t}\iref{stdfloat.syn}
+have a greater conversion subrank than any standard floating-point type
+with equal conversion rank.
+Otherwise, the conversion subrank order is
+\impldef{floating-point conversion subrank}.
+
+\pnum
+\begin{note}
+The floating-point conversion rank and subrank are used in
+the definition of the usual arithmetic conversions\iref{expr.arith.conv}.
+\end{note}
+
 \rSec1[basic.exec]{Program execution}
 
 \rSec2[intro.execution]{Sequential execution}

diff --git a/source/declarations.tex b/source/declarations.tex
@@ -5948,8 +5948,10 @@
 \begin{itemize}
 \item from a floating-point type to an integer type, or
 
-\item from \tcode{long double} to \tcode{double} or \tcode{float}, or from
-\tcode{double} to \tcode{float}, except where the source is a constant expression and
+\item from a floating-point type \tcode{T} to another floating-point type
+whose floating-point conversion rank is neither greater than nor equal to
+that of \tcode{T},
+except where the source is a constant expression and
 the actual value after conversion
 is within the range of values that can be represented (even if it cannot be represented exactly),
 or

diff --git a/source/expressions.tex b/source/expressions.tex
@@ -929,7 +929,13 @@
 \pnum
 \indextext{conversion!floating-point}%
 A prvalue of floating-point type can be converted to a prvalue of
-another floating-point type. If the source value can be exactly
+another floating-point type
+with a greater or equal conversion rank\iref{conv.rank}.
+A prvalue of standard floating-point type can be converted to
+a prvalue of another standard floating-point type.
+
+\pnum
+If the source value can be exactly
 represented in the destination type, the result of the conversion is
 that exact representation. If the source value is between two adjacent
 destination values, the result of the conversion is an
@@ -1114,24 +1120,36 @@
 \item If either operand is of scoped enumeration type\iref{dcl.enum}, no conversions
 are performed; if the other operand does not have the same type, the expression is
 ill-formed.
-
-\item If either operand is of type \tcode{\keyword{long} \keyword{double}}, the
-other shall be converted to \tcode{\keyword{long} \keyword{double}}.
-
-\item Otherwise, if either operand is \keyword{double}, the other shall be
-converted to \keyword{double}.
-
-\item Otherwise, if either operand is \keyword{float}, the other shall be
-converted to \keyword{float}.
-
-\item Otherwise, the integral promotions\iref{conv.prom} shall be
+\item Otherwise, if either operand is of floating-point type,
+the following rules are applied:
+\begin{itemize}
+\item
+If both operands have the same type, no further conversion is needed.
+\item
+Otherwise, if one of the operands is of a non-floating-point type,
+that operand is converted to the type of
+the operand with the floating-point type.
+\item
+Otherwise, if the floating-point conversion ranks\iref{conv.rank} of
+the types of the operands are ordered but not equal,
+then the operand of the type with the lesser floating-point conversion rank
+is converted to the type of the other operand.
+\item
+Otherwise, if the floating-point conversion ranks of the types of
+the operands are equal,
+then the operand with the lesser floating-point conversion subrank\iref{conv.rank}
+is converted to the type of the other operand.
+\item
+Otherwise, the expression is ill-formed.
+\end{itemize}
+\item Otherwise, the integral promotions\iref{conv.prom} are
 performed on both operands.
 \begin{footnote}
 As a consequence, operands of type \keyword{bool}, \keyword{char8_t}, \keyword{char16_t},
 \keyword{char32_t}, \keyword{wchar_t}, or an enumerated type are converted
 to some integral type.
 \end{footnote}
-Then the following rules shall be applied to the promoted operands:
+Then the following rules are applied to the promoted operands:
 
 \begin{itemize}
 
@@ -1140,20 +1158,20 @@
 
 \item Otherwise, if both operands have signed integer types or both have
 unsigned integer types, the operand with the type of lesser integer
-conversion rank shall be converted to the type of the operand with
+conversion rank is converted to the type of the operand with
 greater rank.
 
 \item Otherwise, if the operand that has unsigned integer type has rank
 greater than or equal to the rank of the type of the other operand, the
-operand with signed integer type shall be converted to the type of the
+operand with signed integer type is converted to the type of the
 operand with unsigned integer type.
 
 \item Otherwise, if the type of the operand with signed integer type can
 represent all of the values of the type of the operand with unsigned
-integer type, the operand with unsigned integer type shall be converted
+integer type, the operand with unsigned integer type is converted
 to the type of the operand with signed integer type.
 
-\item Otherwise, both operands shall be converted to the unsigned
+\item Otherwise, both operands are converted to the unsigned
 integer type corresponding to the type of the operand with signed
 integer type.
 \end{itemize}
@@ -4096,6 +4114,17 @@
 underlying type of the enumeration\iref{conv.fpint}, and subsequently to
 the enumeration type.
 
+\pnum
+A prvalue of floating-point type can be explicitly converted to
+any other floating-point type.
+If the source value can be exactly represented in the destination type,
+the result of the conversion has that exact representation.
+If the source value is between two adjacent destination values,
+the result of the conversion is
+an \impldef{result of inexact floating-point conversion} choice of
+either of those values.
+Otherwise, the behavior is undefined.
+
 \pnum
 \indextext{cast!base class}%
 \indextext{cast!derived class}%

diff --git a/source/intro.tex b/source/intro.tex
@@ -33,6 +33,7 @@
 For undated references, the latest edition of the referenced document
 (including any amendments) applies.
 \begin{itemize}
+% ISO documents in numerical order.
 \item ISO/IEC 2382, \doccite{Information technology --- Vocabulary}
 \item ISO 8601:2004, \doccite{Data elements and interchange formats ---
 Information interchange --- Representation of dates and times}
@@ -58,9 +59,12 @@
 \end{footnote}
 \doccite{Information technology ---
 Universal Multiple-Octet Coded Character Set (UCS)}
+\item ISO/IEC/IEEE 60559:2020, \doccite{Information technology ---
+Microprocessor Systems --- Floating-Point arithmetic}
 \item ISO 80000-2:2009, \doccite{Quantities and units ---
 Part 2: Mathematical signs and symbols
 to be used in the natural sciences and technology}
+% Other international standards.
 \item Ecma International, \doccite{ECMAScript
 \begin{footnote}
 ECMAScript\textregistered\ is a registered trademark of Ecma