Commit c0cfff58 authored by Luc Maisonobe's avatar Luc Maisonobe

Allow unit parser to work with any identifier and support "1" as unit.

parent 6f9d8f73
......@@ -16,6 +16,7 @@
*/
package org.orekit.utils.units;
import org.hipparchus.fraction.Fraction;
import org.orekit.errors.OrekitException;
import org.orekit.errors.OrekitMessages;
......@@ -97,20 +98,19 @@ class Lexer {
// look for prefixed units
int current = start;
while (current < end &&
(Character.isAlphabetic(unitSpecification.charAt(current)) ||
(Character.isLowerCase(unitSpecification.charAt(current)) ||
Character.isUpperCase(unitSpecification.charAt(current)) ||
unitSpecification.charAt(current) == '°' ||
unitSpecification.charAt(current) == '◦' ||
unitSpecification.charAt(current) == '′' ||
unitSpecification.charAt(current) == '\'' ||
unitSpecification.charAt(current) == '″' ||
unitSpecification.charAt(current) == '"' ||
unitSpecification.charAt(current) == '%' ||
unitSpecification.charAt(current) == 'µ')) {
unitSpecification.charAt(current) == '%')) {
++current;
}
if (current > start) {
final String identifier = unitSpecification.subSequence(start, current).toString();
return emit(current, TokenType.PREFIXED_UNIT, PrefixedUnit.valueOf(identifier), 0);
return emit(current, TokenType.IDENTIFIER, 0, 1);
}
// look for power
......@@ -118,34 +118,82 @@ class Lexer {
unitSpecification.charAt(start) == '*' &&
unitSpecification.charAt(start + 1) == '*') {
// power indicator as **
return emit(start + 2, TokenType.POWER, null, 0);
return emit(start + 2, TokenType.POWER, 0, 1);
} else if (unitSpecification.charAt(start) == '^') {
// power indicator as ^
return emit(start + 1, TokenType.POWER, null, 0);
return emit(start + 1, TokenType.POWER, 0, 1);
} else if (convertSuperscript(start) != ' ' &&
last != null &&
last.getType() != TokenType.POWER) {
// virtual power indicator as we switch to superscript characters
return emit(start, TokenType.POWER, null, 0);
return emit(start, TokenType.POWER, 0, 1);
}
// look for one character tokens
if (unitSpecification.charAt(start) == '*') {
return emit(start + 1, TokenType.MULTIPLICATION, null, 0);
return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
} else if (unitSpecification.charAt(start) == '×') {
return emit(start + 1, TokenType.MULTIPLICATION, null, 0);
return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
} else if (unitSpecification.charAt(start) == '.') {
return emit(start + 1, TokenType.MULTIPLICATION, null, 0);
return emit(start + 1, TokenType.MULTIPLICATION, 0, 1);
} else if (unitSpecification.charAt(start) == '/') {
return emit(start + 1, TokenType.DIVISION, null, 0);
return emit(start + 1, TokenType.DIVISION, 0, 1);
} else if (unitSpecification.charAt(start) == '⁄') {
return emit(start + 1, TokenType.DIVISION, null, 0);
return emit(start + 1, TokenType.DIVISION, 0, 1);
} else if (unitSpecification.charAt(start) == '(') {
return emit(start + 1, TokenType.OPEN, null, 0);
return emit(start + 1, TokenType.OPEN, 0, 1);
} else if (unitSpecification.charAt(start) == ')') {
return emit(start + 1, TokenType.CLOSE, null, 0);
return emit(start + 1, TokenType.CLOSE, 0, 1);
} else if (unitSpecification.charAt(start) == '√') {
return emit(start + 1, TokenType.SQUARE_ROOT, null, 0);
return emit(start + 1, TokenType.SQUARE_ROOT, 0, 1);
}
// look for special case "0.5" (used by CCSDS for square roots)
if ((start < end - 2) &&
unitSpecification.charAt(start) == '0' &&
unitSpecification.charAt(start + 1) == '.' &&
unitSpecification.charAt(start + 2) == '5') {
// ½ written as decimal number
return emit(start + 3, TokenType.FRACTION, 1, 2);
}
// look for unicode fractions
if (unitSpecification.charAt(start) == '¼') {
return emit(start + 1, TokenType.FRACTION, 1, 4);
} else if (unitSpecification.charAt(start) == '½') {
return emit(start + 1, TokenType.FRACTION, 1, 2);
} else if (unitSpecification.charAt(start) == '¾') {
return emit(start + 1, TokenType.FRACTION, 3, 4);
} else if (unitSpecification.charAt(start) == '⅐') {
return emit(start + 1, TokenType.FRACTION, 1, 7);
} else if (unitSpecification.charAt(start) == '⅑') {
return emit(start + 1, TokenType.FRACTION, 1, 9);
} else if (unitSpecification.charAt(start) == '⅒') {
return emit(start + 1, TokenType.FRACTION, 1, 10);
} else if (unitSpecification.charAt(start) == '⅓') {
return emit(start + 1, TokenType.FRACTION, 1, 3);
} else if (unitSpecification.charAt(start) == '⅔') {
return emit(start + 1, TokenType.FRACTION, 2, 3);
} else if (unitSpecification.charAt(start) == '⅕') {
return emit(start + 1, TokenType.FRACTION, 1, 5);
} else if (unitSpecification.charAt(start) == '⅖') {
return emit(start + 1, TokenType.FRACTION, 2, 5);
} else if (unitSpecification.charAt(start) == '⅗') {
return emit(start + 1, TokenType.FRACTION, 3, 5);
} else if (unitSpecification.charAt(start) == '⅘') {
return emit(start + 1, TokenType.FRACTION, 4, 5);
} else if (unitSpecification.charAt(start) == '⅙') {
return emit(start + 1, TokenType.FRACTION, 1, 6);
} else if (unitSpecification.charAt(start) == '⅚') {
return emit(start + 1, TokenType.FRACTION, 5, 6);
} else if (unitSpecification.charAt(start) == '⅛') {
return emit(start + 1, TokenType.FRACTION, 1, 8);
} else if (unitSpecification.charAt(start) == '⅜') {
return emit(start + 1, TokenType.FRACTION, 3, 8);
} else if (unitSpecification.charAt(start) == '⅝') {
return emit(start + 1, TokenType.FRACTION, 5, 8);
} else if (unitSpecification.charAt(start) == '⅞') {
return emit(start + 1, TokenType.FRACTION, 7, 8);
}
// it must be an integer, either as regular character or as superscript
......@@ -180,7 +228,7 @@ class Lexer {
}
if (current > numberStart) {
// there were some digits
return emit(current, TokenType.INTEGER, null, sign * value);
return emit(current, TokenType.INTEGER, sign * value, 1);
}
throw generateException();
......@@ -197,16 +245,16 @@ class Lexer {
/** Emit one token.
* @param after index after token
* @param type token type
* @param unit prefixed unit value
* @param value integer value
* @param numerator value of the token numerator
* @param denominator value of the token denominator
* @return new token
*/
private Token emit(final int after, final TokenType type,
final PrefixedUnit unit, final int value) {
private Token emit(final int after, final TokenType type, final int numerator, final int denominator) {
final CharSequence subString = unitSpecification.subSequence(start, after);
start = after;
nextToLast = last;
last = new Token(subString, type, unit, value);
last = new Token(subString, type, numerator,
denominator == 1 ? null : new Fraction(numerator, denominator));
return last;
}
......
......@@ -16,55 +16,69 @@
*/
package org.orekit.utils.units;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.hipparchus.fraction.Fraction;
/** Parser for units.
* <p>
* This fairly basic parser uses recursive descent with the following grammar,
* where '*' can in fact be either '*', '×' or '.', '/' can be either '/' or '⁄'
* and '^' can be either '^', "**" or implicit with switch to superscripts.
* The special case "n/a" corresponds to {@link PredefinedUnit#NONE}.
* and '^' can be either '^', "**" or implicit with switch to superscripts,
* and fraction are either unicode fractions like ½ or ⅞ or the decimal value 0.5.
* The special cases "n/a" returns a null list. It is intended to manage the
* special unit {@link Unit.NONE}. The special case "1" returns a singleton with
* the base term set to "1" and the exponent set to 1. It is intended to manage the
* special unit {@link Unit.ONE}. This is the only case were a number can appear
* in a unit, it cannot be combined with other units (i.e. m.1/s is not allowed).
* </p>
* <pre>
* unit → "n/a" | chain
* chain → operand operation
* operand → '√' simple | simple power
* operation → '*' chain | '/' chain | ε
* power → '^' exponent | ε
* exponent → integer | '(' integer denominator ')'
* denominator → '/' integer | ε
* simple → predefined | '(' chain ')'
* unit ::= "n/a" | "1" | chain
* chain ::= operand { ('*' | '/') operand }
* operand ::= '√' base | base power
* power ::= '^' exponent | ε
* exponent ::= 'fraction' | integer | '(' integer denominator ')'
* denominator ::= '/' integer | ε
* base ::= identifier | '(' chain ')'
* </pre>
* <p>
* This parses correctly units like MHz, km/√d, kg.m.s⁻¹, µas^(2/5)/(h**(2)×m)³, km/√(kg.s), √kg*km** (3/2) /(µs^2*Ω⁻⁷).
* Note that we don't accept both square root and power on the same operand, so km/√d³ is refused (but km/√(d³) is accepted).
* Note that "nd" does not stands for "not-defined" but for "nano-day"…
* This parses correctly units like MHz, km/√d, kg.m.s⁻¹, µas^⅖/(h**(2)×m)³, km/√(kg.s),
* √kg*km** (3/2) /(µs^2*Ω⁻⁷), km**0.5/s.
* </p>
* <p>
* Note that we don't accept both square root and power on the same operand, so km/√d³ is
* refused (but km/√(d³) is accepted).
* </p>
* @author Luc Maisonobe
* @since 11.0
*/
class Parser {
public class Parser {
/** Private constructor for a utility class.
*/
private Parser() {
}
/** Parse a string.
/** Build the power terms list.
* @param unitSpecification unit specification to parse
* @return parsed unit
* @return list of power terms
*/
public static Unit parse(final String unitSpecification) {
public static List<PowerTerm> buildList(final String unitSpecification) {
if (Unit.NONE.getName().equals(unitSpecification)) {
// special case
return Unit.NONE;
// special case for no units
return null;
} else if (Unit.ONE.getName().equals(unitSpecification)) {
// special case for dimensionless unit
return Collections.singletonList(new PowerTerm(unitSpecification, Fraction.ONE));
} else {
final Lexer lexer = new Lexer(unitSpecification);
final Unit parsed = chain(lexer);
final List<PowerTerm> chain = chain(lexer);
if (lexer.next() != null) {
throw lexer.generateException();
}
return parsed.alias(unitSpecification);
return chain;
}
}
......@@ -72,47 +86,80 @@ class Parser {
* @param lexer lexer providing tokens
* @return chain unit
*/
private static Unit chain(final Lexer lexer) {
return operation(operand(lexer), lexer);
private static List<PowerTerm> chain(final Lexer lexer) {
final List<PowerTerm> chain = new ArrayList<>();
chain.addAll(operand(lexer));
for (Token token = lexer.next(); token != null; token = lexer.next()) {
if (checkType(token, TokenType.MULTIPLICATION)) {
chain.addAll(operand(lexer));
} else if (checkType(token, TokenType.DIVISION)) {
chain.addAll(reciprocate(operand(lexer)));
} else {
lexer.pushBack();
break;
}
}
return chain;
}
/** Parse an operand.
* @param lexer lexer providing tokens
* @return operand unit
* @return operand term
*/
private static Unit operand(final Lexer lexer) {
private static List<PowerTerm> operand(final Lexer lexer) {
final Token token = lexer.next();
if (token == null) {
throw lexer.generateException();
}
if (token.getType() == TokenType.SQUARE_ROOT) {
return simple(lexer).power(null, Fraction.ONE_HALF);
return applyExponent(base(lexer), Fraction.ONE_HALF);
} else {
lexer.pushBack();
return simple(lexer).power(null, power(lexer));
return applyExponent(base(lexer), power(lexer));
}
}
/** Parse an operation.
* @param lhs left hand side unit
* @param lexer lexer providing tokens
* @return simple unit
/** Apply an exponent to a base term.
* @param base base term
* @param exponent exponent (may be null)
* @return term with exponent applied (same as {@code if exponent is null)
*/
private static Unit operation(final Unit lhs, final Lexer lexer) {
final Token token = lexer.next();
if (checkType(token, TokenType.MULTIPLICATION)) {
return lhs.multiply(null, chain(lexer));
} else if (checkType(token, TokenType.DIVISION)) {
return lhs.divide(null, chain(lexer));
} else {
lexer.pushBack();
return lhs;
private static List<PowerTerm> applyExponent(final List<PowerTerm> base, final Fraction exponent) {
if (exponent == null) {
// no exponent at all, return the base term itself
return base;
}
// combine exponent with existing ones, for example to handles compounds units like m/(kg.s²)³
final List<PowerTerm> powered = new ArrayList<>(base.size());
for (final PowerTerm term : base) {
powered.add(new PowerTerm(term.getBase(), exponent.multiply(term.getExponent())));
}
return powered;
}
/** Compute the reciprocal a base term.
* @param base base term
* @return reciprocal of base term
*/
private static List<PowerTerm> reciprocate(final List<PowerTerm> base) {
// reciprocate individual terms
final List<PowerTerm> reciprocal = new ArrayList<>(base.size());
for (final PowerTerm term : base) {
reciprocal.add(new PowerTerm(term.getBase(), term.getExponent().negate()));
}
return reciprocal;
}
/** Parse a power operation.
* @param lexer lexer providing tokens
* @return exponent
* @return exponent, or null if no exponent
*/
private static Fraction power(final Lexer lexer) {
final Token token = lexer.next();
......@@ -120,7 +167,7 @@ class Parser {
return exponent(lexer);
} else {
lexer.pushBack();
return Fraction.ONE;
return null;
}
}
......@@ -130,12 +177,14 @@ class Parser {
*/
private static Fraction exponent(final Lexer lexer) {
final Token token = lexer.next();
if (checkType(token, TokenType.INTEGER)) {
return new Fraction(token.getValue());
if (checkType(token, TokenType.FRACTION)) {
return token.getFraction();
} else if (checkType(token, TokenType.INTEGER)) {
return new Fraction(token.getInt());
} else {
lexer.pushBack();
accept(lexer, TokenType.OPEN);
final int num = accept(lexer, TokenType.INTEGER).getValue();
final int num = accept(lexer, TokenType.INTEGER).getInt();
final int den = denominator(lexer);
accept(lexer, TokenType.CLOSE);
return new Fraction(num, den);
......@@ -149,25 +198,25 @@ class Parser {
private static int denominator(final Lexer lexer) {
final Token token = lexer.next();
if (checkType(token, TokenType.DIVISION)) {
return accept(lexer, TokenType.INTEGER).getValue();
return accept(lexer, TokenType.INTEGER).getInt();
} else {
lexer.pushBack();
return 1;
}
}
/** Parse a simple unit.
/** Parse a base term.
* @param lexer lexer providing tokens
* @return simple unit
* @return base term
*/
private static Unit simple(final Lexer lexer) {
private static List<PowerTerm> base(final Lexer lexer) {
final Token token = lexer.next();
if (checkType(token, TokenType.PREFIXED_UNIT)) {
return token.getPrefixedUnit();
if (checkType(token, TokenType.IDENTIFIER)) {
return Collections.singletonList(new PowerTerm(token.getSubString(), Fraction.ONE));
} else {
lexer.pushBack();
accept(lexer, TokenType.OPEN);
final Unit chain = chain(lexer);
final List<PowerTerm> chain = chain(lexer);
accept(lexer, TokenType.CLOSE);
return chain;
}
......
/* Copyright 2002-2021 CS GROUP
* Licensed to CS GROUP (CS) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* CS licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.orekit.utils.units;
import org.hipparchus.fraction.Fraction;
/** A container for a term with associated power.
* @author Luc Maisonobe
* @since 11.0
*/
public class PowerTerm {
/** Base term. */
private final CharSequence base;
/** Fractional exponent. */
private final Fraction exponent;
/** Simple constructor.
* @param base base term
* @param exponent fractional exponent
*/
PowerTerm(final CharSequence base, final Fraction exponent) {
this.base = base;
this.exponent = exponent;
}
/** Get the base term.
* @return base term
*/
public CharSequence getBase() {
return base;
}
/** Get the fractional exponent.
* @return fractional exponent
*/
public Fraction getExponent() {
return exponent;
}
}
......@@ -30,6 +30,9 @@ import org.orekit.errors.OrekitMessages;
*/
class PrefixedUnit extends Unit {
/** Serializable UID. */
private static final long serialVersionUID = 20210407L;
/** Allowed units with SI prefixes, with various aliases for angles, year, sfu, and tecu. */
private static final Map<String, PrefixedUnit> ALLOWED;
......@@ -77,6 +80,10 @@ class PrefixedUnit extends Unit {
ALLOWED.put(pu.getName(), pu);
}
}
// dimensionless unit "1" does not accept any prefix
ALLOWED.put(Unit.ONE.getName(), new PrefixedUnit(null, Unit.ONE));
}
/** Simple constructor.
......
......@@ -16,6 +16,8 @@
*/
package org.orekit.utils.units;
import org.hipparchus.fraction.Fraction;
/** Unit token.
* @author Luc Maisonobe
* @since 11.0
......@@ -28,24 +30,23 @@ class Token {
/** Token type. */
private final TokenType type;
/** Prefixed unit value. */
private final PrefixedUnit unit;
/** Integer value. */
private final int value;
private final int integer;
/** Fraction value. */
private final Fraction fraction;
/** Build a token.
* @param subString substring corresponding to the token
* @param type token type
* @param unit prefixed unit value
* @param value integer value of the token
* @param integer integer value
* @param fraction fraction value
*/
Token(final CharSequence subString, final TokenType type,
final PrefixedUnit unit, final int value) {
Token(final CharSequence subString, final TokenType type, final int integer, final Fraction fraction) {
this.subString = subString;
this.type = type;
this.unit = unit;
this.value = value;
this.integer = integer;
this.fraction = fraction;
}
/** Get the substring corresponding to the token.
......@@ -62,18 +63,18 @@ class Token {
return type;
}
/** Get the prefixed unit value.
* @return prefixed unit value
/** Get the integer value (numerator in case of fraction).
* @return integer value
*/
public PrefixedUnit getPrefixedUnit() {
return unit;
public int getInt() {
return integer;
}
/** Get the integer value.
* @return integer value
/** Get the fraction value.
* @return fraction value
*/
public int getValue() {
return value;
public Fraction getFraction() {
return fraction;
}
}
......@@ -22,8 +22,8 @@ package org.orekit.utils.units;
*/
enum TokenType {
/** Unit with a SI prefix. */
PREFIXED_UNIT,
/** Identifier. */
IDENTIFIER,
/** Power operator. */
POWER,
......@@ -44,6 +44,9 @@ enum TokenType {
SQUARE_ROOT,
/** Integer. */
INTEGER;
INTEGER,
/** Fraction. */
FRACTION;
}
......@@ -17,9 +17,12 @@
package org.orekit.utils.units;
import java.io.Serializable;
import java.util.List;
import org.hipparchus.fraction.Fraction;
import org.hipparchus.util.FastMath;
import org.orekit.errors.OrekitException;
import org.orekit.errors.OrekitMessages;