Search code examples
google-translate

How to ignore specified word in text?


I'm using java version of translate API in app engine. Is there a way to ignore specific word in translation eg: "Translate IGNORED_TEXT this", for some languages IGNORED_TEXT is malformed and it's not guaranteed that Translate API will not change it.


Solution

  • After multiple attempts I've ended up with a kind of retrier, which was using special character for the text I wanted to ignore. In My case they were string parameters(%d, %s etc.). Maybe it will be a help for someone:

    public class Parser {
    
    public static final String[] MAGIC_PARAMETER_STRING = {"975313579", "*****", "˨", "இ", "⏲"};
    public static final String[] MAGIC_PARAMETER_NUMBER = {"975323579", "*******", "Ω", "˧", "\u23FA"};
    private static final String formatSpecifier
            = "%(\\d+\\$)?([-#+ 0,(\\<]*)?(\\d+)?(\\.\\d+)?([tT])?([a-zA-Z%])";
    private static final Pattern formatToken = Pattern.compile(formatSpecifier);
    private final int maxStringParameterCount = Parser.MAGIC_PARAMETER_STRING.length;
    private final int maxNumberParameterCount = Parser.MAGIC_PARAMETER_NUMBER.length;
    private int stringPos = 0;
    private int numberPos = 0;
    
    private String convertToken(ConvertedString result, String index, String flags, String width, String precision, String temporal, String conversion, String numberReplacement, String stringReplacement) {
        if (conversion.equals("s")) {
            result.stringArgCount++;
            return stringReplacement;
        } else if (conversion.equals("d")) {
            result.numberArgCount++;
            return numberReplacement;
        }
        throw new IllegalArgumentException("%" + index + flags + width + precision + temporal + conversion);
    }
    
    private String getReplacementNumber(boolean bumpUp) throws RetryExceededException {
        if (bumpUp) {
            ++numberPos;
        }
        if (numberPos >= maxNumberParameterCount) {
            throw new RetryExceededException();
        }
        return MAGIC_PARAMETER_NUMBER[numberPos];
    }
    
    private String getReplacementString(boolean bumpUp) throws RetryExceededException {
        if (bumpUp) {
            ++stringPos;
        }
        if (stringPos >= maxStringParameterCount) {
            throw new RetryExceededException();
        }
        return MAGIC_PARAMETER_STRING[stringPos];
    }
    
    public ConvertedString revert(String text) throws RetryExceededException {
        ConvertedString convertedString = new ConvertedString();
        String replacementString = getReplacementString(false);
        String replacementNumber = getReplacementNumber(false);
        convertedString.stringArgCount = StringUtils.countMatches(text, replacementString);
        convertedString.numberArgCount = StringUtils.countMatches(text, replacementNumber);
        String result = text.replace(replacementString, "%s");
        result = result.replace(replacementNumber, "%d");
        convertedString.result = result;
        return convertedString;
    }
    
    public ConvertedString convert(final String format) {
        return convert(format, MAGIC_PARAMETER_NUMBER[0], MAGIC_PARAMETER_STRING[0]);
    }
    
    public ConvertedString convert(final String format, String numberReplacement, String stringReplacement) {
        ConvertedString result = new ConvertedString();
        final StringBuilder regex = new StringBuilder();
        final Matcher matcher = formatToken.matcher(format);
        int lastIndex = 0;
        while (matcher.find()) {
            regex.append(format.substring(lastIndex, matcher.start()));
            regex.append(convertToken(result, matcher.group(1), matcher.group(2), matcher.group(3),
                    matcher.group(4), matcher.group(5), matcher.group(6), numberReplacement, stringReplacement));
            lastIndex = matcher.end();
        }
        regex.append(format.substring(lastIndex, format.length()));
        result.result = regex.toString();
        return result;
    }
    
    public ConvertedString retryConvert(String originalText, boolean bumpUpString, boolean bumpUpNumber) throws RetryExceededException {
        String replacementNumber = getReplacementNumber(bumpUpNumber);
        String replacementString = getReplacementString(bumpUpString);
        return convert(originalText, replacementNumber, replacementString);
    }
    
    public static class ConvertedString {
        public int stringArgCount;
        public int numberArgCount;
        public String result;
    
    }
    
    public static class RetryExceededException extends Exception {
    
    }
    }