package com.benjaminwan.chinesettstflite.utils;

import android.util.Log;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: classes.dex */
public class Processor {
    private static final String EOS = "~";
    private static final String PAD = "_";
    private static final String SPECIAL = "-";
    private static final String TAG = "processor";
    private static final String[] VALID_SYMBOLS = {"AA", "AA0", "AA1", "AA2", "AE", "AE0", "AE1", "AE2", "AH", "AH0", "AH1", "AH2", "AO", "AO0", "AO1", "AO2", "AW", "AW0", "AW1", "AW2", "AY", "AY0", "AY1", "AY2", "B", "CH", "D", "DH", "EH", "EH0", "EH1", "EH2", "ER", "ER0", "ER1", "ER2", "EY", "EY0", "EY1", "EY2", "F", "G", "HH", "IH", "IH0", "IH1", "IH2", "IY", "IY0", "IY1", "IY2", "JH", "K", "L", "M", "N", "NG", "OW", "OW0", "OW1", "OW2", "OY", "OY0", "OY1", "OY2", "P", "R", "S", "SH", "T", "TH", "UH", "UH0", "UH1", "UH2", "UW", "UW0", "UW1", "UW2", "V", "W", "Y", "Z", "ZH"};
    private static final Pattern CURLY_RE = Pattern.compile("(.*?)\\{(.+?)\\}(.*)");
    private static final Pattern COMMA_NUMBER_RE = Pattern.compile("([0-9][0-9\\,]+[0-9])");
    private static final Pattern DECIMAL_RE = Pattern.compile("([0-9]+\\.[0-9]+)");
    private static final Pattern POUNDS_RE = Pattern.compile("£([0-9\\,]*[0-9]+)");
    private static final Pattern DOLLARS_RE = Pattern.compile("\\$([0-9.\\,]*[0-9]+)");
    private static final Pattern ORDINAL_RE = Pattern.compile("[0-9]+(st|nd|rd|th)");
    private static final Pattern NUMBER_RE = Pattern.compile("[0-9]+");
    private static final String[] PUNCTUATION = "!'(),.:;? ".split("");
    private static final String[] LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".split("");
    private static final List<String> SYMBOLS = new ArrayList();
    private static final Map<String, String> ABBREVIATIONS = new HashMap();
    private static final Map<String, Integer> SYMBOL_TO_ID = new HashMap();

    public Processor() {
        List<String> list = SYMBOLS;
        list.add(PAD);
        list.add(SPECIAL);
        int i = 0;
        for (String str : PUNCTUATION) {
            if (!"".equals(str)) {
                SYMBOLS.add(str);
            }
        }
        for (String str2 : LETTERS) {
            if (!"".equals(str2)) {
                SYMBOLS.add(str2);
            }
        }
        for (String str3 : VALID_SYMBOLS) {
            SYMBOLS.add("@" + str3);
        }
        SYMBOLS.add(EOS);
        while (true) {
            List<String> list2 = SYMBOLS;
            if (i >= list2.size()) {
                Map<String, String> map = ABBREVIATIONS;
                map.put("mrs", "misess");
                map.put("mr", "mister");
                map.put("dr", "doctor");
                map.put("st", "saint");
                map.put("co", "company");
                map.put("jr", "junior");
                map.put("maj", "major");
                map.put("gen", "general");
                map.put("drs", "doctors");
                map.put("rev", "reverend");
                map.put("lt", "lieutenant");
                map.put("hon", "honorable");
                map.put("sgt", "sergeant");
                map.put("capt", "captain");
                map.put("esq", "esquire");
                map.put("ltd", "limited");
                map.put("col", "colonel");
                map.put("ft", "fort");
                return;
            }
            SYMBOL_TO_ID.put(list2.get(i), Integer.valueOf(i));
            i++;
        }
    }

    private List<Integer> arpabetToSequence(String str) {
        ArrayList arrayList = new ArrayList();
        if (str != null) {
            for (String str2 : str.split(" ")) {
                arrayList.add(SYMBOL_TO_ID.get("@" + str2));
            }
        }
        return arrayList;
    }

    private String cleanTextForEnglish(String str) {
        String expandAbbreviations = expandAbbreviations(convertToAscii(str).toLowerCase());
        try {
            expandAbbreviations = expandNumbers(expandAbbreviations);
        } catch (Exception e) {
            Log.d(TAG, "Failed to convert numbers", e);
        }
        String collapseWhitespace = collapseWhitespace(expandAbbreviations);
        Log.d(TAG, "text preprocessed: " + collapseWhitespace);
        return collapseWhitespace;
    }

    private String collapseWhitespace(String str) {
        return str.replaceAll("\\s+", " ");
    }

    private String convertToAscii(String str) {
        return new String(str.getBytes(StandardCharsets.US_ASCII));
    }

    private String expandAbbreviations(String str) {
        for (Map.Entry<String, String> entry : ABBREVIATIONS.entrySet()) {
            str = str.replaceAll("\\b" + entry.getKey() + "\\.", entry.getValue());
        }
        return str;
    }

    private String expandCardinals(String str) {
        Matcher matcher = NUMBER_RE.matcher(str);
        while (matcher.find()) {
            str = str.replaceFirst(matcher.group(), NumberNorm.numToString(Long.valueOf(matcher.group()).longValue()));
        }
        return str;
    }

    private String expandDecimals(String str) {
        Matcher matcher = DECIMAL_RE.matcher(str);
        while (matcher.find()) {
            str = str.replaceFirst(matcher.group(), matcher.group().replaceAll("\\.", " point "));
        }
        return str;
    }

    private String expandDollars(String str) {
        Matcher matcher = DOLLARS_RE.matcher(str);
        while (matcher.find()) {
            String str2 = "";
            String substring = matcher.group().substring(1);
            String[] split = substring.split("\\.");
            String str3 = !substring.startsWith(".") ? split[0] : "0";
            String str4 = (substring.endsWith(".") || split.length <= 1) ? "0" : split[1];
            if (!"0".equals(str3)) {
                str2 = "" + split[0] + " dollars ";
            }
            if (!"0".equals(str4) && !"00".equals(str4)) {
                str2 = str2 + split[1] + " cents ";
            }
            str = str.replaceFirst("\\" + matcher.group(), str2);
        }
        return str;
    }

    private String expandNumbers(String str) {
        return expandCardinals(expandOrdinals(expandDecimals(expandDollars(expandPounds(removeCommasFromNumbers(str))))));
    }

    private String expandOrdinals(String str) {
        Matcher matcher = ORDINAL_RE.matcher(str);
        while (matcher.find()) {
            str = str.replaceFirst(matcher.group(), NumberNorm.toOrdinal(Long.valueOf(matcher.group().substring(0, matcher.group().length() - 2)).longValue()));
        }
        return str;
    }

    private String expandPounds(String str) {
        Matcher matcher = POUNDS_RE.matcher(str);
        while (matcher.find()) {
            str = str.replaceFirst(matcher.group(), matcher.group() + " pounds");
        }
        return str;
    }

    private String removeCommasFromNumbers(String str) {
        Matcher matcher = COMMA_NUMBER_RE.matcher(str);
        while (matcher.find()) {
            str = str.replaceFirst(matcher.group(), matcher.group().replaceAll(",", ""));
        }
        return str;
    }

    private List<Integer> symbolsToSequence(String str) {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < str.length(); i++) {
            Integer num = SYMBOL_TO_ID.get(String.valueOf(str.charAt(i)));
            if (num == null) {
                Log.e(TAG, "symbolsToSequence: id is not found for " + str.charAt(i));
            } else {
                arrayList.add(num);
            }
        }
        return arrayList;
    }

    public int[] textToIds(String str) {
        ArrayList arrayList = new ArrayList();
        while (true) {
            if (str == null || str.length() <= 0) {
                break;
            }
            Matcher matcher = CURLY_RE.matcher(str);
            if (!matcher.find()) {
                arrayList.addAll(symbolsToSequence(cleanTextForEnglish(str)));
                break;
            }
            arrayList.addAll(symbolsToSequence(cleanTextForEnglish(matcher.group(1))));
            arrayList.addAll(arpabetToSequence(matcher.group(2)));
            str = matcher.group(3);
        }
        int size = arrayList.size();
        Integer[] numArr = (Integer[]) arrayList.toArray(new Integer[size]);
        int[] iArr = new int[size];
        for (int i = 0; i < size; i++) {
            iArr[i] = numArr[i].intValue();
        }
        return iArr;
    }
}
