package org.languagetool.tagging.disambiguation;

import gnu.trove.THashMap;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import org.jetbrains.annotations.Nullable;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.tools.StringTools;

/* loaded from: input_file:org/languagetool/tagging/disambiguation/MultiWordChunker.class */
public class MultiWordChunker extends AbstractDisambiguator {
    private final String filename;
    private final boolean allowFirstCapitalized;
    private final boolean allowAllUppercase;
    private volatile boolean initialized;
    private Map<String, Integer> mStartSpace;
    private Map<String, Integer> mStartNoSpace;
    private Map<String, AnalyzedToken> mFull;
    private static final int MAX_TOKENS_IN_MULTIWORD = 20;

    public MultiWordChunker(String str) {
        this(str, false, false);
    }

    public MultiWordChunker(String str, boolean z, boolean z2) {
        this.filename = str;
        this.allowFirstCapitalized = z;
        this.allowAllUppercase = z2;
    }

    private void lazyInit() {
        if (this.initialized) {
            return;
        }
        synchronized (this) {
            if (this.initialized) {
                return;
            }
            THashMap tHashMap = new THashMap();
            THashMap tHashMap2 = new THashMap();
            THashMap tHashMap3 = new THashMap();
            fillMaps(tHashMap, tHashMap2, tHashMap3);
            tHashMap.trimToSize();
            tHashMap2.trimToSize();
            tHashMap3.trimToSize();
            this.mStartSpace = tHashMap;
            this.mStartNoSpace = tHashMap2;
            this.mFull = tHashMap3;
            this.initialized = true;
        }
    }

    private void fillMaps(Map<String, Integer> map, Map<String, Integer> map2, Map<String, AnalyzedToken> map3) {
        HashMap hashMap = new HashMap();
        try {
            InputStream fromResourceDirAsStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(this.filename);
            Throwable th = null;
            try {
                try {
                    for (String str : loadWords(fromResourceDirAsStream)) {
                        String[] split = str.split("\t");
                        if (split.length != 2) {
                            throw new RuntimeException("Invalid format in " + this.filename + ": '" + str + "', expected two tab-separated parts");
                        }
                        ArrayList<String> arrayList = new ArrayList();
                        String str2 = (String) hashMap.computeIfAbsent(split[0], Function.identity());
                        String str3 = (String) hashMap.computeIfAbsent(split[1], Function.identity());
                        arrayList.add(str2);
                        if (this.allowFirstCapitalized) {
                            String uppercaseFirstChar = StringTools.uppercaseFirstChar(str2);
                            if (!map3.containsKey(uppercaseFirstChar) && !str2.equals(uppercaseFirstChar)) {
                                arrayList.add(uppercaseFirstChar);
                            }
                        }
                        if (this.allowAllUppercase) {
                            String upperCase = str2.toUpperCase();
                            if (!map3.containsKey(upperCase) && !str2.equals(upperCase)) {
                                arrayList.add(upperCase);
                            }
                        }
                        for (String str4 : arrayList) {
                            if (str4.indexOf(32) > 0) {
                                String[] split2 = str4.split(" ");
                                String str5 = split2[0];
                                if (!map.containsKey(str5)) {
                                    map.put(str5, Integer.valueOf(split2.length));
                                } else if (map.get(str5).intValue() < split2.length) {
                                    map.put(str5, Integer.valueOf(split2.length));
                                }
                            } else {
                                String[] strArr = new String[split[0].length()];
                                String substring = str4.substring(0, 1);
                                for (int i = 1; i < str4.length(); i++) {
                                    strArr[i] = str4.substring(i - 1, i);
                                }
                                if (!map2.containsKey(substring)) {
                                    map2.put(substring, Integer.valueOf(strArr.length));
                                } else if (map2.get(substring).intValue() < strArr.length) {
                                    map2.put(substring, Integer.valueOf(strArr.length));
                                }
                            }
                            map3.put(str4, new AnalyzedToken(str4, str3, str2));
                        }
                    }
                    if (fromResourceDirAsStream != null) {
                        if (0 != 0) {
                            try {
                                fromResourceDirAsStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            fromResourceDirAsStream.close();
                        }
                    }
                } finally {
                }
            } finally {
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // org.languagetool.tagging.disambiguation.Disambiguator
    public AnalyzedSentence disambiguate(AnalyzedSentence analyzedSentence) throws IOException {
        return disambiguate(analyzedSentence, null);
    }

    @Override // org.languagetool.tagging.disambiguation.Disambiguator
    public final AnalyzedSentence disambiguate(AnalyzedSentence analyzedSentence, @Nullable JLanguageTool.CheckCancelledCallback checkCancelledCallback) throws IOException {
        lazyInit();
        AnalyzedTokenReadings[] tokens = analyzedSentence.getTokens();
        for (int i = 0; i < tokens.length; i++) {
            String token = tokens[i].getToken();
            if (token.length() >= 1) {
                if (i + 1 < tokens.length && !tokens[i + 1].isWhitespace()) {
                    token = token + tokens[i + 1].getToken();
                }
                if (checkCancelledCallback != null && checkCancelledCallback.checkCancelled()) {
                    break;
                }
                StringBuilder sb = new StringBuilder();
                int i2 = 0;
                if (this.mStartSpace.containsKey(token)) {
                    int intValue = this.mStartSpace.get(token).intValue();
                    int i3 = i;
                    int i4 = 0;
                    while (i3 < tokens.length && i3 - i < MAX_TOKENS_IN_MULTIWORD) {
                        if (tokens[i3].isWhitespace()) {
                            if (i3 > 1 && !tokens[i3 - 1].isWhitespace()) {
                                sb.append(' ');
                                i4++;
                            }
                            if (i4 == intValue) {
                                break;
                            }
                        } else {
                            sb.append(tokens[i3].getToken());
                            String sb2 = sb.toString();
                            if (this.mFull.containsKey(sb2)) {
                                tokens[i] = prepareNewReading(sb2, tokens[i].getToken(), tokens[i], false);
                                tokens[i2] = prepareNewReading(sb2, tokens[i2].getToken(), tokens[i2], true);
                            }
                        }
                        i3++;
                        i2 = i3;
                    }
                }
                if (this.mStartNoSpace.containsKey(token.substring(0, 1))) {
                    for (int i5 = i; i5 < tokens.length && !tokens[i5].isWhitespace() && i5 - i < MAX_TOKENS_IN_MULTIWORD; i5++) {
                        sb.append(tokens[i5].getToken());
                        String sb3 = sb.toString();
                        if (this.mFull.containsKey(sb3)) {
                            tokens[i] = prepareNewReading(sb3, tokens[i].getToken(), tokens[i], false);
                            tokens[i5] = prepareNewReading(sb3, tokens[i5].getToken(), tokens[i5], true);
                        }
                    }
                }
            }
        }
        return new AnalyzedSentence(tokens);
    }

    private AnalyzedTokenReadings prepareNewReading(String str, String str2, AnalyzedTokenReadings analyzedTokenReadings, boolean z) {
        StringBuilder sb = new StringBuilder();
        sb.append('<');
        if (z) {
            sb.append('/');
        }
        sb.append(this.mFull.get(str).getPOSTag());
        sb.append('>');
        return setAndAnnotate(analyzedTokenReadings, new AnalyzedToken(str2, sb.toString(), this.mFull.get(str).getLemma()));
    }

    private AnalyzedTokenReadings setAndAnnotate(AnalyzedTokenReadings analyzedTokenReadings, AnalyzedToken analyzedToken) {
        analyzedTokenReadings.addReading(analyzedToken, "MULTIWORD_CHUNKER");
        return analyzedTokenReadings;
    }

    private List<String> loadWords(InputStream inputStream) {
        ArrayList arrayList = new ArrayList();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
            Throwable th = null;
            while (true) {
                try {
                    try {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        String trim = readLine.trim();
                        if (!trim.isEmpty() && trim.charAt(0) != '#') {
                            arrayList.add(trim.replaceFirst("#.*", "").trim());
                        }
                    } finally {
                    }
                } finally {
                }
            }
            if (bufferedReader != null) {
                if (0 != 0) {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    bufferedReader.close();
                }
            }
            return arrayList;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
