package org.languagetool.rules.ngrams;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.ResourceBundle;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.rules.Categories;
import org.languagetool.rules.ConfusionPair;
import org.languagetool.rules.ConfusionSetLoader;
import org.languagetool.rules.ConfusionString;
import org.languagetool.rules.ITSIssueType;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.patterns.PatternToken;
import org.languagetool.tagging.disambiguation.rules.DisambiguationPatternRule;
import org.languagetool.tools.StringTools;
import org.languagetool.tools.Tools;

/* loaded from: input_file:org/languagetool/rules/ngrams/ConfusionProbabilityRule.class */
public abstract class ConfusionProbabilityRule extends Rule {
    public static final String RULE_ID = "CONFUSION_RULE";
    public static final float MIN_COVERAGE = 0.5f;
    private static final double MIN_PROB = 0.0d;
    private static final boolean DEBUG = false;
    private static final Pattern REAL_WORD = Pattern.compile("\\p{L}+");
    private static final LoadingCache<PathAndLanguage, Map<String, List<ConfusionPair>>> confSetCache = CacheBuilder.newBuilder().expireAfterWrite(10, TimeUnit.MINUTES).build(new CacheLoader<PathAndLanguage, Map<String, List<ConfusionPair>>>() { // from class: org.languagetool.rules.ngrams.ConfusionProbabilityRule.1
        public Map<String, List<ConfusionPair>> load(@NotNull PathAndLanguage pathAndLanguage) throws IOException {
            ConfusionSetLoader confusionSetLoader = new ConfusionSetLoader(pathAndLanguage.lang);
            InputStream fromResourceDirAsStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(pathAndLanguage.path);
            try {
                Map<String, List<ConfusionPair>> loadConfusionPairs = confusionSetLoader.loadConfusionPairs(fromResourceDirAsStream);
                if (fromResourceDirAsStream != null) {
                    fromResourceDirAsStream.close();
                }
                return loadConfusionPairs;
            } catch (Throwable th) {
                if (fromResourceDirAsStream != null) {
                    try {
                        fromResourceDirAsStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        }
    });
    private final Map<String, List<ConfusionPair>> wordToPairs;
    private final LanguageModel lm;
    private final int grams;
    private final Language language;
    private final List<String> exceptions;
    private final List<DisambiguationPatternRule> antiPatterns;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/languagetool/rules/ngrams/ConfusionProbabilityRule$PathAndLanguage.class */
    public static class PathAndLanguage {
        private final String path;
        private final Language lang;

        PathAndLanguage(String str, Language language) {
            this.path = (String) Objects.requireNonNull(str);
            this.lang = (Language) Objects.requireNonNull(language);
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            PathAndLanguage pathAndLanguage = (PathAndLanguage) obj;
            return this.path.equals(pathAndLanguage.path) && this.lang.equals(pathAndLanguage.lang);
        }

        public int hashCode() {
            return Objects.hash(this.path, this.lang);
        }
    }

    /* loaded from: input_file:org/languagetool/rules/ngrams/ConfusionProbabilityRule$SpecificIdRule.class */
    static class SpecificIdRule extends ConfusionProbabilityRule {
        private final String id;
        private final String desc;

        SpecificIdRule(String str, String str2, ResourceBundle resourceBundle, LanguageModel languageModel, Language language) {
            super(resourceBundle, languageModel, language);
            this.id = (String) Objects.requireNonNull(str);
            this.desc = str2;
            if (language.getShortCode().equals("en") && (str.equals("CONFUSION_RULE_BARE_BEAR") || str.equals("CONFUSION_RULE_BEAR_BARE"))) {
                setUrl(Tools.getUrl("https://languagetool.org/insights/post/spelling-bear-vs-bare-with-me/"));
            }
            if (language.getShortCode().equals("en") && (str.equals("CONFUSION_RULE_DISCREET_DISCRETE") || str.equals("CONFUSION_RULE_DISCRETE_DISCREET"))) {
                setUrl(Tools.getUrl("https://languagetool.org/insights/post/discrete-vs-discreet/"));
            }
            if (language.getShortCode().equals("en")) {
                if (str.equals("CONFUSION_RULE_PRECEDENTS_PRECEDENCE") || str.equals("CONFUSION_RULE_PRECEDENCE_PRECEDENTS")) {
                    setUrl(Tools.getUrl("https://languagetool.org/insights/post/precedence-or-precedent/"));
                }
            }
        }

        @Override // org.languagetool.rules.ngrams.ConfusionProbabilityRule, org.languagetool.rules.Rule
        public String getId() {
            return this.id;
        }

        @Override // org.languagetool.rules.ngrams.ConfusionProbabilityRule, org.languagetool.rules.Rule
        public String getDescription() {
            return this.desc;
        }
    }

    public ConfusionProbabilityRule(ResourceBundle resourceBundle, LanguageModel languageModel, Language language) {
        this(resourceBundle, languageModel, language, 3);
    }

    public ConfusionProbabilityRule(ResourceBundle resourceBundle, LanguageModel languageModel, Language language, int i) {
        this(resourceBundle, languageModel, language, i, Arrays.asList(new String[0]));
    }

    public ConfusionProbabilityRule(ResourceBundle resourceBundle, LanguageModel languageModel, Language language, int i, List<String> list) {
        this(resourceBundle, languageModel, language, i, list, Collections.emptyList());
    }

    public ConfusionProbabilityRule(ResourceBundle resourceBundle, LanguageModel languageModel, Language language, int i, List<String> list, List<List<PatternToken>> list2) {
        super(resourceBundle);
        this.wordToPairs = new HashMap();
        setCategory(Categories.TYPOS.getCategory(resourceBundle));
        setLocQualityIssueType(ITSIssueType.NonConformance);
        Iterator<String> it = getFilenames().iterator();
        while (it.hasNext()) {
            this.wordToPairs.putAll((Map) confSetCache.getUnchecked(new PathAndLanguage("/" + language.getShortCode() + "/" + it.next(), language)));
        }
        this.lm = (LanguageModel) Objects.requireNonNull(languageModel);
        this.language = (Language) Objects.requireNonNull(language);
        if (i < 1 || i > 5) {
            throw new IllegalArgumentException("grams must be between 1 and 5: " + i);
        }
        this.grams = i;
        this.exceptions = list;
        this.antiPatterns = makeAntiPatterns(list2, language);
    }

    @NotNull
    protected List<String> getFilenames() {
        return Arrays.asList("confusion_sets.txt");
    }

    @Override // org.languagetool.rules.Rule
    public String getId() {
        return RULE_ID;
    }

    @Override // org.languagetool.rules.Rule
    public int estimateContextForSureMatch() {
        return this.grams;
    }

    @Override // org.languagetool.rules.Rule
    public RuleMatch[] match(AnalyzedSentence analyzedSentence) {
        String text = analyzedSentence.getText();
        List<GoogleToken> googleTokens = GoogleToken.getGoogleTokens(text, true, LanguageModelUtils.getGoogleStyleWordTokenizer(this.language));
        ArrayList arrayList = new ArrayList();
        if (googleTokens.size() == 2) {
            return (RuleMatch[]) arrayList.toArray(new RuleMatch[0]);
        }
        int i = 0;
        boolean z = false;
        for (GoogleToken googleToken : googleTokens) {
            String str = googleToken.token;
            List<ConfusionPair> list = this.wordToPairs.get(str);
            boolean z2 = false;
            if (list == null && str.length() > 0 && Character.isUpperCase(str.charAt(0)) && !z && isRealWord(str)) {
                list = this.wordToPairs.get(StringTools.lowercaseFirstChar(str));
                z2 = true;
            }
            if (isRealWord(str)) {
                z = true;
            }
            if (list != null) {
                Iterator<ConfusionPair> it = list.iterator();
                while (it.hasNext()) {
                    ConfusionPair next = it.next();
                    if (next != null) {
                        List<ConfusionString> uppercaseFirstCharTerms = z2 ? next.getUppercaseFirstCharTerms() : next.getTerms();
                        ConfusionString betterAlternativeOrNull = getBetterAlternativeOrNull(googleTokens.get(i), googleTokens, uppercaseFirstCharTerms, next.getFactor());
                        if (betterAlternativeOrNull != null && !isException(text, googleToken.startPos, googleToken.endPos) && (next.isBidirectional() || !betterAlternativeOrNull.getString().equals(uppercaseFirstCharTerms.get(0).getString()))) {
                            String message = getMessage(getConfusionString(uppercaseFirstCharTerms, googleTokens.get(i)), betterAlternativeOrNull);
                            ArrayList arrayList2 = new ArrayList(getSuggestions(message));
                            if (!arrayList2.contains(betterAlternativeOrNull.getString())) {
                                arrayList2.add(betterAlternativeOrNull.getString());
                            }
                            if (i <= 0 || !LanguageModel.GOOGLE_SENTENCE_START.equals(googleTokens.get(i - 1).token) || googleTokens.size() <= i + 1 || googleTokens.get(i + 1).token == null || isCommonWord(googleTokens.get(i + 1).token)) {
                                if (!isCoveredByAntiPattern(analyzedSentence, googleToken) && !isLocalException(analyzedSentence, googleToken)) {
                                    String string = next.getTerms().get(0).getString();
                                    String string2 = next.getTerms().get(1).getString();
                                    RuleMatch ruleMatch = new RuleMatch(new SpecificIdRule(getId() + "_" + cleanId(string) + "_" + cleanId(string2), getDescription(string, string2), this.messages, this.lm, this.language), analyzedSentence, googleToken.startPos, googleToken.endPos, message, Tools.i18n(this.messages, "statistics_suggest_short_desc", new Object[0]));
                                    ruleMatch.setSuggestedReplacements(arrayList2);
                                    arrayList.add(ruleMatch);
                                }
                            }
                        }
                    }
                }
            }
            i++;
        }
        return (RuleMatch[]) arrayList.toArray(new RuleMatch[0]);
    }

    protected boolean isCommonWord(String str) {
        return str.matches("\\w+");
    }

    private boolean isCoveredByAntiPattern(AnalyzedSentence analyzedSentence, GoogleToken googleToken) {
        for (AnalyzedTokenReadings analyzedTokenReadings : getSentenceWithImmunization(analyzedSentence).getTokensWithoutWhitespace()) {
            if (analyzedTokenReadings.isImmunized() && covers(analyzedTokenReadings.getStartPos(), analyzedTokenReadings.getEndPos(), googleToken.startPos, googleToken.endPos)) {
                return true;
            }
        }
        return false;
    }

    private String cleanId(String str) {
        return str.toUpperCase().replace("Ä", "AE").replace("Ü", "UE").replace("Ö", "OE");
    }

    private boolean isRealWord(String str) {
        return REAL_WORD.matcher(str).matches();
    }

    private boolean isLocalException(AnalyzedSentence analyzedSentence, GoogleToken googleToken) {
        for (String str : this.exceptions) {
            int indexOf = analyzedSentence.getText().toLowerCase().indexOf(str);
            while (true) {
                int i = indexOf;
                if (i != -1) {
                    int length = i + str.length();
                    if (length == i) {
                        return false;
                    }
                    if (covers(i, length, googleToken.startPos, googleToken.endPos)) {
                        return true;
                    }
                    indexOf = analyzedSentence.getText().indexOf(str, length);
                }
            }
        }
        return false;
    }

    private boolean covers(int i, int i2, int i3, int i4) {
        return i <= i3 && i2 >= i4;
    }

    private List<String> getSuggestions(String str) {
        Matcher matcher = Pattern.compile("<suggestion>(.*?)</suggestion>").matcher(str);
        ArrayList arrayList = new ArrayList();
        while (matcher.find()) {
            arrayList.add(matcher.group(1));
        }
        return arrayList;
    }

    protected boolean isException(String str, int i, int i2) {
        return false;
    }

    @Override // org.languagetool.rules.Rule
    public String getDescription() {
        return Tools.i18n(this.messages, "statistics_rule_description", new Object[0]);
    }

    private String getDescription(String str, String str2) {
        return Tools.i18n(this.messages, "statistics_rule_description", str, str2);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getMessage(ConfusionString confusionString, ConfusionString confusionString2) {
        return (confusionString.getDescription() == null || confusionString2.getDescription() == null) ? confusionString.getDescription() != null ? Tools.i18n(this.messages, "statistics_suggest4_new", confusionString2.getString(), confusionString, confusionString.getDescription()) : confusionString2.getDescription() != null ? Tools.i18n(this.messages, "statistics_suggest2_new", confusionString2.getString(), confusionString2.getDescription(), confusionString.getString()) : Tools.i18n(this.messages, "statistics_suggest3_new", confusionString2.getString(), confusionString.getString()) : Tools.i18n(this.messages, "statistics_suggest1_new", confusionString2.getString(), confusionString2.getDescription(), confusionString.getString(), confusionString.getDescription());
    }

    public void setConfusionPair(ConfusionPair confusionPair) {
        this.wordToPairs.clear();
        Iterator<ConfusionString> it = confusionPair.getTerms().iterator();
        while (it.hasNext()) {
            this.wordToPairs.put(it.next().getString(), Collections.singletonList(confusionPair));
        }
    }

    public int getNGrams() {
        return this.grams;
    }

    @Nullable
    private ConfusionString getBetterAlternativeOrNull(GoogleToken googleToken, List<GoogleToken> list, List<ConfusionString> list2, long j) {
        if (list2.size() != 2) {
            throw new RuntimeException("Confusion set must be of size 2: " + list2);
        }
        return getBetterAlternativeOrNull(googleToken, list, getAlternativeTerm(list2, googleToken), j);
    }

    private ConfusionString getAlternativeTerm(List<ConfusionString> list, GoogleToken googleToken) {
        for (ConfusionString confusionString : list) {
            if (!confusionString.getString().equals(googleToken.token)) {
                return confusionString;
            }
        }
        throw new RuntimeException("No alternative found for: " + googleToken);
    }

    private ConfusionString getConfusionString(List<ConfusionString> list, GoogleToken googleToken) {
        for (ConfusionString confusionString : list) {
            if (confusionString.getString().equalsIgnoreCase(googleToken.token)) {
                return confusionString;
            }
        }
        throw new RuntimeException("Not found in set '" + list + "': " + googleToken);
    }

    private ConfusionString getBetterAlternativeOrNull(GoogleToken googleToken, List<GoogleToken> list, ConfusionString confusionString, long j) {
        double d;
        double d2;
        String str = googleToken.token;
        if (this.grams == 3) {
            d = LanguageModelUtils.get3gramProbabilityFor(this.language, this.lm, googleToken, list, str);
            d2 = LanguageModelUtils.get3gramProbabilityFor(this.language, this.lm, googleToken, list, confusionString.getString());
        } else {
            if (this.grams != 4) {
                throw new RuntimeException("Only 3grams and 4grams are supported");
            }
            d = LanguageModelUtils.get4gramProbabilityFor(this.language, this.lm, googleToken, list, str);
            d2 = LanguageModelUtils.get4gramProbabilityFor(this.language, this.lm, googleToken, list, confusionString.getString());
        }
        debug("%.90f <- P(" + str + ") \n", Double.valueOf(d));
        debug("%.90f <- P(" + confusionString + ")\n", Double.valueOf(d2));
        if (d2 < MIN_PROB || d2 <= d * j) {
            return null;
        }
        return confusionString;
    }

    private void debug(String str, Object... objArr) {
    }

    @Override // org.languagetool.rules.Rule
    public List<DisambiguationPatternRule> getAntiPatterns() {
        return this.antiPatterns;
    }
}
