package ai.grazie.detector.ngram;

import ai.grazie.detector.ngram.impl.NgramExtractor;
import ai.grazie.detector.ngram.profiles.LanguageProfileReader;
import ai.grazie.detector.utils.filter.AggregatedTextFilter;
import ai.grazie.detector.utils.filter.TextFilter;
import ai.grazie.nlp.langs.Language;
import ai.grazie.utils.json.JSON;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Stream;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.comparisons.ComparisonsKt;
import kotlin.jdk7.AutoCloseableKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.functions.Function2;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.sequences.SequencesKt;
import kotlin.text.CharsKt;
import kotlin.text.Regex;

/* compiled from: NgramEnumerator.kt */
@Metadata(mv = {1, 7, 0}, k = 2, xi = 48, d1 = {"��\b\n��\n\u0002\u0010\u0002\n��\u001a\u0006\u0010��\u001a\u00020\u0001¨\u0006\u0002"}, d2 = {"main", "", "nlp-detect"})
@SourceDebugExtension({"SMAP\nNgramEnumerator.kt\nKotlin\n*S Kotlin\n*F\n+ 1 NgramEnumerator.kt\nai/grazie/detector/ngram/NgramEnumeratorKt\n+ 2 fake.kt\nkotlin/jvm/internal/FakeKt\n+ 3 Maps.kt\nkotlin/collections/MapsKt__MapsKt\n+ 4 _Sequences.kt\nkotlin/sequences/SequencesKt___SequencesKt\n+ 5 JSON.kt\nai/grazie/utils/json/JSON\n*L\n1#1,59:1\n1#2:60\n535#3:61\n520#3,6:62\n607#4:68\n607#4:69\n1317#4,2:70\n28#5:72\n*S KotlinDebug\n*F\n+ 1 NgramEnumerator.kt\nai/grazie/detector/ngram/NgramEnumeratorKt\n*L\n43#1:61\n43#1:62,6\n52#1:68\n53#1:69\n54#1:70,2\n56#1:72\n*E\n"})
/* loaded from: input_file:ai/grazie/detector/ngram/NgramEnumeratorKt.class */
public final class NgramEnumeratorKt {
    public static final void main() {
        final Language language = Language.UKRAINIAN;
        final Regex regex = new Regex("[^\\p{javaLetter}]+");
        final Regex regex2 = new Regex("\\s{2,}");
        final ConcurrentHashMap concurrentHashMap = new ConcurrentHashMap();
        Stream<Path> walk = Files.walk(Path.of("path_to_corpus", new String[0]), new FileVisitOption[0]);
        try {
            Function1<Path, Unit> function1 = new Function1<Path, Unit>() { // from class: ai.grazie.detector.ngram.NgramEnumeratorKt$main$1$1
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                {
                    super(1);
                }

                public final void invoke(Path path) {
                    if (Files.isRegularFile(path, new LinkOption[0])) {
                        Stream stream = (Stream) Files.lines(path).parallel();
                        final Regex regex3 = regex;
                        final Regex regex4 = regex2;
                        final ConcurrentHashMap<String, Integer> concurrentHashMap2 = concurrentHashMap;
                        final Language language2 = language;
                        Function1<String, Unit> function12 = new Function1<String, Unit>() { // from class: ai.grazie.detector.ngram.NgramEnumeratorKt$main$1$1.1
                            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                            {
                                super(1);
                            }

                            public final void invoke(String str) {
                                boolean z;
                                AggregatedTextFilter aggregatedTextFilter = TextFilter.Companion.getDefault();
                                Intrinsics.checkNotNullExpressionValue(str, "line");
                                List<String> extract = NgramExtractor.Companion.getStandard().extract(regex4.replace(regex3.replace(aggregatedTextFilter.filter(str), " "), " "));
                                ConcurrentHashMap<String, Integer> concurrentHashMap3 = concurrentHashMap2;
                                Language language3 = language2;
                                for (String str2 : extract) {
                                    String str3 = str2;
                                    int i = 0;
                                    while (true) {
                                        if (i >= str3.length()) {
                                            z = true;
                                            break;
                                        }
                                        char charAt = str3.charAt(i);
                                        if (!(CharsKt.isWhitespace(charAt) || language3.getAlphabet().matchEntire(String.valueOf(charAt)))) {
                                            z = false;
                                            break;
                                        }
                                        i++;
                                    }
                                    if (z) {
                                        NgramEnumeratorKt$main$1$1$1$1$2 ngramEnumeratorKt$main$1$1$1$1$2 = NgramEnumeratorKt$main$1$1$1$1$2.INSTANCE;
                                        concurrentHashMap3.merge(str2, 1, (v1, v2) -> {
                                            return invoke$lambda$2$lambda$1(r3, v1, v2);
                                        });
                                    }
                                }
                            }

                            private static final Integer invoke$lambda$2$lambda$1(Function2 function2, Object obj, Object obj2) {
                                return (Integer) function2.invoke(obj, obj2);
                            }

                            public /* bridge */ /* synthetic */ Object invoke(Object obj) {
                                invoke((String) obj);
                                return Unit.INSTANCE;
                            }
                        };
                        stream.forEach((v1) -> {
                            invoke$lambda$0(r1, v1);
                        });
                    }
                }

                private static final void invoke$lambda$0(Function1 function12, Object obj) {
                    function12.invoke(obj);
                }

                public /* bridge */ /* synthetic */ Object invoke(Object obj) {
                    invoke((Path) obj);
                    return Unit.INSTANCE;
                }
            };
            walk.forEach((v1) -> {
                main$lambda$1$lambda$0(r1, v1);
            });
            Unit unit = Unit.INSTANCE;
            AutoCloseableKt.closeFinally(walk, (Throwable) null);
            ConcurrentHashMap concurrentHashMap2 = concurrentHashMap;
            LinkedHashMap linkedHashMap = new LinkedHashMap();
            for (Map.Entry entry : concurrentHashMap2.entrySet()) {
                if (((String) entry.getKey()).length() == 3) {
                    linkedHashMap.put(entry.getKey(), entry.getValue());
                }
            }
            int intValue = ((Number) CollectionsKt.maxOrThrow(linkedHashMap.values())).intValue();
            final int i = intValue / 200;
            System.out.println((Object) ("Found " + concurrentHashMap.size() + " ngrams, max trigram frequency " + intValue + ". Dividing everything by " + i + " for smaller file size"));
            LinkedHashMap linkedHashMap2 = new LinkedHashMap();
            for (Pair pair : SequencesKt.sortedWith(SequencesKt.sortedWith(SequencesKt.filter(SequencesKt.map(MapsKt.asSequence(concurrentHashMap), new Function1<Map.Entry<? extends String, ? extends Integer>, Pair<? extends String, ? extends Integer>>() { // from class: ai.grazie.detector.ngram.NgramEnumeratorKt$main$2
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                {
                    super(1);
                }

                public final Pair<String, Integer> invoke(Map.Entry<String, Integer> entry2) {
                    Intrinsics.checkNotNullParameter(entry2, "<name for destructuring parameter 0>");
                    return TuplesKt.to(entry2.getKey(), Integer.valueOf(entry2.getValue().intValue() / i));
                }
            }), new Function1<Pair<? extends String, ? extends Integer>, Boolean>() { // from class: ai.grazie.detector.ngram.NgramEnumeratorKt$main$3
                public final Boolean invoke(Pair<String, Integer> pair2) {
                    Intrinsics.checkNotNullParameter(pair2, "it");
                    return Boolean.valueOf(((Number) pair2.getSecond()).intValue() > 1);
                }
            }), new Comparator() { // from class: ai.grazie.detector.ngram.NgramEnumeratorKt$main$$inlined$sortedBy$1
                @Override // java.util.Comparator
                public final int compare(T t, T t2) {
                    return ComparisonsKt.compareValues((String) ((Pair) t).getFirst(), (String) ((Pair) t2).getFirst());
                }
            }), new Comparator() { // from class: ai.grazie.detector.ngram.NgramEnumeratorKt$main$$inlined$sortedBy$2
                @Override // java.util.Comparator
                public final int compare(T t, T t2) {
                    return ComparisonsKt.compareValues(Integer.valueOf(((String) ((Pair) t).getFirst()).length()), Integer.valueOf(((String) ((Pair) t2).getFirst()).length()));
                }
            })) {
                linkedHashMap2.put((String) pair.component1(), Integer.valueOf(((Number) pair.component2()).intValue()));
            }
            System.out.println((Object) JSON.Default.INSTANCE.string(LanguageProfileReader.SerializedProfile.Companion.serializer(), new LanguageProfileReader.SerializedProfile(language.getIso().toString(), linkedHashMap2)));
        } catch (Throwable th) {
            AutoCloseableKt.closeFinally(walk, (Throwable) null);
            throw th;
        }
    }

    private static final void main$lambda$1$lambda$0(Function1 function1, Object obj) {
        function1.invoke(obj);
    }
}
