/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.clustering.stc;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.BitSetIterator;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntStack;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.carrot2.attrs.AttrBoolean;
import org.carrot2.attrs.AttrComposite;
import org.carrot2.attrs.AttrDouble;
import org.carrot2.attrs.AttrInteger;
import org.carrot2.attrs.AttrObject;
import org.carrot2.attrs.AttrString;
import org.carrot2.clustering.Cluster;
import org.carrot2.clustering.ClusteringAlgorithm;
import org.carrot2.clustering.Document;
import org.carrot2.clustering.SharedInfrastructure;
import org.carrot2.clustering.stc.ClusterCandidate;
import org.carrot2.clustering.stc.GeneralizedSuffixTree;
import org.carrot2.internal.clustering.ClusteringAlgorithmUtilities;
import org.carrot2.language.EphemeralDictionaries;
import org.carrot2.language.LabelFilter;
import org.carrot2.language.LanguageComponents;
import org.carrot2.language.Stemmer;
import org.carrot2.language.StopwordFilter;
import org.carrot2.language.TokenTypeUtils;
import org.carrot2.language.Tokenizer;
import org.carrot2.text.preprocessing.BasicPreprocessingPipeline;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.PreprocessingContext;

public final class STCClusteringAlgorithm
extends AttrComposite
implements ClusteringAlgorithm {
    private static final Set<Class<?>> REQUIRED_LANGUAGE_COMPONENTS = new HashSet<Class>(Arrays.asList(Stemmer.class, Tokenizer.class, StopwordFilter.class, LabelFilter.class, LabelFormatter.class));
    public static final String NAME = "STC";
    public final AttrString queryHint;
    public AttrDouble ignoreWordIfInHigherDocsPercent;
    public AttrDouble minBaseClusterScore;
    public AttrInteger minBaseClusterSize;
    public AttrInteger maxBaseClusters;
    public AttrInteger maxClusters;
    public AttrDouble mergeThreshold;
    public AttrDouble maxPhraseOverlap;
    public AttrDouble mostGeneralPhraseCoverage;
    public AttrInteger maxWordsPerLabel;
    public AttrInteger maxPhrasesPerLabel;
    public AttrDouble singleTermBoost;
    public AttrInteger optimalPhraseLength;
    public AttrDouble optimalPhraseLengthDev;
    public AttrDouble documentCountBoost;
    public AttrDouble scoreWeight;
    public AttrBoolean mergeStemEquivalentBaseClusters;
    public BasicPreprocessingPipeline preprocessing;
    public EphemeralDictionaries dictionaries;
    private LabelFormatter labelFormatter;
    private static final Predicate<PhraseCandidate> NOT_SELECTED = p -> !p.selected;
    private GeneralizedSuffixTree.SequenceBuilder sb;
    private PreprocessingContext context;

    public STCClusteringAlgorithm() {
        this.queryHint = this.attributes.register("queryHint", SharedInfrastructure.queryHintAttribute());
        this.ignoreWordIfInHigherDocsPercent = this.attributes.register("ignoreWordIfInHigherDocsPercent", AttrDouble.builder().label("Maximum word-document ratio").min(0.0).max(1.0).defaultValue(0.9));
        this.minBaseClusterScore = this.attributes.register("minBaseClusterScore", AttrDouble.builder().label("Minimum base cluster score").min(0.0).max(10.0).defaultValue(2.0));
        this.minBaseClusterSize = this.attributes.register("minBaseClusterSize", AttrInteger.builder().label("Minimum base cluster documents").min(2).max(20).defaultValue(2));
        this.maxBaseClusters = this.attributes.register("maxBaseClusters", AttrInteger.builder().label("Maximum base clusters").min(2).defaultValue(300));
        this.maxClusters = this.attributes.register("maxClusters", AttrInteger.builder().label("Maximum number of final clusters").min(1).defaultValue(15));
        this.mergeThreshold = this.attributes.register("mergeThreshold", AttrDouble.builder().label("Base cluster merge threshold").min(0.0).max(1.0).defaultValue(0.6));
        this.maxPhraseOverlap = this.attributes.register("maxPhraseOverlap", AttrDouble.builder().label("Maximum cluster phrase overlap").min(0.0).max(1.0).defaultValue(0.6));
        this.mostGeneralPhraseCoverage = this.attributes.register("mostGeneralPhraseCoverage", AttrDouble.builder().label("Minimum general phrase coverage").min(0.0).max(1.0).defaultValue(0.5));
        this.maxWordsPerLabel = this.attributes.register("maxWordsPerLabel", AttrInteger.builder().label("Maximum words per label").min(1).defaultValue(4));
        this.maxPhrasesPerLabel = this.attributes.register("maxPhrasesPerLabel", AttrInteger.builder().label("Maximum phrases per label").min(1).defaultValue(3));
        this.singleTermBoost = this.attributes.register("singleTermBoost", AttrDouble.builder().label("Boost single-term clusters").min(0.0).defaultValue(0.5));
        this.optimalPhraseLength = this.attributes.register("optimalPhraseLength", AttrInteger.builder().label("Optimal cluster label length").min(1).defaultValue(3));
        this.optimalPhraseLengthDev = this.attributes.register("optimalPhraseLengthDev", AttrDouble.builder().label("Phrase length tolerance").min(0.5).defaultValue(2.0));
        this.documentCountBoost = this.attributes.register("documentCountBoost", AttrDouble.builder().label("Document count boost").min(0.0).defaultValue(1.0));
        this.scoreWeight = this.attributes.register("scoreWeight", AttrDouble.builder().label("Size-score sorting ratio").min(0.0).max(1.0).defaultValue(0.0));
        this.mergeStemEquivalentBaseClusters = this.attributes.register("mergeStemEquivalentBaseClusters", AttrBoolean.builder().label("Merge all stem-equivalent phrases when discovering base clusters").defaultValue(true));
        this.attributes.register("preprocessing", ((AttrObject.Builder)AttrObject.builder(BasicPreprocessingPipeline.class).label("Input preprocessing components")).getset(() -> this.preprocessing, v -> {
            this.preprocessing = v;
        }).defaultValue(BasicPreprocessingPipeline::new));
        ClusteringAlgorithmUtilities.registerDictionaries(this.attributes, () -> this.dictionaries, v -> {
            this.dictionaries = v;
        });
    }

    @Override
    public Set<Class<?>> requiredLanguageComponents() {
        return REQUIRED_LANGUAGE_COMPONENTS;
    }

    @Override
    public <T extends Document> List<Cluster<T>> cluster(Stream<? extends T> docStream, LanguageComponents languageComponents) {
        List documents = docStream.collect(Collectors.toList());
        ArrayList<Cluster<T>> clusters = new ArrayList<Cluster<T>>();
        if (this.dictionaries != null) {
            languageComponents = this.dictionaries.override(languageComponents);
        }
        this.context = this.preprocessing.preprocess(documents.stream(), (String)this.queryHint.get(), languageComponents);
        this.labelFormatter = this.context.languageComponents.get(LabelFormatter.class);
        this.sb = new GeneralizedSuffixTree.SequenceBuilder();
        int[] tokenIndex = this.context.allTokens.wordIndex;
        short[] tokenType = this.context.allTokens.type;
        for (int i = 0; i < tokenIndex.length; ++i) {
            if (tokenIndex[i] == -1) {
                if ((tokenType[i] & 0xA00) == 0) continue;
                this.sb.endDocument();
                continue;
            }
            int s = i;
            while (tokenIndex[i + 1] != -1) {
                ++i;
            }
            int phraseLength = 1 + i - s;
            if (phraseLength < 1) continue;
            this.sb.addPhrase(tokenIndex, s, phraseLength);
        }
        this.sb.buildSuffixTree();
        List<ClusterCandidate> baseClusters = this.createBaseClusters(this.sb);
        ArrayList<ClusterCandidate> mergedClusters = this.createMergedClusters(baseClusters);
        this.postProcessing(documents, mergedClusters, clusters);
        return SharedInfrastructure.reorderByWeightedScoreAndSize(clusters, (Double)this.scoreWeight.get());
    }

    private List<ClusterCandidate> createBaseClusters(GeneralizedSuffixTree.SequenceBuilder sb) {
        final ArrayList<ClusterCandidate> candidates = new ArrayList<ClusterCandidate>();
        final int minBaseClusterSize = (Integer)this.minBaseClusterSize.get();
        new GeneralizedSuffixTree.Visitor(sb, minBaseClusterSize){

            @Override
            protected void visit(int state, int cardinality, BitSet documents, IntStack path) {
                assert (cardinality >= minBaseClusterSize);
                if (!STCClusteringAlgorithm.this.checkAcceptablePhrase(path)) {
                    return;
                }
                int effectivePhraseLen = STCClusteringAlgorithm.this.effectivePhraseLength(path);
                if (effectivePhraseLen == 0) {
                    return;
                }
                float score = STCClusteringAlgorithm.this.baseClusterScore(effectivePhraseLen, cardinality);
                candidates.add(new ClusterCandidate(path.toArray(), (BitSet)documents.clone(), cardinality, score));
            }
        }.visit();
        if (((Boolean)this.mergeStemEquivalentBaseClusters.get()).booleanValue()) {
            this.mergeStemEquivalentBaseClusters(sb, candidates);
        }
        int j = 0;
        double minBaseClusterScore = (Double)this.minBaseClusterScore.get();
        int max = candidates.size();
        for (int i = 0; i < max; ++i) {
            ClusterCandidate cc = (ClusterCandidate)candidates.get(i);
            if (!((double)cc.score >= minBaseClusterScore)) continue;
            candidates.set(j++, cc);
        }
        candidates.subList(j, candidates.size()).clear();
        candidates.sort((c1, c2) -> -Float.compare(c1.score, c2.score));
        j = 0;
        LabelFilter labelFilter = this.context.languageComponents.get(LabelFilter.class);
        int maxBaseClusters = (Integer)this.maxBaseClusters.get();
        int max2 = candidates.size();
        for (int i = 0; i < max2 && j < maxBaseClusters; ++i) {
            ClusterCandidate cc = (ClusterCandidate)candidates.get(i);
            assert (cc.phrases.size() == 1);
            if (!labelFilter.test(this.buildLabel(cc.phrases.get(0)))) continue;
            candidates.set(j++, cc);
        }
        if (j < candidates.size()) {
            candidates.subList(j, candidates.size()).clear();
            assert (candidates.size() == j);
        }
        return candidates;
    }

    private void mergeStemEquivalentBaseClusters(GeneralizedSuffixTree.SequenceBuilder sb, List<ClusterCandidate> candidates) {
        HashMap<IntArrayList, ClusterCandidate> merged = new HashMap<IntArrayList, ClusterCandidate>();
        int j = 0;
        int max = candidates.size();
        for (int i = 0; i < max; ++i) {
            ClusterCandidate cc = candidates.get(i);
            candidates.set(j, cc);
            assert (cc.phrases.size() == 1);
            int[] stemIndices = this.context.allWords.stemIndex;
            int[] phraseWords = cc.phrases.get(0);
            IntArrayList stemList = new IntArrayList(phraseWords.length);
            for (int seqIndex : phraseWords) {
                int termIndex = sb.input.get(seqIndex);
                stemList.add(stemIndices[termIndex]);
            }
            ClusterCandidate equivalent = (ClusterCandidate)merged.get(stemList);
            if (equivalent == null) {
                merged.put(stemList, cc);
                ++j;
                continue;
            }
            if (equivalent.cardinality < cc.cardinality) {
                equivalent.cardinality = cc.cardinality;
                equivalent.phrases.add(0, cc.phrases.get(0));
            } else {
                equivalent.phrases.add(cc.phrases.get(0));
            }
            equivalent.documents.or(cc.documents);
        }
        candidates.subList(j, candidates.size()).clear();
        IntStack scratch = new IntStack();
        for (ClusterCandidate cc : candidates) {
            if (cc.phrases.size() <= 1) continue;
            cc.cardinality = (int)cc.documents.cardinality();
            scratch.buffer = cc.phrases.get(0);
            scratch.elementsCount = scratch.buffer.length;
            cc.score = this.baseClusterScore(this.effectivePhraseLength(scratch), cc.cardinality);
            cc.phrases.subList(1, cc.phrases.size()).clear();
        }
    }

    private ArrayList<ClusterCandidate> createMergedClusters(List<ClusterCandidate> baseClusters) {
        int END = -1;
        IntStack neighborList = new IntStack();
        neighborList.push(-1);
        int[] neighbors = new int[baseClusters.size()];
        float m = ((Double)this.mergeThreshold.get()).floatValue();
        for (int i = 0; i < baseClusters.size(); ++i) {
            for (int j = i + 1; j < baseClusters.size(); ++j) {
                ClusterCandidate c12 = baseClusters.get(i);
                ClusterCandidate c22 = baseClusters.get(j);
                float a = c12.cardinality;
                float b = c22.cardinality;
                float c = BitSet.intersectionCount((BitSet)c12.documents, (BitSet)c22.documents);
                if (!(c / a > m) || !(c / b > m)) continue;
                neighborList.push(neighbors[i], j);
                neighbors[i] = neighborList.size() - 2;
                neighborList.push(neighbors[j], i);
                neighbors[j] = neighborList.size() - 2;
            }
        }
        int NO_INDEX = -1;
        int[] merged = new int[baseClusters.size()];
        Arrays.fill(merged, -1);
        ArrayList<ClusterCandidate> mergedClusters = new ArrayList<ClusterCandidate>(baseClusters.size());
        IntStack stack = new IntStack(baseClusters.size());
        IntStack mergeList = new IntStack(baseClusters.size());
        int mergedIndex = 0;
        for (int v = 0; v < baseClusters.size(); ++v) {
            if (merged[v] != -1) continue;
            stack.push(v);
            while (stack.size() > 0) {
                int c = stack.pop();
                assert (merged[c] == -1 || merged[c] == mergedIndex);
                if (merged[c] == mergedIndex) continue;
                merged[c] = mergedIndex;
                mergeList.push(c);
                int i = neighbors[c];
                while (neighborList.get(i) != -1) {
                    int neighbor = neighborList.get(i + 1);
                    if (merged[neighbor] == -1) {
                        stack.push(neighbor);
                    } else assert (merged[neighbor] == mergedIndex);
                    i = neighborList.get(i);
                }
            }
            ++mergedIndex;
            mergedClusters.add(this.merge(this.context, mergeList, baseClusters));
            mergeList.clear();
        }
        Collections.sort(mergedClusters, (c1, c2) -> {
            if (c1.score < c2.score) {
                return 1;
            }
            if (c1.score > c2.score) {
                return -1;
            }
            if (c1.cardinality < c2.cardinality) {
                return 1;
            }
            if (c1.cardinality > c2.cardinality) {
                return -1;
            }
            return 0;
        });
        int maxClusters = (Integer)this.maxClusters.get();
        if (mergedClusters.size() > maxClusters) {
            mergedClusters.subList(maxClusters, mergedClusters.size()).clear();
        }
        return mergedClusters;
    }

    private ClusterCandidate merge(PreprocessingContext context, IntStack mergeList, List<ClusterCandidate> baseClusters) {
        assert (mergeList.size() > 0);
        ClusterCandidate result = new ClusterCandidate();
        for (int i = 0; i < mergeList.size(); ++i) {
            ClusterCandidate cc = baseClusters.get(mergeList.get(i));
            result.documents.or(cc.documents);
            result.score += cc.score;
        }
        result.cardinality = (int)result.documents.cardinality();
        ArrayList<PhraseCandidate> phrases = new ArrayList<PhraseCandidate>(mergeList.size());
        for (int i = 0; i < mergeList.size(); ++i) {
            ClusterCandidate cc = baseClusters.get(mergeList.get(i));
            float coverage = (float)cc.cardinality / (float)result.cardinality;
            phrases.add(new PhraseCandidate(cc, coverage));
        }
        this.markSubSuperPhrases(phrases);
        phrases.removeIf(NOT_SELECTED);
        this.markOverlappingPhrases(context, phrases);
        phrases.removeIf(NOT_SELECTED);
        for (PhraseCandidate p2 : phrases) {
            p2.renderedLabel = this.buildLabel(p2.cluster.phrases.get(0));
        }
        Comparator<PhraseCandidate> comparator = Comparator.comparingDouble(p -> p.coverage).reversed().thenComparingInt(p -> p.renderedLabel.length()).reversed().thenComparing(p -> p.renderedLabel);
        phrases.sort(comparator);
        int max = (Integer)this.maxPhrasesPerLabel.get();
        for (PhraseCandidate p3 : phrases) {
            if (max-- <= 0) break;
            result.phrases.add(p3.cluster.phrases.get(0));
        }
        return result;
    }

    private void markSubSuperPhrases(ArrayList<PhraseCandidate> phrases) {
        int max = phrases.size();
        IntStack words = new IntStack((Integer)this.maxWordsPerLabel.get() * phrases.size());
        IntStack offsets = new IntStack(phrases.size() * 2);
        for (PhraseCandidate p : phrases) {
            this.appendWords(words, offsets, p);
        }
        for (int i = 0; i < max; ++i) {
            for (int j = 0; j < max; ++j) {
                int index;
                if (i == j || (index = STCClusteringAlgorithm.indexOf(words.buffer, offsets.get(2 * i), offsets.get(2 * i + 1), words.buffer, offsets.get(2 * j), offsets.get(2 * j + 1))) < 0) continue;
                phrases.get((int)i).mostGeneral = false;
                phrases.get((int)j).mostSpecific = false;
            }
        }
        double mostGeneralPhraseCoverage = (Double)this.mostGeneralPhraseCoverage.get();
        for (int i = 0; i < max; ++i) {
            PhraseCandidate a = phrases.get(i);
            if (!a.mostGeneral) continue;
            for (int j = 0; j < max; ++j) {
                int index;
                PhraseCandidate b = phrases.get(j);
                if (i == j || !b.mostSpecific || (index = STCClusteringAlgorithm.indexOf(words.buffer, offsets.get(2 * j), offsets.get(2 * j + 1), words.buffer, offsets.get(2 * i), offsets.get(2 * i + 1))) < 0 || !((double)(a.coverage - b.coverage) < mostGeneralPhraseCoverage)) continue;
                a.selected = false;
                j = max;
            }
        }
        for (PhraseCandidate p : phrases) {
            if (p.mostGeneral || p.mostSpecific) continue;
            p.selected = false;
        }
    }

    private void markOverlappingPhrases(PreprocessingContext context, ArrayList<PhraseCandidate> phrases) {
        int max = phrases.size();
        IntStack words = new IntStack((Integer)this.maxWordsPerLabel.get() * phrases.size());
        IntStack offsets = new IntStack(phrases.size() * 2);
        for (PhraseCandidate p : phrases) {
            this.appendUniqueWords(context, words, offsets, p);
        }
        double maxPhraseOverlap = (Double)this.maxPhraseOverlap.get();
        for (int i = 0; i < max; ++i) {
            for (int j = i + 1; j < max; ++j) {
                PhraseCandidate a = phrases.get(i);
                PhraseCandidate b = phrases.get(j);
                int a_words = offsets.get(2 * i + 1);
                int b_words = offsets.get(2 * j + 1);
                float intersection = STCClusteringAlgorithm.computeIntersection(words.buffer, offsets.get(2 * i), a_words, words.buffer, offsets.get(2 * j), b_words);
                if ((double)(intersection / (float)b_words) > maxPhraseOverlap && b.coverage < a.coverage) {
                    b.selected = false;
                }
                if (!((double)(intersection / (float)a_words) > maxPhraseOverlap) || !(a.coverage < b.coverage)) continue;
                a.selected = false;
            }
        }
    }

    static int computeIntersection(int[] a, int aPos, int aLength, int[] b, int bPos, int bLength) {
        int maxa = aPos + aLength;
        int maxb = bPos + bLength;
        int common = 0;
        while (aPos < maxa && bPos < maxb) {
            int ea = a[aPos];
            int eb = b[bPos];
            if (ea >= eb) {
                ++bPos;
            }
            if (ea <= eb) {
                ++aPos;
            }
            if (ea != eb) continue;
            ++common;
        }
        return common;
    }

    private void appendUniqueWords(PreprocessingContext context, IntStack words, IntStack offsets, PhraseCandidate p) {
        assert (p.cluster.phrases.size() == 1);
        int start = words.size();
        int[] phraseIndices = p.cluster.phrases.get(0);
        short[] tokenTypes = context.allWords.type;
        for (int i = 0; i < phraseIndices.length; i += 2) {
            for (int j = phraseIndices[i]; j <= phraseIndices[i + 1]; ++j) {
                int termIndex = this.sb.input.get(j);
                if (TokenTypeUtils.isCommon(tokenTypes[termIndex])) continue;
                words.push(termIndex);
            }
        }
        Arrays.sort(words.buffer, start, words.size());
        int j = start;
        for (int i = start + 1; i < words.size(); ++i) {
            if (words.buffer[j] == words.buffer[i]) continue;
            words.buffer[++j] = words.buffer[i];
        }
        words.elementsCount = j + 1;
        offsets.push(start, words.size() - start);
    }

    private void appendWords(IntStack words, IntStack offsets, PhraseCandidate p) {
        int start = words.size();
        int[] phraseIndices = p.cluster.phrases.get(0);
        short[] tokenTypes = this.context.allWords.type;
        for (int i = 0; i < phraseIndices.length; i += 2) {
            for (int j = phraseIndices[i]; j <= phraseIndices[i + 1]; ++j) {
                int termIndex = this.sb.input.get(j);
                if (TokenTypeUtils.isCommon(tokenTypes[termIndex])) continue;
                words.push(termIndex);
            }
        }
        offsets.push(start, words.size() - start);
    }

    private <T extends Document> void postProcessing(List<T> documents, List<ClusterCandidate> candidates, List<Cluster<T>> clusters) {
        BitSet all = new BitSet((long)documents.size());
        ArrayList docs = new ArrayList(documents.size());
        for (ClusterCandidate c : candidates) {
            Cluster c2 = new Cluster();
            this.collectPhrases(c, c2);
            this.collectDocuments(documents, docs, c.documents).forEach(document -> c2.addDocument(document));
            c2.setScore(Double.valueOf(c.score));
            clusters.add(c2);
            all.or(c.documents);
            docs.clear();
        }
    }

    private void collectPhrases(ClusterCandidate c, Cluster<?> cluster) {
        for (int[] phraseIndexes : c.phrases) {
            cluster.addLabel(this.buildLabel(phraseIndexes));
        }
    }

    private <T extends Document> List<T> collectDocuments(List<T> documents, List<T> l, BitSet bitset) {
        if (l == null) {
            l = new ArrayList<T>((int)bitset.cardinality());
        }
        BitSetIterator i = bitset.iterator();
        int d = i.nextSetBit();
        while (d >= 0) {
            l.add((Document)documents.get(d));
            d = i.nextSetBit();
        }
        return l;
    }

    private String buildLabel(int[] phraseIndices) {
        int termsCount = 0;
        for (int j = 0; j < phraseIndices.length; j += 2) {
            termsCount += phraseIndices[j + 1] - phraseIndices[j] + 1;
        }
        boolean[] stopwords = new boolean[termsCount];
        char[][] images = new char[termsCount][];
        short[] tokenTypes = this.context.allWords.type;
        int k = 0;
        for (int i = 0; i < phraseIndices.length; i += 2) {
            int j = phraseIndices[i];
            while (j <= phraseIndices[i + 1]) {
                int termIndex = this.sb.input.get(j);
                images[k] = this.context.allWords.image[termIndex];
                stopwords[k] = TokenTypeUtils.isCommon(tokenTypes[termIndex]);
                ++j;
                ++k;
            }
        }
        return this.labelFormatter.format(images, stopwords);
    }

    private String toString(PhraseCandidate c) {
        return String.format(Locale.ENGLISH, "%3.2f %s %s %s %s", Float.valueOf(c.coverage), this.buildLabel(c.cluster.phrases.get(0)), c.selected ? "S" : "", c.mostGeneral ? "MG" : "", c.mostSpecific ? "MS" : "");
    }

    private String buildDebugLabel(int[] phraseIndices) {
        StringBuilder b = new StringBuilder();
        String sep = "";
        int k = 0;
        short[] tokenTypes = this.context.allWords.type;
        for (int i = 0; i < phraseIndices.length; i += 2) {
            int j = phraseIndices[i];
            while (j <= phraseIndices[i + 1]) {
                b.append(sep);
                int termIndex = this.sb.input.get(j);
                b.append(this.context.allWords.image[termIndex]);
                if (TokenTypeUtils.isCommon(tokenTypes[termIndex])) {
                    b.append("[S]");
                }
                sep = " ";
                ++j;
                ++k;
            }
            sep = "_";
        }
        return b.toString();
    }

    final boolean checkAcceptablePhrase(IntStack path) {
        int j;
        assert (path.size() > 0);
        short[] tokenTypes = this.context.allWords.type;
        int[] terms = this.sb.input.buffer;
        if (TokenTypeUtils.isCommon(tokenTypes[terms[path.get(0)]])) {
            return false;
        }
        int i = path.get(path.size() - 2);
        int k = j = path.get(path.size() - 1);
        while (i <= j && TokenTypeUtils.isCommon(tokenTypes[terms[j]])) {
            --j;
        }
        if (j < i) {
            return false;
        }
        if (j < k) {
            path.buffer[path.size() - 1] = j;
        }
        int termsCount = 0;
        for (j = 0; j < path.size(); j += 2) {
            termsCount += path.get(j + 1) - path.get(j) + 1;
        }
        return termsCount <= (Integer)this.maxWordsPerLabel.get();
    }

    final int effectivePhraseLength(IntStack path) {
        int[] terms = this.sb.input.buffer;
        int lower = (Integer)this.preprocessing.wordDfThreshold.get();
        int upper = (int)((Double)this.ignoreWordIfInHigherDocsPercent.get() * (double)this.context.documentCount);
        int effectivePhraseLen = 0;
        for (int i = 0; i < path.size(); i += 2) {
            for (int j = path.get(i); j <= path.get(i + 1); ++j) {
                int docCount;
                int termIndex = terms[j];
                if (TokenTypeUtils.isCommon(this.context.allWords.type[termIndex]) || (docCount = this.context.allWords.tfByDocument[termIndex].length / 2) < lower || docCount > upper) continue;
                ++effectivePhraseLen;
            }
        }
        return effectivePhraseLen;
    }

    final float baseClusterScore(int phraseLength, int documentCount) {
        double boost;
        double singleTermBoost = (Double)this.singleTermBoost.get();
        if (phraseLength == 1 && singleTermBoost > 0.0) {
            boost = singleTermBoost;
        } else {
            int tmp = phraseLength - (Integer)this.optimalPhraseLength.get();
            boost = Math.exp((double)(-tmp * tmp) / (2.0 * (Double)this.optimalPhraseLengthDev.get() * (Double)this.optimalPhraseLengthDev.get()));
        }
        return (float)(boost * ((double)documentCount * (Double)this.documentCountBoost.get()));
    }

    private static int indexOf(int[] source, int sourceOffset, int sourceCount, int[] target, int targetOffset, int targetCount) {
        if (targetCount == 0) {
            return 0;
        }
        int first = target[targetOffset];
        int max = sourceOffset + (sourceCount - targetCount);
        for (int i = sourceOffset; i <= max; ++i) {
            if (source[i] != first) {
                while (++i <= max && source[i] != first) {
                }
            }
            if (i > max) continue;
            int j = i + 1;
            int end = j + targetCount - 1;
            int k = targetOffset + 1;
            while (j < end && source[j] == target[k]) {
                ++j;
                ++k;
            }
            if (j != end) continue;
            return i - sourceOffset;
        }
        return -1;
    }

    private static final class PhraseCandidate {
        final ClusterCandidate cluster;
        final float coverage;
        String renderedLabel;
        boolean selected = true;
        boolean mostGeneral = true;
        boolean mostSpecific = true;

        PhraseCandidate(ClusterCandidate c, float coverage) {
            this.cluster = c;
            this.coverage = coverage;
        }
    }
}

