/*
 * Decompiled with CFR 0.152.
 */
package weka.associations;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Enumeration;
import java.util.Vector;
import weka.associations.AbstractAssociator;
import weka.associations.gsp.Element;
import weka.associations.gsp.Sequence;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;

public class GeneralizedSequentialPatterns
extends AbstractAssociator
implements OptionHandler,
TechnicalInformationHandler {
    private static final long serialVersionUID = -4119691320812254676L;
    protected double m_MinSupport;
    protected int m_DataSeqID;
    protected Instances m_OriginalDataSet;
    protected FastVector m_AllSequentialPatterns;
    protected int m_Cycles;
    protected String m_CycleStart;
    protected String m_CycleEnd;
    protected String m_AlgorithmStart;
    protected String m_FilterAttributes;
    protected FastVector m_FilterAttrVector;
    protected boolean m_Debug = false;

    public GeneralizedSequentialPatterns() {
        this.resetOptions();
    }

    public String globalInfo() {
        return "Class implementing a GSP algorithm for discovering sequential patterns in a sequential data set.\nThe attribute identifying the distinct data sequences contained in the set can be determined by the respective option. Furthermore, the set of output results can be restricted by specifying one or more attributes that have to be contained in each element/itemset of a sequence.\n\nFor further information see:\n\n" + this.getTechnicalInformation().toString();
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation paper = new TechnicalInformation(TechnicalInformation.Type.PROCEEDINGS);
        paper.setValue(TechnicalInformation.Field.AUTHOR, "Ramakrishnan Srikant and Rakesh Agrawal");
        paper.setValue(TechnicalInformation.Field.TITLE, "Mining Sequential Patterns: Generalizations and Performance Improvements");
        paper.setValue(TechnicalInformation.Field.BOOKTITLE, "Advances in Database Technology EDBT '96");
        paper.setValue(TechnicalInformation.Field.YEAR, "1996");
        paper.setValue(TechnicalInformation.Field.PUBLISHER, "Springer");
        return paper;
    }

    @Override
    public Enumeration listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.addElement(new Option("\tIf set, algorithm is run in debug mode and\n\tmay output additional info to the console", "D", 0, "-D"));
        result.addElement(new Option("\tThe miminum support threshold.\n\t(default: 0.9)", "S", 1, "-S <minimum support threshold>"));
        result.addElement(new Option("\tThe attribute number representing the data sequence ID.\n\t(default: 0)", "I", 1, "-I <attribute number representing the data sequence ID"));
        result.addElement(new Option("\tThe attribute numbers used for result filtering.\n\t(default: -1)", "F", 1, "-F <attribute numbers used for result filtering"));
        return result.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        this.resetOptions();
        this.setDebug(Utils.getFlag('D', options));
        String tmpStr = Utils.getOption('S', options);
        if (tmpStr.length() != 0) {
            this.setMinSupport(Double.parseDouble(tmpStr));
        }
        if ((tmpStr = Utils.getOption('I', options)).length() != 0) {
            this.setDataSeqID(Integer.parseInt(tmpStr));
        }
        if ((tmpStr = Utils.getOption('F', options)).length() != 0) {
            this.setFilterAttributes(tmpStr);
        }
    }

    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        if (this.getDebug()) {
            result.add("-D");
        }
        result.add("-S");
        result.add("" + this.getMinSupport());
        result.add("-I");
        result.add("" + this.getDataSeqID());
        result.add("-F");
        result.add(this.getFilterAttributes());
        return result.toArray(new String[result.size()]);
    }

    protected void resetOptions() {
        this.m_MinSupport = 0.9;
        this.m_DataSeqID = 0;
        this.m_FilterAttributes = "-1";
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enable(Capabilities.Capability.NOMINAL_ATTRIBUTES);
        result.enable(Capabilities.Capability.NO_CLASS);
        return result;
    }

    @Override
    public void buildAssociations(Instances data) throws Exception {
        this.getCapabilities().testWithFail(data);
        this.m_AllSequentialPatterns = new FastVector();
        this.m_Cycles = 0;
        this.m_FilterAttrVector = new FastVector();
        this.m_AlgorithmStart = this.getTimeAndDate();
        this.m_OriginalDataSet = new Instances(data);
        this.extractFilterAttributes(this.m_FilterAttributes);
        this.findFrequentSequences();
    }

    protected int calcFreqSequencesTotal() {
        int total = 0;
        Enumeration allSeqPatternsEnum = this.m_AllSequentialPatterns.elements();
        while (allSeqPatternsEnum.hasMoreElements()) {
            FastVector kSequences = (FastVector)allSeqPatternsEnum.nextElement();
            total += kSequences.size();
        }
        return total;
    }

    protected FastVector extractDataSequences(Instances originalDataSet, int dataSeqID) {
        FastVector<Instances> dataSequences = new FastVector<Instances>();
        int firstInstance = 0;
        int lastInstance = 0;
        Attribute seqIDAttribute = originalDataSet.attribute(dataSeqID);
        for (int i = 0; i < seqIDAttribute.numValues(); ++i) {
            double sequenceID = originalDataSet.instance(firstInstance).value(dataSeqID);
            while (lastInstance < originalDataSet.numInstances() && sequenceID == originalDataSet.instance(lastInstance).value(dataSeqID)) {
                ++lastInstance;
            }
            Instances dataSequence = new Instances(originalDataSet, firstInstance, lastInstance - firstInstance);
            dataSequence.deleteAttributeAt(dataSeqID);
            dataSequences.addElement(dataSequence);
            firstInstance = lastInstance;
        }
        return dataSequences;
    }

    public void extractFilterAttributes(String attrNumbers) {
        String numbers = attrNumbers.trim();
        while (!numbers.equals("")) {
            int commaLoc = numbers.indexOf(44);
            if (commaLoc != -1) {
                String number = numbers.substring(0, commaLoc);
                numbers = numbers.substring(commaLoc + 1).trim();
                this.m_FilterAttrVector.addElement(Integer.decode(number));
                continue;
            }
            this.m_FilterAttrVector.addElement(Integer.decode(numbers));
            break;
        }
    }

    protected void findFrequentSequences() throws CloneNotSupportedException {
        this.m_CycleStart = this.getTimeAndDate();
        Instances originalDataSet = this.m_OriginalDataSet;
        FastVector dataSequences = this.extractDataSequences(this.m_OriginalDataSet, this.m_DataSeqID);
        long minSupportCount = Math.round(this.m_MinSupport * (double)dataSequences.size());
        originalDataSet.deleteAttributeAt(0);
        FastVector oneElements = Element.getOneElements(originalDataSet);
        this.m_Cycles = 1;
        FastVector kSequences = Sequence.oneElementsToSequences(oneElements);
        Sequence.updateSupportCount(kSequences, dataSequences);
        kSequences = Sequence.deleteInfrequentSequences(kSequences, minSupportCount);
        this.m_CycleEnd = this.getTimeAndDate();
        if (kSequences.size() == 0) {
            return;
        }
        while (kSequences.size() > 0) {
            this.m_CycleStart = this.getTimeAndDate();
            this.m_AllSequentialPatterns.addElement(kSequences.copy());
            FastVector kMinusOneSequences = kSequences;
            kSequences = Sequence.aprioriGen(kMinusOneSequences);
            Sequence.updateSupportCount(kSequences, dataSequences);
            kSequences = Sequence.deleteInfrequentSequences(kSequences, minSupportCount);
            this.m_CycleEnd = this.getTimeAndDate();
            if (this.getDebug()) {
                System.out.println("Cycle " + this.m_Cycles + " from " + this.m_CycleStart + " to " + this.m_CycleEnd);
            }
            ++this.m_Cycles;
        }
    }

    public String dataSeqIDTipText() {
        return "The attribute number representing the data sequence ID.";
    }

    public int getDataSeqID() {
        return this.m_DataSeqID;
    }

    public void setDataSeqID(int value) {
        this.m_DataSeqID = value;
    }

    public String filterAttributesTipText() {
        return "The attribute numbers (eg \"0, 1\") used for result filtering; only sequences containing the specified attributes in each of their elements/itemsets will be output; -1 prints all.";
    }

    public String getFilterAttributes() {
        return this.m_FilterAttributes;
    }

    public void setFilterAttributes(String value) {
        this.m_FilterAttributes = value;
    }

    public String minSupportTipText() {
        return "Minimum support threshold.";
    }

    public double getMinSupport() {
        return this.m_MinSupport;
    }

    public void setMinSupport(double value) {
        this.m_MinSupport = value;
    }

    public void setDebug(boolean value) {
        this.m_Debug = value;
    }

    public boolean getDebug() {
        return this.m_Debug;
    }

    public String debugTipText() {
        return "If set to true, algorithm may output additional info to the console.";
    }

    protected String getTimeAndDate() {
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        return dateFormat.format(new Date());
    }

    public String getAlgorithmStart() {
        return this.m_AlgorithmStart;
    }

    public String getCycleStart() {
        return this.m_CycleStart;
    }

    public String getCycleEnd() {
        return this.m_CycleEnd;
    }

    public String toString() {
        StringBuffer result = new StringBuffer();
        result.append("GeneralizedSequentialPatterns\n");
        result.append("=============================\n\n");
        result.append("Number of cycles performed: " + (this.m_Cycles - 1) + "\n");
        result.append("Total number of frequent sequences: " + this.calcFreqSequencesTotal() + "\n\n");
        result.append("Frequent Sequences Details (filtered):\n\n");
        for (int i = 0; i < this.m_AllSequentialPatterns.size(); ++i) {
            result.append("- " + (i + 1) + "-sequences\n\n");
            FastVector kSequences = (FastVector)this.m_AllSequentialPatterns.elementAt(i);
            result.append(Sequence.setOfSequencesToString(kSequences, this.m_OriginalDataSet, this.m_FilterAttrVector) + "\n");
        }
        return result.toString();
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 5504 $");
    }

    public static void main(String[] args) {
        GeneralizedSequentialPatterns.runAssociator(new GeneralizedSequentialPatterns(), args);
    }
}

