Ticket #29984: patch-mecab.diff

File patch-mecab.diff, 928 bytes (added by humem (humem), 13 years ago)
  • app/guts/pipes/DocumentPipe.java

    old new  
    1616import cc.mallet.types.Instance;
    1717import cc.mallet.util.CharSequenceLexer;
    1818
     19import dualist.ja.SimpleMecabPipe;
     20
    1921public class DocumentPipe extends Pipe {
    2022
    2123    private Pipe myPipe = new SerialPipes(new Pipe[] {
     
    2729            new CharSequenceReplace(Pattern.compile("&(.*?);"), ""),
    2830            new CharSequenceReplace(Pattern.compile("[0-9]+"), "00"),
    2931            new CharSequenceLowercase(),
     32            (System.getProperty("dualist.lang") != null &&
     33             System.getProperty("dualist.lang").equals("ja")) ?
     34            new SimpleMecabPipe() :
    3035            new CharSequence2TokenSequence(CharSequenceLexer.LEX_WORD_CLASSES),
    3136            new TokenSequenceRemoveStopwords(),
    3237            new TokenSequence2FeatureSequence(),