View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.File;
7   import java.io.FileNotFoundException;
8   import java.io.IOException;
9   import java.util.HashSet;
10  import java.util.Iterator;
11  import java.util.List;
12  import java.util.Map;
13  import java.util.Properties;
14  import java.util.Set;
15  import java.util.TreeMap;
16  
17  import net.sourceforge.pmd.util.FileFinder;
18  
19  import org.apache.commons.io.FilenameUtils;
20  
21  public class CPD {
22  
23      private static final int MISSING_FILES = 1;
24  	private static final int MISSING_ARGS = 2;
25  	private static final int DUPLICATE_CODE_FOUND = 4;
26  
27  	static boolean dontExitForTests = false;
28  
29  	private CPDConfiguration configuration;
30  	
31  	private Map<String, SourceCode> source = new TreeMap<String, SourceCode>();
32      private CPDListener listener = new CPDNullListener();
33      private Tokens tokens = new Tokens();
34      private MatchAlgorithm matchAlgorithm;
35  
36      public CPD(CPDConfiguration theConfiguration) {
37      	configuration = theConfiguration;
38          // before we start any tokenizing (add(File...)), we need to reset the static TokenEntry status
39          TokenEntry.clearImages();
40      }
41  
42      public void setCpdListener(CPDListener cpdListener) {
43          this.listener = cpdListener;
44      }
45  
46      public void go() {
47          matchAlgorithm = new MatchAlgorithm(
48          		source, tokens, 
49          		configuration.minimumTileSize(), 
50          		listener
51          		);
52          matchAlgorithm.findMatches();
53      }
54  
55      public Iterator<Match> getMatches() {
56          return matchAlgorithm.matches();
57      }
58  
59      public void add(File file) throws IOException {
60          add(1, file);
61      }
62  
63      public void addAllInDirectory(String dir) throws IOException {
64          addDirectory(dir, false);
65      }
66  
67      public void addRecursively(String dir) throws IOException {
68          addDirectory(dir, true);
69      }
70  
71      public void add(List<File> files) throws IOException {
72          for (File f: files) {
73              add(files.size(), f);
74          }
75      }
76  
77      private void addDirectory(String dir, boolean recurse) throws IOException {
78          if (!(new File(dir)).exists()) {
79              throw new FileNotFoundException("Couldn't find directory " + dir);
80          }
81          FileFinder finder = new FileFinder();
82          // TODO - could use SourceFileSelector here
83          add(finder.findFilesFrom(dir, configuration.filenameFilter(), recurse));
84      }
85  
86      private Set<String> current = new HashSet<String>();
87  
88      private void add(int fileCount, File file) throws IOException {
89  
90          if (configuration.skipDuplicates()) {
91              // TODO refactor this thing into a separate class
92              String signature = file.getName() + '_' + file.length();
93              if (current.contains(signature)) {
94                  System.err.println("Skipping " + file.getAbsolutePath() + " since it appears to be a duplicate file and --skip-duplicate-files is set");
95                  return;
96              }
97              current.add(signature);
98          }
99  
100         if (!FilenameUtils.equalsNormalizedOnSystem(file.getAbsoluteFile().getCanonicalPath(), file.getAbsolutePath())) {
101             System.err.println("Skipping " + file + " since it appears to be a symlink");
102             return;
103         }
104 
105         if (!file.exists()) {
106             System.err.println("Skipping " + file + " since it doesn't exist (broken symlink?)");
107             return;
108         }
109 
110         listener.addedFile(fileCount, file);
111         SourceCode sourceCode = configuration.sourceCodeFor(file);
112         configuration.tokenizer().tokenize(sourceCode, tokens);
113         source.put(sourceCode.getFileName(), sourceCode);
114     }
115 
116     private static void setSystemProperties(String[] args, CPDConfiguration config) {
117         boolean ignoreLiterals = CPDConfiguration.findBooleanSwitch(args, "--ignore-literals");
118         boolean ignoreIdentifiers = CPDConfiguration.findBooleanSwitch(args, "--ignore-identifiers");
119         boolean ignoreAnnotations = CPDConfiguration.findBooleanSwitch(args, "--ignore-annotations");
120         Properties properties = System.getProperties();
121         if (ignoreLiterals) {
122             properties.setProperty(JavaTokenizer.IGNORE_LITERALS, "true");
123         }
124         if (ignoreIdentifiers) {
125             properties.setProperty(JavaTokenizer.IGNORE_IDENTIFIERS, "true");
126         }
127         if (ignoreAnnotations) {
128             properties.setProperty(JavaTokenizer.IGNORE_ANNOTATIONS, "true");
129         }
130         System.setProperties(properties);
131         config.language().setProperties(properties);
132     }
133 
134     public static void main(String[] args) {
135         if (args.length == 0) {
136         	showUsage();
137             System.exit(MISSING_ARGS);
138         }
139 
140         try {
141         	CPDConfiguration config = new CPDConfiguration(args);
142 
143             // Pass extra parameters as System properties to allow language
144             // implementation to retrieve their associate values...
145             setSystemProperties(args, config);
146            
147             CPD cpd = new CPD(config);
148             
149             /* FIXME: Improve this !!!	*/
150             boolean missingFiles = true;
151             for (int position = 0; position < args.length; position++) {
152                 if (args[position].equals("--files")) {
153                 	cpd.addRecursively(args[position + 1]);
154                 	if ( missingFiles ) {
155                         missingFiles = false;
156                     }
157                 }
158             }
159 
160             if ( missingFiles ) {
161 	            System.out.println("No " + "--files" + " value passed in");
162 	            showUsage();
163 	            System.exit(MISSING_FILES);
164             }
165 
166             cpd.go();
167             if (cpd.getMatches().hasNext()) {
168                 System.out.println(config.renderer().render(cpd.getMatches()));
169                 if (!dontExitForTests) {
170                     System.exit(DUPLICATE_CODE_FOUND);
171                 }
172             }
173         } catch (Exception e) {
174             e.printStackTrace();
175         }
176     }
177 
178     public static void showUsage() {
179         System.out.println("Usage:");
180         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens xxx --files xxx [--language xxx] [--encoding xxx] [--format (xml|text|csv|vs)] [--skip-duplicate-files] ");
181         System.out.println("i.e: ");
182         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files c:\\jdk14\\src\\java ");
183         System.out.println("or: ");
184         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files /path/to/c/code --language c ");
185         System.out.println("or: ");
186         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --encoding UTF-16LE --files /path/to/java/code --format xml");
187     }
188 
189 }