1
2
3
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.File;
7 import java.io.FileNotFoundException;
8 import java.io.IOException;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.Properties;
14 import java.util.Set;
15 import java.util.TreeMap;
16
17 import net.sourceforge.pmd.util.FileFinder;
18
19 import org.apache.commons.io.FilenameUtils;
20
21 public class CPD {
22
23 private static final int MISSING_FILES = 1;
24 private static final int MISSING_ARGS = 2;
25 private static final int DUPLICATE_CODE_FOUND = 4;
26
27 static boolean dontExitForTests = false;
28
29 private CPDConfiguration configuration;
30
31 private Map<String, SourceCode> source = new TreeMap<String, SourceCode>();
32 private CPDListener listener = new CPDNullListener();
33 private Tokens tokens = new Tokens();
34 private MatchAlgorithm matchAlgorithm;
35
36 public CPD(CPDConfiguration theConfiguration) {
37 configuration = theConfiguration;
38
39 TokenEntry.clearImages();
40 }
41
42 public void setCpdListener(CPDListener cpdListener) {
43 this.listener = cpdListener;
44 }
45
46 public void go() {
47 matchAlgorithm = new MatchAlgorithm(
48 source, tokens,
49 configuration.minimumTileSize(),
50 listener
51 );
52 matchAlgorithm.findMatches();
53 }
54
55 public Iterator<Match> getMatches() {
56 return matchAlgorithm.matches();
57 }
58
59 public void add(File file) throws IOException {
60 add(1, file);
61 }
62
63 public void addAllInDirectory(String dir) throws IOException {
64 addDirectory(dir, false);
65 }
66
67 public void addRecursively(String dir) throws IOException {
68 addDirectory(dir, true);
69 }
70
71 public void add(List<File> files) throws IOException {
72 for (File f: files) {
73 add(files.size(), f);
74 }
75 }
76
77 private void addDirectory(String dir, boolean recurse) throws IOException {
78 if (!(new File(dir)).exists()) {
79 throw new FileNotFoundException("Couldn't find directory " + dir);
80 }
81 FileFinder finder = new FileFinder();
82
83 add(finder.findFilesFrom(dir, configuration.filenameFilter(), recurse));
84 }
85
86 private Set<String> current = new HashSet<String>();
87
88 private void add(int fileCount, File file) throws IOException {
89
90 if (configuration.skipDuplicates()) {
91
92 String signature = file.getName() + '_' + file.length();
93 if (current.contains(signature)) {
94 System.err.println("Skipping " + file.getAbsolutePath() + " since it appears to be a duplicate file and --skip-duplicate-files is set");
95 return;
96 }
97 current.add(signature);
98 }
99
100 if (!FilenameUtils.equalsNormalizedOnSystem(file.getAbsoluteFile().getCanonicalPath(), file.getAbsolutePath())) {
101 System.err.println("Skipping " + file + " since it appears to be a symlink");
102 return;
103 }
104
105 if (!file.exists()) {
106 System.err.println("Skipping " + file + " since it doesn't exist (broken symlink?)");
107 return;
108 }
109
110 listener.addedFile(fileCount, file);
111 SourceCode sourceCode = configuration.sourceCodeFor(file);
112 configuration.tokenizer().tokenize(sourceCode, tokens);
113 source.put(sourceCode.getFileName(), sourceCode);
114 }
115
116 private static void setSystemProperties(String[] args, CPDConfiguration config) {
117 boolean ignoreLiterals = CPDConfiguration.findBooleanSwitch(args, "--ignore-literals");
118 boolean ignoreIdentifiers = CPDConfiguration.findBooleanSwitch(args, "--ignore-identifiers");
119 boolean ignoreAnnotations = CPDConfiguration.findBooleanSwitch(args, "--ignore-annotations");
120 Properties properties = System.getProperties();
121 if (ignoreLiterals) {
122 properties.setProperty(JavaTokenizer.IGNORE_LITERALS, "true");
123 }
124 if (ignoreIdentifiers) {
125 properties.setProperty(JavaTokenizer.IGNORE_IDENTIFIERS, "true");
126 }
127 if (ignoreAnnotations) {
128 properties.setProperty(JavaTokenizer.IGNORE_ANNOTATIONS, "true");
129 }
130 System.setProperties(properties);
131 config.language().setProperties(properties);
132 }
133
134 public static void main(String[] args) {
135 if (args.length == 0) {
136 showUsage();
137 System.exit(MISSING_ARGS);
138 }
139
140 try {
141 CPDConfiguration config = new CPDConfiguration(args);
142
143
144
145 setSystemProperties(args, config);
146
147 CPD cpd = new CPD(config);
148
149
150 boolean missingFiles = true;
151 for (int position = 0; position < args.length; position++) {
152 if (args[position].equals("--files")) {
153 cpd.addRecursively(args[position + 1]);
154 if ( missingFiles ) {
155 missingFiles = false;
156 }
157 }
158 }
159
160 if ( missingFiles ) {
161 System.out.println("No " + "--files" + " value passed in");
162 showUsage();
163 System.exit(MISSING_FILES);
164 }
165
166 cpd.go();
167 if (cpd.getMatches().hasNext()) {
168 System.out.println(config.renderer().render(cpd.getMatches()));
169 if (!dontExitForTests) {
170 System.exit(DUPLICATE_CODE_FOUND);
171 }
172 }
173 } catch (Exception e) {
174 e.printStackTrace();
175 }
176 }
177
178 public static void showUsage() {
179 System.out.println("Usage:");
180 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens xxx --files xxx [--language xxx] [--encoding xxx] [--format (xml|text|csv|vs)] [--skip-duplicate-files] ");
181 System.out.println("i.e: ");
182 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files c:\\jdk14\\src\\java ");
183 System.out.println("or: ");
184 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files /path/to/c/code --language c ");
185 System.out.println("or: ");
186 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --encoding UTF-16LE --files /path/to/java/code --format xml");
187 }
188
189 }