| 1 | |
|
| 2 | |
|
| 3 | |
|
| 4 | |
|
| 5 | |
|
| 6 | |
|
| 7 | |
|
| 8 | |
|
| 9 | |
|
| 10 | |
|
| 11 | |
|
| 12 | |
|
| 13 | |
|
| 14 | |
|
| 15 | |
|
| 16 | |
|
| 17 | |
|
| 18 | |
|
| 19 | |
package com.puppycrawl.tools.checkstyle.checks.duplicates; |
| 20 | |
|
| 21 | |
import com.google.common.collect.ArrayListMultimap; |
| 22 | |
import com.google.common.collect.Lists; |
| 23 | |
import com.google.common.collect.MapMaker; |
| 24 | |
import com.google.common.collect.Multimap; |
| 25 | |
import com.puppycrawl.tools.checkstyle.api.AbstractFileSetCheck; |
| 26 | |
import com.puppycrawl.tools.checkstyle.api.FileText; |
| 27 | |
import com.puppycrawl.tools.checkstyle.api.MessageDispatcher; |
| 28 | |
import com.puppycrawl.tools.checkstyle.api.Utils; |
| 29 | |
import java.io.File; |
| 30 | |
import java.io.IOException; |
| 31 | |
import java.util.Collection; |
| 32 | |
import java.util.List; |
| 33 | |
import java.util.Map; |
| 34 | |
import org.apache.commons.logging.Log; |
| 35 | |
import org.apache.commons.logging.LogFactory; |
| 36 | |
|
| 37 | |
|
| 38 | |
|
| 39 | |
|
| 40 | |
|
| 41 | |
|
| 42 | |
|
| 43 | |
|
| 44 | |
|
| 45 | |
|
| 46 | |
|
| 47 | 1035 | public final class StrictDuplicateCodeCheck extends AbstractFileSetCheck |
| 48 | |
{ |
| 49 | |
|
| 50 | |
|
| 51 | |
|
| 52 | |
|
| 53 | |
|
| 54 | |
|
| 55 | |
|
| 56 | |
|
| 57 | |
private static final int BIG_PRIME = 317; |
| 58 | |
|
| 59 | |
|
| 60 | |
|
| 61 | |
|
| 62 | |
|
| 63 | |
|
| 64 | |
private interface ChecksumGenerator |
| 65 | |
{ |
| 66 | |
|
| 67 | |
|
| 68 | |
|
| 69 | |
|
| 70 | |
|
| 71 | |
|
| 72 | |
|
| 73 | |
|
| 74 | |
|
| 75 | |
int[] convertLines(String[] aOriginalLines); |
| 76 | |
} |
| 77 | |
|
| 78 | |
|
| 79 | |
|
| 80 | |
|
| 81 | |
|
| 82 | 12 | private class TextfileChecksumGenerator implements ChecksumGenerator |
| 83 | |
{ |
| 84 | |
|
| 85 | |
public int[] convertLines(String[] aOriginalLines) |
| 86 | |
{ |
| 87 | 6 | final int lineCount = aOriginalLines.length; |
| 88 | 6 | final long[] checkSums = new long[lineCount]; |
| 89 | 149 | for (int i = 0; i < lineCount; i++) { |
| 90 | 143 | final String line = aOriginalLines[i]; |
| 91 | 143 | checkSums[i] = calcChecksum(line); |
| 92 | |
} |
| 93 | 6 | final int retLen = Math.max(0, lineCount - mMin + 1); |
| 94 | 6 | final int[] ret = new int[retLen]; |
| 95 | |
|
| 96 | 117 | for (int i = 0; i < retLen; i++) { |
| 97 | 111 | int blockChecksum = 0; |
| 98 | 111 | boolean onlyEmptyLines = true; |
| 99 | 1029 | for (int j = 0; j < mMin; j++) { |
| 100 | 924 | if (aOriginalLines[i + j].length() > 0) { |
| 101 | 614 | onlyEmptyLines = false; |
| 102 | |
} |
| 103 | 924 | final long checksum = checkSums[i + j]; |
| 104 | 924 | if (checksum == IGNORE) { |
| 105 | 6 | blockChecksum = IGNORE; |
| 106 | 6 | break; |
| 107 | |
} |
| 108 | 918 | blockChecksum += (j + 1) * BIG_PRIME * checksum; |
| 109 | |
} |
| 110 | 111 | ret[i] = onlyEmptyLines ? IGNORE : blockChecksum; |
| 111 | |
} |
| 112 | 6 | return ret; |
| 113 | |
} |
| 114 | |
|
| 115 | |
|
| 116 | |
|
| 117 | |
|
| 118 | |
|
| 119 | |
|
| 120 | |
protected int calcChecksum(String aLine) |
| 121 | |
{ |
| 122 | 137 | final int hashCode = aLine.hashCode(); |
| 123 | 137 | if (hashCode == IGNORE) { |
| 124 | 0 | return Integer.MAX_VALUE / 2; |
| 125 | |
} |
| 126 | 137 | return hashCode; |
| 127 | |
} |
| 128 | |
} |
| 129 | |
|
| 130 | |
|
| 131 | |
|
| 132 | |
|
| 133 | 12 | private class JavaChecksumGenerator extends TextfileChecksumGenerator |
| 134 | |
{ |
| 135 | |
|
| 136 | |
|
| 137 | |
|
| 138 | |
|
| 139 | |
|
| 140 | |
|
| 141 | |
@Override |
| 142 | |
protected int calcChecksum(String aLine) |
| 143 | |
{ |
| 144 | |
|
| 145 | |
|
| 146 | 143 | if (aLine.startsWith("import ")) { |
| 147 | 6 | return IGNORE; |
| 148 | |
} |
| 149 | 137 | return super.calcChecksum(aLine); |
| 150 | |
} |
| 151 | |
} |
| 152 | |
|
| 153 | |
|
| 154 | 1 | private static final Log LOG = |
| 155 | |
LogFactory.getLog(StrictDuplicateCodeCheck.class); |
| 156 | |
|
| 157 | |
|
| 158 | |
static final int IGNORE = Integer.MIN_VALUE; |
| 159 | |
|
| 160 | |
|
| 161 | |
private static final int DEFAULT_MIN_DUPLICATE_LINES = 12; |
| 162 | |
|
| 163 | |
|
| 164 | 4 | private int mMin = DEFAULT_MIN_DUPLICATE_LINES; |
| 165 | |
|
| 166 | |
|
| 167 | |
private String mBasedir; |
| 168 | |
|
| 169 | |
|
| 170 | |
|
| 171 | |
|
| 172 | |
|
| 173 | |
|
| 174 | |
private int[][] mLineBlockChecksums; |
| 175 | |
|
| 176 | |
|
| 177 | |
|
| 178 | |
|
| 179 | |
|
| 180 | |
private ChecksumInfo[] mChecksumInfo; |
| 181 | |
|
| 182 | |
|
| 183 | 4 | private final List<File> mFiles = Lists.newArrayList(); |
| 184 | |
|
| 185 | |
|
| 186 | |
|
| 187 | |
|
| 188 | 4 | private final Map<String, String[]> mTrimmedLineCache = |
| 189 | |
new MapMaker().softValues().makeMap(); |
| 190 | |
|
| 191 | |
|
| 192 | |
|
| 193 | |
|
| 194 | |
private int mDuplicates; |
| 195 | |
|
| 196 | |
private String mCharset; |
| 197 | |
|
| 198 | |
|
| 199 | |
public StrictDuplicateCodeCheck() |
| 200 | 4 | { |
| 201 | 4 | } |
| 202 | |
|
| 203 | |
|
| 204 | |
|
| 205 | |
|
| 206 | |
|
| 207 | |
|
| 208 | |
|
| 209 | |
|
| 210 | |
public void setMin(int aMin) |
| 211 | |
{ |
| 212 | 2 | if (aMin < 1) { |
| 213 | 0 | throw new IllegalArgumentException("min must be 1 or higher"); |
| 214 | |
} |
| 215 | 2 | mMin = aMin; |
| 216 | 2 | } |
| 217 | |
|
| 218 | |
|
| 219 | |
public void setBasedir(String aBasedir) |
| 220 | |
{ |
| 221 | 4 | mBasedir = aBasedir; |
| 222 | 4 | } |
| 223 | |
|
| 224 | |
@Override |
| 225 | |
public void beginProcessing(String aCharset) |
| 226 | |
{ |
| 227 | 4 | super.beginProcessing(aCharset); |
| 228 | 4 | mCharset = aCharset; |
| 229 | 4 | mFiles.clear(); |
| 230 | 4 | } |
| 231 | |
|
| 232 | |
@Override |
| 233 | |
protected void processFiltered(File aFile, List<String> aLines) |
| 234 | |
{ |
| 235 | 6 | mFiles.add(aFile); |
| 236 | 6 | } |
| 237 | |
|
| 238 | |
@Override |
| 239 | |
public void finishProcessing() |
| 240 | |
{ |
| 241 | 4 | super.finishProcessing(); |
| 242 | 4 | final long start = System.currentTimeMillis(); |
| 243 | 4 | mDuplicates = 0; |
| 244 | 4 | mLineBlockChecksums = new int[mFiles.size()][]; |
| 245 | 4 | mChecksumInfo = new ChecksumInfo[mFiles.size()]; |
| 246 | |
|
| 247 | 4 | if (LOG.isDebugEnabled()) { |
| 248 | 0 | LOG.debug("Reading " + mFiles.size() + " input files"); |
| 249 | |
} |
| 250 | |
|
| 251 | 10 | for (int i = 0; i < mFiles.size(); i++) { |
| 252 | 6 | final File file = mFiles.get(i); |
| 253 | |
try { |
| 254 | 6 | final String[] lines = getTrimmedLines(file); |
| 255 | 6 | final ChecksumGenerator transformer = |
| 256 | |
findChecksumGenerator(file); |
| 257 | 6 | mLineBlockChecksums[i] = transformer.convertLines(lines); |
| 258 | |
} |
| 259 | 0 | catch (final IOException ex) { |
| 260 | 0 | LOG.error("Cannot access " + file + " (" |
| 261 | |
+ ex.getMessage() + "), ignoring", ex); |
| 262 | |
|
| 263 | |
|
| 264 | |
|
| 265 | 0 | mLineBlockChecksums = new int[0][0]; |
| 266 | 6 | } |
| 267 | |
} |
| 268 | 4 | fillSortedRelevantChecksums(); |
| 269 | |
|
| 270 | 4 | final long endReading = System.currentTimeMillis(); |
| 271 | 4 | findDuplicates(); |
| 272 | 4 | final long endSearching = System.currentTimeMillis(); |
| 273 | |
|
| 274 | 4 | dumpStats(start, endReading, endSearching); |
| 275 | |
|
| 276 | 4 | mLineBlockChecksums = null; |
| 277 | 4 | mChecksumInfo = null; |
| 278 | 4 | } |
| 279 | |
|
| 280 | |
|
| 281 | |
|
| 282 | |
|
| 283 | |
|
| 284 | |
|
| 285 | |
|
| 286 | |
private ChecksumGenerator findChecksumGenerator(File aFile) |
| 287 | |
{ |
| 288 | 6 | if (aFile.getName().endsWith(".java")) { |
| 289 | 6 | return new JavaChecksumGenerator(); |
| 290 | |
} |
| 291 | |
|
| 292 | 0 | return new TextfileChecksumGenerator(); |
| 293 | |
} |
| 294 | |
|
| 295 | |
|
| 296 | |
|
| 297 | |
|
| 298 | |
|
| 299 | |
|
| 300 | |
|
| 301 | |
private void dumpStats(long aStart, long aEndReading, long aEndSearching) |
| 302 | |
{ |
| 303 | 4 | if (LOG.isDebugEnabled()) { |
| 304 | 0 | final long initTime = aEndReading - aStart; |
| 305 | 0 | final long workTime = aEndSearching - aEndReading; |
| 306 | 0 | LOG.debug("files = " + mFiles.size()); |
| 307 | 0 | LOG.debug("duplicates = " + mDuplicates); |
| 308 | 0 | LOG.debug("Runtime = " + initTime + " + " + workTime); |
| 309 | |
} |
| 310 | 4 | } |
| 311 | |
|
| 312 | |
|
| 313 | |
|
| 314 | |
|
| 315 | |
|
| 316 | |
|
| 317 | |
|
| 318 | |
|
| 319 | |
|
| 320 | |
private void fillSortedRelevantChecksums() |
| 321 | |
{ |
| 322 | 10 | for (int i = 0; i < mLineBlockChecksums.length; i++) { |
| 323 | 6 | final int[] checksums = mLineBlockChecksums[i]; |
| 324 | 6 | mChecksumInfo[i] = new ChecksumInfo(checksums); |
| 325 | |
} |
| 326 | 4 | } |
| 327 | |
|
| 328 | |
|
| 329 | |
|
| 330 | |
|
| 331 | |
|
| 332 | |
|
| 333 | |
private void findDuplicates() |
| 334 | |
{ |
| 335 | 4 | if (LOG.isDebugEnabled()) { |
| 336 | 0 | LOG.debug("Analysis phase"); |
| 337 | |
} |
| 338 | |
|
| 339 | |
|
| 340 | |
|
| 341 | |
|
| 342 | |
|
| 343 | |
|
| 344 | |
|
| 345 | 4 | final int len = mFiles.size(); |
| 346 | 10 | for (int i = 0; i < len; i++) { |
| 347 | |
|
| 348 | 6 | final String path = mFiles.get(i).getPath(); |
| 349 | 6 | getMessageCollector().reset(); |
| 350 | 6 | final MessageDispatcher dispatcher = getMessageDispatcher(); |
| 351 | 6 | dispatcher.fireFileStarted(path); |
| 352 | |
|
| 353 | 14 | for (int j = 0; j <= i; j++) { |
| 354 | 8 | findDuplicatesInFiles(i, j); |
| 355 | |
} |
| 356 | |
|
| 357 | 6 | fireErrors(path); |
| 358 | 6 | dispatcher.fireFileFinished(path); |
| 359 | |
} |
| 360 | 4 | } |
| 361 | |
|
| 362 | |
|
| 363 | |
|
| 364 | |
|
| 365 | |
|
| 366 | |
|
| 367 | |
private void findDuplicatesInFiles(int aI, int aJ) |
| 368 | |
{ |
| 369 | 8 | final ChecksumInfo iChecksumInfo = mChecksumInfo[aI]; |
| 370 | 8 | final ChecksumInfo jChecksumInfo = mChecksumInfo[aJ]; |
| 371 | 8 | if (!iChecksumInfo.hasChecksumOverlapsWith(jChecksumInfo)) { |
| 372 | 3 | return; |
| 373 | |
} |
| 374 | |
|
| 375 | 5 | final int[] iLineBlockChecksums = mLineBlockChecksums[aI]; |
| 376 | 5 | final int iBlockCount = iLineBlockChecksums.length; |
| 377 | |
|
| 378 | |
|
| 379 | |
|
| 380 | |
|
| 381 | 5 | final Multimap<Integer, Integer> ignorePairs = |
| 382 | |
ArrayListMultimap.create(); |
| 383 | |
|
| 384 | |
|
| 385 | |
|
| 386 | 116 | for (int iLine = 0; iLine < iBlockCount; iLine++) { |
| 387 | |
|
| 388 | 111 | final int iSum = iLineBlockChecksums[iLine]; |
| 389 | 111 | final int[] jLines = jChecksumInfo.findLinesWithChecksum(iSum); |
| 390 | |
|
| 391 | 111 | if (jLines.length > 0) { |
| 392 | 98 | findDuplicateFromLine(aI, aJ, iLine, jLines, ignorePairs); |
| 393 | |
} |
| 394 | |
} |
| 395 | 5 | } |
| 396 | |
|
| 397 | |
|
| 398 | |
|
| 399 | |
|
| 400 | |
|
| 401 | |
|
| 402 | |
|
| 403 | |
|
| 404 | |
|
| 405 | |
|
| 406 | |
|
| 407 | |
|
| 408 | |
|
| 409 | |
|
| 410 | |
|
| 411 | |
private void findDuplicateFromLine( |
| 412 | |
final int aI, final int aJ, final int aILine, |
| 413 | |
final int[] aJLines, final Multimap<Integer, Integer> aIgnore) |
| 414 | |
{ |
| 415 | |
|
| 416 | |
|
| 417 | |
|
| 418 | 98 | final int[] iCheckSums = mLineBlockChecksums[aI]; |
| 419 | 98 | final int[] jCheckSums = mLineBlockChecksums[aJ]; |
| 420 | 98 | final long checkSum = iCheckSums[aILine]; |
| 421 | |
|
| 422 | 220 | for (int jLine : aJLines) { |
| 423 | |
|
| 424 | 122 | if (aI == aJ && aILine >= jLine) { |
| 425 | 110 | continue; |
| 426 | |
} |
| 427 | |
|
| 428 | 12 | if (jCheckSums[jLine] != checkSum) { |
| 429 | 0 | continue; |
| 430 | |
} |
| 431 | |
|
| 432 | 12 | final Collection<Integer> ignoreEntries = aIgnore.get(aILine); |
| 433 | 12 | if (ignoreEntries != null && ignoreEntries.contains(jLine)) { |
| 434 | 6 | continue; |
| 435 | |
} |
| 436 | |
|
| 437 | 6 | final int duplicateLines = |
| 438 | |
verifiyDuplicateLines(aI, aJ, aILine, jLine); |
| 439 | 6 | if (duplicateLines >= mMin) { |
| 440 | 6 | reportDuplicate(duplicateLines, aILine, mFiles.get(aJ), jLine); |
| 441 | 6 | final int extend = duplicateLines - mMin; |
| 442 | 12 | for (int i = 0; i < extend; i++) { |
| 443 | 6 | final int offset = (i + 1); |
| 444 | 6 | aIgnore.put(aILine + offset, jLine + offset); |
| 445 | |
} |
| 446 | |
} |
| 447 | |
} |
| 448 | 98 | } |
| 449 | |
|
| 450 | |
|
| 451 | |
|
| 452 | |
|
| 453 | |
|
| 454 | |
|
| 455 | |
|
| 456 | |
|
| 457 | |
|
| 458 | |
|
| 459 | |
|
| 460 | |
|
| 461 | |
private int verifiyDuplicateLines( |
| 462 | |
int aI, int aJ, int aIStartLine, int aJStartLine) |
| 463 | |
{ |
| 464 | 6 | final File iFile = mFiles.get(aI); |
| 465 | 6 | final File jFile = mFiles.get(aJ); |
| 466 | |
try { |
| 467 | 6 | final String[] iLines = getTrimmedLines(iFile); |
| 468 | 6 | final String[] jLines = getTrimmedLines(jFile); |
| 469 | |
|
| 470 | 6 | int verified = 0; |
| 471 | 6 | int i = aIStartLine; |
| 472 | 6 | int j = aJStartLine; |
| 473 | |
while (i < iLines.length && j < jLines.length |
| 474 | 39 | && iLines[i++].equals(jLines[j++])) |
| 475 | |
{ |
| 476 | 33 | verified += 1; |
| 477 | |
} |
| 478 | 6 | return verified; |
| 479 | |
} |
| 480 | 0 | catch (IOException ex) { |
| 481 | 0 | LOG.error("Unable to verify potential duplicate for " |
| 482 | |
+ iFile + " and " + jFile, ex); |
| 483 | 0 | return 0; |
| 484 | |
} |
| 485 | |
} |
| 486 | |
|
| 487 | |
|
| 488 | |
|
| 489 | |
|
| 490 | |
|
| 491 | |
|
| 492 | |
|
| 493 | |
|
| 494 | |
|
| 495 | |
|
| 496 | |
|
| 497 | |
private String[] getTrimmedLines(File aFile) throws IOException |
| 498 | |
{ |
| 499 | 18 | final String path = aFile.getPath(); |
| 500 | 18 | final String[] cachedLines = mTrimmedLineCache.get(path); |
| 501 | 18 | if (cachedLines != null) { |
| 502 | 12 | return cachedLines; |
| 503 | |
} |
| 504 | 6 | final String charset = mCharset; |
| 505 | 6 | final FileText text = new FileText(aFile, charset); |
| 506 | 6 | final String[] lines = getTrimmed(text.toLinesArray()); |
| 507 | 6 | mTrimmedLineCache.put(path, lines); |
| 508 | 6 | return lines; |
| 509 | |
} |
| 510 | |
|
| 511 | |
|
| 512 | |
|
| 513 | |
|
| 514 | |
|
| 515 | |
|
| 516 | |
private String[] getTrimmed(String[] aLines) |
| 517 | |
{ |
| 518 | 6 | final String[] ret = new String[aLines.length]; |
| 519 | 149 | for (int i = 0; i < ret.length; i++) { |
| 520 | 143 | ret[i] = aLines[i].trim(); |
| 521 | |
} |
| 522 | 6 | return ret; |
| 523 | |
} |
| 524 | |
|
| 525 | |
|
| 526 | |
|
| 527 | |
|
| 528 | |
|
| 529 | |
|
| 530 | |
|
| 531 | |
|
| 532 | |
|
| 533 | |
private void reportDuplicate( |
| 534 | |
int aEquivalent, int aILine, File aJFile, int aJLine) |
| 535 | |
{ |
| 536 | 6 | final String fileName = |
| 537 | |
Utils.getStrippedFileName(mBasedir, aJFile.getPath()); |
| 538 | 6 | log(aILine + 1, "duplicates.lines", aEquivalent, fileName, aJLine + 1); |
| 539 | 6 | mDuplicates += 1; |
| 540 | 6 | } |
| 541 | |
|
| 542 | |
} |