Changeset 64
- Timestamp:
- 28.11.2006 00:11:21 (2 years ago)
- Files:
-
- trunk/devel/FuzzyOcr.pm (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/devel/FuzzyOcr.pm
r60 r64 601 601 my @ocr_results = (); 602 602 my $scansets = get_scansets(); 603 my $mcnt = 0; 604 my $wref = get_wordlist(); 605 my %words = %$wref; 603 606 foreach my $scanset (@$scansets) { 607 my $cmcnt = 0; 608 my @cfound; 604 609 my $scancmd = $scanset->{ocr_command}; 605 610 my $scanlabel = $scanset->{label}; … … 622 627 623 628 debuglog("ocrdata=>>".join("",@result)."<<=end") if ($conf->{focr_verbose}>2); 624 push( @ocr_results, [@result] ); 625 push( @used_scansets, $scanset ); 626 } 627 my $mcnt = 0; 628 my $wref = get_wordlist(); 629 my %words = %$wref; 630 foreach my $ww (keys %words) { 631 my $w = lc $ww; 632 $w =~ s/[^a-z]//g; 633 my $wcnt = 0; 634 my $gcnt = 0; 635 foreach my $ocr_set (@ocr_results) { 636 my $cwcnt = 0; 637 foreach (@$ocr_set) { 629 630 foreach my $ww (keys %words) { 631 my $w = lc $ww; 632 $w =~ s/[^a-z]//g; 633 my $wcnt = 0; 634 foreach (@result) { 638 635 tr/!;|081/iiioal/; 639 636 s/[^a-zA-Z]//g; … … 641 638 my $matched = abs(adistr( $w, $_ )); 642 639 if ( $matched < $words{$ww} ) { 643 $ cwcnt++;640 $wcnt++; 644 641 debuglog( 645 642 "Found word \"$w\" in line\n \"$_\" \n with fuzz of " 646 643 . sprintf("%0.4f",$matched) 647 . " scanned with scanset \"$ used_scansets[$gcnt]->{label}\""644 . " scanned with scanset \"$scanlabel\"" 648 645 ); 649 646 } 650 647 } 651 $wcnt = max( $wcnt, $cwcnt ); 652 $gcnt++; 653 } 654 $cnt += $wcnt; 655 $mcnt += $wcnt; 656 if ( ( $conf->{focr_verbose} > 0 ) and ($wcnt) ) { 657 push( @found, "\"$w\" in $wcnt lines" ); 658 } 648 $cmcnt += $wcnt; 649 if ( ( $conf->{focr_verbose} > 0 ) and ($wcnt) ) { 650 push( @cfound, "\"$w\" in $wcnt lines" ); 651 } 652 } 653 $mcnt = max($mcnt, $cmcnt); 654 if ($mcnt == $cmcnt) { 655 @found = @cfound; 656 } 657 658 if ($mcnt >= $conf->{focr_counts_required} and $conf->{focr_minimal_scanset}) { 659 debuglog("Scanset \"$scanlabel\" generates enough hits ($mcnt), skipping further scansets..."); 660 last; 661 } 662 #push( @ocr_results, [@result] ); 663 #push( @used_scansets, $scanset ); 659 664 } 660 665 if ($conf->{focr_enable_image_hashing}) { … … 662 667 push(@hashes, $info); 663 668 } 669 $cnt += $mcnt; 664 670 } 665 671 close RAWERR if ($haserr>0); 666 672 667 673 if ($cnt == 0) { 668 674 if ($conf->{focr_enable_image_hashing} > 1 and @hashes) {
