-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimport_tiffs.pl
executable file
·114 lines (81 loc) · 1.91 KB
/
import_tiffs.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/perl -I/usr/share/eprints/perl_lib
use strict;
use warnings;
use utf8;
use Data::Dumper;
use Unicode::Escape qw(escape unescape);
use Encode::Escape::Unicode;
use Text::Unidecode;
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
use EPrints;
use Text::Iconv;
my $conv = Text::Iconv->new('utf8','utf16');
my $repo_id = 'medmus';
my $img_dir = '/home/eprints/micro-repositories/instances/medmus/TIFS';
my $ep = EPrints->new;
my $repo = $ep->repository('medmus');
die unless $repo;
my $files = {};
foreach my $file (<$img_dir/*>)
{
my $cmp = compval_fs($file);
die "Filename collision for $cmp\n" if $files->{$cmp};
$files->{$cmp} = $file;
}
my $problems = 0;
$repo->dataset('eprint')->search->map(
sub
{
my( $repo, $ds, $eprint ) = @_;
return unless $eprint->is_set('image_file');
my $image_file = $eprint->value('image_file');
my $cmp = compval_db($img_dir . '/' . $image_file . '.png');
my $filename = $files->{$cmp};
if (!$filename)
{
$problems++;
print STDERR $eprint->value('refrain_id') . '/' . $eprint->value('instance_number') . "No file for $image_file";
return;
}
if (-e $files->{$cmp})
{
my $doc = $eprint->create_subdataobj( "documents" );
my $file = $doc->add_file($files->{$cmp}, $filename);
$file->set_value('mime_type', $repo->call('guess_doc_type', $repo, $files->{$cmp}));
$file->commit;
$doc->set_main($file);
$doc->set_value('format', 'image');
$doc->commit;
$eprint->commit;
}
else
{
$problems++;
print STDERR "Cannot open $image_file with cmp of $cmp\n";
}
});
print STDERR "$problems problems\n";
sub full_path
{
my ($filename) = @_;
return $img_dir . '/' . $filename . '.tif';
}
#strip out utf8
sub compval_db
{
my ($str) = @_;
my $v = $str;
chomp $v; #belt and braces
$v =~ s/vdB/vdb/;
return unidecode($v);
}
sub compval_fs
{
my ($str) = @_;
my $v = $str;
chomp $v;
$v =~ s/[^0-9a-zA-Z\. \/]//g;
$v =~ s/vdB/vdb/;
return $v;
}