# perl script.pl XML-TEI.xml $in = 0; $start = 0; open T, ">tmp"; while(<>) { if (// || //) {$in = 1; next} if (/<\/teiHeader>/ || /<\/envelope>/) {$in = 0; next} if ($in) {next} if (//) {$start = 1; s//\n\n/;s/(.*?<\/abbr>)/\n\1\n/g; print T $_."\n";next} if (/<\/body>/) {$start = 0; s/<\/body>/\n<\/body>\n/;s/(.*?<\/abbr>)/\n\1\n/g; print T $_."\n";next} if ($start) { s/(.*?<\/abbr>)/\n\1\n/g; print T; next; } #print T; } close T; open T, "tmp"; $abb; $full; $start = 0; $id = 1; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "Document Name\n"; print "head\n"; print "\n"; print "\n"; print "XML generated by\n"; print "E-Dictor-v1.0.b010\n"; print "\n"; print "\n"; print "Last Saved Date\n"; print "00.00.0000\n"; print "\n"; print "\n"; print "Word Count\n"; print "0\n"; print "\n"; print "\n"; print "\n"; while() { if (/<\/TEI.2>/) {print "\n";next} if (//) {next} if (//) {$start = 1; s/<.*?>//g;print "\n\n\n

\n\n";next} if (/<\/body>/) {$start = 0; s/<.*?>//g;print "\n

\n
\n
\n\n\n";next} if ($start) { chomp; if (//) { $abb = $full = $_; $full =~ s/<.*?>//g; $abb =~ s/.*?<\/expan>//g; $abb =~ s/<.*?>//g; print "\n\t".$abb."\n\t".$full."\n\n"; #print "abb:".$abb; #print "full:".$full; } else { s/-//g; #eliminar guiones s/<.*?>//g; s/([\.\,\:\;\?\!])/ \1 /g; #MG tokenize punctuation. You can add new punctuation symbols by preceeding it with '\'. @words = split(/\s+/); foreach (@words) { if (length($_)) {print "\n\t".$_."\n\n";} } } next; } print ; } close T; unlink "tmp";