Method for extracting names of people from a text file
by Jules Berman, Ph.D., M.D.
Perl script:
#!/usr/local/bin/perl
#
#This Perl script was created by Jules J. Berman, on 10/26/10
#and is released under the GNU General Public license, found at:
#http://www.gnu.org/licenses/gpl.html
#
#The purpose of this script is to pull names (of people) from
#a text file containing names in various forms and locations,
#and assembling those names as an alphabetized list.
#
#The software is provided "as is", without warranty of any kind,
#express or implied, including but not limited to the warranties
#of merchantability, fitness for a particular purpose and
#noninfringement. in no event shall the authors or copyright
#holders be liable for any claim, damages or other liability,
#whether in an action of contract, tort or otherwise, arising
#from, out of or in connection with the software or the use or
#other dealings in the software.
#
undef($/);
open (TEXT, "new.txt"); #put in the file name and path,if not in
#the current directory, where your own
#text file is found
$line = <TEXT>;
while ($line =~ /\b[A-Z][a-z]+[ \n]{1}[A-Z][a-z]+\b/g)
{
$name = $&;
$name =~ s/\n/ /;
next if ($name =~ /^The/);
next if ($name !~ /[A-Z][a-z]+/);
$namelist{$name} = "";
}
while ($line =~ /\b[A-Z][a-z]+[\, \n]+[A-Z][\,\. \n]*[A-Z]?[\,\. \n]*[A-Z]?[\,\. \n]*\b/g)
{
$name = $&;
next if ($name =~ /^The/);
$name =~ s/[\,\.\n]//g;
next if ($name =~ / {3,}/);
$name =~ s/ +$//;
next if ($name !~ /[A-Z][a-z]+/);
$namelist{$name} = "";
}
print join("\n", sort(keys(%namelist)));
exit;