:
eval 'exec perl -wS $0 ${1+"$@"}'
    if 0;
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# This file incorporates work covered by the following license notice:
#
#   Licensed to the Apache Software Foundation (ASF) under one or more
#   contributor license agreements. See the NOTICE file distributed
#   with this work for additional information regarding copyright
#   ownership. The ASF licenses this file to you under the Apache
#   License, Version 2.0 (the "License"); you may not use this file
#   except in compliance with the License. You may obtain a copy of
#   the License at http://www.apache.org/licenses/LICENSE-2.0 .
#
#
#

sub by_entry {
    my ($aent, $aoff) = split('\|',$a);
    my ($bent, $boff) = split('\|',$b);
    $aent cmp $bent;
}

#FIXME: someone may want "infile" or even parameter parsing
sub get_outfile {
	my $next_is_file = 0;
	foreach ( @ARGV ) {
		if ( $next_is_file ) {
			return $_
		}
		if ( $_ eq "-o" ) {
			$next_is_file = 1;
		}
	}
	return "";
}

sub usage {
	print "usage:\n";
	print "$0 -o outfile < input\n";

	exit 99;
}

# main routine
my $ne = 0;       # number of entries in index
my @tindex=();    # the index itself
my $foffset = 0;  # file position offset into thesaurus
my $rec="";       # current string and related pieces
my $rl=0;         # misc string length     
my $entry="";     # current word being processed
my $nm=0;         # number of meaning for the current word
my $meaning="";   # current meaning and synonyms
my $p;            # misc uses
my $encoding;     # encoding used by text file
my $outfile = "";

$outfile = get_outfile();
usage() if ( $outfile eq "" );

# top line of thesaurus provides encoding
$encoding=<STDIN>;
$foffset = $foffset + length($encoding); 
chomp($encoding);
   
# read thesaurus line by line
# first line of every block is an entry and meaning count
while ($rec=<STDIN>){
    $rl = length($rec);
    chomp($rec);
    ($entry, $nm) = split('\|',$rec);
    $p = 0;
    while ($p < $nm) {
        $meaning=<STDIN>;
        $rl = $rl + length($meaning);
        chomp($meaning);
        $p++;
    }       
    push(@tindex,"$entry|$foffset");
    $ne++;
    $foffset = $foffset + $rl;
}

# now we have all of the information
# so sort it and then output the encoding, count and index data
@tindex = sort by_entry @tindex;

print "$outfile\n";
open OUTFILE, ">$outfile" or die "ERROR: Can't open $outfile for writing!";
print OUTFILE "$encoding\n";
print OUTFILE "$ne\n";
foreach $one (@tindex) {
    print OUTFILE "$one\n";
}
close OUTFILE;

