// correct some error in n.dic v.dic first!
// this is the most ugly file :-)
#include <stdio.h>
#include <sys/ddi.h>    // for max()
#include <ctype.h>      // for isalpha
#include <stdlib.h>     // for atoi
#include <strings.h>

#define DICT_PATH       "../../../../../Install/Chinese/dict/Oxford_Dictionary"
const char sTargetFile[] = "../dic/oxford.dic";

typedef unsigned short WORD;

struct WordItem {
    char Word[100];
    char Meaning[16383];
    char Mark[200];
} * CurrentWordItem, * LastWordItem;
bool myispunct(char * input)
{
    char * end = input + strlen(input) - 1;
    while ( *end == ' ' && end > input )
        end--;
    switch (*end)
    {
        case '?':
        case '.':
        case '!':
            return true;
        default:
            if ( *end==(const char)0xa7 && *(end-1)==(const char)0xa3 ) //chinese '
                return true;
            return false;
    }
    return false;
}

void chomp(char * input)
{
    char * end = input + strlen(input) -1;
    while (( *end == '\r' || *end == '\n' || *end == ' ' ) && end > input )
        *end-- = '\0';
}

void vProcessMark(char * dest,char * src)
{
    strcpy(dest,src);
}

inline void vFillChar(char * dest, const char fill, int num)
{
    while(num--)
        *dest++=fill;
    *dest='\0';
}

void vprocessslice(char * dest, char * src, const int spaces,const bool noslice)
{
    if ( !strlen(src) )
        return;
    char * sentences[100];
    memset(sentences,0,sizeof(sentences));
    int currentsentence = 0;
    while (*src==' ')
        src++;
    if (!strlen(src))
    {
        printf("Warning (vprocesssice):empty line\n");
        return;
    }
    sentences[currentsentence++]=src;
    char * current;
    for ( current=src;!noslice && *current;current++ )
    {
        // ": " or "*" or "long sentence with chinse. english"
        if ( *current==':' && *(current+1)==' ' && *(current+2) && (unsigned)*(current+2)<=128 )
        {
            bool exam=true;
            char *a,*b,*c,*d;
            for (a=current+1;*a==' ';a++)
                ;
            //both in chinese & english?
            if ( current-1>src && (unsigned)*(current-1)<=128 )
            {
                for (b=a;*b&&(unsigned)*b<=128;b++)
                    ;
                if (*b)
                {
                    for (c=b;*c&&(unsigned)*c>128;c++)
                        ;
                    if (*c==':')
                    {
                        for (d=c+1;*d;d++)
                        {
                            if (isspace(*d))
                                continue;
                            if ((unsigned)*d>128)
                                exam=false;
                            break;
                        }
                    }
                }
            }
            if (!exam)
                break;
            *(current+1)='\0';
            sentences[currentsentence++]=a;
            current=a;
        }
        else if ( *current=='*' && current>src && *(current+1) )
        {
            char *a,*b;
            for (a=current-1;a>src && *a==' ';a--)
                ;
            *(a+1)='\0';
            for (b=current+1;*b==' ';b++)
                ;
            sentences[currentsentence++]=b;
            current=b;
        }
        else if ( *current=='.' && current>src && *(current+1)==' ' && *(current+2)!='*' && *(current+2)!=':' && *(current-1)!='.' )
        {
            char *a,*b,*c;
            bool checkbefore=false,suspect=false;
            for (a=current+1;*a;a++)
            {
                if (strstr(current,"Cf ") && strstr(current,"Cf ")-current<=5)
                    break;
                if (strstr(current,"=>") && strstr(current,"Cf ")-current<=5)
                    break;
                if (*a==' ')
                    continue;
                if ((unsigned)*a>128)
                    break;
                if (isalpha(*a))
                {
                    checkbefore=true;
                    break;
                }
            }
            for (b=current-1;checkbefore&&b>src&&*b;b--)
            {
                if (*b==' ')
                    continue;
                if((unsigned)*b>128)
                {
                    suspect=true;
                    break;
                }
                break;
            }
            if (suspect)
            {
                for (;*b&&b>src;b--)
                    ;
                for (;*a && *a!='.' && *a!=':' && *a!='*';a++)
                    ;
                for (c=current+1;*c==' ';c++)
                    ;
                if (a-b>20)
                {
                    *(current+1)='\0';
                    sentences[currentsentence++]=c;
                    current=c;
                }
            }
        }
    }
    chomp(sentences[0]);
    if ( strlen(sentences[0]) )
    {
        vFillChar(dest,' ',spaces);
        sprintf(dest+strlen(dest),"%s\n",sentences[0]);
    }
    else
        printf("Warning!\n");
    int totalsentences = currentsentence;
    for ( currentsentence = 1; currentsentence<totalsentences;currentsentence++ )
    {
        chomp(sentences[currentsentence]);
        vFillChar(dest+strlen(dest),' ',spaces+4);
        sprintf(dest+strlen(dest),"%s\n",sentences[currentsentence]);
    }
}

inline bool ismath(char * input)
{
    char &in = *input;
    if ( in=='+' || in=='-' || in=='*' || (in=='x'&&*(input+1)&&*(input+1)==' ') || in=='=' )
        return true;
    else
        return false;
}

void vProcessMeaning(char * meaning)
{
    char temp[32768];
    temp[0]='\0';
    char * current;
    char * items[100];
    int currenttabs = 0;
    memset(items,0,sizeof(items));
    //split to items
    int currentitem = 0;
    items[currentitem++] = meaning;
    bool startwithnote = strstr(meaning,"NOTE ON USAGE");
    bool startwithcap = false;
    bool startwithidm = false;
    int num=0,startitem=1;
    for (char *x=meaning;*x&&x-meaning<10;x++)
    {
        if (isspace(*x))
            continue;
        if (isupper(*x))
            num++;
    }
    if (num>=7)
        startwithcap = true;
    for ( current = meaning; * current; current++)
    {
        if ( isdigit(*current) && (current-1<meaning||*(current-1)==' '||(*(current-1)=='.'&&!isdigit(*(current-2)))) )
        {
            for ( char * c = current+1;*c; c++ )
            {
                if ( isdigit(*c) )
                    continue;
                if ( *c == ' ' && (atoi(current)-currentitem==startitem-1||startwithcap)
                      && (current-2<items[currentitem-1] || (*(current-2)!='*'&&*(current-2)!=':'&&((*(current+2)&&*(current+3)&&*(current+4)&&!isdigit(*(current+4)))||!ismath(current+2))) ) )
                {
                    char faint[16];
                    bool notdigital = false;
                    sprintf(faint," %d ",currentitem+startitem-1);
                    if ( strstr(c,faint) )
                    {
                        if ( strstr(c,"and ")==c+1 )
                            break;
                        for (char *i=c;*i;i++)
                        {
                            if (isspace(*i))
                                continue;
                            if ((unsigned)*i>128)
                            {
                                notdigital=true;
                                break;
                            }
                            break;
                        }
                    }
                    if (notdigital)
                        break;
                    if (current-1<meaning)
                        items[currentitem-1] = "";
                    if (startwithcap)
                    {
                        startwithcap = false;
                        startitem=atoi(current);
                    }
                    items[currentitem++] = current;
                    *(current-1) = '\0';
                    current = c;
                    break;
                }
                else
                    break;
            }
        }
        if ( (*current=='.'||*current=='!')
             && isspace(*(current+1)) && *(current+2) 
             && !strstr(current+2,"Cf ")
             && !(*current=='.' && *(current-1)=='.' && *(current-2)=='.')  // not ...
             && !startwithnote )    // fix dic bugs to forget to input digitals.
        {
            bool checkbefore=false;
            for ( char * c= current+1;*c;c++ )
            {
                if ((unsigned)*c>128)    //chinese
                    break;
                if ( isspace(*c) )
                    continue;
                if (isalpha(*c)||*c=='`'||*c==','||(*c=='(' && ( !*(c+1) || !*(c+2) || *(c+2)!=')')))
                {
                    for ( char * e=c+1;*e;e++ )
                    {
                        bool donotcheck=false;
                        if (isalpha(*e))
                            continue;
                        if ( (unsigned)*e>128 ) 
                            break;
                        // or if the chinese below also have "." or "!" in it, break
                        char *f;
                        for( f = e;*f && (unsigned)*f<=128;f++ )
                            ;
                        if (*f) //start of chinese
                        {
                            char * j,*g;
                            for(j=f;*j && !isalpha(*j);j++)
                                ;
                            if (strchr(f,*current) && strchr(f,*current)<j)   //chinese have this one.
                            {
                                g=strchr(f,*current);
                                for(char *h=g+1;*h;h++)
                                {
                                    if (*h==' ')
                                        continue;
                                    if (isalpha(*h))
                                        break;
                                    if ((unsigned)*h>128)
                                    {
                                        donotcheck=true;
                                        break;
                                    }
                                    break;
                                }
                            }
                        }
                        if (!donotcheck)
                            checkbefore = true;
                        break;
                    }
                    break;
                }
                if ( strstr(items[currentitem-1]," => ") && strstr(items[currentitem-1]," => ")<current )    // checkbefore :-)
                {
                    checkbefore = true;
                    break;
                }                
                break;
            }
            if ( checkbefore )
            {
                char itemstr[16];
                char digitalstr[16];
                sprintf(itemstr,"%d ",currentitem+startitem-1);
                sprintf(digitalstr,"%d ",currentitem+startitem-1);
                if ( startwithidm || (strstr(items[currentitem-1],"(idm ") && strstr(items[currentitem-1],"(idm ")-items[currentitem-1]<=5) )
                {
                    startitem--;//skip currentitem
                    startwithidm = true;
                    sprintf(itemstr,"AA55 ");
                }
                char *testend=strstr(current,digitalstr);
                if ( testend )  // checkfollow :-)
                {
                    if (startwithidm)
                    {
                        char * testover;
                        for (testover=current+1;*testover && *testover!='.' &&testover<=testend;testover++)
                            ;
                        if(*testover!='.')
                        {
                            startwithidm=false;
                            startitem++;
                            continue;
                        }
                    }
                    else
                        continue;
                }
                if (!startwithidm)
                    sprintf(itemstr,"55AA%d ",currentitem+startitem-1);

                for ( char * d=current-1;d>meaning && *d;d-- )
                {
                    //if ( isdigit(*d) &&)
                    //   break;
                    if ( ( *d==']' || *d==')' ) && isupper(*(d-1)) )
                        break;
                    //if ( isspace(*d) )
                     //   break;
                    //if (isalpha(*d)||(unsigned)(*d)>128)
                    {
                        //printf("Warning:Insert digitals:\n");
                        *(current+1) = '\0';
                        char * slicebegin = current+2;
                        while (*slicebegin==' ')
                            slicebegin++;
                        memmove(current+2+strlen(itemstr),slicebegin,strlen(slicebegin)+1); // meaning has 16384 chars.
                        memcpy(current+2,itemstr,strlen(itemstr));
                        items[currentitem++]=current+2;
                        current += 2;
                        break;
                    }
                    break;
                }
            }
        }
    }
    
    int totalitems = currentitem;
    for ( currentitem=0; currentitem<totalitems; currentitem++)
    {
        //split to slices
        char * slices[26];
        int currentslice = 0;
        slices[currentslice++] = items[currentitem];
        for (char * currentchar = items[currentitem]; * currentchar; currentchar ++)
        {
            /* (a) */
            if ( *currentchar=='(' && *(currentchar+1) && *(currentchar+2)
                    && islower(*(currentchar+1)) && *(currentchar+2)==')'   // format
                    && *(currentchar+1)==(char)(currentslice+'a'-1)         // series
                    && (*(currentchar+1)!='a' || strstr(currentchar+1,"(b)")) ) // annoying "(a)"
            {
                *(currentchar-1) = '\0';
                slices[currentslice++] = currentchar;
            }
        }
        int totalslices = currentslice;
        // each slice
        vprocessslice(temp+strlen(temp),items[currentitem],0,startwithnote);
        for (currentslice=1;currentslice<totalslices;currentslice++)
            vprocessslice(temp+strlen(temp),slices[currentslice],4,startwithnote);
    }
    strcpy(meaning,temp);
}

int main(int argc,char **argv)
{
    FILE *input, *output;
    char word[4096],meaning[32768];
    char dic_path[256];
    int icount = 0;
    int maxlen = 0;
    struct WordItem * WordItemLib = new WordItem[8192];
    if ( !WordItemLib )
    {
        fprintf(stderr,"Not enough memory\n");
        return 0;
    }
    
    /* Open output file */
    if (!(output = fopen(sTargetFile, "wb")))
    {
        fprintf(stderr,"Unable to open outfile\r\n");
        return -3;
    }

    for (unsigned char o='m'; o<='m'; o++)
    {
        printf("processing %c ...\n",o);
        CurrentWordItem = WordItemLib;
        LastWordItem = NULL;
        sprintf((char *)dic_path, DICT_PATH "/%c.dic", o);
        input = fopen( dic_path, "r" );
        if (!input)
        {
            fprintf(stderr,"can not open inputfile:%s\n",dic_path);
            fclose(output);
            delete [] WordItemLib;
            return -2;
        }
        
        while (fgets(word,sizeof(word),input))
        {
            chomp(word);
            if (strlen(word)>100 && !strstr(word,"north-") ) //north- exception
            {
                fprintf(stderr,"warning too long: %s\n",word);
                if ( !LastWordItem )
                {
                    printf("faint...\n");
                    delete [] WordItemLib;
                    return -1;
                }
                if ( strlen(word)+strlen(LastWordItem->Meaning) > sizeof(LastWordItem->Meaning) )
                {
                    printf("ERROR:%d\n",strlen(word)+strlen(LastWordItem->Meaning));
                    delete[] WordItemLib;
                    return -1;
                }
                strcat(LastWordItem->Meaning,word);
                continue;
            }
            
            if ( !fgets(meaning,sizeof(meaning),input) )
            {
                fprintf(stderr,"error when read data\r\n");
                fprintf(stderr,"%c %s\n",o,word);
                delete [] WordItemLib;
                return -4;
            }
            chomp(meaning);
            
            while ( strlen(meaning)%80==0 )
            {
                if  ( !myispunct(meaning) )
                {
                    printf("warning 80: %s\n",word);
                    fgets(meaning+strlen(meaning),sizeof(meaning)-strlen(meaning),input);
                    chomp(meaning);
                }
                else
                {
                    // try next line
                    long current = ftell(input);
                    char temp[32768];
                    fgets(temp,sizeof(temp),input);
                    chomp(temp);
                        if ( (isalpha(temp[0]) || temp[0]=='-') && strlen(temp)!=80 )
                    {
                        fseek(input,current,SEEK_SET);
                        break;
                    }
                    else
                    {
                        printf("warning 80! %s\n",word);
                        strcat(meaning,temp);   
                    }
                }
            }

            maxlen = max(strlen(word),maxlen);
            maxlen = max(strlen(meaning),maxlen);
            strcpy(CurrentWordItem->Word,word);
            strcpy(CurrentWordItem->Meaning,meaning);
            LastWordItem = CurrentWordItem;
            CurrentWordItem++;
            icount ++;
            //process
            
        }
        fclose(input);
        // seprate mark from meaning
        for (CurrentWordItem=WordItemLib;CurrentWordItem<=LastWordItem;CurrentWordItem++)
        {
            if ( CurrentWordItem->Meaning[0] == '/' )
            {
                char * endmark = strchr(&CurrentWordItem->Meaning[1],'/');
                if (endmark)
                {
                    *endmark='\0';
                    vProcessMark(CurrentWordItem->Mark,&CurrentWordItem->Meaning[1]);
                    strcpy(CurrentWordItem->Meaning,endmark+1);
                }
            }
            //if (meaning start with ==>
            //fprintf(output,"ORIMEANING:%s\n",CurrentWordItem->Meaning);
            vProcessMeaning(CurrentWordItem->Meaning);
            fprintf(output,"WORD:%s\nMARK:%s\nMEANING:\n%s\n",CurrentWordItem->Word,CurrentWordItem->Mark,CurrentWordItem->Meaning);
        }
    }
    
    fprintf(stdout,"Maxlen:%d\nItems:%d\n",maxlen,icount);
    
//    fwrite(&icount,sizeof(int),1,output);
    int style= (0<<24) + (0<<16) + (0<<8) + 0;
//    fwrite(&style,sizeof(int),1,output);
    
    fclose(output);
    delete [] WordItemLib;
}
