/** File "SynonymsKey.scala", by KWR for CSE250, Spring 2022
    Client for tests involving Samuel Fallows's 1898 book of synonyms and antonyms.
    Has extra features: rotating implementations among the standard Scala libraries
    and timing code.
 */

import io.StdIn._
import io.Source
import java.io.File         //technically not needed
import java.io.FileWriter   //makes it easy to append
import java.io.PrintWriter  //makes "print" and "println" available

import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.ListBuffer
import scala.collection.mutable.Set
import scala.collection.mutable.Map
import scala.collection.mutable.SortedSet
import scala.collection.mutable.SortedMap
import scala.collection.mutable.HashMap


//case class SynonymSet(var key: String, var synonyms: ListBuffer[String])
//class SynonymSet(var key: String, var synonyms: Set[String])

case class SynonymSet(key: String, synonyms: SortedSet[String])




/** Principal lines begin "KEY: " and "SYN: ", the latter possibly
    followed by similar lines with no headword and terminated by a line
    beginning "ANT:" or with "=".  Assume words of those lines begin in
    column 5.  Reader uses state pattern to work one line at a time.
 */
object SynonymReaderKey {
   def readEntries: ArrayBuffer[SynonymSet] = {
      val synFile = "Fallows1898.txt"
      val src = Source.fromFile(synFile)
      var synarray = new ArrayBuffer[SynonymSet]()
   
      var key = ""
      var inSyn = false
      var accumeLine = ""
      var count = 0
      for (line <- src.getLines()) {
         if (line.startsWith("KEY:")) {
            if (key != "" || inSyn) {
               println("Parse off rails at " + line)
            }
            val restLine = line.substring(5)
            var inAlpha = true
            for (c <- restLine if inAlpha) {
               if (c.isLetter || c == '_' || c == '-') {
                  key += c
               } else {
                  inAlpha = false
               }
            }
            if (key != "") {
               key = key.toLowerCase
               if (line.contains("\\n.\\")) {
                  //println("Added noun from line " + line)
                  key += "_n__"
               } else if (line.contains("\\v.\\")) {
                  //println("Added verb from line " + line)
                  key += "_v__"
               } else if (line.contains("\\a.\\")) {
                  //println("Added adjective from line " + line)
                  key += "_a__"
               } else if (line.contains("\\r.\\")) {
                  //println("Added adjective from line " + line)
                  key += "_r__"
               }
               //if (key.startsWith("wage")) { println(key) }
            }
         }
         else if (line.startsWith("SYN:")) {
            if (key == "") {
               println("No key found for synonyms beginning " + line)
            } else if (inSyn) {
               println("Parse off rails at " + key + ":" + line)
            }
            inSyn = true
            accumeLine = line.substring(5)
   
         } else if (line.startsWith("ANT:") || line.startsWith("=")) {
            if (inSyn && key != "") {
               accumeLine = accumeLine.trim()
               if (accumeLine.endsWith(".")) { accumeLine = accumeLine.dropRight(1) }
               val syns = accumeLine.split(",\\s+")
               if (syns.size > 0) {
                  //val item = new SynonymSet(key, Set[String]())
                  val item = SynonymSet(key, SortedSet[String]())
                  //val item = SynonymSet(key, ListBuffer[String]())
                  for (word <- syns) {
                     item.synonyms += (word.toLowerCase)
                  }
                  synarray :+= item
                  count += 1
                  if (count % 1000 == 0) {
                     println(s"Entry $count is ${item.key}:" + item.synonyms.toList)
                  }
               } else {
                  println("Empty synonym lost found for key " + key)
               }
            }
            key = ""
            inSyn = false
            accumeLine = ""
         } else if (inSyn) {
            accumeLine += " " + line
         } else {
            //do nothing
         }
      }
   
      return synarray
            
   }
}
            
            
object SynonymsKey extends App {
   val outp = new PrintWriter(new FileWriter("output.txt",true));  //appends

   var synarray = SynonymReaderKey.readEntries
   println("Read " + synarray.length + ", entries.")

   //val lookup = Map.empty[String,Set[String]]
   val lookup = SortedMap.empty[String,SortedSet[String]]
   //val lookup = HashMap.empty[String,ListBuffer[String]]

   var count = 0
   val ms = 1000000.0
   val allowPrintWhenTiming = true

   println("\n\n\n\n")
   println("Timing the creation of the Map, reading simple entries from array...")
   if (allowPrintWhenTiming) {
      println("...allowing printing to screen & file.  Non-increases are correct Map behavior...")
      println("...because Fallows1898.txt has 20 buggy repeated entries of words, clearly typos...\n")
   }

   val tm1 = System.nanoTime()
   for (item <- synarray) {
      count = lookup.size

      lookup(item.key) = item.synonyms
      //lookup += (item.key -> item.synonyms)

      if (lookup.size == count && allowPrintWhenTiming) {
         println("Map did not increase when adding " + item.key + ": " + item.synonyms.toList)
      }
      if (count % 1000 == 0 && allowPrintWhenTiming) {
         println(s"Map item $count is ${item.key}:" + item.synonyms.toList)
      }
   }
   val tm2 = System.nanoTime()
   var elapsedTime = (tm2 - tm1)/ms
   println("")
   println("Map creation took time " + elapsedTime + " ms, from " + lookup.size + " different entries")
   println("\n\n")

   println("\nTiming the main run now..." + (if (allowPrintWhenTiming) "" else "no printing...") + "\n")
   val t1 = System.nanoTime()

   for ((key,synSet) <- lookup) {
      val kw = if (key.endsWith("__")) key.dropRight(4) else key
      val tag = if (key.endsWith("__")) " ("+kw+"="+key.takeRight(4)(1)+")" else ""
      for (word <- synSet) {
         val recip = ((lookup.contains(word) && lookup(word).contains(kw))
                        || (lookup.contains(word+"_v__") && lookup(word+"_v__").contains(kw))
                        || (lookup.contains(word+"_n__") && lookup(word+"_n__").contains(kw))
                        || (lookup.contains(word+"_a__") && lookup(word+"_a__").contains(kw))
                        || (lookup.contains(word+"_r__") && lookup(word+"_r__").contains(kw)))
         //recip = (lookup.contains(word) && lookup(word).contains(kw))
         val nonrecip = (!recip) && ((lookup.contains(word) && lookup(word).size > 0)
                        || (lookup.contains(word) && lookup(word).size > 0)
                        || (lookup.contains(word+"_v__") && lookup(word+"_v__").size > 0)
                        || (lookup.contains(word+"_n__") && lookup(word+"_n__").size > 0)
                        || (lookup.contains(word+"_a__") && lookup(word+"_a__").size > 0)
                        || (lookup.contains(word+"_r__") && lookup(word+"_r__").size > 0))
         val emptyKeyOnly = (!recip) && (!nonrecip) && (lookup.contains(word)
                            || lookup.contains(word+"_v__") || lookup.contains(word+"_n__")
                            || lookup.contains(word+"_a__") || lookup.contains(word+"_r__"))
         if (allowPrintWhenTiming) {
            if (recip && kw.startsWith("q")) {
               outp.println(s"$kw and $word are reciprocal synonyms" + tag)
               //println(s"$kw and $word are reciprocal synonyms"+tag)
            } else if (nonrecip && kw.startsWith("q")) {
               outp.println(s"$kw lists $word but $word has a list of synonyms without $kw")
               //println(s"$kw lists $word but $word has a list of synonyms without $kw")
            } else if (emptyKeyOnly) {
               //println(s"$kw lists $word which is a key but Xref only or otherwise has no synonyms")
            } else if (kw.startsWith("q")) {
               //println(s"$kw lists $word but that is not a key")
            }
         }
      }
   }
               
   val t2 = System.nanoTime()
   elapsedTime = (t2 - t1)/ms
   println(s"Elapsed time in milliseconds: $elapsedTime")
   outp.close()
}
   

