This class encapsulates a word in a sentence as a string, with an associated score (floating point value).
public class Word { var value : String // the word itself var score : Double // the score for the word init(value: String, score: Double) { self.value = value self.score = score } public func get_Value() -> String { return value } public func get_Score() -> Double { return score } }
This class encapsulates a sentence as a list of words, with an aggregate score as well as the order in which it appears in the document (via its index). See the Document class below.
public class Sentence { var words : [Word] // List of words in one sentence var score : Double var original_Index : Int // the original index in the array of sentences from the document; // will be used later when putting the final summary sentences in order. init() { words = [] score = 0.0 original_Index = -1 } public func add_Word(word: Word) { self.words.append(word) score += word.get_Score() } public func display() -> String { var toDisplay : String = "" for w in self.words { toDisplay = toDisplay + " " + w.get_Value() } return toDisplay } public func get_Score() -> Double { return score } public func setOriginal_Index(index: Int) { original_Index = index } }
The Document class allows us to add sentences as we read them from a file, maintaining the original order. It also has a method to sort the sentences by their cumulative scores.
public class Document { var sentences : [Sentence] var counter : Int init() { sentences = [] counter = 0 } public func add_Sentence(sentence: Sentence) { self.sentences.append(sentence) sentence.setOriginal_Index(index: counter) counter += 1 } public func get_Sentence(index: Int) -> Sentence { return sentences[index] } public func get_SentenceScore(index: Int) -> Double { return sentences[index].get_Score() } public func summarize() -> [Sentence] { var index: Int = 0 sentences.sort { (s1, s2) -> Bool in return s1.get_Score() > s2.get_Score() } // We now have two arrays: one of the document's sentences // broken up in order, and one in which the sentences are // sorted based on the score that they have. Now, I am going to finally let summarySize : Double = 0.1 * Double(sentences.count) let summarySizeRounded : Int = Int(summarySize.rounded()) var newSentences : [Sentence] = [] var number : Int for number in 0..<sentences.count { if (newSentences.count == summarySizeRounded) { break } else { newSentences.append(sentences[number]) } } newSentences.sort { (s1, s2) -> Bool in return s1.original_Index < s2.original_Index } return newSentences } }
This is the central logic of the program:
let wordlist : WordList = WordList(fullyQualifiedPath: "word_scores.txt") let document : DocumentReader = DocumentReader(fullyQualifiedPath: "text_document_to_summarize") let words: [String] = document.get_words() var fullPageDocument: Document = Document() var newSentenceObject: Sentence = Sentence() for var word : String in words { if (word.range(of: ".") == nil && word.range(of: "?") == nil && word.range(of: "!") == nil) { var newWordObject: Word = Word(value: word, score: wordlist.lookup_word(word: word)) newSentenceObject.add_Word(word: newWordObject) } else if (word.range(of: "\"") != nil || word.range(of: ",") != nil || word.range(of: ":") != nil || word.range(of: "'") != nil) { let index = word.index(word.startIndex, offsetBy: word.characters.count - 1) var newWordObject: Word = Word(value: word, score: wordlist.lookup_word(word: word.substring(to: index))) newSentenceObject.add_Word(word: newWordObject) } else { let index = word.index(word.startIndex, offsetBy: word.characters.count - 1) var newWordObject: Word = Word(value: word, score: wordlist.lookup_word(word: word.substring(to: index))) newSentenceObject.add_Word(word: newWordObject) fullPageDocument.add_Sentence(sentence: newSentenceObject) newSentenceObject = Sentence() /*This creates a new object under the same variable name; if a bucket is full, you need to get a new one of the same kind that holds the same stuff. */ } } var summary : [Sentence] = fullPageDocument.summarize() for s:Sentence in summary { print(s.display()) }