Expert Answer
ProjectTest.java
import static org.junit.Assert.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.util.Set;
public class ProjectTest
{
/**
* Default constructor for test class ProjectTest
*/
public ProjectTest() {
}
/**
* Sets up the test fixture.
*
* Called before every test case method.
*/
@Before
public void setUp() {
}
/**
* Tears down the test fixture.
*
* Called after every test case method.
*/
@After
public void tearDown() {
}
//TODO add new test cases from here include brief documentation
/**
* Test if the number of lines in a string output from Ngram.toString()
* is valid (i.e equal to the size of the alphabet of that Ngram)
* Also ensures that the sort, splice and constructor functions work
* as required to produce the required comparison
*/
@Test(timeout=1000)
public void testSensibleToStringSize() {
String[] stringsToTest = {“Hello my friend”,
“be”,
“Have a nice day you filthy animal”,
“asdfghjkl$$sdfghj%%”,
“2”,
“adadadadaaaaa”,
” “};
Integer[] ngramSizesToTest = {1, 2, 3, 4, 5};
NgramAnalyser analysis;
String analysisString;
int i = ngramSizesToTest[0];
String s = stringsToTest[5];
if (i > s.length()) {
try {
analysis = new NgramAnalyser(i, s);
} catch (IllegalArgumentException e) {
assertEquals(0, 0);
}
} else {
analysis = new NgramAnalyser(i, s);
analysisString = analysis.toString();
//Number of lines is equal to the number of n’s plus 1
int numberofLines = analysisString.length() –
analysisString.replace(“n”, “”).length() + 1;
assert(numberofLines >= analysis.getAlphabetSize());
}
}
/**
* Tests various aspects of the getDistinctNgrams function
* inlcuding set length with comparison to basic boundaries
*/
@Test(timeout=1000)
public void testGetDistinctNgrams() {
String[] stringsToTest = {
“123!@#123!@#”,
“adadadadadadadad”,
“cadadcdaadcdbed”,
“aaaaaa”,
“HOWWEYVUFXBINEF”
};
String stringToTest = stringsToTest[0];
int ngramSize = 2;
NgramAnalyser analysis = new NgramAnalyser(ngramSize, stringToTest);
Set<String> distinctNgrams = analysis.getDistinctNgrams();
int distinctNgramCount = analysis.getDistinctNgramCount();
int totalNgramCount = analysis.getNgramCount();
//Test that there are fewer or equal distinct Ngrams than total Ngrams
assert(distinctNgramCount <= totalNgramCount);
//Test that there are fewer or equal distinct Ngrams than the size
//of the analysed string
assert(distinctNgramCount <= stringToTest.length());
//Test that the alphabet size is smaller than
//or equal to the number of distinct NGrams
assert(analysis.getAlphabetSize() <= distinctNgramCount);
}
/**
* Tests the NgramAnalyser function for more complicated and longer ngrams
*
*/
@Test(timeout=1000)
public void testNgramAnalyser() {
String stringToTest = “baaaaaaaaaamsdbfajeduhgtraaaab”;
int ngramSize = 16;
NgramAnalyser analysis = new NgramAnalyser(ngramSize, stringToTest);
//Test toString method
String toString = analysis.toString();
//System.out.println(toString); //REMOVE BEFORE SUBMITTING!!!!!
//Test that ngramCount = length of the string
assert(analysis.getNgramCount() == stringToTest.length());
}
@Test(timeout=1000)
public void testLaplaceExample() {
assertEquals(0,1); //TODO replace with test code
}
@Test(timeout=1000)
public void testSimpleExample() {
assertEquals(0,1); //TODO replace with test code
}
@Test
public void testTask3example()
{
MarkovModel model = new MarkovModel(2,”aabcabaacaac”);
ModelMatcher match = new ModelMatcher(model,”aabbcaac”);
assertEquals(0,1); //TODO replace with test code
}
}
MarkovModel.java
public class MarkovModel
{
/** Markov model order parameter */
int k;
/** ngram model of order k */
NgramAnalyser ngram;
/** ngram model of order k+1 */
NgramAnalyser n1gram;
/**
* Construct an order-k Markov model from string s
* @param k int order of the Markov model
* @param s String input to be modelled
*/
public MarkovModel(int k, String s)
{
ngram = new NgramAnalyser(k, s);
n1gram = new NgramAnalyser((k+1), s);
}
/**
* @return order of this Markov model
*/
public int getK()
{
return k;
}
/** Estimate the probability of a sequence appearing in the text
* using simple estimate of freq seq / frequency front(seq).
* @param sequence String of length k+1
* @return double probability of the last letter occurring in the
* context of the first ones or 0 if front(seq) does not occur.
*/
public double simpleEstimate(String sequence) {
double prob;
String seqNotLast = sequence.substring(0, sequence.length()-1);
if (ngram.getDistinctNgrams().contains(seqNotLast))
{
double n1g = n1gram.getNgramFrequency(sequence);
double ng = ngram.getNgramFrequency(seqNotLast);
try{
prob = (n1g/ng);
}
catch(ArithmeticException e){
return 0.0;
}
return prob;
}
else
{
return 0.0;
}
}
/**
* Calculate the Laplacian probability of string obs given this Markov model
* @input sequence String of length k+1
* @return Laplacian Probability
*/
public double laplaceEstimate(String sequence)
{
//TODO replace this line with your code
String context = sequence.substring(0, sequence.length()-1);
double npc = n1gram.getNgramFrequency(sequence);
double np = ngram.getNgramFrequency(context);
double laplace;
laplace = (npc + 1)/(np + ngram.getAlphabetSize());
return laplace;
}
/**
* @return String representing this Markov model
*/
public String toString()
{
//TODO replace this line with your code
String toRet = “”;
String k = Integer.toString(getK());
toRet += (k + “n”);
toRet += (Integer.toString(ngram.getAlphabetSize()) + “n”);
toRet += ngram.toString() + n1gram.toString();
return toRet;
}
}
NgramAnalyser.java
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
import java.util.Arrays;
//packages for iterating through hashmap
import java.util.Map;
import java.util.Iterator;
public class NgramAnalyser
{
/** dictionary of all distinct n-grams and their frequencies */
private HashMap<String,Integer> ngram;
/** number of distinct characters in the input */
private int alphabetSize;
/** n-gram size for this object (new field) */
private int ngramSize;
/** input length for this object (new field) */
private int inputLength;
/**
* Analyse the frequency with which distinct n-grams, of length n,
* appear in an input string.
* n-grams at the end of the string wrap to the front
* e.g. “abbbbc” includes “bca” and “cab” in its 3-grams
* @param int n size of n-grams to create
* @param String inp input string to be modelled
*/
public NgramAnalyser(int n, String inp) {
if(inp != null && inp != “” && n > 0 && n <= inp.length()) {
this.ngramSize = n;
this.inputLength = inp.length();
this.ngram = new HashMap<>(inp.length(), inp.length());
for (int i = 0; i < inp.length(); i++) { //loops through each character in inp
String currentNGram = “”; //new nGram starting at ith position
for (int j = i; j-i < n ; j++) { //starting from the ith character, loop n characters after this
currentNGram = currentNGram.concat(inp.substring(j%inp.length(), j%inp.length()+1)); //concatonates the jth char to currNGram
}
if (ngram.containsKey(currentNGram)) { //if the ngram exists, add one to its frequency
ngram.put(currentNGram, ngram.get(currentNGram) +1);
} else {
ngram.put(currentNGram, 1); //otherwise create a key for this ngram
}
}
//Prints the ngram
/*
Set set = ngram.entrySet();
Iterator iterator = set.iterator();
while(iterator.hasNext()) {
Map.Entry mentry = (Map.Entry)iterator.next();
System.out.print(“key is: “+ mentry.getKey() + ” & Value is: “);
System.out.println(mentry.getValue());
}
*/
//Alphabet size calculation
if (n != 1) {
NgramAnalyser alpha = new NgramAnalyser(inp);
this.alphabetSize = alpha.getDistinctNgramCount(); //find alphabet size by getting number of distinct 1-grams
} else {
this.alphabetSize = this.getDistinctNgramCount(); // 1-grams are simply a list of distinct characters, also bottoms recursion.
}
} else {
throw new IllegalArgumentException(“ngram size must be between 1 and the length of the input string. Input string must not be null or empty.”);
}
}
/**
* Analyses the input text for n-grams of size 1.
*/
public NgramAnalyser(String inp) {
this(1,inp);
}
/**
* @return int the size of the alphabet of a given input
*/
public int getAlphabetSize() {
return alphabetSize;
}
/**
* @return the total number of distinct n-grams appearing
* in the input text.
*/
public int getDistinctNgramCount() {
//TODO replace this line with your code
return ngram.size();
}
/**
* @return Return a set containing all the distinct n-grams
* in the input string.
*/
public Set<String> getDistinctNgrams() {
//TODO replace this line with your code
return ngram.keySet();
}
/**
* @return the total number of n-grams appearing
* in the input text (not requiring them to be distinct)
*/
public int getNgramCount() {
//TODO replace this line with your code
return this.inputLength;
}
/** Return the frequency with which a particular n-gram appears
* in the text. If it does not appear at all, return 0.
*
* @param ngram The n-gram to get the frequency of
* @return The frequency with which the n-gram appears.
*/
public int getNgramFrequency(String ngram) {
//TODO replace this line with your code
return this.ngram.get(ngram);
}
/**
* Generate a summary of the ngrams for this object.
* @return a string representation of the n-grams in the input text
* comprising the ngram size and then each ngram and its frequency
* where ngrams are presented in alphabetical order.
*/
public String toString() {
//TODO replace this line with your code
String[] keys = ngram.keySet().toArray(new String[0]);
Arrays.sort(keys);
Integer a = ngramSize;
String answer = a.toString();
for (int i =0; i < ngram.keySet().size();i++) {
answer = answer.concat(“n” + keys[i] + ” “);
answer = answer.concat(this.getNgramFrequency(keys[i]) + “”);
}
return answer;
}
}