Showing posts with label Class. Show all posts
Showing posts with label Class. Show all posts

Thursday, December 24, 2015

Infix Notation Parser via Shunting-Yard Algorithm





Infix notation is the typical notation for writing equations in algebra.
An example would be: 7 - (2 * 5)

Parsing such an equation is not a trivial task, but I wanted one for my EquationFinder project, as I wanted to respect order of operations.

Strategies include substitution/replacement algorithms, recursion to parse into a tree and then tree traversal, or converting the infix notation to reverse polish notation (RPN), also known as post-fix notation, then using a stack based postfix notation evaluator. I choose the latter, as such algorithms are well defined in many places on the web.

My code consists of 3 classes, all static:
(Links go to the .cs file on GitHub)
  1. InfixNotation - this simply holds a few public variables and calls the public methods on the below two classes.
  2. ShuntingYardAlgorithm - this converts an equation in infix notation into postfix notation (aka RPN).
  3. PostfixNotation - this evaluates the equation in postfix notation and returns a numerical result value.

In order to implement the shunting-yard algorithm and the postfix evaluator, I simply wrote the steps to the algorithms as written on Wikipedia:
(Links go to the Wikipedia article)
Link to the Shunting-Yard Algorithm to convert Infix notation to Postfix notation.
Link to the Postfix Notation Evaluation Algorithm.


The code for this is pretty extensive, but I will prettify it and present it below. Alternatively, you can view and download the code from the MathNotationConverter project on my GitHub.


InfixNotationParser:


public static class InfixNotation
{
   public static string Numbers = "0123456789";
   public static string Operators = "+-*/^";

   public static bool IsNumeric(string text)
   {
      return string.IsNullOrWhiteSpace(text) ? false : text.All(c => Numbers.Contains(c));
   }

  public static int Evaluate(string infixNotationString)
  {
    string postFixNotationString = ShuntingYardConverter.Convert(infixNotationString);
    int result = PostfixNotation.Evaluate(postFixNotationString);
    return result;
  }
}



ShuntingYardConverter 

(converts an equation from infix notation into postfix notation):

public static class ShuntingYardAlgorithm
{
   private static string AllowedCharacters = InfixNotation.Numbers + InfixNotation.Operators + "()";

   private enum Associativity
   {
      Left, Right
   }
   private static Dictionary<char, int> PrecedenceDictionary = new Dictionary<char, int>()
   {
      {'(', 0}, {')', 0},
      {'+', 1}, {'-', 1},
      {'*', 2}, {'/', 2},
      {'^', 3}
   };
   private static Dictionary<char, Associativity> AssociativityDictionary = new Dictionary<char, Associativity>()
   {
      {'+', Associativity.Left},
      {'-', Associativity.Left},
      {'*', Associativity.Left},
      {'/', Associativity.Left},
      {'^', Associativity.Right}
   };

   private static void AddToOutput(List<char> output, params char[] chars)
   {
      if (chars != null && chars.Length > 0)
      {
         foreach (char c in chars)
         {
            output.Add(c);
         }
         output.Add(' ');
      }
   }
   
   public static string Convert(string infixNotationString)
   {
      if (string.IsNullOrWhiteSpace(infixNotationString))
      {
         throw new ArgumentException("Argument infixNotationString must not be null, empty or whitespace.", "infixNotationString");
      }

      List<char> output = new List<char>();
      Stack<char> operatorStack = new Stack<char>();
      string sanitizedString = new string(infixNotationString.Where(c => AllowedCharacters.Contains(c)).ToArray());

      string number = string.Empty;
      List<string> enumerableInfixTokens = new List<string>();
      foreach (char c in sanitizedString)
      {
         if (InfixNotation.Operators.Contains(c) || "()".Contains(c))
         {
            if (number.Length > 0)
            {
               enumerableInfixTokens.Add(number);
               number = string.Empty;
            }
            enumerableInfixTokens.Add(c.ToString());
         }
         else if (InfixNotation.Numbers.Contains(c))
         {
            number += c.ToString();
         }
         else
         {
            throw new Exception(string.Format("Unexpected character '{0}'.", c));
         }
      }

      if (number.Length > 0)
      {
         enumerableInfixTokens.Add(number);
         number = string.Empty;
      }

      foreach (string token in enumerableInfixTokens)
      {
         if (InfixNotation.IsNumeric(token))
         {
            AddToOutput(output, token.ToArray());
         }
         else if (token.Length == 1)
         {
            char c = token[0];

            if (InfixNotation.Numbers.Contains(c)) // Numbers (operands)
            {
               AddToOutput(output, c);
            }
            else if (InfixNotation.Operators.Contains(c)) // Operators
               if (operatorStack.Count > 0)
               {
                  char o = operatorStack.Peek();
                  if ((AssociativityDictionary[c] == Associativity.Left &&
                     PrecedenceDictionary[c] <= PrecedenceDictionary[o])
                        ||
                     (AssociativityDictionary[c] == Associativity.Right &&
                     PrecedenceDictionary[c] < PrecedenceDictionary[o]))
                  {
                     AddToOutput(output, operatorStack.Pop());
                  }
               }
               operatorStack.Push(c);
            }
            else if (c == '(') // open brace
            {
               operatorStack.Push(c);
            }
            else if (c == ')') // close brace
            {
               bool leftParenthesisFound = false;
               while (operatorStack.Count > 0 )
               {
                  char o = operatorStack.Peek();
                  if (o != '(')
                  {
                     AddToOutput(output, operatorStack.Pop());
                  }
                  else
                  {
                     operatorStack.Pop();
                     leftParenthesisFound = true;
                     break;
                  }
               }

               if (!leftParenthesisFound)
               {
                  throw new FormatException("The algebraic string contains mismatched parentheses (missing a left parenthesis).");
               }
            }
            else // wtf?
            {
               throw new Exception(string.Format("Unrecognized character '{0}'.", c));
            }
         }
         else
         {
            throw new Exception(string.Format("String '{0}' is not numeric and has a length greater than 1.", token));
         }
      } // end foreach

      while (operatorStack.Count > 0)
      {
         char o = operatorStack.Pop();
         if (o == '(')
         {
            throw new FormatException("The algebraic string contains mismatched parentheses (extra left parenthesis).");
         }
         else if (o == ')')
         {
            throw new FormatException("The algebraic string contains mismatched parentheses (extra right parenthesis).");
         }
         else
         {
            AddToOutput(output, o);
         }
      }

      return new string(output.ToArray());
   }
}










PostfixNotation

(evaluates the postfix notation and returns a numerical result):

public static class PostfixNotation
{
   private static string AllowedCharacters = InfixNotation.Numbers + InfixNotation.Operators + " ";

   public static int Evaluate(string postfixNotationString)
   {
      if (string.IsNullOrWhiteSpace(postfixNotationString))
      {
         throw new ArgumentException("Argument postfixNotationString must not be null, empty or whitespace.", "postfixNotationString");
      }

      Stack<string> stack = new Stack<string>();
      string sanitizedString = new string(postfixNotationString.Where(c => AllowedCharacters.Contains(c)).ToArray());
      List<string> enumerablePostfixTokens = sanitizedString.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).ToList();

      foreach (string token in enumerablePostfixTokens)
      {
         if (token.Length > 0)
         {
            if (token.Length > 1)
            {
               if (InfixNotation.IsNumeric(token))
               {
                  stack.Push(token);
               }
               else
               {
                  throw new Exception("Operators and operands must be separated by a space.");
               }
            }
            else
            {
               char tokenChar = token[0];

               if (InfixNotation.Numbers.Contains(tokenChar))
               {
                  stack.Push(tokenChar.ToString());
               }
               else if (InfixNotation.Operators.Contains(tokenChar))
               {
                  if (stack.Count < 2)
                  {
                     throw new FormatException("The algebraic string has not sufficient values in the expression for the number of operators.");
                  }

                  string r = stack.Pop();
                  string l = stack.Pop();

                  int rhs = int.MinValue;
                  int lhs = int.MinValue;

                  bool parseSuccess = int.TryParse(r, out rhs);
                  parseSuccess &= int.TryParse(l, out lhs);
                  parseSuccess &= (rhs != int.MinValue && lhs != int.MinValue);

                  if (!parseSuccess)
                  {
                     throw new Exception("Unable to parse valueStack characters to Int32.");
                  }

                  int value = int.MinValue;
                  if (tokenChar == '+')
                  {
                     value = lhs + rhs;
                  }
                  else if (tokenChar == '-')
                  {
                     value = lhs - rhs;
                  }
                  else if (tokenChar == '*')
                  {
                     value = lhs * rhs;
                  }
                  else if (tokenChar == '/')
                  {
                     value = lhs / rhs;
                  }
                  else if (tokenChar == '^')
                  {
                     value = (int)Math.Pow(lhs, rhs);
                  }

                  if (value != int.MinValue)
                  {
                     stack.Push(value.ToString());
                  }
                  else
                  {
                     throw new Exception("Value never got set.");
                  }
               }
               else
               {
                  throw new Exception(string.Format("Unrecognized character '{0}'.", tokenChar));
               }
            }
         }
         else
         {
            throw new Exception("Token length is less than one.");
         }
      }

      if (stack.Count == 1)
      {
         int result = 0;
         if (!int.TryParse(stack.Pop(), out result))
         {
            throw new Exception("Last value on stack could not be parsed into an integer.");
         }
         else
         {
            return result;
         }
      }
      else
      {
         throw new Exception("The input has too many values for the number of operators.");
      }

   } // method
} // class


Another alternative technique is to using the Shunting-Yard Algorithm to turn infix notation into an abstract syntax tree (Linq.Expressions anyone?). I will likely post this technique later.


Other blog posts by me that are related to this article are the Threaded Equation Finder, a Mixed Radix System Calulator and Drawing Text Along a Bezier Spline.



Wednesday, February 4, 2015

EditLabel UserControl



EditLabel is editable Label control, who's text can be modified at run-time by double-clicking on it. Under the hood, it contains a Label control and TextBox control. It works like a Label control until you double-click it with the mouse. When double-clicked it enters 'edit-mode' where a TextBox overlays the Label. The control will AutoSize to the characters you type in the TextBox, updating the Label's size at the same time. You can leave the control's edit mode by hitting either enter, the escape key, or letting the control lose focus. After editing, the control acts like a label again, with the updated text. You can check out the code for the EditLabel project here on my GitHub.

The EditLabel isn't too interesting by itself, but I wanted to blog about it for all the interesting attributes that I never knew about until I started getting into designing controls, and controlling how controls are displayed in the designer window in visual studio.

The first thing my EditLabel control needed was and event that the parent form could subscribe to know when the text changed. Creating the event simply isn't enough. If you want the event to show up in the Visual Studio's Form Designer when you click on events, you need to add the Browsable and EditorBrowsable attribute. This will make is show up in the designer. You can control the description that appears as well as what category it shows up under when viewing events/properties in the Categorize view as opposed to Alphabetical view by using the Description and Category attributes respectively:

[Category("Property Changed")]
[Description("Event raised when the value of the Text property is changed on Control.")]
[Browsable(true), EditorBrowsable(EditorBrowsableState.Always)]
public new event EventHandler TextChanged;

There is also the notion of default events. That is, the event for which a handler is added when you double click the control in the designer. For example, when you double click a TextBox, it automatically adds the TextChanged event, instead of, say, the KeyDown event. To make an event the default event, add the DefaultEvent attribute at the control's class-level declaration:

[DefaultEvent("TextChanged")]
public partial class EditLabel : UserControl
{
   ...

Another thing I wanted was the Text property. Like a Label or a TextBox control, I wanted a public Text property that could be edited at design time. However, every time I re-built the project, the text would get reset to the default. As it turns out, if you want modifications to properties to be persisted, it must be serialized to the MainForm.Designer.cs, or whatever the auto-generated Designer.cs file is. In order to tell the designer to serialize changes made at design time to Designer.cs file, you have to add the DesignerSerializationVisibility attribute above the property. Here is the attributes and declaration for the Text property:

[Category("Appearance")]
[Description("The text associated with the control.")]
[Browsable(true), EditorBrowsable(EditorBrowsableState.Always)]
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
public override string Text
{
   get { return ctrlLabel.Text; }
   set { ChangeText(value); }
}

You can view the full EditLabel.cs here, or view the entire EditLabel project here.



Tuesday, January 13, 2015

EntityJustWorks - C# class/object/entity to SQL database/script/command mapper.



Note by author:

   Since writing this, I have expanded on this idea quite a bit. I have written a lightweight ORM class library that I call EntityJustWorks.

   The full project can be found on 
GitHub or CodePlex.


   EntityJustWorks not only goes from a class to DataTable (below), but also provides:


Security Warning:
This library generates dynamic SQL, and has functions that generate SQL and then immediately executes it. While it its true that all strings funnel through the function Helper.EscapeSingleQuotes, this can be defeated in various ways and only parameterized SQL should be considered SAFE. If you have no need for them, I recommend stripping semicolons ; and dashes --. Also there are some Unicode characters that can be interpreted as a single quote or may be converted to one when changing encodings. Additionally, there are Unicode characters that can crash .NET code, but mainly controls (think TextBox). You almost certainly should impose a white list:
string clean = new string(dirty.Where(c => "abcdefghijklmnopqrstuvwxyz0123456789.,\"_ !@".Contains(c)).ToArray());

PLEASE USE the SQLScript.StoredProcedure and DatabaseQuery.StoredProcedure classes to generate SQL for you, as the scripts it produces is parameterized. All of the functions can be altered to generate parameterized instead of sanitized scripts. Ever since people have started using this, I have been maintaining backwards compatibility. However, I may break this in the future, as I do not wish to teach one who is learning dangerous/bad habits. This project is a few years old, and its already showing its age. What is probably needed here is a total re-write, deprecating this version while keep it available for legacy users after slapping big warnings all over the place. This project was designed to generate the SQL scripts for standing up a database for a project, using only MY input as data. This project was never designed to process a USER'S input.! Even if the data isn't coming from an adversary, client/user/manually entered data is notoriously inconsistent. Please do not use this code on any input that did not come from you, without first implementing parameterization. Again, please see the SQLScript.StoredProcedure class for inspiration on how to do that.





    In this post, I bring the wagons 'round full circle and use what I have shown you in older posts to make a class that can create (or insert into) a table in an SQL database from a C# class object at run-time. The only prior knowledge needed would be the connection string. The name of the the table will be the name of the class Type, and the tables's columns are created from the class's public properties, with their names being the property names.

    If you want to take a data-first approach, you can generate the code to C# classes that mirrors a table in a relational database given a connection string and the table name, or use the new Emit class and generate a dynamic assembly at run-time that contains a class with public auto-properties matching the DataColumns that you can use to store DataRows in. Similarly, the name of class name will be the same name as the table, with each of the table's columns being represented by a public auto property on the class of the same name and type. An improvement here would be to create the class inside a dynamic assembly using System.Reflection.Emit. Anywho, I have been holding on to this code for too long I just want to get it out of the door.

    For those of you who just want the code:
      GitHub  (Download zip)
      BitBucket (Download)
      code.msdn.microsoft.com

    Going from a query to a DataTable is as simple as a call to SQLAdapter.Fill(). For populating the public properties of a class from a DataTable, see this post I made. To build a DataTable who's columns match the public properties of a class, check out this post here. Going from a DataTable to a class as a C# code file is covered in this previous post.

    Going from a DataTable to SQL CREATE TABLE or INSERT INTO scripts that can be passed to ExecuteNonQuery() to execute the command, see below.

    Here is the INSERT INTO code. This code takes advantage of some functions in the Helper class that converts a DataTable's ColumnNames and Row's Values as a List of String, which is then passed into a function that takes a list of string and formats it with a delimiter string and prefix and postfix strings so I can easily format the list to look like the COLUMNS and VALUES part of an SQL INSERT INTO statement. For example: INSERT INTO ([FullName], [Gender], [BirthDate]) VALUES ('John Doe', 'M', '10/3/1981''). If you want to view how the functions RowToColumnString(), RowToValueString(), and ListToDelimitedString() work, click here to view Helper.cs.

public static string InsertInto<T>(params T[] ClassObjects) where T : class
{
   DataTable table = Map.ClassToDatatable<T>(ClassObjects);
   return InsertInto(table);   // We don't need to check IsValidDatatable() because InsertInto does
}

public static string InsertInto(DataTable Table)
{
   if (!Helper.IsValidDatatable(Table))
      return string.Empty;

   StringBuilder result = new StringBuilder();
   foreach (DataRow row in Table.Rows)
   {
      if (row == null || row.Table.Columns.Count < 1 || row.ItemArray.Length < 1)
         return string.Empty;

      string columns = Helper.RowToColumnString(row);
      string values = Helper.RowToValueString(row);

      if (string.IsNullOrWhiteSpace(columns) || string.IsNullOrWhiteSpace(values))
         return string.Empty;

      result.AppendFormat("INSERT INTO [{0}] {1} VALUES {2}", Table.TableName, columns, values);
   }

   return result.ToString();
}

    Here is the CREATE TABLE CODE. A very important function here is the GetDataTypeString() member, which converts the Column.DataType to the SQL equivalent as a string, which is used to craft a CREATE TABLE script. I use static strings formatted to work with String.Format to replace the crucial bits in the CREATE TABLE command:

public static string CreateTable<T>(params T[] ClassObjects) where T : class
{
 DataTable table = Map.ClassToDatatable<T>(ClassObjects);
 return Script.CreateTable(table);
}

public static string CreateTable(DataTable Table)
{
   if (Helper.IsValidDatatable(Table, IgnoreRows: true))
      return string.Empty;

   StringBuilder result = new StringBuilder();
   result.AppendFormat("CREATE TABLE [{0}] ({1}", Table.TableName, Environment.NewLine);

   bool FirstTime = true;
   foreach (DataColumn column in Table.Columns.OfType<DataColumn>())
   {
      if (FirstTime) FirstTime = false;
      else
         result.Append(",");

      result.AppendFormat("[{0}] {1} {2}NULL{3}",
         column.ColumnName,
         GetDataTypeString(column.DataType),
         column.AllowDBNull ? "" : "NOT ",
         Environment.NewLine
      );
   }
   result.AppendFormat(") ON [PRIMARY]{0}GO", Environment.NewLine);

   return result.ToString();
}

private static string GetDataTypeString(Type DataType)
{
   switch (DataType.Name)
   {
      case "Boolean": return "[bit]";
      case "Char": return "[char]";
      case "SByte": return "[tinyint]";
      case "Int16": return "[smallint]";
      case "Int32": return "[int]";
      case "Int64": return "[bigint]";
      case "Byte": return "[tinyint] UNSIGNED";
      case "UInt16": return "[smallint] UNSIGNED";
      case "UInt32": return "[int] UNSIGNED";
      case "UInt64": return "[bigint] UNSIGNED";
      case "Single": return "[float]";
      case "Double": return "[double]";
      case "Decimal": return "[decimal]";
      case "DateTime": return "[datetime]";
      case "Guid": return "[uniqueidentifier]";
      case "Object": return "[variant]";
      case "String": return "[nvarchar](250)";
      default: return "[nvarchar](MAX)";
   }
}


...


    Here is definition of all the public members:

namespace EntityJustWorks.SQL
{
   public static class Convert
   {
      public static string SQLTableToCSharp(string ConnectionString, string TableName);
      public static bool ClassToSQLTable(string ConnectionString, params T[] ClassCollection);
   }
   
   public static class Code
   {
      public static string SQLTableToCSharp(string ConnectionString, string TableName);
      public static string DatatableToCSharp(DataTable Table);
   }

   public static class Script
   {
      public static string InsertInto<T>(params T[] ClassObjects);
      public static string InsertInto(DataTable Table);
      public static string CreateTable<T>(params T[] ClassObjects);
      public static string CreateTable(DataTable Table);
   }
   
   public static class Map
   {
      public static IList<T> DatatableToClass<T>(DataTable Table);
      public static DataTable ClassToDatatable<T>(params T[] ClassCollection);
      public static DataTable ClassToDatatable<T>();
   }

   public static class Query
   {
      public static IList<T> QueryToClass<T>(string ConnectionString, string FormatString_Query,
                                                            params object[] FormatString_Parameters);
      public static DataTable QueryToDataTable(string ConnectionString, string FormatString_Query,
                                                            params object[] FormatString_Parameters);
      public static T QueryToScalarType<T>(string ConnectionString, string FormatString_Query,
                                                            params object[] FormatString_Parameters);
      public static int ExecuteNonQuery(string ConnectionString, string FormatString_Command,
                                                            params object[] FormatString_Parameters);
   }
   
   public static class Helper
   {
      public static bool IsValidDatatable(DataTable Table, bool IgnoreRows = false);
      public static bool IsCollectionEmpty<T>(IEnumerable<T> Input);
      public static bool IsNullableType(Type Input);
   }
}

    Please feel free to comment, either on my GitHub or my blog, with criticisms or suggestions.

Wednesday, December 10, 2014

Humorous and unhelpful Exception Class hierarchy



This post covers some of the lesser-known exception classes that exist. It is okay if you have never heard of these exception classes before... in fact, if you haven’t, that is probably a very, very good thing...





WTFException
The exception that is thrown when the code has encountered a genuine ‘WTF’ moment. This is usually only reserved for areas of the code that should never be reached, or indicates a state/condition that is either invalid, unexpected, or simply illogical. It can also be thrown to indicate that the developer has no idea why something is not working, and is at his wits end, or optionally at the end of some code that the developer was absolutely certain would never work.

See Also: CodeUnreachableException, ThisShouldNeverHappenException, and SpaceTimeException.


class WTFException : Exception
{
    public WTFException()
        : base() { }
    
    public WTFException(string message, params object[] messageParams)
        : base(string.Format(message, messageParams)) { }
    
    public WTFException(string message, Exception innerException)
        : base(message, innerException) { }
}




SpaceTimeException
The exception that is thrown when there is a serious problem with the fabric of space/time. Typically, the error message property will be empty or missing, as the actual even that caused this exception to be thrown has not yet occurred in the same timeline that your applications exists in. No attempt should be made to retrieve the InnerException object, as a black hole could result that would be almost certain to annihilate your code base, or possibly all of humanity. The only thing we can be certain of, however, is that your boss will be very cross with you and you will most likely get fired.


class SpaceTimeException : Exception
{
    public SpaceTimeException()
        : base() { }

    public SpaceTimeException(string message, Exception innerException)
        : base(message, innerException) { }
}




ArgumentInvalidException
You argument is invalid. You are wrong, your reasoning makes no logical sense, and you probably failed debate class in school. Even if your argument has some semblance of being based in logic or reality, you cannot argue with the compiler, there is no point, so quit trying.


class ArgumentInvalidException : Exception
{
    public ArgumentInvalidException()
        : base() { }

    public ArgumentInvalidException(string message, Exception innerException)
        : base(message, innerException) { }   
}





CodeUnreachableExeception

This exception is thrown when the assembly executes a block of code that is unreachable. This exception should never get thrown, and is, by definition, impossible. Catching this exception is like catching a leprechaun, and should it ever happen, the CLR will ask you for three wishes.


class CodeUnreachableExeception : Exception
{
    public CodeUnreachableExeception()
        : base() { }

    public CodeUnreachableExeception(string message, Exception innerException)
        : base(message, innerException) { }
}




OutOfCoffeeExecption
This exception is thrown when a developer has ran out of coffee or sufficient motivation to complete the function or feature. The exception is thrown at the exact line that the developer got up for a coffee break and never returned.


class OutOfCoffeeExecption : Exception
{
    public OutOfCoffeeExecption()
        : base() { }

    public OutOfCoffeeExecption(string message, Exception innerException)
        : base(message, innerException) { }
}




UnknownException
This exception is not thrown because a parameter or method specified is unknown, but rather that the implementing developer did not have the knowledge to implement the feature or method you just called, and threw this exception instead as quick and dirty solution.


class UnknownException : Exception
{
    public UnknownException()
        : base() { }

    public UnknownException(string message, Exception innerException)
        : base(message, innerException) { }
}




ThisIsNotTheExceptionYouAreLookingForException
What? Oh this? Oh, this was not meant for you. Now move along...


class ThisIsNotTheExceptionYouAreLookingForException : Exception
{
    /* Move along... */
}



Wednesday, December 3, 2014

Word Frequency Dictionary & Sorted Occurrence Ranking Dictionary for Generic Item Quantification and other Statistical Analyses



Taking a little inspiration from DotNetPerl's MultiMap, I created a generic class that uses a Dictionary under the hood to create a keyed collection that tracks the re-occurrences or frequency matching or duplicate items of arbitrary type. For lack of a better name, I call it a FrequencyDicionary. Instead of Key/Value pairs, the FrequencyDictionary has the notion of a Item/Frequency pair, with the key being the Item. I strayed from Key/Value because I may ultimately end up calling the Item the Value.

I invented the FrequencyDictionary while writing a pisg-like IRC log file statistics generator program. It works well for word frequency analysis. After some pre-processing/cleaning of the text log file, I parse each line by spaces to get an array of words. I then use FrequencyDictionary.AddRange to add the words to my dictionary.

The FrequencyDictionary is essentially a normal Dictionary (with T being type String in this case), however when you attempt to add a key that already exists, it simply increments the Value integer. After processing the file, you essentially have a Dictionary where the Key is a word that was found in the source text, and the Value is the number of occurrences of that word in the source text.

Taking the idea even further, I created complementary Dictionary that essentially a sorted version of FrequencyDictionary with the Key/Values reversed. I call it a RankingDictionary because the Keys represent the number of occurrences of a word, with the Values being a List of words that occurred that many times in the source text. Its called a RankingDictionary because I was using it to produce a top 10 ranking of most popular words, most popular nicks, ect.

The FrequencyDictionary has a GetRankingDictionary() method in it to make the whole thing very easy to use. Typically I don't use the FrequencyDictionary too extensively, but rather as a means to get a RankingDictionary, which I base the majority of my IRC statistics logic on. The RankingDictionary also proved very useful for finding Naked Candidates and Hidden Pairs or Triples in my Sudoku solver application that I will be releasing on Windows, Windows Phone and will be blogging about shortly. Hell, I was even thinking about releasing the source code to my Sudoku App, since its so elegant and a great example of beautiful, readable code to a complex problem.

Anyways, the code for the Frequency and Ranking Dictionary is heavily commented with XML Documentation Comments, so I'm going to go ahead and let the code speak for itself. I will add usage examples later. In fact, I will probably release the pisg-like IRC Stats Prog source code since I don't think I'm going to go any farther with it.

Limitations: I ran into problems trying to parse large text files approaching 3-4 megabytes. Besides taking up a bunch of memory, the Dictionary begins to encounter many hash collisions once the size of the collection gets large enough. This completely kills performance, and eventually most the time is spent trying to resolve collisions, so it grinds to a near halt. You might notice the constructor public FrequencyDictionary(int Capacity), where you can specify a maximum capacity for the Dictionary. A good, safe ceiling is about 10,000. A better implementation of the GetHash() method might be in order, but is not a problem I have felt like solving yet.




/// <summary>
/// A keyed collection of Item/Frequency pairs, (keyed off Item).
/// If a duplicate Item is added to the Dictionary, the Frequency for that Item is incremented.
/// </summary>
public class FrequencyDictionary<ITM>
{
  // The underlying Dictionary
  private Dictionary<ITM, int> _dictionary;
  
  /// <summary>
  /// Initializes a new instance of the FrequencyDictionary that is empty.
  /// </summary>
  public FrequencyDictionary() { _dictionary = new Dictionary<ITM, int>(); }
  
  /// <summary>
  /// Initializes a new instance of the FrequencyDictionary that has a maximum Capacity limit.
  /// </summary>
  public FrequencyDictionary(int Capacity) { _dictionary = new Dictionary<ITM, int>(); }
  
  /// <summary>
  /// Gets a collection containing the Items in the FrequencyDictionary.
  /// </summary>
  public IEnumerable<ITM> ItemCollection { get { return this._dictionary.Keys; } }
  
  /// <summary>
  /// Gets a collection containing the Frequencies in the FrequencyDictionary.
  /// </summary>
  public IEnumerable<int> FrequencyCollection { get { return this._dictionary.Values;} }
  
  /// <summary>
  /// Adds the specified Item to the FrequencyDictionary.
  /// </summary>
  public void Add(ITM Item)
  {
    if  ( this._dictionary.ContainsKey(Item))   { this._dictionary[Item]++; }
    else    { this._dictionary.Add(Item,1); }
  }
  
  /// <summary>
  /// Adds the elements of the specified array to the FrequencyDictionary.
  /// </summary>
  public void AddRange(ITM[] Items)
  {
    foreach(ITM item in Items) { this.Add(item); }
  }
  
  /// <summary>
  /// Gets the Item that occurs most frequently.
  /// </summary>
  /// <returns>A KeyValuePair containing the Item (key) and how many times it has appeard (value).</returns>
  public KeyValuePair<ITM,int> GetMostFrequent()
  {
    int maxValue = this._dictionary.Values.Max();
    return this._dictionary.Where(kvp => kvp.Value == maxValue).FirstOrDefault();
  }
  
  /// <summary>
  /// Gets the number of Item/Frequency pairs contained in the FrequencyDictionary.
  /// </summary>
  public int Count { get { return this._dictionary.Count; } }
  
  /// <summary>
  /// Returns an enumerator that iterates through the FrequencyDictionary.
  /// </summary>
  public IEnumerator<KeyValuePair<ITM,int>> GetEnumerator()
  {
    return this._dictionary.GetEnumerator();
  }
  
  /// <summary>
  /// Gets the Frequency (occurrences) associated with the specified Item.
  /// </summary>
  public int this[ITM Item]
  {
    get { if (this._dictionary.ContainsKey(Item)) { return this._dictionary[Item]; } return 0; }
  }
  
  /// <summary>
  /// Creates a RankingDictionary from the current FrequencyDictionary.
  /// </summary>
  /// <returns>A RankingDictionary of Frequency/ItemCollection pairs ordered by Frequency.</returns>
  public RankingDictionary<ITM> GetRankingDictionary()
  {
    RankingDictionary<ITM> result = new RankingDictionary<ITM>();
    foreach(KeyValuePair<ITM,int> kvp in _dictionary)
    {
      result.Add(kvp.Value,kvp.Key);
    }
    return result;
  }
  
  /// <summary>
  /// Displays usage information for FrequencyDictionary 
  /// </summary>
  public override string ToString()
  {
    return "FrequencyDictionary<Item, Frequency> : Key=\"Item=\", Value=\"Frequency\"\".";
  }
}




And now the RankingDictionary:

/// <summary>
/// A keyed collection of Frequency/ItemCollection pairs that is ordered by Frequency (rank).
/// If an Item is added that has the same Frequency as another Item, that Item is added to the Item collection for that Frequency.
/// </summary>
public class RankingDictionary<ITM>
{
  // Underlying dictionary
  SortedDictionary<int,List<ITM>> _dictionary;
  
  /// <summary>
  /// Initializes a new instance of the FrequencyDictionary that is empty.
  /// </summary>
  public RankingDictionary() { _dictionary = new SortedDictionary<int,List<ITM>>(new FrequencyComparer()); }
  
  /// <summary>
  /// The Comparer used to compare Frequencies.
  /// </summary>
  public class FrequencyComparer : IComparer<int>
  {
    public int Compare(int one,int two) { if(one == two) return 0; else if(one > two) return -1; else return 1; }
  }
  
  /// <summary>
  /// Gets a collection containing the Frequencies in the RankingDictionary.
  /// </summary>
  public IEnumerable<int> FrequencyCollection { get { return this._dictionary.Keys; } }
  
  /// <summary>
  /// Gets a collection containing the ItemCollection in the RankingDictionary.
  /// </summary>
  public IEnumerable<List<ITM>> ItemCollections   { get { return this._dictionary.Values; } }
  
  /// <summary>
  /// Adds the specified Frequency and Item to the RankingDictionary.
  /// </summary>
  public void Add(int Frequency, ITM Item)
  {
    List<ITM> itemCollection = new List<ITM>();
    itemCollection.Add(Item);
    // If the specified Key is not found, a set operation creates a new element with the specified Key
    this._dictionary[Frequency] = itemCollection;
  }
  
  /// <summary>
  ///  Gets the number of Frequency/ItemCollection pairs contained in the RankingDictionary.
  /// </summary>
  public int Count { get { return this._dictionary.Count; } }
  
  /// <summary>
  /// Returns an enumerator that iterates through the RankingDictionary.
  /// </summary>
  public IEnumerator<KeyValuePair<int,List<ITM>>> GetEnumerator()
  {
    return this._dictionary.GetEnumerator();
  }
  
  /// <summary>
  /// Gets the ItemCollection associated with the specified Frequency.
  /// </summary>
  public List<ITM> this[int Frequency]
  {
    get
    {
      List<ITM> itemCollection;
      if (this._dictionary.TryGetValue(Frequency,out itemCollection)) return itemCollection;
      else return new List<ITM>();
    }
  }
  
  /// <summary>
  /// Displays usage information for RankingDictionary 
  /// </summary>
  public override string ToString()
  {
    return "RankingDictionary<Frequency, List<Item>> : Key=\"Frequency\", Value=\"List<Item>\".";
  }
}


Usage examples will be posted later.

Tuesday, October 28, 2014

Create C# Class Code From a DataTable using CodeDOM



Note by author:

   Since writing this, I have expanded on this idea quite a bit. I have written a lightweight ORM class library that I call EntityJustWorks.

   The full project can be found on
GitHub or CodePlex.


   EntityJustWorks not only goes from a class to DataTable (below), but also provides:

Security Warning:
This library generates dynamic SQL, and has functions that generate SQL and then immediately executes it. While it its true that all strings funnel through the function Helper.EscapeSingleQuotes, this can be defeated in various ways and only parameterized SQL should be considered SAFE. If you have no need for them, I recommend stripping semicolons ; and dashes --. Also there are some Unicode characters that can be interpreted as a single quote or may be converted to one when changing encodings. Additionally, there are Unicode characters that can crash .NET code, but mainly controls (think TextBox). You almost certainly should impose a white list: string clean = new string(dirty.Where(c => "abcdefghijklmnopqrstuvwxyz0123456789.,\"_ !@".Contains(c)).ToArray()); 
PLEASE USE the SQLScript.StoredProcedure and DatabaseQuery.StoredProcedure classes to generate SQL for you, as the scripts it produces is parameterized. All of the functions can be altered to generate parameterized instead of sanitized scripts. Ever since people have started using this, I have been maintaining backwards compatibility. However, I may break this in the future, as I do not wish to teach one who is learning dangerous/bad habits. This project is a few years old, and its already showing its age. What is probably needed here is a total re-write, deprecating this version while keep it available for legacy users after slapping big warnings all over the place. This project was designed to generate the SQL scripts for standing up a database for a project, using only MY input as data. This project was never designed to process a USER'S input.! Even if the data isn't coming from an adversary, client/user/manually entered data is notoriously inconsistent. Please do not use this code on any input that did not come from you, without first implementing parameterization. Again, please see the SQLScript.StoredProcedure class for inspiration on how to do that.


So far I have posted several times on the DataTable class. I have shown how to convert a DataTable to CSV or tab-delimited file using the clipboard, how to create a DataTable from a class using reflection, as well as how to populate the public properties of a class from a DataTable using reflection. Continuing along these lines, I decided to bring the DataTable-To-Class wagons around full-circle and introduce a class that will generate the C# code for the class that is used by the DataTableToClass<T> function, so you don't have to create it manually. The only parameter required to generate the C# class code is, of course, a DataTable.

The code below is rather trivial. It uses CodeDOM to build up a class with public properties that match the names and data types of the data columns of the supplied DataTable. I really wanted the output code to use auto properties. This is not supported by CodeDOM, however, so I used a little hack or workaround to accomplish the same thing. I simply added the getter and setter code for the property to the member's field name. CodeDOM adds a semicolon to the end of the CodeMemberField statement, which would cause the code not to compile, so I added two trailing slashes "//" to the end of the field name to comment out the semicolon. The whole point of creating auto properties was to have clean, succinct code, so after I generate the source code file, I clean up the commented-out semicolons by replacing every occurrence with an empty string. The main disadvantage of this 'workaround' is that the code cannot be used to generate a working class in Visual Basic code. I do have proper CodeDOM code that does not employ this workaround, but I prefer the output code to contain auto-properties; auto-generated code is notorious for being messy and hard to read, and I did not want my generated code to feel like generated code.


Below is the DataTableToCode function, its containing class and its supporting functions. The code is short, encapsulated, clean and commented, so I will just let it speak for itself:

public static class DataTableExtensions
{
   public static string DataTableToCode(DataTable Table)
   {
      string className = Table.TableName;
      if(string.IsNullOrWhiteSpace(className))
      {   // Default name
         className = "Unnamed";
      }
      className += "TableAsClass";
      
      // Create the class
      CodeTypeDeclaration codeClass = CreateClass(className);
      
      // Add public properties
      foreach(DataColumn column in Table.Columns)
      {
         codeClass.Members.Add( CreateProperty(column.ColumnName, column.DataType) );
      }
      
      // Add Class to Namespace
      string namespaceName = "AutoGeneratedDomainModels";
      CodeNamespace codeNamespace = new CodeNamespace(namespaceName);
      codeNamespace.Types.Add(codeClass);
      
      // Generate code
      string filename = string.Format("{0}.{1}.cs",namespaceName,className);
      CreateCodeFile(filename, codeNamespace);
      
      // Return filename
      return filename;
   }
   
   static CodeTypeDeclaration CreateClass(string name)
   {
      CodeTypeDeclaration result = new CodeTypeDeclaration(name);
      result.Attributes = MemberAttributes.Public;
      result.Members.Add(CreateConstructor(name)); // Add class constructor
      return result;
   }
   
   static CodeConstructor CreateConstructor(string className)
   {
      CodeConstructor result = new CodeConstructor();
      result.Attributes = MemberAttributes.Public;
      result.Name = className;
      return result;
   }
   
   static CodeMemberField CreateProperty(string name, Type type)
   {
      // This is a little hack. Since you cant create auto properties in CodeDOM,
      //  we make the getter and setter part of the member name.
      // This leaves behind a trailing semicolon that we comment out.
      //  Later, we remove the commented out semicolons.
      string memberName = name + "\t{ get; set; }//";
      
      CodeMemberField result = new CodeMemberField(type,memberName);
      result.Attributes = MemberAttributes.Public | MemberAttributes.Final;
      return result;
   }
   
   static void CreateCodeFile(string filename, CodeNamespace codeNamespace)
   {
      // CodeGeneratorOptions so the output is clean and easy to read
      CodeGeneratorOptions codeOptions = new CodeGeneratorOptions();
      codeOptions.BlankLinesBetweenMembers = false;
      codeOptions.VerbatimOrder = true;
      codeOptions.BracingStyle = "C";
      codeOptions.IndentString = "\t";
      
      // Create the code file
      using(TextWriter textWriter = new StreamWriter(filename))
      {
         CSharpCodeProvider codeProvider = new CSharpCodeProvider();
         codeProvider.GenerateCodeFromNamespace(codeNamespace, textWriter, codeOptions);
      }
      
      // Correct our little auto-property 'hack'
      File.WriteAllText(filename, File.ReadAllText(filename).Replace("//;", ""));
   }
}


An example of the resulting code appears below:

namespace AutoGeneratedDomainModels
{
   public class CustomerTableAsClass
   {
      public CustomerTableAsClass()
      {
      }
      public string FirstName   { get; set; }
      public string LastName    { get; set; }
      public int  Age           { get; set; }
      public char Sex           { get; set; }
      public string Address     { get; set; }
      public string Birthdate   { get; set; }
   }
}

I am satisfied with the results and look of the code. The DataTableToCode() function can be a huge time saver if you have a large number of tables you need to write classes for, or if each DataTable contains a large number of columns.

If you found any of this helpful, or have any comments or suggestions, please feel free to post a comment.

Sunday, August 3, 2014

Set Public Properties of C# class from a DataTable using reflection



Note by author:

   Since writing this, I have expanded on this idea quite a bit. I have written a lightweight ORM class library that I call EntityJustWorks.

   The full project can be found on
GitHub or CodePlex.


   EntityJustWorks not only goes from a class to DataTable (below), but also provides:



Security Warning:
This library generates dynamic SQL, and has functions that generate SQL and then immediately executes it. While it its true that all strings funnel through the function Helper.EscapeSingleQuotes, this can be defeated in various ways and only parameterized SQL should be considered SAFE. If you have no need for them, I recommend stripping semicolons ; and dashes --. Also there are some Unicode characters that can be interpreted as a single quote or may be converted to one when changing encodings. Additionally, there are Unicode characters that can crash .NET code, but mainly controls (think TextBox). You almost certainly should impose a white list:
string clean = new string(dirty.Where(c => "abcdefghijklmnopqrstuvwxyz0123456789.,\"_ !@".Contains(c)).ToArray());

PLEASE USE the SQLScript.StoredProcedure and DatabaseQuery.StoredProcedure classes to generate SQL for you, as the scripts it produces is parameterized. All of the functions can be altered to generate parameterized instead of sanitized scripts. Ever since people have started using this, I have been maintaining backwards compatibility. However, I may break this in the future, as I do not wish to teach one who is learning dangerous/bad habits. This project is a few years old, and its already showing its age. What is probably needed here is a total re-write, deprecating this version while keep it available for legacy users after slapping big warnings all over the place. This project was designed to generate the SQL scripts for standing up a database for a project, using only MY input as data. This project was never designed to process a USER'S input.! Even if the data isn't coming from an adversary, client/user/manually entered data is notoriously inconsistent. Please do not use this code on any input that did not come from you, without first implementing parameterization. Again, please see the SQLScript.StoredProcedure class for inspiration on how to do that.




In this post I showed how to create a DataTable where the column names and types matched the properties of a class. In this post, we work the opposite direction and start with a Data-First approach. Given an SQL Database, we can easily convert a query to a DataTable using System.Data's SqlDataAdapter.Fill method. Now, given a DataTable, I show you here how to use Reflection to populate a class's public properties from a DataRow in a DataTable (or a List<> of classes, one from each DataRow in the DataTable) where the ColumnName matches the name of the public property in the class exactly (case-sensitive).
If the DataTable has extra columns that don't match up to a property in the class, they are ignored. If the DataTable is missing columns to match a class property, that property is ignored and left at the default value for that type (since it is a property). If you desire the ColumnName/PropertyInfo.Name matching behavior to be case insensitive, simply modify the line that compares the two strings (PropertyInfo.Name and DataColumn.ColumnName) to include a call to String.ToUpper() or String.ToLower() for each name.

If you paying close attention, or have ever attempted this kind of thing before, you are probably thinking to yourself that the most laborious (and error-prone) process is going to be creating the C# classes plus their many auto-properties that have to match the columns of a table, all manually. Well, take solace in the fact that I already thought of this and created a solution to generate C# class object code files from a DataTable using CodeDOM. It even implements a little hack to generate the properties as auto-properties (something not supported by CodeDOM) for clean, compact code that isn't bloated with private backing fields, and full getter/setter implementation.
Ultimately, the goal is to have a full, end-to-end, class-to-DataTable-to-SQL and back-again class library solution. Something like a poor-man's Entity Framework, or minimum-viable ORM. So stay alert for the next piece that will bring these wagons 'round full-circle: Automatic generation of SQL CREATE, INSERT INTO, and UPDATE scripts from a DataTable, which was generated from a C# class object, which can be generated from a DataTable, which can be generated by a SQL Database, which can be... well you get the idea.

This code has been tested and is a a little more robust than some of the equivalent samples I have been finding on StackOverflow (such as being able to handle properties of type Nullable<>. However there probably exists some conditions or use cases that I have not thought of, so please feel free to leave a comment if you find a way I can improve this class or have a feature request. In the next paragraph, I describe what the code is doing, or if you don't care, you can jump straight to the code below it. Enjoy.

How it works: Fist we get a list of PropertyInfo from the class. This will effectively be a list of properties in that class that we will want to fill. PropertyInfo exposes the Name property and the SetValue method, which takes an object and a value as parameters.
    We are going to make three nested loops to do this (one for each DataRow, one for each PropertyInfo and one for each DataColumn) and return a List of classes, each one filled out from a single row in the DataTable. It is possible to fill out one class provided a DataTable and row index in only two nested loops, and this post will provide that example too.
    For each row in DataTable.Rows, we will need to loop through each property (to fill them) and then loop through each DataTable's DataColumn and match the PropertyInfo.Name to the DataColumn.ColumnName. We then call the PropertyInfo's SetValue method. This function will take advantage of generics so that we can pass in any class as a parameter.

Here is the code:

public static class Helper
{
   public static class Table
   {
      /// <summary>
      /// Fills the public properties of a class from the first row of a DataTable
      ///  where the name of the property matches the column name from that DataTable.
      /// </summary>
      /// <param name="Table">A DataTable that contains the data.</param>
      /// <returns>A class of type T with its public properties matching column names
      ///      set to the values from the first row in the DataTable.</returns>
      public static T ToClass<T>(DataTable Table) where T : class, new()
      {
          T result = new T();
          if (Validate(Table))
          {  // Because reflection is slow, we will only pass the first row of the DataTable
              result = FillProperties<T>(Table.Rows[0]);
          }
          return result;
      }
       
      /// <summary>
      /// Fills the public properties of a class from each row of a DataTable where the name of
      /// the property matches the column name in the DataTable, returning a List of T.
      /// </summary>
      /// <param name="Table">A DataTable that contains the data.</param>
      /// <returns>A List class T with each class's public properties matching column names
      ///      set to the values of a diffrent row in the DataTable.</returns>
      public static List<T> ToClassList<T>(DataTable Table) where T: class, new()
      {
          List<T> result = new List<T>();
          
          if (Validate(Table))
          {
              foreach(DataRow row in Table.Rows)
              {
                   result.Add(FillProperties<T>(row));
              }
          }
          return result;
      }
       
      /// <summary>
      /// Fills the public properties of a class from a DataRow where the name
      /// of the property matches a column name from that DataRow.
      /// </summary>
      /// <param name="Row">A DataRow that contains the data.</param>
      /// <returns>A class of type T with its public properties set to the
      ///      data from the matching columns in the DataRow.</returns>
      public static T FillProperties<T>(DataRow Row) where T: class, new()
      {
          T result = new T();
          Type classType = typeof(T);
          
          // Defensive programming, make sure there are properties to set,
          //   and columns to set from and values to set from.
          if(    Row.Table.Columns.Count < 1
              || classType.GetProperties().Length < 1
              || Row.ItemArray.Length < 1)
          {
              return result;
          }
          
          foreach (PropertyInfo property in classType.GetProperties())
          {
              foreach(DataColumn column in Row.Table.Columns)
              {
                  // Skip if Property name and ColumnName do not match
                  if(property.Name != column.ColumnName)
                      continue;
                  // This would throw if we tried to convert it below
                  if(Row[column] == DBNull.Value)
                      continue;
                  
                  object newValue;
                  
                  // If type is of type System.Nullable, do not attempt to convert the value
                  if (IsNullable(property.PropertyType))
                  {
                      newValue = Row[property.Name];
                  }
                  else
                  {   // Convert row object to type of property
                      newValue = Convert.ChangeType(Row[column], property.PropertyType);
                  }
                  
                  // This is what sets the class properties of the class
                  property.SetValue(result, newValue, null);
              }
          }
          return result;
      }
       
      /// <summary>
      /// Checks a DataTable for empty rows, columns or null.
      /// </summary>
      /// <param name="DataTable">The DataTable to check.</param>
      /// <returns>True if DataTable has data, false if empty or null.</returns>
      public static bool Validate(DataTable DataTable)
      {
          if (DataTable == null) return false;
          if (DataTable.Rows.Count == 0) return false;
          if (DataTable.Columns.Count == 0) return false;
          return true;
      }
       
      /// <summary>
      /// Checks if type is nullable, Nullable<T> or its reference is nullable.
      /// </summary>
      /// <param name="type">Type to check for nullable.</param>
      /// <returns>True if type is nullable, false if it is not.</returns>
      public static bool IsNullable(Type type)
      {
          if (!type.IsValueType) return true; // ref-type
          if (Nullable.GetUnderlyingType(type) != null) return true; // Nullable<T>
          return false; // value-type
      }
   }
}

Wednesday, April 9, 2014

An Implementation of a One-Way Hashing Algorithm using Substitution and Permutation




        This program is a one-way hashing algorithm that can be used for hashing passwords or verifying a file's integrity. Like other ciphers, it has rounds. While this particular implementation was designed to overwrite some of the information required to reverse the rounds operations to be asymmetric, the same core 'substitution' algorithm could be used to make a symmetric stream cipher by taking a different approach.
        Each round proceeds as follows: First a scrambling or mangling (sounds violent) of a substitution table is performed before substituting the plain-text (or last round's output) with values from the table chosen by the index corresponding to the plain-text character's equivalent byte value. The substituted-text is then converted into a List of bits (bool) and the list then split in half into two lists of bits. One of the lists of bits is optionally flipped (reversed) based on the value of the first bit of that list. The value of the middle bit of the other list is used to determine which list is to lead during the (next step) bit shuffling. Starting with the selected list, the bits of the two lists are interleaved (shuffled) making one list again. The bits are converted back into bytes via an extension method and used as the input for the next round.
        The substitution/cipher block step uses a variation of the RC4 cipher with the key-scheduling algorithm dropped in favor of a pseudo-random-appearing, but evenly distributed table with each number from 0 to 256 appearing exactly once. This table is deterministically populated by an initial value, IV, which is some number that is both greater than 256 and co-prime to it. Starting with zero, set each byte in the table to the value plus IV, modulus 256, in order. Then, to 'prime' the table/block, iterate over the table several thousand times with the RC4 algorithm, discarding the byte stream (in the case of RC4). In the case of my particular RC4 variant implementation, the result byte is put into the table at an index that congruent to 256 modulus the current iteration count multiplied by some co-prime of 256, in this case, 331.
        While many people believe that RC4 is well on its way to being broken, it is my hope that by using a smarter key-scheduling function with a different (larger) block size and maybe some bit country line-dancing (shuffling) will make a variant of RC4 that we all can have confidence in again. Hmm, there is already an RC5 and RC6. Maybe a good name would be RC4.1 or RC4.Awesome. RC4 and seven thirteenths? R2C2 perhaps? No, I got it: The stream cipher formally known as RC4. RC4Realz or RCL33t? Anyways, this article is not about symmetric algorithms, but rather asymmetric ones, so read on...

        Cryptography is one of my interests. I love the math behind it and I think the mechanics behind RSA are just so elegant. I often daydream that one day I will be able to come out with something incredibly clever and useful. For the past several months, I have been working on implementing several different concepts of cryptography in C#. I have wrote several classes that do one specific type of job, such as transposition, permutation or substitution. I have been toying with combinations these different methods into 'rounds', with each round getting its input from the output of the previous round.  In this post, I am proposing a one-way hashing algorithm that uses a combination of bit-shuffling (permutation) and a substitution table with logic that mangle the table based on the current state of the table. It is possible to make an asymmetric algorithm from this idea, but that is not the aim of this particular tool.

        This algorithm still has some areas that need improving. First of all, its rather slow, so hashing large files is not practical. The number of rounds has a large baring on its hash time. A smaller number of rounds is recommended for anything beyond a paragraph. The current default is 18. This is rather arbitrary, as I have not done any testing to find the optimal number of rounds. Anyone who uses less than 4 rounds will start to see some repeating characters, not to mention make their implementation more susceptible to cryptanalysis. Sometimes a lengthy hashing time can be desirable, such as a proof of work concept, or to make it more difficult to brute force. For a password-length input at 1024 rounds, it takes almost a full second.


        Another area for improvement is the permutation step; is basically just shuffles the bits by dividing the bit array in half and interleaving the bits (think a deck of cards). While there is some variation in the order, based off the input, a much better bit shuffling would probably be an expansion or compression style of permutation. See this link to Wikipedia for an visual depiction.


        Well that about sums it up, so now on to the code! Take note at how I used extension methods to take care of converting a BitArray and a Byte array into a string, and back again. This makes the code very readable and not take up nearly as many lines by reusing code. The code is pretty heavily commented, so I will just let it explaining itself:





public class AJRHash
{
 byte[] table;
 // Unsure the optimum setting for rounds.
 // No less that 4. 1024 takes around 1 second for password-length hashes.
 // Use lower setting for large hashes
 static int rounds = 18;
 static int minimumLength = 10;
 static int tableSize = 256; // Because we are using bytes
 // By choosing a co-prime to 256, we ensure we get an evenly distributed table
 static int[] coprimesOf256 = { 4489, 2209, 1369 }; // Changing the co-primes will change the hash
 
 public AJRHash()
 {
  table = new byte[tableSize];
  InitializeTable();
 }
 
 public string Hash(string Input)
 {
  // Pad the input to minimum length
  if(Input.Length<minimumLength)
  {
   List<byte> padding = new List<byte>();
   
   // Copy the input first
   if(Input.Length>0)
    padding.AddRange(Input.GetBytes());
   
   // Make up the diffrence with nulls
   int diffrence = minimumLength - Input.Length;
   while(diffrence>0)
   {
    padding.Add(0);
    diffrence--;
   }
   Input = padding.ToArray().GetString();
  }
  
  byte[] permutation = Input.GetBytes();
  byte[] substitution = new byte[Input.Length+1];
  
  int counter = rounds;
  while(counter>0)
  {
   substitution = Substitution(permutation);
   permutation  = Permutation_Interleave(substitution);
   counter--;
  }

  return Convert.ToBase64String(permutation);
 }
 
 void InitializeTable()
 {
  int prime = coprimesOf256[0];
  
  byte index = 0;
  for (int i = 0; i < tableSize; i++)
  {
   table[index] = (byte)i;
   
   unchecked // The large prime will just roll over. Essentially just modular arithmetic
   { // By choosing a co-prime to 256, we ensure we get an evenly distributed table
    index += (byte)prime;
   }
  }
  
  // Finish initializing the table by shuffling it
  ScrambleTable(prime);
 }
 
 void ScrambleTable(int iterations)
 {
  byte i = 0;
  byte j = 0;
  byte k = 0;
  byte l = 0;
  
  byte iteration = 0;
  int counter = iterations;
  
  // Just roll over on overflow
  unchecked
  {
   // Some initial values
   j=table[table.Length/2];
   i=table[j];

   // Use the current state of the table to determine how it is changed
   while(counter>0)
   {
    i++;
    l=(byte)(table[i]+1);
    j=(byte)(j+l+1);
    
    table[i] = (byte)(j + 3);
    table[j] = (byte)(l - 1);
    
    k = (byte)( (table[i] + table[j]));
    l = (byte)(k % 255);
    
    table[k] = (byte)(table[k]+1);
    
    k = table[l];
    l = table[iteration];
    
    table[(byte)(iteration*331)] = (byte)(k ^ l);
    
    counter--;
    iteration++;
   }
  }
 }
 
 byte[] Substitution(byte[] input)
 {
  // Shuffle the table by an ammount unique to the input
  ScrambleTable(input.Length*input[0]);
  
  // Apply input against the substitution table
  List<byte> result = new List<byte>();
  foreach(byte b in input)
  {
   result.Add( table[(int)b] );
  }
  
  return result.ToArray();
 }
 
 // TODO: Add more robust bit shuffling/permutation such as compression or expansion
 byte[] Permutation_Interleave(byte[] input)
 {
  // Convert input into an array of bits
  BitArray temp = new BitArray(input);
  List<bool> bits = temp.GetList();
  
  // Ensure we have an even number of bits
  if(bits.Count%2 != 0)
   bits.Add(false);
  
  // Split the input up into two arrays of bits
  List<bool[]> split = bits.Split();
  
  // Make sure we have at least two arrays to interleave
  if(split.Count<2)
   throw new Exception("Input is too short.");
  
  bool[] one = split[0];
  bool[] two = split[1];
  
  // Ensure both arrays are the same length
  if(one.Length != two.Length)
   throw new Exception("Lengths must match.");
  
  // If the first bit of two is 1, reverse it.
  //   This makes the deterministic shuffling more difficult to reverse 
  if(two[0])
   Array.Reverse(two);
  
  // If the 'middle' bit of one is 1, take from two first.
  //   This makes the deterministic shuffling more difficult to reverse
  bool SecondFirst = false;
  if(one[one.Length/2])
   SecondFirst = true;
  
  // Interleave the two arrays
  int counter = 0;
  List<bool> result = new List<bool>();
  while(counter<one.Length)
  {
   // Two possible ways to order (decided above)
   if(SecondFirst)
   {
    result.Add(two[counter]);
    result.Add(one[counter]);
   }
   else
   {
    result.Add(one[counter]);
    result.Add(two[counter]);
   }
   counter++;
  }
  
  return new BitArray(result.ToArray()).GetBytes();
 }
}



I also wrote a few extension methods to help with all the tedious converting of data types. Extension methods can help keep you code tidy, readable. Don't forget to add these extension methods, or the above code wont compile:




public static class ExtentionMethods
{
 public static byte[] GetBytes(this string Input)
 {
  if(Input == null) return new byte[0];   
  return Encoding.UTF8.GetBytes(Input);
 }
 
 public static string GetString(this byte[] Input)
 {
  if(Input == null) return string.Empty;   
  return Encoding.UTF8.GetString(Input);
 }
 
 public static string GetString(this BitArray Input)
 {
  if(Input == null) return string.Empty;   
  return Encoding.UTF8.GetString(Input.GetBytes());
 }
 
 public static byte[] GetBytes(this BitArray Input)
 {
  if(Input == null) return new byte[0];   
  int lengthInBytes = (Input.Length%8==0) ? Input.Length/8 : (Input.Length/8)+1;
  
  byte[] result = new byte[lengthInBytes];
  Input.CopyTo(result,0);
  return result;
 }
 
 public static bool[] GetArray(this BitArray Input)
 {
  if(Input == null) return new bool[0];   
  return Input.GetList().ToArray();
 }
 
 public static List<bool> GetList(this BitArray Input)
 {
  if(Input == null) return new List<bool>();
  
  List<bool> result = new List<bool>();
  foreach(bool b in Input)
  {
   result.Add(b);
  }   
  return result;
 }
 
 public static List<bool[]> Split(this List<bool> Input)
 {
  if(Input == null) return new List<bool[]>();
  
  int midpoint = (Input.Count/2);
  List<bool> result1 = new List<bool>();
  List<bool> result2 = new List<bool>();
  
  int counter = 0;
  foreach(bool b in Input)
  {
   if(counter<midpoint)
   {
    result1.Add(b);
   }
   else
   {
    result2.Add(b);
   }    
   counter++;
  }
  
  List<bool[]> result = new List<bool[]>();
  result.Add(result1.ToArray());
  result.Add(result2.ToArray());
  return result;   
 }
}

Sunday, August 4, 2013

Topmost window always on top




This project was inspired by PowerMenu 1.51 by Thong Nyguyen. I wrote a small C# system tray application that provides some of the same functionality as PowerMenu. It can set/unset any application window as always on top or topmost. This is useful if you want to compare or edit two documents at once and you want to make a window float, sticky, or pin a window on top. I wrote this tool in C# and am giving you the source code here so you can see how it works. Since .NET does not support such level of control over other applications, our code must call the native WIN32 APIs SetWindowPos and FindWindow to accomplish this.

Besides being a system tray application (NotifyIcon), the application essentially has two main functions: GetWindowTitles(), which uses the Process class to return a list of windows' titles as an array of strings, and AssignTopmostWindow() which takes a windows title as a parameter.

But first, we must define our WIN32 API imports:

#region WIN32API
static readonly IntPtr HWND_TOPMOST = new IntPtr(-1);
static readonly IntPtr HWND_NOTOPMOST = new IntPtr(-2);
const UInt32 SWP_SHOWWINDOW = 0x0040;

[StructLayout(LayoutKind.Sequential)]
public struct RECT
{
    public int Left, Top, Right, Bottom;
    public RECT(int left,int top,int right,int bottom) {
        Left=left; Top=top; Right=right; Bottom=bottom; }
    public RECT(System.Drawing.Rectangle r) : this(r.Left, r.Top, r.Right, r.Bottom) { }
    public int X { get { return Left; } set { Right -= (Left - value); Left = value; } }
    public int Y { get { return Top; } set { Bottom -= (Top - value); Top = value; } }
    public int Height { get { return Bottom - Top; } set { Bottom = value + Top; } }
    public int Width { get { return Right - Left; } set { Right = value + Left; } }
    public static implicit operator System.Drawing.Rectangle(RECT r) {
        return new System.Drawing.Rectangle(r.Left, r.Top, r.Width, r.Height); }
    public static implicit operator RECT(System.Drawing.Rectangle r) { return new RECT(r); }
}

[DllImport("user32.dll", SetLastError = true)]
private static extern IntPtr FindWindow(String lpClassName, String lpWindowName);

[DllImport("user32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
public static extern bool GetWindowRect(HandleRef hwnd, out RECT lpRect);

[DllImport("user32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
private static extern bool SetWindowPos(IntPtr hWnd, IntPtr hWndInsertAfter,
                                        int x, int y, int cx, int cy, uint uFlags );
#endregion

Notice the RECT structure. The WIN32 RECT struct is different than the .NET version, and we need it to store and pass the window position during our call to SetWindowPos() or the window will likely disappear during resizing.

Here you can see in our implementation of AssignTopmostWindow, we get the window rectangle with GetWindowRect() and pass it straight to our call to SetWindowPos():

bool AssignTopmostWindow(string windowTitle,bool makeTopmost)
{
 IntPtr hWnd = FindWindow((string)null,windowTitle);
 
 RECT rect = new RECT();
 GetWindowRect(new HandleRef(this,hWnd),out rect);
 
 return SetWindowPos(hWnd,
              makeTopmost ? HWND_TOPMOST : HWND_NOTOPMOST,
              rect.X, rect.Y, rect.Width, rect.Height,
              SWP_SHOWWINDOW);
}

So your application is first going to want to get the titles of all the forms and windows:

string[] GetWindowTitles()
{
 List<string> winList = new List<string>();
 
 Process[] procArray = Process.GetProcesses();
 foreach(Process proc in procArray)
 {
  if(!string.IsNullOrWhiteSpace(proc.MainWindowTitle))
  {
   winList.Add(proc.MainWindowTitle);
  }
 }
 return winList.ToArray();
}

Then your application must then fill a listbox or somehow display this data to the user for selection. I choose to populate a dynamic context menu for my NotifyIcon application:

private MenuItem[] DynamicMenu()
{
 string[] titleArray = GetWindowTitles();
 
 List<MenuItem> menuList = new List<MenuItem>();
 foreach(string title in titleArray)
 {
  if(changeLog.ContainsKey(title))
  {
   if(changeLog[title])
   {
    menuList.Add(new MenuItem("* " + title, menuWinClick));
   }
   else
   {
    menuList.Add(new MenuItem(title, menuWinClick));
   }
  }
  else
  {
   menuList.Add(new MenuItem(title, menuWinClick));
  }
 }
 menuList.Add(new MenuItem("_____________"));
 menuList.Add(new MenuItem("About", menuAboutClick));
 menuList.Add(new MenuItem("Exit", menuExitClick));
 
 return menuList.ToArray();
}
Once the user has made a selection, a call to AssignTopmostWindow is made.

If your application should happen to exit without another call to AssignTopmostWindow(selectedTitle,false) to undo the topmost property, the window will remain topmost until reboot. As this is likely undesirable behavior, . My solution was to create a dictionary to keep track of the windows whose topmost property was modified, and 'roll back' those changes when your program/form is ending/closing:


private Dictionary<string,bool> changeLog = new Dictionary<string, bool>();

private void menuExitClick(object sender, EventArgs e)
{
 // Roll-back changes by
 //  unsetting every topmost window
 foreach(KeyValuePair<string,bool> entry in changeLog)
 {
  if(entry.Value) // If topmost
  {
   AssignTopmostWindow(entry.Key,false);
  }
 }   
 Application.Exit();
}


Here is the full code.

Thursday, August 1, 2013

Captcha: Drawing Text along a Bézier Spline



The following post/code will achieve something to the effect of:


All this code, and this article was inspired by planetclegg.com/projects/WarpingTextToSplines.html.
Explanation of the math and why it works will come shortly. In the meantime, please see the above link for a detailed explanation.

This code assumes you have already drawn some text on your GraphicsPath. Here is the code to transform your GraphicsPath text to follow a cubic Bézier Spline:

GraphicsPath BezierWarp(GraphicsPath text,Size size)
{
 // Control points for a cubic Bézier spline
 PointF P0 = new PointF();
 PointF P1 = new PointF();
 PointF P2 = new PointF();
 PointF P3 = new PointF();
 
 float shrink = 20;
 float shift = 0;
 
 P0.X = shrink;
 P0.Y = shrink+shift;
 P1.X = size.Width-shrink;
 P1.Y = shrink;
 P2.X = shrink;
 P2.Y = size.Height-shrink;
 P3.X = size.Width-shrink;
 P3.Y = size.Height-shrink-shift;

 // Calculate coefficients A thru H from the control points
 float A = P3.X - 3 * P2.X + 3 * P1.X - P0.X;
 float B = 3 * P2.X - 6 * P1.X + 3 * P0.X;
 float C = 3 * P1.X - 3 * P0.X;
 float D = P0.X;

 float E = P3.Y - 3 * P2.Y + 3 * P1.Y - P0.Y;
 float F = 3 * P2.Y - 6 * P1.Y + 3 * P0.Y;
 float G = 3 * P1.Y - 3 * P0.Y;
 float H = P0.Y;

 PointF[] pathPoints = text.PathPoints;
 RectangleF textBounds = text.GetBounds();
 
 for (int i =0; i  < pathPoints.Length; i++)
 {
  PointF pt = pathPoints[i];
  float textX = pt.X;
  float textY = pt.Y;
  
  // Normalize the x coordinate into the parameterized
  // value with a domain between 0 and 1.
  float t  =  textX / textBounds.Width;
  float t2 = (t * t);
  float t3 = (t * t * t);
  
  // Calculate spline point for parameter t
  float Sx = A * t3 + B * t2 + C * t + D;
  float Sy = E * t3 + F * t2 + G * t + H;
  
  // Calculate the tangent vector for the point
  float Tx = 3 * A * t2 + 2 * B * t + C;
  float Ty = 3 * E * t2 + 2 * F * t + G;

  // Rotate 90 or 270 degrees to make it a perpendicular
  float Px = - Ty;
  float Py =   Tx;
  
  // Normalize the perpendicular into a unit vector
  float magnitude = (float)Math.Sqrt((Px*Px) + (Py*Py));
  Px /= magnitude;
  Py /= magnitude;
  
  // Assume that input text point y coord is the "height" or
  // distance from the spline.  Multiply the perpendicular
  // vector with y. it becomes the new magnitude of the vector
  Px *= textY;
  Py *= textY;
  
  // Translate the spline point using the resultant vector
  float finalX = Px + Sx;
  float finalY = Py + Sy;
  
  pathPoints[i] = new PointF(finalX, finalY);
 }
 
 return new GraphicsPath(pathPoints,text.PathTypes);
}

Tuesday, July 9, 2013

Procedural generation of cave-like maps for rogue-like games



This post is about procedural content generation of cave-like dungeons/maps for rogue-like games using what is known as the Cellular Automata method.

To understand what I mean by cellular automata method, imagine Conway's Game of Life. Many algorithms use what is called the '4-5 method', which means a tile will become a wall if it is a wall and 4 or more of its nine neighbors are walls, or if it is not a wall and 5 or more neighbors are walls. I start by filling the map randomly with walls or space, then visit each x/y position iteratively and apply the 4-5 rule. Usually this is preceded with 'seeding' the map by randomly filling each cell of the map with a wall or space, based on some weight (say, 40% of the time it chooses to place a wall). Then the automata step is applied multiple times over the entire map, precipitating walls and subsequently smoothing them. About 3 rounds is all that is required, with about 4-5 rounds being pretty typical amongst most implementations. Perhaps a picture of the the output will help you understand what I mean.

Using the automata-based method for procedural generation of levels will produce something similar to this:

Sure the dungeon generation by linking rooms approach has its place, but I really like the 'natural' look to the automata inspired method. I first originally discovered this technique on the website called Roguebasin. It is a great resource for information concerning the different problems involved with programming a rogue-like game, such as Nethack or Angband.

One of the major problems developers run into while employing this technique is the formation of isolated caves. Instead of one big cave-like room, you may get section of the map that is inaccessible without digging through walls. Isolated caves can trap key items or (worse) stairs leading to the next level, preventing further progress in the game. I have seen many different approaches proposed to solve this problem. Some suggestions I've seen include: 1) Discarding maps that have isolated caves, filling in the isolated sections, or finely tweaking the variables/rules to reduce occurrences of such maps. None of these are ideal (in my mind), and most require a way to detect isolated sections, which is another non-trivial problem in itself.

Despite this, I consider the problem solved because I have discovered a solution that is dead simple and almost** never fail because of the rules of the automata generation itself dictate such. I call my method 'Horizontal Blanking' because you can probably guess how it works now just from hearing the name. This step comes after the random filling of the map (initialization), but before the cellular automata iterations. After the map is 'seeded' with a random fill of walls, a horizontal strip in the middle of of the map is cleared of all walls. The horizontal strip is about 3 or 4 block tall (depending on rules). Clearing a horizontal strip of sufficient width will prevent a continuous vertical wall from being created and forming isolated caves in your maps. After horizontal blanking, you can begin applying the cellular automata method to your map.

** I say 'almost' because although it it not possible to get whole rooms that are disconnected from each other, it is possible to get tiny squares of blank space in the northern or southern walls that consist of about 4-5 blocks in total area. Often, these little holes will resolve themselves during the rounds of automata rules, but there still exists the possibility that one may persist. My answer to this edge case would be to use some rules around the placement of stairwells (and other must-find items) dictating that such objects must have a 2-3 block radius clear of walls to be placed.


See below for the code that produced the above screenshot, or click here to download the entire project with source code that you can compile yourself.

public class MapHandler
{
 Random rand = new Random();
 
 public int[,] Map;
 
 public int MapWidth   { get; set; }
 public int MapHeight  { get; set; }
 public int PercentAreWalls { get; set; }
 
 public MapHandler()
 {
  MapWidth = 40;
  MapHeight = 21;
  PercentAreWalls = 40;
  
  RandomFillMap();
 }

 public void MakeCaverns()
 {
  // By initilizing column in the outter loop, its only created ONCE
  for(int column=0, row=0; row <= MapHeight-1; row++)
  {
   for(column = 0; column <= MapWidth-1; column++)
   {
    Map[column,row] = PlaceWallLogic(column,row);
   }
  }
 }
 
 public int PlaceWallLogic(int x,int y)
 {
  int numWalls = GetAdjacentWalls(x,y,1,1);

  
  if(Map[x,y]==1)
  {
   if( numWalls >= 4 )
   {
    return 1;
   }
   return 0;   
  }
  else
  {
   if(numWalls>=5)
   {
    return 1;
   }
  }
  return 0;
 }
 
 public int GetAdjacentWalls(int x,int y,int scopeX,int scopeY)
 {
  int startX = x - scopeX;
  int startY = y - scopeY;
  int endX = x + scopeX;
  int endY = y + scopeY;
  
  int iX = startX;
  int iY = startY;
  
  int wallCounter = 0;
  
  for(iY = startY; iY <= endY; iY++) {
   for(iX = startX; iX <= endX; iX++)
   {
    if(!(iX==x && iY==y))
    {
     if(IsWall(iX,iY))
     {
      wallCounter += 1;
     }
    }
   }
  }
  return wallCounter;
 }
 
 bool IsWall(int x,int y)
 {
  // Consider out-of-bound a wall
  if( IsOutOfBounds(x,y) )
  {
   return true;
  }
  
  if( Map[x,y]==1  )
  {
   return true;
  }
  
  if( Map[x,y]==0  )
  {
   return false;
  }
  return false;
 }
 
 bool IsOutOfBounds(int x, int y)
 {
  if( x<0 data-blogger-escaped-else="" data-blogger-escaped-if="" data-blogger-escaped-return="" data-blogger-escaped-true="" data-blogger-escaped-x="" data-blogger-escaped-y="">MapWidth-1 || y>MapHeight-1 )
  {
   return true;
  }
  return false;
 }
Above is the main core of the logic.


Here is the rest of the program, such as filling, printing and blanking:
 
 public void PrintMap()
 {
  Console.Clear();
  Console.Write(MapToString());
 }
 
 string MapToString()
 {
  string returnString = string.Join(" ", // Seperator between each element
                                    "Width:",
                                    MapWidth.ToString(),
                                    "\tHeight:",
                                    MapHeight.ToString(),
                                    "\t% Walls:",
                                    PercentAreWalls.ToString(),
                                    Environment.NewLine
                                   );
  
  List<string> mapSymbols = new List();
  mapSymbols.Add(".");
  mapSymbols.Add("#");
  mapSymbols.Add("+");
  
  for(int column=0,row=0; row < MapHeight; row++ ) {
   for( column = 0; column < MapWidth; column++ )
   {
    returnString += mapSymbols[Map[column,row]];
   }
   returnString += Environment.NewLine;
  }
  return returnString;
 }
 
 public void BlankMap()
 {
  for(int column=0,row=0; row < MapHeight; row++) {
   for(column = 0; column < MapWidth; column++) {
    Map[column,row] = 0;
   }
  }
 }
 
 public void RandomFillMap()
 {
      // New, empty map
      Map = new int[MapWidth,MapHeight];
  
      int mapMiddle = 0; // Temp variable
      for(int column=0,row=0; row < MapHeight; row++) {
         for(column = 0; column < MapWidth; column++)
         {
       // If coordinants lie on the edge of the map
       // (creates a border)
       if(column == 0)
       {
      Map[column,row] = 1;
       }
       else if (row == 0)
       {
      Map[column,row] = 1;
       }
       else if (column == MapWidth-1)
       {
      Map[column,row] = 1;
       }
       else if (row == MapHeight-1)
       {
      Map[column,row] = 1;
       }
       // Else, fill with a wall a random percent of the time
       else
       {
      mapMiddle = (MapHeight / 2);
    
      if(row == mapMiddle)
      {
     Map[column,row] = 0;
      }
      else
      {
     Map[column,row] = RandomPercent(PercentAreWalls);
      }
       }
   }
      }
 }

 int RandomPercent(int percent)
 {
  if(percent>=rand.Next(1,101))
  {
   return 1;
  }
  return 0;
 }
 
 public MapHandler(int mapWidth, int mapHeight, int[,] map, int percentWalls=40)
 {
  this.MapWidth = mapWidth;
  this.MapHeight = mapHeight;
  this.PercentAreWalls = percentWalls;
  this.Map = new int[this.MapWidth,this.MapHeight];
  this.Map = map;
 }
}


And of course, the main function:
 
public static void Main(string[] args)
{
 char key  = new Char();
 MapHandler Map = new MapHandler();
 
 string instructions =
  "[Q]uit [N]ew [+][-]Percent walls [R]andom [B]lank" + Environment.NewLine +
  "Press any other key to smooth/step";

 Map.MakeCaverns();
 Map.PrintMap();
 Console.WriteLine(instructions);
 
 key = Char.ToUpper(Console.ReadKey(true).KeyChar);
 while(!key.Equals('Q'))
 {
  if(key.Equals('+')) {
   Map.PercentAreWalls+=1;
   Map.RandomFillMap();
   Map.MakeCaverns();
   Map.PrintMap();
  } else if(key.Equals('-')) {
   Map.PercentAreWalls-=1;
   Map.RandomFillMap();
   Map.MakeCaverns();
   Map.PrintMap();
  } else if(key.Equals('R')) {
   Map.RandomFillMap();
   Map.PrintMap();
  } else if(key.Equals('N')) {
   Map.RandomFillMap();
   Map.MakeCaverns();
   Map.PrintMap();
  } else if(key.Equals('B')) {
   Map.BlankMap();
   Map.PrintMap();
  } else if(key.Equals('D')) {
   // I set a breakpoint here...
  } else {
   Map.MakeCaverns();
   Map.PrintMap();
  }
  Console.WriteLine(instructions);
  key = Char.ToUpper(Console.ReadKey(true).KeyChar);
 }
 Console.Clear();
 Console.Write(" Thank you for playing!");
 Console.ReadKey(true);
}


See also: Roguebasin - Cellular Automata Method for Generating Random Cave-Like Levels