C# – submitting form programmatically

cscreen-scraping

Im trying to submit a specific form programatically, but I allways get the initial page back.
I must be doing something wrong or missing something here.
Im sending the session cookie and some POST data like viewState (that I parse from the initial request), and SessionID (this is the value i change in the form toget data from other years). But in the second request I allways get data for Session 899, instead of the one i request: 875.

Here is the code used:, any help is greatly apreciated

retrieveEdmIndexForSession(875);

 protected string retrieveEdmIndexForSession(int sessionId) {

    CookieContainer cookies;
    HttpWebRequest oRequest;
    HttpWebResponse oResponse;
    Stream sw;
    StreamReader sr;
    string pageData;
string PathRemote = @"http://edmi.parliament.uk/EDMi/EDMList.aspx";


    /*
     * download the index page so we can get Cookies and ViewState from it.
     */
    oRequest = (HttpWebRequest)WebRequest.Create(PathRemote);
    oRequest.Method = "GET";
    oRequest.AllowAutoRedirect = true;
    oRequest.CookieContainer = new CookieContainer();
    oRequest.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
    oRequest.Referer = "http://edmi.parliament.uk/EDMi/EDMList.aspx";


    oResponse =(HttpWebResponse) oRequest.GetResponse();
    sr = new StreamReader(oResponse.GetResponseStream());

    pageData = sr.ReadToEnd();

    /*
     * extract view state from pageData.
     */
    string viewState = this.ExtractViewState(pageData);


    /*
    * lets submit the form with the parameters we want
    */
    oRequest = (HttpWebRequest)WebRequest.Create(PathRemote);
    oRequest.Method = "POST";
    oRequest.AllowAutoRedirect = true;
    oRequest.ContentType = "application/x-www-form-urlencoded";
    oRequest.CookieContainer = new CookieContainer();
    oRequest.CookieContainer.Add(oResponse.Cookies);
    oRequest.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
    oRequest.Referer = "http://edmi.parliament.uk/EDMi/EDMList.aspx";

    string postdata = "__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=" + viewState + "&_MenuCtrl%3AddlSession=" + sessionId + "&_MenuCtrl%3A_GoTo.x=57&_MenuCtrl%3A_GoTo.y=14&ddlStatus=1&ddlSortedBy=1";
    byte[] buffer = Encoding.UTF8.GetBytes(postdata);
    oRequest.ContentLength = buffer.Length;

    /*
     * Send post data into request stream first
     */
    sw = oRequest.GetRequestStream();
    sw.Write(buffer, 0, buffer.Length);
    sw.Flush();
    sw.Close();

    /*
     * Connect, send and get response
     */
    oResponse = (HttpWebResponse)oRequest.GetResponse();
    sr = new StreamReader(oResponse.GetResponseStream());

    //OnLogUpdated(1, "\r\nStatus Code: " + oResponse.StatusCode);
    //OnLogUpdated(1, "\r\nServer: " + oResponse.Server);

    pageData = sr.ReadToEnd();
    string result = getSessionId(pageData);
    //OnLogUpdated(1, "\r\nRestuls: [" + result + "]");
    //OnLogUpdated(1, "\r\nPage: [" + pageData + "]");


    return pageData;


}



private string ExtractViewState(string str)
{
    string viewState = "";
    string pattern = "(?<=__VIEWSTATE\" value=\")(?<val>.*?)(?=\")";

    Match match = Regex.Match(str, pattern);

    if (match.Success)
    {
        viewState = match.Groups["val"].Value;
        viewState = HttpUtility.UrlEncodeUnicode(viewState);

    }

    return viewState;
}

protected string getSessionId(string str)
{
    string sessionId = string.Empty;

    str = str.Trim();

    string pattern = @"&SESSION=([^']+)'";

    Match match = Regex.Match(str, pattern, RegexOptions.IgnoreCase);
    if (match.Success)
    {
        sessionId = match.Groups[1].ToString(); ;

    }

    return sessionId;
}

This is the RAW Request being sent by the .NET script.

POST /EDMi/EDMList.aspx HTTP/1.1
Content-Type:
application/x-www-form-urlencoded
Accept:
text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8
Referer:
http://edmi.parliament.uk/EDMi/EDMList.aspx
User-Agent: .NET Framework Client
Host: edmi.parliament.uk Cookie:
ASP.NET_SessionId=k55fqarvx2oszp2wxhtrol45
Content-Length: 2431 Expect:
100-continue

__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=dDwxMDgyNzIxNDQ2O3Q8O2w8aTwzPjs%2bO2w8dDw7bDxpPDE%2bO2k8Mz47aTw1PjtpPDExPjs%2bO2w8dDw7bDxpPDEzPjtpPDE3Pjs%2bO2w8dDx0PHA8cDxsPERhdGFWYWx1ZUZpZWxkO0RhdGFUZXh0RmllbGQ7PjtsPFNFU1NJT05JRDtJVEVNX1ZBTFVFOz4%2bOz47dDxpPDIwPjtAPDA4LTA5OzA3LTA4OzA2LTA3OzA1LTA2OzA0LTA1OzAzLTA0OzAyLTAzOzAxLTAyOzAwLTAxOzk5LTAwOzk4LTk5Ozk3LTk4Ozk2LTk3Ozk1LTk2Ozk0LTk1OzkzLTk0OzkyLTkzOzkxLTkyOzkwLTkxOzg5LTkwOz47QDw4OTk7ODkxOzg4NTs4NzU7ODczOzY4Mjs2ODE7NjgwOzY3OTs3MDM7NzAyOzcwMTs3MDA7Njk5OzY5ODs2OTc7Njk2OzY5NTs2OTQ7NjkzOz4%2bOz47Oz47dDxwPGw8VGV4dDs%2bO2w8TGlzdCBPZiBFYXJseSBEYXkgTW90aW9uczs%2bPjs7Pjs%2bPjt0PDtsPGk8MT47aTwzPjs%2bO2w8dDx0PDs7bDxpPDA%2bOz4%2bOzs%2bO3Q8dDw7O2w8aTwwPjs%2bPjs7Pjs%2bPjt0PDtsPGk8MT47aTwzPjs%2bO2w8dDw7bDxpPDE%2bO2k8Mz47aTw1PjtpPDc%2bOz47bDx0PHA8cDxsPENvbW1hbmRBcmd1bWVudDtDc3NDbGFzcztFbmFibGVkO18hU0I7PjtsPDA7UGFnZUZpcnN0RGlzYWJsZWQ7bzxmPjtpPDI%2bOz4%2bOz47Oz47dDxwPHA8bDxDb21tYW5kQXJndW1lbnQ7Q3NzQ2xhc3M7RW5hYmxlZDtfIVNCOz47bDwtMTtQYWdlUHJldkRpc2FibGVkO288Zj47aTwyPjs%2bPjs%2bOzs%2bO3Q8cDxwPGw8Q29tbWFuZEFyZ3VtZW50O0Nzc0NsYXNzO18hU0I7PjtsPDE7UGFnZU5leHRFbmFibGVkO2k8Mj47Pj47Pjs7Pjt0PHA8cDxsPENvbW1hbmRBcmd1bWVudDtDc3NDbGFzcztfIVNCOz47bDw0MjtQYWdlTGFzdEVuYWJsZWQ7aTwyPjs%2bPjs%2bOzs%2bOz4%2bO3Q8O2w8aTwxPjtpPDM%2bO2k8NT47aTw3Pjs%2bO2w8dDxwPHA8bDxUZXh0Oz47bDwyMTA5Oz4%2bOz47Oz47dDxwPHA8bDxUZXh0Oz47bDxFRE1zIGFuZCBBbWVuZG1lbnRzOz4%2bOz47Oz47dDxwPHA8bDxUZXh0Oz47bDwxOz4%2bOz47Oz47dDxwPHA8bDxUZXh0Oz47bDw1MDs%2bPjs%2bOzs%2bOz4%2bOz4%2bO3Q8O2w8aTwxPjtpPDM%2bOz47bDx0PDtsPGk8MT47aTwzPjtpPDU%2bO2k8Nz47PjtsPHQ8cDxwPGw8Q29tbWFuZEFyZ3VtZW50O0Nzc0NsYXNzO0VuYWJsZWQ7XyFTQjs%2bO2w8MDtQYWdlRmlyc3REaXNhYmxlZDtvPGY%2bO2k8Mj47Pj47Pjs7Pjt0PHA8cDxsPENvbW1hbmRBcmd1bWVudDtDc3NDbGFzcztFbmFibGVkO18hU0I7PjtsPC0xO1BhZ2VQcmV2RGlzYWJsZWQ7bzxmPjtpPDI%2bOz4%2bOz47Oz47dDxwPHA8bDxDb21tYW5kQXJndW1lbnQ7Q3NzQ2xhc3M7XyFTQjs%2bO2w8MTtQYWdlTmV4dEVuYWJsZWQ7aTwyPjs%2bPjs%2bOzs%2bO3Q8cDxwPGw8Q29tbWFuZEFyZ3VtZW50O0Nzc0NsYXNzO18hU0I7PjtsPDQyO1BhZ2VMYXN0RW5hYmxlZDtpPDI%2bOz4%2bOz47Oz47Pj47dDxwPHA8bDxWaXNpYmxlOz47bDxvPGY%2bOz4%2bOz47bDxpPDE%2bO2k8Mz47aTw1PjtpPDc%2bOz47bDx0PHA8cDxsPFRleHQ7PjtsPDIxMDk7Pj47Pjs7Pjt0PHA8cDxsPFRleHQ7PjtsPEVETXMgYW5kIEFtZW5kbWVudHM7Pj47Pjs7Pjt0PHA8cDxsPFRleHQ7PjtsPDE7Pj47Pjs7Pjt0PHA8cDxsPFRleHQ7PjtsPDUwOz4%2bOz47Oz47Pj47Pj47Pj47Pj47bDxfTWVudUN0cmw6X0dvVG87Pj5NHcFbPBNzNuwxs7sYLdUE2omkjw%3d%3d&_MenuCtrl%3AddlSession=875&_MenuCtrl%3A_GoTo.x=57&_MenuCtrl%3A_GoTo.y=14&ddlStatus=1&ddlSortedBy=1

This is the RAW request sent by IE:

POST /EDMi/EDMList.aspx HTTP/1.1
Accept: image/gif, image/jpeg,
image/pjpeg, image/pjpeg,
application/x-shockwave-flash,
application/xaml+xml,
application/vnd.ms-xpsdocument,
application/x-ms-xbap,
application/x-ms-application,
application/vnd.ms-excel,
application/vnd.ms-powerpoint,
application/msword, / Referer:
http://edmi.parliament.uk/EDMi/EDMList.aspx
Accept-Language: en-gb User-Agent:
Mozilla/4.0 (compatible; MSIE 8.0;
Windows NT 5.1; Trident/4.0; .NET CLR
1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; InfoPath.1; .NET CLR 3.0.04506.648; OfficeLiveConnector.1.3;
OfficeLivePatch.0.0; .NET CLR
3.0.4506.2152; .NET CLR 3.5.30729) Content-Type:
application/x-www-form-urlencoded
Accept-Encoding: gzip, deflate Host:
edmi.parliament.uk Content-Length:
2431 Connection: Keep-Alive Pragma:
no-cache Cookie:
WT_FPC=id=83.217.99.254-2364242496.30021299:lv=1249572414567:ss=1249572414567;
ASP.NET_SessionId=vwxgo4rlex1j5m55l0bivrqo

__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=dDwxMDgyNzIxNDQ2O3Q8O2w8aTwzPjs%2BO2w8dDw7bDxpPDE%2BO2k8Mz47aTw1PjtpPDExPjs%2BO2w8dDw7bDxpPDEzPjtpPDE3Pjs%2BO2w8dDx0PHA8cDxsPERhdGFWYWx1ZUZpZWxkO0RhdGFUZXh0RmllbGQ7PjtsPFNFU1NJT05JRDtJVEVNX1ZBTFVFOz4%2BOz47dDxpPDIwPjtAPDA4LTA5OzA3LTA4OzA2LTA3OzA1LTA2OzA0LTA1OzAzLTA0OzAyLTAzOzAxLTAyOzAwLTAxOzk5LTAwOzk4LTk5Ozk3LTk4Ozk2LTk3Ozk1LTk2Ozk0LTk1OzkzLTk0OzkyLTkzOzkxLTkyOzkwLTkxOzg5LTkwOz47QDw4OTk7ODkxOzg4NTs4NzU7ODczOzY4Mjs2ODE7NjgwOzY3OTs3MDM7NzAyOzcwMTs3MDA7Njk5OzY5ODs2OTc7Njk2OzY5NTs2OTQ7NjkzOz4%2BOz47Oz47dDxwPGw8VGV4dDs%2BO2w8TGlzdCBPZiBFYXJseSBEYXkgTW90aW9uczs%2BPjs7Pjs%2BPjt0PDtsPGk8MT47aTwzPjs%2BO2w8dDx0PDs7bDxpPDA%2BOz4%2BOzs%2BO3Q8dDw7O2w8aTwwPjs%2BPjs7Pjs%2BPjt0PDtsPGk8MT47aTwzPjs%2BO2w8dDw7bDxpPDE%2BO2k8Mz47aTw1PjtpPDc%2BOz47bDx0PHA8cDxsPENvbW1hbmRBcmd1bWVudDtDc3NDbGFzcztFbmFibGVkO18hU0I7PjtsPDA7UGFnZUZpcnN0RGlzYWJsZWQ7bzxmPjtpPDI%2BOz4%2BOz47Oz47dDxwPHA8bDxDb21tYW5kQXJndW1lbnQ7Q3NzQ2xhc3M7RW5hYmxlZDtfIVNCOz47bDwtMTtQYWdlUHJldkRpc2FibGVkO288Zj47aTwyPjs%2BPjs%2BOzs%2BO3Q8cDxwPGw8Q29tbWFuZEFyZ3VtZW50O0Nzc0NsYXNzO18hU0I7PjtsPDE7UGFnZU5leHRFbmFibGVkO2k8Mj47Pj47Pjs7Pjt0PHA8cDxsPENvbW1hbmRBcmd1bWVudDtDc3NDbGFzcztfIVNCOz47bDw0MjtQYWdlTGFzdEVuYWJsZWQ7aTwyPjs%2BPjs%2BOzs%2BOz4%2BO3Q8O2w8aTwxPjtpPDM%2BO2k8NT47aTw3Pjs%2BO2w8dDxwPHA8bDxUZXh0Oz47bDwyMTA5Oz4%2BOz47Oz47dDxwPHA8bDxUZXh0Oz47bDxFRE1zIGFuZCBBbWVuZG1lbnRzOz4%2BOz47Oz47dDxwPHA8bDxUZXh0Oz47bDwxOz4%2BOz47Oz47dDxwPHA8bDxUZXh0Oz47bDw1MDs%2BPjs%2BOzs%2BOz4%2BOz4%2BO3Q8O2w8aTwxPjtpPDM%2BOz47bDx0PDtsPGk8MT47aTwzPjtpPDU%2BO2k8Nz47PjtsPHQ8cDxwPGw8Q29tbWFuZEFyZ3VtZW50O0Nzc0NsYXNzO0VuYWJsZWQ7XyFTQjs%2BO2w8MDtQYWdlRmlyc3REaXNhYmxlZDtvPGY%2BO2k8Mj47Pj47Pjs7Pjt0PHA8cDxsPENvbW1hbmRBcmd1bWVudDtDc3NDbGFzcztFbmFibGVkO18hU0I7PjtsPC0xO1BhZ2VQcmV2RGlzYWJsZWQ7bzxmPjtpPDI%2BOz4%2BOz47Oz47dDxwPHA8bDxDb21tYW5kQXJndW1lbnQ7Q3NzQ2xhc3M7XyFTQjs%2BO2w8MTtQYWdlTmV4dEVuYWJsZWQ7aTwyPjs%2BPjs%2BOzs%2BO3Q8cDxwPGw8Q29tbWFuZEFyZ3VtZW50O0Nzc0NsYXNzO18hU0I7PjtsPDQyO1BhZ2VMYXN0RW5hYmxlZDtpPDI%2BOz4%2BOz47Oz47Pj47dDxwPHA8bDxWaXNpYmxlOz47bDxvPGY%2BOz4%2BOz47bDxpPDE%2BO2k8Mz47aTw1PjtpPDc%2BOz47bDx0PHA8cDxsPFRleHQ7PjtsPDIxMDk7Pj47Pjs7Pjt0PHA8cDxsPFRleHQ7PjtsPEVETXMgYW5kIEFtZW5kbWVudHM7Pj47Pjs7Pjt0PHA8cDxsPFRleHQ7PjtsPDE7Pj47Pjs7Pjt0PHA8cDxsPFRleHQ7PjtsPDUwOz4%2BOz47Oz47Pj47Pj47Pj47Pj47bDxfTWVudUN0cmw6X0dvVG87Pj5NHcFbPBNzNuwxs7sYLdUE2omkjw%3D%3D&_MenuCtrl%3AddlSession=885&ddlStatus=0&ddlSortedBy=1&_MenuCtrl%3A_GoTo.x=37&_MenuCtrl%3A_GoTo.y=12

The IE Header seems to have an extra cookie(WT_FPC=id=83.217.99.254-2364242496.30021299:lv=1249572414567:ss=1249572414567;) witch appers to track visitors using cookies via the WebTrends Cookie Plug-In. Both POST Requests Return HTTP Status Code 302 and redirect to a GET request that returns status 200.

Any ideas ?

Best Answer

You are creating a new CookieContainer each time. Try using the CookieContainer of the index page request as the container for the form submit, it should contain the extra cookie. Alternatively, something other than the form could be passing the extra cookie. Check when the page loads using IEinspector's HttpAnalyzer or Firefox's Firebug to make sure other form elements like images aren't passing the cookie.

Related Topic