Resolved HttpRequest download PDF files

Gekidow

Member
Joined
Apr 20, 2023
Messages
12
Programming Experience
Beginner
Hello, I have a problem, my program reads an ODT file and downloads the links inside it, these links correspond to PDF files available on an intranet. The problem is that as a result of the program, I don't have 128 pdf that are downloaded but I have 128 files (which correspond well in terms of name to what I am supposed to have) without extension, and which are all 18 kb of size. My question is then the following: Why do I not have PDF files in output but files without extension as on the screenshot? Is it a redirect problem ? I also tried with DownloadFile method and i have the same result The System.Diagnostics.Process.Start(link); method works but I can't rename the files because the program only execute them and doesn't downloads them(the browser downloads them). PS : i'm on .NET 3.5 and Visual Studio 2010

résultatfichiers.png


Here is my code :
C#:
using System;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using ICSharpCode.SharpZipLib.Core;
using ICSharpCode.SharpZipLib.Zip;

namespace PrepareDocForExternalUse
{
    class Program
    {
        static void Main(string[] args)
        {
            // Prompts the user for the absolute path to an ODT file
            Console.WriteLine("Please enter the absolute path of an ODT file:");
            string odtFilePath = Console.ReadLine();

            // Read the contents of the ODT file
            byte[] content = File.ReadAllBytes(odtFilePath);
            MemoryStream ms = new MemoryStream();
            ms.Write(content, 0, content.Length);
            ZipFile zf = new ZipFile(ms);
            zf.UseZip64 = UseZip64.Off;
            zf.IsStreamOwner = false;
            ZipEntry entry = zf.GetEntry("content.xml");
            Stream s = zf.GetInputStream(entry);

            // Convert stream to string
            StreamReader reader = new StreamReader(s);
            string contentXml = reader.ReadToEnd();

            // Search for all links that start with "applnet.test.fr"
            string pattern = @"http://applnet\.test\.fr/GetContenu/Download\.aspx\?p1=.*?;p2=.*?;p5=.*?;p6=NOPUB";
            Regex regex = new Regex(pattern);
            MatchCollection matches = regex.Matches(contentXml);

            Directory.CreateDirectory(Path.GetDirectoryName(odtFilePath));

            // Process each link found
            foreach (Match match in matches)
            {
                string link = match.Value;
                string[] parts = link.Split(new string[] { "aspx?" }, StringSplitOptions.None);
                string queryString = parts[parts.Length - 1];


                // Download the corresponding intranet document
                string folderName = Path.GetFileNameWithoutExtension(odtFilePath);
                string subFolderName = "PJ - " + folderName;
                string fileName = queryString;
                string localFilePath = "C:/PiecesJointes/" + fileName;
                string onlineFilePath = "https://com.test.fr/files/test/test/" + queryString;

                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(link);
                request.AllowAutoRedirect = false;
                request.Method = "GET";
                request.ContentType = "application/pdf";
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream stream = response.GetResponseStream();
                byte[] buffer = new byte[4096];
                int bytesRead = 0;
                FileStream fileStream = new FileStream(localFilePath, FileMode.Create);

                do
                {
                    bytesRead = stream.Read(buffer, 0, buffer.Length);
                    fileStream.Write(buffer, 0, bytesRead);
                } while (bytesRead > 0);

                fileStream.Close();
                response.Close();


                // Replace the link with the path of the downloaded document
                string newLink = localFilePath.Replace("\\", "/");
                contentXml = contentXml.Replace(link, onlineFilePath);
            }

            // Updates the content.xml in the initial ZIP file
            byte[] contentXmlBytes = System.Text.Encoding.UTF8.GetBytes(contentXml);
            ms = new MemoryStream();
            zf.BeginUpdate();

            // Add updated content to ZIP file
            ZipOutputStream zos = new ZipOutputStream(ms);
            zos.UseZip64 = UseZip64.Off;
            zos.IsStreamOwner = false;

            // Add entry for content.xml file
            zos.PutNextEntry(new ZipEntry(entry.Name));
            StreamUtils.Copy(new MemoryStream(contentXmlBytes), zos, new byte[4096]);

            // Processes each entry from the original ODT file
            foreach (ZipEntry origEntry in zf)
            {
                // Ignore the entry for the content.xml file because it has already been added
                if (origEntry.Name == entry.Name) continue;

                // Add entry to new ZIP file
                zos.PutNextEntry(new ZipEntry(origEntry.Name));
                StreamUtils.Copy(zf.GetInputStream(origEntry), zos, new byte[4096]);
            }

            zos.Close();

            // Finish updating the ZIP file
            zf.CommitUpdate();
            zf.Close();


            // Renames and saves the updated ODT file
            Guid g = Guid.NewGuid();
            string updatedFilePath = Path.Combine(Path.GetDirectoryName(odtFilePath), g + "_" + Path.GetFileName(odtFilePath));
            using (FileStream stream = new FileStream(updatedFilePath, FileMode.Create))
            {
                ms.Position = 0;
                ms.WriteTo(stream);
            }
            Console.WriteLine("The ODT file has been successfully updated and saved as: " + updatedFilePath);
            Console.ReadLine();
        }
    }
}
 
(Link Before the treatment)
http://applnet.fiducial.fr/GetContenu/Download.aspx?p1=35fa172c-2e3a-449a-a156-d40653952325&p2=4&p5=1&p6=NOPUB
(Link after):
https://com.fiducial.fr/files/fiducial/banque/p1=35fa172c-2e3a-449a-a156-d40653952325&p2=4&p5=1&p6=NOPUB
 
It seems blocked on this link http://applnet.fiducial.fr/GetConte...e3a-449a-a156-d40653952325&p2=4&p5=1&p6=NOPUB

(It's the first of the document, so I think they will all be blocked)
 
Last edited by a moderator:
So you are saying that it is timing out downloading this URL:
http://applnet.fiducial.fr/GetContenu/Download.aspx?p1=35fa172c-2e3a-449a-a156-d40653952325&p2=4&p5=1&p6=NOPUB

or on
http://applnet.fiducial.fr/GetConte...e3a-449a-a156-d40653952325&p2=4&p5=1&p6=NOPUB
 
So essentially the only difference between your original post and the new code in post #11 is that you are passing in cookies that you got back from the login page. It's strange that the newer code would time out. I'm not quite sure how to help you. Have you asked the owner of the site what is the best way to get those files? Perhaps they actually have an API instead of you having to essentially do some screen scraping.
 
Make it simple, as a test. Put 10 valid PDF urls, that you have verified by grabbing from a browser, into an array and loop them downloading one after the other with your cookie submit. Do nothing else. No zips, etc. Does it work?
 
Make it simple, as a test. Put 10 valid PDF urls, that you have verified by grabbing from a browser, into an array and loop them downloading one after the other with your cookie submit. Do nothing else. No zips, etc. Does it work?


Do I have to perform this test on the same intranet (with an authentication system)?
 
Yes, because you won't be doing an apples to apples comparison of your downloading logic if you are testing outside your intranet, but your code is meant to run in your intranet.
 
I tried this, After passing the username and password with the Post method, I displayed the authentication response and I got the message Authentication successful, I also tried by putting false identifiers and I had a failure. So for now all is well, I also tried to show the stored cookie, and I don't have any stored after authentication.
C#:
private static CookieContainer Login(string username, string password)
        {
            string uriString = "https://applnet.fiducial.fr/CASServer/login.aspx?";
            CookieContainer cookieContainer = new CookieContainer();
            string postData = "username=" + username + "&password=" + password;
            byte[] postDataBytes = Encoding.UTF8.GetBytes(postData);

            HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(uriString);
            httpWebRequest.Method = "POST";
            httpWebRequest.ContentType = "application/x-www-form-urlencoded";
            httpWebRequest.ContentLength = postDataBytes.Length;
            httpWebRequest.CookieContainer = cookieContainer;

            Stream requestStream = httpWebRequest.GetRequestStream();
            requestStream.Write(postDataBytes, 0, postDataBytes.Length);
            requestStream.Close();

            HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
            CookieCollection cookies = httpWebResponse.Cookies;


            using (Stream responseStream = httpWebResponse.GetResponseStream())
            {
                StreamReader reader = new StreamReader(responseStream, Encoding.UTF8);
                string responseText = reader.ReadToEnd();
            }

            CookieCollection responseCookies = httpWebResponse.Cookies;

            foreach (Cookie cookie in responseCookies)
            {
                Console.WriteLine("Cookie Name: {0}", cookie.Name);
                Console.WriteLine("Value: {0}", cookie.Value);
                Console.WriteLine("Domain: {0}", cookie.Domain);
                Console.WriteLine("Path: {0}", cookie.Path);
                Console.WriteLine("Expires: {0}", cookie.Expires.ToString());
                Console.WriteLine("Secure: {0}", cookie.Secure.ToString());
                Console.WriteLine("HttpOnly: {0}", cookie.HttpOnly.ToString());
            }

            return cookieContainer;

        }
 
You can use the same cookie container that you use for the initial authentication.
 
Hello, finally the problem is solved, it was finally not possible for me to recover the documents without help from the administrator of the intranet. Thanks a lot for your help.
 

Latest posts

Back
Top Bottom