-
Notifications
You must be signed in to change notification settings - Fork 3
/
Program.cs
416 lines (353 loc) · 18.4 KB
/
Program.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
// -------------------------------------------------------------------------------
// Written by Matthew Monroe and Nikša Blonder for the Department of Energy (PNNL, Richland, WA)
// Program started June 14, 2005
//
// E-mail: matthew.monroe@pnnl.gov or proteomics@pnnl.gov
// Website: https://github.com/PNNL-Comp-Mass-Spec/ or https://www.pnnl.gov/integrative-omics
// -------------------------------------------------------------------------------
//
// Licensed under the 2-Clause BSD License; you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
// https://opensource.org/licenses/BSD-2-Clause
//
// Copyright 2018 Battelle Memorial Institute
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Windows.Forms;
using PRISM;
using PRISM.FileProcessor;
using ProteinCoverageSummarizer;
namespace ProteinCoverageSummarizerGUI
{
/// <summary>
/// <para>
/// This program uses clsProteinCoverageSummarizer to read in a file with protein sequences along with
/// an accompanying file with peptide sequences and compute the percent coverage of each of the proteins
/// </para>
/// <para>
/// Example command Line
/// I:PeptideInputFilePath /R:ProteinInputFilePath /O:OutputDirectoryPath /P:ParameterFilePath
/// </para>
/// </summary>
public static class Program
{
// Ignore Spelling: Nikša
/// <summary>
/// Program date
/// </summary>
public const string PROGRAM_DATE = "July 22, 2024";
private static string mParameterFilePath;
private static DateTime mLastProgressReportTime;
private static int mLastProgressReportValue;
[DllImport("kernel32.dll")]
private static extern IntPtr GetConsoleWindow();
[DllImport("user32.dll")]
private static extern bool ShowWindow(IntPtr hWnd, int nCmdShow);
private const int SW_HIDE = 0;
private const int SW_SHOW = 5;
/// <summary>
/// Main entry method
/// </summary>
/// <returns>0 if no error, error code if an issue</returns>
// Enable single thread apartment (STA) mode
[STAThread]
public static int Main()
{
// Returns 0 if no error, error code if an error
var commandLineParser = new clsParseCommandLine();
mParameterFilePath = string.Empty;
try
{
var options = new ProteinCoverageSummarizerOptions();
var proceed = false;
if (commandLineParser.ParseCommandLine())
{
if (SetOptionsUsingCommandLineParameters(commandLineParser, options, out var invalidParameters))
{
proceed = true;
}
if (invalidParameters)
return -1;
}
if (!commandLineParser.NeedToShowHelp && string.IsNullOrEmpty(options.ProteinInputFilePath))
{
ShowGUI(options);
return 0;
}
if (!proceed || commandLineParser.NeedToShowHelp || commandLineParser.ParameterCount == 0 || options.PeptideInputFilePath.Length == 0)
{
ShowProgramHelp();
return -1;
}
if (string.IsNullOrWhiteSpace(mParameterFilePath) &&
!options.SaveProteinToPeptideMappingFile &&
options.SearchAllProteinsSkipCoverageComputationSteps)
{
ConsoleMsgUtils.ShowWarning(ConsoleMsgUtils.WrapParagraph(
"You used /K to skip protein coverage computation but didn't specify /M " +
"to create a protein to peptide mapping file; no results will be saved"));
Console.WriteLine();
ConsoleMsgUtils.ShowWarning("It is advised that you use only /M (and don't use /K)");
}
try
{
var proteinCoverageSummarizer = new clsProteinCoverageSummarizerRunner(options)
{
CallingAppHandlesEvents = false
};
proteinCoverageSummarizer.StatusEvent += ProteinCoverageSummarizer_StatusEvent;
proteinCoverageSummarizer.ErrorEvent += ProteinCoverageSummarizer_ErrorEvent;
proteinCoverageSummarizer.WarningEvent += ProteinCoverageSummarizer_WarningEvent;
proteinCoverageSummarizer.ProgressUpdate += ProteinCoverageSummarizer_ProgressChanged;
proteinCoverageSummarizer.ProgressReset += ProteinCoverageSummarizer_ProgressReset;
var success = proteinCoverageSummarizer.ProcessFilesWildcard(options.PeptideInputFilePath, options.OutputDirectoryPath, mParameterFilePath);
if (success)
{
return 0;
}
ConsoleMsgUtils.ShowWarning("Processing failed");
return -1;
}
catch (Exception ex)
{
ShowErrorMessage("Error running the protein coverage summarizer: " + ex.Message);
return -1;
}
}
catch (Exception ex)
{
ShowErrorMessage("Error occurred in Program->Main: " + Environment.NewLine + ex.Message);
return -1;
}
}
private static void DisplayProgressPercent(int percentComplete, bool addCarriageReturn)
{
if (addCarriageReturn)
{
Console.WriteLine();
}
if (percentComplete > 100)
percentComplete = 100;
Console.Write("Processing: {0}% ", percentComplete);
if (addCarriageReturn)
{
Console.WriteLine();
}
}
private static string GetAppVersion()
{
return AppUtils.GetAppVersion(PROGRAM_DATE);
}
/// <summary>
/// Set options using the command line
/// </summary>
/// <param name="commandLineParser"></param>
/// <param name="options"></param>
/// <param name="invalidParameters">True if an unrecognized parameter is found</param>
/// <returns>True if no problems; false if an issue</returns>
private static bool SetOptionsUsingCommandLineParameters(
clsParseCommandLine commandLineParser, ProteinCoverageSummarizerOptions options, out bool invalidParameters)
{
var validParameters = new List<string>
{
"I", "O", "R", "P", "F", "SkipHeader", "SkipHeaders", "G", "H", "M", "K", "D", "Debug", "KeepDB"
};
invalidParameters = false;
try
{
// Make sure no invalid parameters are present
if (commandLineParser.InvalidParametersPresent(validParameters))
{
ShowErrorMessage("Invalid command line parameters",
(from item in commandLineParser.InvalidParameters(validParameters) select ("/" + item)).ToList());
invalidParameters = true;
return false;
}
// Query commandLineParser to see if various parameters are present
if (commandLineParser.RetrieveValueForParameter("I", out var inputFilePath))
{
options.PeptideInputFilePath = inputFilePath;
}
else if (commandLineParser.NonSwitchParameterCount > 0)
{
options.PeptideInputFilePath = commandLineParser.RetrieveNonSwitchParameter(0);
}
if (commandLineParser.RetrieveValueForParameter("O", out var outputDirectoryPath))
options.OutputDirectoryPath = outputDirectoryPath;
if (commandLineParser.RetrieveValueForParameter("R", out var proteinFile))
options.ProteinInputFilePath = proteinFile;
if (commandLineParser.RetrieveValueForParameter("P", out var parameterFile))
mParameterFilePath = parameterFile;
if (commandLineParser.RetrieveValueForParameter("F", out var inputFileFormatCode))
{
if (int.TryParse(inputFileFormatCode, out var inputFileFormatCodeValue))
{
try
{
options.PeptideFileFormatCode = (ProteinCoverageSummarizerOptions.PeptideFileColumnOrderingCode)inputFileFormatCodeValue;
}
catch (Exception)
{
// Conversion failed; leave options.PeptideFileFormatCode unchanged
}
}
}
if (commandLineParser.RetrieveValueForParameter("SkipHeader", out _) ||
commandLineParser.RetrieveValueForParameter("SkipHeaders", out _))
{
options.PeptideFileSkipFirstLine = true;
options.ProteinDataOptions.DelimitedFileSkipFirstLine = true;
}
if (commandLineParser.RetrieveValueForParameter("H", out _))
options.OutputProteinSequence = false;
options.IgnoreILDifferences = commandLineParser.IsParameterPresent("G");
options.SaveProteinToPeptideMappingFile = commandLineParser.IsParameterPresent("M");
options.SearchAllProteinsSkipCoverageComputationSteps = commandLineParser.IsParameterPresent("K");
options.SaveSourceDataPlusProteinsFile = commandLineParser.IsParameterPresent("D");
options.DebugMode = commandLineParser.IsParameterPresent("Debug");
options.KeepDB = commandLineParser.IsParameterPresent("KeepDB");
return true;
}
catch (Exception ex)
{
ShowErrorMessage("Error parsing the command line parameters: " + Environment.NewLine + ex.Message);
}
return false;
}
private static void ShowErrorMessage(string message)
{
ConsoleMsgUtils.ShowError(message);
}
private static void ShowErrorMessage(string title, IEnumerable<string> errorMessages)
{
ConsoleMsgUtils.ShowErrors(title, errorMessages);
}
private static void ShowGUI(ProteinCoverageSummarizerOptions options)
{
Application.EnableVisualStyles();
Application.DoEvents();
try
{
var handle = GetConsoleWindow();
if (!options.DebugMode)
{
// Hide the console
ShowWindow(handle, SW_HIDE);
}
var objFormMain = new GUI
{
KeepDB = options.KeepDB
};
objFormMain.ShowDialog();
if (!options.DebugMode)
{
// Show the console
ShowWindow(handle, SW_SHOW);
}
}
catch (Exception ex)
{
ConsoleMsgUtils.ShowWarning("Error in ShowGUI: " + ex.Message);
ConsoleMsgUtils.ShowWarning(StackTraceFormatter.GetExceptionStackTraceMultiLine(ex));
MessageBox.Show("Error in ShowGUI: " + ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Exclamation);
}
}
private static void ShowProgramHelp()
{
try
{
Console.WriteLine(ConsoleMsgUtils.WrapParagraph(
"This program reads in a .fasta or .txt file containing protein names and sequences (and optionally descriptions). " +
"The program also reads in a .txt file containing peptide sequences and protein names (though protein name is optional) " +
"then uses this information to compute the sequence coverage percent for each protein. " +
"Recognizes files where the first line is column headers, reading peptides from the first column that starts with 'Peptide' " +
"and proteins from the first column that starts with 'Protein'"));
Console.WriteLine();
Console.WriteLine("Program syntax:" + Environment.NewLine + Path.GetFileName(AppUtils.GetAppPath()));
Console.WriteLine(" /I:PeptideInputFilePath /R:ProteinInputFilePath [/O:OutputDirectoryName]");
Console.WriteLine(" [/P:ParameterFilePath] [/F:FileFormatCode] [/SkipHeader]");
Console.WriteLine(" [/G] [/H] [/M] [/K] [/D] [/Debug] [/KeepDB]");
Console.WriteLine();
Console.WriteLine(ConsoleMsgUtils.WrapParagraph(
"The input file path can contain the wildcard character *. If a wildcard is present, the same protein input file path " +
"will be used for each of the peptide input files matched."));
Console.WriteLine();
Console.WriteLine(ConsoleMsgUtils.WrapParagraph(
"The output directory name is optional. If omitted, the output files will be created in the same directory as the input file. " +
"If included, a subdirectory is created with the name OutputDirectoryName."));
Console.WriteLine();
Console.WriteLine(ConsoleMsgUtils.WrapParagraph(
"The parameter file path is optional. If included, it should point to a valid XML parameter file."));
Console.WriteLine();
Console.WriteLine("Use /F to specify the peptide input file format code. Options are:");
Console.WriteLine(" " + (int)ProteinCoverageSummarizerOptions.PeptideFileColumnOrderingCode.SequenceOnly + "=Peptide sequence in the 1st column (subsequent columns are ignored)");
Console.WriteLine(" " + (int)ProteinCoverageSummarizerOptions.PeptideFileColumnOrderingCode.ProteinName_PeptideSequence + "=Protein name in 1st column and peptide sequence 2nd column");
Console.WriteLine(" " + (int)ProteinCoverageSummarizerOptions.PeptideFileColumnOrderingCode.UseHeaderNames + "=Generic tab-delimited text file; will look for column names that start with Peptide, Protein, and Scan");
Console.WriteLine();
Console.WriteLine("Use /SkipHeader to skip the first line when the file format is Sequence Only or Protein Name and Sequence");
Console.WriteLine(ConsoleMsgUtils.WrapParagraph(
"Use /G to ignore I/L differences when finding peptides in proteins or computing coverage."));
Console.WriteLine("Use /H to suppress (hide) the protein sequence in the _coverage.txt file.");
Console.WriteLine("Use /M to enable the creation of a protein to peptide mapping file");
Console.WriteLine("Use /K to skip protein coverage computation steps");
Console.WriteLine(ConsoleMsgUtils.WrapParagraph(
"Use /D to duplicate the input file, but add a new column listing the mapped protein for each peptide. " +
"If a peptide maps to multiple proteins, multiple lines will be listed"));
Console.WriteLine();
Console.WriteLine("Use /Debug to keep the console open to see additional debug messages");
Console.WriteLine("Use /KeepDB to keep the SQLite database after processing (by default it is deleted)");
Console.WriteLine();
Console.WriteLine("Program written by Matthew Monroe and Nikša Blonder for the Department of Energy (PNNL, Richland, WA)");
Console.WriteLine("Version: " + GetAppVersion());
Console.WriteLine();
Console.WriteLine("E-mail: matthew.monroe@pnnl.gov or proteomics@pnnl.gov");
Console.WriteLine("Website: https://github.com/PNNL-Comp-Mass-Spec/ or https://www.pnnl.gov/integrative-omics");
Console.WriteLine();
}
catch (Exception ex)
{
ShowErrorMessage("Error displaying the program syntax: " + ex.Message);
}
}
private static void ProteinCoverageSummarizer_StatusEvent(string message)
{
Console.WriteLine(message);
}
private static void ProteinCoverageSummarizer_WarningEvent(string message)
{
ConsoleMsgUtils.ShowWarning(message);
}
private static void ProteinCoverageSummarizer_ErrorEvent(string message, Exception ex)
{
ShowErrorMessage(message);
}
private static void ProteinCoverageSummarizer_ProgressChanged(string taskDescription, float percentComplete)
{
const int PERCENT_REPORT_INTERVAL = 25;
const int PROGRESS_DOT_INTERVAL_MSEC = 250;
if (percentComplete >= mLastProgressReportValue)
{
if (mLastProgressReportValue > 0)
{
Console.WriteLine();
}
DisplayProgressPercent(mLastProgressReportValue, false);
mLastProgressReportValue += PERCENT_REPORT_INTERVAL;
mLastProgressReportTime = DateTime.UtcNow;
}
else if (DateTime.UtcNow.Subtract(mLastProgressReportTime).TotalMilliseconds > PROGRESS_DOT_INTERVAL_MSEC)
{
mLastProgressReportTime = DateTime.UtcNow;
Console.Write(".");
}
}
private static void ProteinCoverageSummarizer_ProgressReset()
{
mLastProgressReportTime = DateTime.UtcNow;
mLastProgressReportValue = 0;
}
}
}