-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathsc_readparsebio.m
128 lines (116 loc) · 3.35 KB
/
sc_readparsebio.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
function [X, genelist, celllist, ftdone] = sc_readparsebio(selpath, ~)
if nargin < 2, coln = 2; end
if nargin < 1, selpath = uigetdir; end
% if isempty(selpath) || selpath==0 || ~isfolder(selpath)
% error('Need valide folder name.');
% end
fprintf('Processing %s...\n', selpath);
[out, aff] = i_guessmtxfile(selpath);
if ~isempty(out)
disp('Found DGE.mtx');
end
if ~isempty(aff)
mmfname = fullfile(selpath, sprintf('%sDGE.mtx', aff));
zmmfname = fullfile(selpath, sprintf('%sDGE.mtx.gz', aff));
else
mmfname = fullfile(selpath, out);
zmmfname = fullfile(selpath, sprintf('%s.gz', out));
end
if ~exist(mmfname, 'file')
if ~exist(zmmfname, 'file')
error('No DGE.mtx file.');
else
[~, nametxt] = fileparts(zmmfname);
fprintf('Unzipping %s.gz...\n', nametxt);
gunzip(zmmfname);
end
end
ftdone = false;
if ~isempty(aff)
ftfname = fullfile(selpath, sprintf('%sall_genes.csv', aff));
zftfname = fullfile(selpath, sprintf('%sall_genes.csv.gz', aff));
else
ftfname = fullfile(selpath, 'all_genes.csv');
zftfname = fullfile(selpath, 'all_genes.csv.gz');
end
if ~exist(ftfname, 'file')
if ~exist(zftfname, 'file')
% error('No features.tsv file.');
ftdone = false;
else
[~, nametxt] = fileparts(zftfname);
fprintf('Unzipping %s.gz...\n', nametxt);
gunzip(zftfname);
ftdone = true;
end
else % ftfname exisiting
ftdone = true;
end
if ~isempty(aff)
bcfname = fullfile(selpath, sprintf('%scell_metadata.csv', aff));
zbcfname = fullfile(selpath, sprintf('%scell_metadata.csv.gz', aff));
else
bcfname = fullfile(selpath, 'cell_metadata.csv');
zbcfname = fullfile(selpath, 'cell_metadata.csv.gz');
end
if ~exist(bcfname, 'file')
if ~exist(zbcfname, 'file')
warning('No cell_metadata.csv file.');
else
[~, nametxt] = fileparts(zbcfname);
fprintf('Unzipping %s.gz...\n', nametxt);
gunzip(zbcfname);
end
end
if ~exist(mmfname, 'file'), error('No matrix file'); end
if ~exist(ftfname, 'file'), error('No feature file'); end
fprintf('Reading matrix file...');
% if exist(bcfname,'file')
% [X,genelist,celllist]=sc_readmtxfile(mmfname,ftfname,bcfname,coln);
% else
% [X,genelist]=sc_readmtxfile(mmfname,ftfname,[],coln);
% celllist=[];
% end
[X] = sc_readmtxfile(mmfname);
if ~issparse(X)
X = uint16(X);
end
X = X.';
fprintf('done.\n');
T1 = readtable(ftfname, 'ReadVariableNames', true, ...
'filetype', 'text', 'Delimiter', {'\t', ',', ' ', ';', '|'});
genelist = string(T1.gene_name);
T2 = readtable(bcfname, 'ReadVariableNames', true, ...
'filetype', 'text', 'Delimiter', {'\t', ',', ' ', ';', '|'});
celllist = string(T2.bc_wells);
ftdone = true;
if exist(zmmfname, 'file') && exist(mmfname, 'file')
delete(mmfname);
end
if exist(zftfname, 'file') && exist(ftfname, 'file')
delete(ftfname);
end
if exist(zbcfname, 'file') && exist(bcfname, 'file')
delete(bcfname);
end
end
function [out, aff] = i_guessmtxfile(selpath)
out = [];
aff = [];
a = dir(selpath);
for k = 1:length(a)
if contains(a(k).name, 'DGE.mtx')
out = a(k).name;
aff = extractBefore(out, 'DGE.mtx');
continue;
end
end
if isempty(out)
for k = 1:length(a)
if contains(a(k).name, 'count_matrix.mtx')
out = 'count_matrix.mtx';
continue;
end
end
end
end