-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfunctions.py
2568 lines (2175 loc) · 134 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
""" Functions to generate daily rinex 3.0x files from day-overlapping Emlid rinex files;
Necessary preprocessing for daily file processing with RTKLib
created by: L. Steiner (Orchid ID: 0000-0002-4958-0849)
created on: 17.05.2021
updated on: 10.05.2023
requirements: - install gnssrefl on Linux/Mac (gnssrefl is not working on Windows, see gnssrefl docs)
- gnssrefl (/~https://github.com/kristinemlarson/gnssrefl)
- gfzrnx (https://dataservices.gfz-potsdam.de/panmetaworks/showshort.php?id=escidoc:1577894)
- wget
- 7zip
- path to all programs added to the system environment variables
"""
import subprocess
import os
import glob
import datetime
import shutil
import lzma
import tarfile
import gnsscal
import datetime as dt
import pandas as pd
import numpy as np
import jdcal
import matplotlib.pyplot as plt
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
from matplotlib.ticker import NullFormatter
from termcolor import colored
import requests
import zipfile
import io
from datetime import date
""" Define general functions """
def create_folder(dest_path):
""" create a directory if it is not already existing
:param dest_path: path and name of new directory
"""
# Q: create 'temp' directory if not existing
if not os.path.exists(dest_path):
os.makedirs(dest_path, exist_ok=True)
print(colored("\ntemp dir created: %s" % dest_path, 'yellow'))
else:
print(colored("\ntemp dir already existing: %s" % dest_path, 'blue'))
def remove_folder(dest_path):
""" delete temporary directory
:param dest_path: local temporary directory for preprocessing the GNSS rinex files
"""
shutil.rmtree(dest_path)
print(colored("\n!!! temporary directory removed: %s" % dest_path, 'yellow'))
def get_mjd_int(syyyy, smm, sdd, eyyyy, emm, edd):
""" calculate start/end mjd using a start and end date of format '2022', '10', '01')
:param dest_path: local temporary directory for preprocessing the GNSS rinex files
"""
start_mjd = jdcal.gcal2jd(str(syyyy), str(smm), str(sdd))[1]
end_mjd = jdcal.gcal2jd(str(eyyyy), str(emm), str(edd))[1]
return start_mjd, end_mjd
""" Define preprocessing functions """
def move_files2parentdir(dest_path, f):
""" move files from temporary preprocessing to main processing directory (parent directory)
:param dest_path: local temporary directory for preprocessing the GNSS rinex files
:param f: file in folder
"""
# get parent directory
parent_dir = os.path.dirname(os.path.dirname(dest_path))
# destination file in parent directory
dest_path_file = os.path.join(parent_dir, f.split("\\")[-1])
# move file if it does not already exist
if not os.path.exists(dest_path_file):
shutil.move(f, parent_dir)
print("obs files moved to parent dir %s" % dest_path_file)
else:
print(colored("file in destination already exists, move aborted: %s" % dest_path_file, 'yellow'))
def check_existing_files(dest_path, rover):
""" check if rinex files are already available in the processing directory, otherwise copy & uncompress files from server
:param dest_path: temp directory for preprocessing the GNSS rinex files
:param rover: rover file name prefix
:return: doy_max: maximum doy in existing files, newer doys should be copied
"""
# get parent directory
parent_dir = os.path.dirname(os.path.dirname(dest_path))
print('parent dir: ', parent_dir)
# get file names
files = sorted(glob.iglob(parent_dir + '/' + rover + '???0.*O', recursive=True), reverse=True)
# get newest year of files in processing folder
year_max = max([os.path.basename(f).split('.')[1][:2] for f in files])
print(colored('newest year in existing files of parent dir: %s' % year_max, 'blue'))
# get newest doy of files in processing folder
doy_max = os.path.basename(sorted(glob.iglob(parent_dir + '/' + rover + '???0.' + year_max + 'O', recursive=True),
reverse=True)[0]).split('.')[0][4:7]
print(colored('newest doy in existing files of parent dir: %s' % doy_max, 'blue'))
return year_max, doy_max
def copy_file_no_overwrite(source_path, dest_path, file_name):
""" copy single files without overwriting existing files
:param source_path: source directory
:param dest_path: destination directory
:param file_name: name of file to copy
"""
# construct the src path and file name
source_path_file = os.path.join(source_path, file_name)
# construct the dest path and file name
dest_path_file = os.path.join(dest_path, file_name)
# test if the dest file exists, if false, do the copy, or else abort the copy operation.
if not os.path.exists(dest_path_file):
if not os.path.exists(dest_path):
os.makedirs(dest_path, exist_ok=True)
shutil.copyfile(source_path_file, dest_path_file)
print("\ncopy from %s to %s \nok" % (source_path_file, dest_path_file))
else:
print("\nfile in destination already exists: %s, \ncopy aborted!!!" % dest_path_file)
pass
def copy_solplotsdirs(source_path, dest_path):
""" copy entire solution and plot directories
:param source_path: local directory containing the solution and plot files
:param dest_path: remote directory used for backup
"""
shutil.copytree(source_path + '20_solutions/', dest_path + '20_solutions/', dirs_exist_ok=True)
print('\ncopy directory: ' + source_path + '20_solutions/\nto: ' + dest_path + '20_solutions/')
shutil.copytree(source_path + '30_plots/', dest_path + '30_plots/', dirs_exist_ok=True)
print('copy directory: ' + source_path + '30_plots/\nto: ' + dest_path + '30_plots/')
def copy4backup(source_path, dest_path):
""" copy entire processing directory to server
:param source_path: local processing directory containing
:param dest_path: remote directory used for backup
"""
shutil.copytree(source_path, dest_path, dirs_exist_ok=True)
print('\ncopy directory: ' + source_path + '\nto: ' + dest_path)
def copy_rinex_files(source_path, dest_path, receiver=['NMLB', 'NMLR', 'NMER'], copy=[True, False],
parent=[True, False], hatanaka=[True, False], move=[True, False], delete_temp=[True, False]):
""" copy rinex files of remote directory to a local temp directory if it does not already exist
& uncompress files, keep observation (and navigation) files
& delete compressed files and subfolders afterwards
:param source_path: remote directory hosting compressed GNSS rinex files
:param dest_path: local directory for processing the GNSS rinex files
:param receiver: high-end (Leica) or low-cost (Emlid), needs to be treated differently
:param copy: do you want to copy (True) the data or skip copying (False) the data and just decompress and further?
:param move: move renamed files to parent directory (True) or not (False)
:param delete_temp: delete temporary directory (True) or not (False)
"""
# Q: create temp directory if not existing
create_folder(dest_path)
# Q: check which rinex files are already existing in the processing directory
# prepare file prefix
if receiver == 'NMLR':
rover = '3393'
if receiver == 'NMLB':
rover = '3387'
if receiver == 'NMER':
rover = receiver
# Q: check already existing years and doys of files in processing directory, get newest yeardoy
year_max, doy_max = check_existing_files(dest_path, rover)
if receiver == 'NMER':
# Q: copy files from network drive to local temp folder
if copy is True:
for f in sorted(glob.glob(source_path + '*.zip'), reverse=False):
# construct the destination filename
dest_file = os.path.join(dest_path, f.split("\\")[-1])
# convert datetime to day of year (doy) from filename
doy_file = datetime.datetime.strptime(os.path.basename(f).split('_')[2], "%Y%m%d%H%M").strftime('%j')
yy_file = os.path.basename(f).split('_')[2][2:4]
# Q: only copy files from server which are newer than the already existing doys of year=yy
if (yy_file == year_max and doy_file > doy_max) or (yy_file > year_max):
# copy file if it does not already exist
if not os.path.exists(dest_file):
shutil.copy2(f, dest_path)
print("\nfile copied from %s to %s" % (f, dest_file))
else:
print(colored("\nfile in destination already exists: %s, \ncopy aborted!!!" % dest_file,
'yellow'))
continue
# Q: uncompress file
shutil.unpack_archive(dest_file, dest_path)
print('file decompressed: %s' % dest_file)
else:
# print(colored('file already preprocessed and available in the processing folder, skip file: %s' % f, 'yellow'))
# doy_file = None
pass
if doy_file is None:
print(colored('no new files available in source folder', 'green'))
else:
pass
# Q: delete nav & zipped files
if doy_file is not None:
for f in glob.glob(dest_path + '*.*[BPzip]'):
os.remove(f)
print("nav files deleted %s" % dest_path)
# Q: split & merge day-overlapping Emlid rinex files to daily rinex files (for Emlid files only!)
if doy_file is not None:
dayoverlapping2daily_rinexfiles(dest_path, 'ReachM2_sladina-raw_', receiver, move, delete_temp)
if receiver == 'NMLB' or receiver == 'NMLR':
# Q: copy files from network drive to local temp folder
if copy is True:
for f in glob.glob(source_path + '*.tar.xz'):
# create destination filename
dest_file = dest_path + f.split("\\")[-1]
doy_file = os.path.basename(f)[4:7]
yy_file = os.path.basename(f).split('.')[1][:2]
# Q: only copy files from server which are newer than the already existing doys of year=yy
if (yy_file == year_max and doy_file > doy_max) or (yy_file > year_max):
# copy file if it does not already exist
if not os.path.exists(dest_file):
shutil.copy2(f, dest_path)
print("\nfile copied from %s to %s" % (f, dest_file))
else:
print(colored("\nfile in destination already exists: %s, \ncopy aborted!!!" % dest_file,
'yellow'))
continue
# Q: uncompress .tar.xz file
with tarfile.open(fileobj=lzma.open(dest_file)) as tar:
tar.extractall(dest_path)
print('file decompressed: %s' % dest_file)
# close xz file
tar.fileobj.close()
else:
# print(colored('file already preprocessed and available in the processing folder, skip file: %s' % f, 'yellow'))
# doy_file = None
pass
if doy_file is None:
print(colored('no new files available in source folder:', 'green'))
else:
pass
# Q: move obs (and nav) files to parent dir
if parent is True:
if receiver == 'NMLB':
# copy observation (.yyd) & navigation (.yy[ngl]) files from base receiver
for f in glob.glob(dest_path + 'var/www/upload/' + receiver + '/*.*'):
shutil.move(f, dest_path)
if doy_file is not None:
print(colored("\nobs & nav files moved to parent dir %s" % dest_path, 'blue'))
if receiver == 'NMLR':
# copy only observation (.yyd) files from rover receivers
for f in glob.glob(dest_path + 'var/www/upload/' + receiver + '/*.*d'):
shutil.move(f, dest_path)
if doy_file is not None:
print(colored("\nobs files moved to parent dir %s" % dest_path, 'blue'))
else:
pass
# Q: convert hatanaka compressed rinex (.yyd) to uncompressed rinex observation (.yyo) files
if hatanaka is True:
if doy_file is not None:
print(colored("\ndecompress hatanaka rinex files", 'blue'))
for hatanaka_file in glob.glob(dest_path + '*.*d'):
print('decompress hatanaka file: ', hatanaka_file)
# subprocess.Popen('crx2rnx ' + hatanaka_file)
subprocess.call('crx2rnx ' + hatanaka_file)
print(colored("\nfinished decompressing hatanaka rinex files", 'blue'))
else:
pass
# Q: move all obs (and nav) files from temp to parent directory
if move is True:
if doy_file is not None:
print(colored("\nmove decompressed files to parent dir", 'blue'))
for f in glob.glob(dest_path + '*.*[ongl]'):
move_files2parentdir(dest_path, f)
print(colored("\nfinished moving decompressed files to parent dir", 'blue'))
else:
print('files are NOT moved to parent directory!')
# Q: get the newest year, doy after copying and convert to modified julian date (mjd)
yy_file, doy_file = check_existing_files(dest_path, rover)
date_file = gnsscal.yrdoy2date(int('20' + yy_file), int(doy_file))
mjd_file = jdcal.gcal2jd(date_file.year, date_file.month, date_file.day)[1]
# Q: delete temp directory
if delete_temp is True:
remove_folder(dest_path)
else:
print('temporary directory is NOT deleted!')
return mjd_file
def convert_datetime2doy_rinexfiles(dest_path, rover_prefix, rover_name):
""" convert Emlid file names to match format for 'gfzrnx' rinex conversion tools
:param dest_path: local temporary directory for preprocessing the GNSS rinex files
:param rover_prefix: prefix of rinex files in temp directory
:param rover_name: name of rover receiver
input filename: 'ReachM2_sladina-raw_202111251100.21O' [rover_prefix + datetime + '.' + yy + 'O']
output filename: 'NMER329[a..d].21o' [rover_prefix + doy + '0.' + yy + 'o']
"""
# Q: get doy from rinex filenames in temp dir with name structure: 'ReachM2_sladina-raw_202112041058.21O' [rover_prefix + datetime + '.' + yy + 'O']
print(colored('\nrenaming all files', 'blue'))
for f in glob.iglob(dest_path + rover_prefix + '*.*O', recursive=True):
# get rinex filename and year
rover_file = os.path.basename(f)
yy = rover_file.split('.')[-1][:2]
# convert datetime to day of year (doy)
doy = datetime.datetime.strptime(rover_file.split('.')[0].split('_')[2], "%Y%m%d%H%M").strftime('%j')
# create new filename with doy
new_filename = dest_path + rover_name + doy + 'a.' + yy + 'o'
print('\nRover file: ' + rover_file, '\ndoy: ', doy, '\nNew filename: ', new_filename)
# check if new filename already exists and rename file
file_exists = os.path.exists(new_filename) # True or False
if file_exists is True:
new_filename = dest_path + rover_name + doy + 'b.' + yy + 'o'
# check if new filename already exists and rename file
file_exists = os.path.exists(new_filename) # True or False
if file_exists is True:
new_filename = dest_path + rover_name + doy + 'c.' + yy + 'o'
# check if new filename already exists and rename file
file_exists = os.path.exists(new_filename) # True or False
if file_exists is True:
new_filename = dest_path + rover_name + doy + 'd.' + yy + 'o'
else:
os.rename(f, new_filename)
print('\nNew filename already existing --> renamed to: ', new_filename)
else:
os.rename(f, new_filename)
print('\nNew filename already existing --> renamed to: ', new_filename)
else:
os.rename(f, new_filename)
print(colored('\nfinished renaming all files', 'blue'))
def split_rinex(dest_path, rover_name):
""" split day-overlapping rinex files at midnight --> get multiple subdaily files
:param dest_path: local temporary directory for preprocessing the GNSS rinex files
:param rover_name: name of rover receiver
gfzrnx split input: 'NMER329[a..d].21o' [rover + doy + '.' + yy + 'o']
gfzrnx split output: ' 00XXX_R_20213291100_01D_30S_MO.rnx'
"""
print(colored('\nstart splitting day-overlapping rinex files', 'blue'))
for f in glob.iglob(dest_path + rover_name + '*.*o', recursive=True):
# get filename
rover_file = os.path.basename(f)
print('\nstart splitting day-overlapping rinex file: %s' % rover_file)
# split rinex file at midnight with command: 'gfzrnx -finp NMER345.21o -fout ::RX3:: -split 86400'
process1 = subprocess.Popen('cd ' + dest_path + ' && '
'gfzrnx -finp ' + rover_file + ' -fout ::RX3:: -split 86400',
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout1, stderr1 = process1.communicate()
print(stdout1)
print(stderr1)
print(colored('\nfinished splitting all day-overlapping rinex files at: %s' % dest_path, 'blue'))
def rename_splitted_rinexfiles(dest_path, rover_name):
""" rename gfzrnx splitted rinex files output names to match input for gfzrnx merge files:
:param dest_path: local temporary directory for preprocessing the GNSS rinex files
:param rover_name: name of rover receiver
gfzrx split output: ' 00XXX_R_20213291100_01D_30S_MO.rnx'
gfzrx merge input: 'NMER00XXX_R_20213291100_01D_30S_MO.yyo'
"""
# Q: rename all .rnx files (gfzrnx split output --> gfzrnx merge input)
print(colored('\nrenaming all splitted rinex files', 'blue'))
for f in glob.iglob(dest_path + '*.rnx', recursive=True):
rover_file = os.path.basename(f)
yy = rover_file.split('_')[2][2:4]
new_filename = dest_path + rover_name + rover_file.split('.')[0][4:] + '.' + yy + 'o'
print('\nRover file: ' + rover_file, '\nNew filename: ', new_filename)
os.rename(f, new_filename)
print(colored('\nfinished renaming all splitted rinex files', 'blue'))
def merge_rinex(dest_path):
""" merge rinex files together per day --> get daily rinex files
:param dest_path: local temporary directory for preprocessing the GNSS rinex files
gfzrnx merge input: 'NMER00XXX_R_2021329????_01D_30S_MO.yyo'
gfzrnx merge output: 'NMER00XXX_R_2021330????_01D_30S_MO.rnx'
"""
print(colored('\nmerging all rinex files per day at: %s' % dest_path, 'blue'))
for f in glob.iglob(dest_path + 'NMER00XXX_R_20' + '*.*O', recursive=True):
# get filename
rover_file = os.path.basename(f)
yy = rover_file.split('_')[2][2:4]
# extract doy
doy = rover_file.split('.')[0][16:19]
print('\nRover file: ' + rover_file, '\ndoy: ', doy)
# merge rinex files per day with command: 'gfzrnx -finp NMER00XXX_R_2021330????_01D_30S_MO.21o' -fout ::RX3D:: -d 86400'
process1 = subprocess.Popen('cd ' + dest_path + ' && '
'gfzrnx -finp NMER00XXX_R_20' + yy + doy + '????_01D_30S_MO.' + yy + 'o -fout ::RX3D:: -d 86400',
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout1, stderr1 = process1.communicate()
print(stdout1)
print(stderr1)
print(colored('\nfinished merging all rinex files per day at: %s' % dest_path, 'blue'))
def rename_merged_rinexfiles(dest_path, rover_name, move=[True, False]):
""" rename gfzrnx merge files output names to match rtklib and leica rinex file names:
:param dest_path: local temporary directory for preprocessing the GNSS rinex files
:param rover_name: name of rover receiver
:param move: move renamed files to parent directory (True) or not (False)
:param delete_temp: delete temporary directory (True) or not (False)
gfzrnx merge output: 'NMER00XXX_R_2021330????_01D_30S_MO.rnx'
rtklib input: 'NMERdoy0.yyo' [rover_prefix + doy + '0.' + yy + 'o']
"""
for f in glob.iglob(dest_path + '*.rnx', recursive=True):
rover_file = os.path.basename(f)
yy = rover_file.split('_')[2][2:4]
new_filename = dest_path + rover_name + rover_file.split('.')[0][16:19] + '0.' + yy + 'o'
print('\nRover file: ' + rover_file, '\nNew filename: ', new_filename)
os.rename(f, new_filename)
print(colored('\nfinished renaming all merged rinex files', 'blue'))
# move renamed files to parent directory
if move is True:
for f in glob.iglob(dest_path + 'NMER???0.*O', recursive=True):
move_files2parentdir(dest_path, f)
else:
print('renamed merged daily files are NOT moved to parent directory!')
def dayoverlapping2daily_rinexfiles(dest_path, rover_prefix, receiver, move=[True, False], delete_temp=[True, False]):
""" convert day-overlapping Emlid rinex files to daily rinex files names to match rtklib input and leica files
:param dest_path: local temporary directory for preprocessing the GNSS rinex files
:param rover_prefix: prefix of rinex files in temp directory
:param receiver: name of rover receiver ['NMER']
"""
# create temporary directory if not already existing
create_folder(dest_path)
# convert Emlid files [rover_prefix + datetime + '.' + yy + 'O'] to format for 'gfzrnx' rinex conversion [rover_prefix + doy + '0.' + yy + 'o']
convert_datetime2doy_rinexfiles(dest_path, rover_prefix, receiver)
# split rinex files at midnight for day-overlapping files --> get subdaily rinex files
split_rinex(dest_path, receiver)
# rename splitted (subdaily) rinex files to match input for 'gfzrnx -merge'
rename_splitted_rinexfiles(dest_path, receiver)
# merge rinex files together per day --> get daily rinex files
merge_rinex(dest_path)
# rename merged (daily) rinex files to match rtklib input format [rover_prefix + doy + '0.' + yy + 'o'] & move to parent directory & delete temp dir
rename_merged_rinexfiles(dest_path, receiver, move)
def get_sol_yeardoy(dest_path, resolution):
""" get the newest solution file year, doy, mjd, date for only further process new available data.
:param resolution: processing time interval (minutes) for naming of output folder
:return: start_yy, start_mjd, start_mjd_emlid
"""
print(colored("\nget start year and mjd for further processing", 'blue'))
# TODO: remove _events from sol files
# get the newest solution file name
name_max = os.path.basename(
sorted(glob.iglob(dest_path + '20_solutions/NMLR/' + resolution + '/*.POS', recursive=True), reverse=True)[
0]).split('.')[0]
start_yy = name_max[2:4]
start_doy = int(name_max[-3:]) + 1
start_date = gnsscal.yrdoy2date(int('20' + start_yy), start_doy)
start_mjd = jdcal.gcal2jd(start_date.year, start_date.month, start_date.day)[1]
print(colored('start year %s, doy %s, mjd %s, date %s for further processing of Leica Rover' % (
start_yy, start_doy, start_mjd, start_date), 'blue'))
# get the newest emlid solution file name
name_max_emlid = os.path.basename(
sorted(glob.iglob(dest_path + '20_solutions/NMER/' + resolution + '/*.POS', recursive=True), reverse=True)[
0]).split('.')[0]
start_yy_emlid = name_max_emlid[2:4]
start_doy_emlid = int(name_max_emlid[-3:]) + 1
start_date_emlid = gnsscal.yrdoy2date(int('20' + start_yy_emlid), start_doy_emlid)
start_mjd_emlid = jdcal.gcal2jd(start_date_emlid.year, start_date_emlid.month, start_date_emlid.day)[1]
print(colored('start year %s, doy %s, mjd %s, date %s for further processing of Emlid Rover' % (
start_yy_emlid, start_doy_emlid, start_mjd_emlid, start_date_emlid), 'blue'))
return start_yy, start_mjd, start_mjd_emlid
""" Define RTKLIB functions """
def automate_rtklib_pp(dest_path, rover_prefix, mjd_start, mjd_end, ti_int, base_prefix, brdc_nav_prefix,
precise_nav_prefix, resolution, ending, rover_name=['NMER_original', 'NMER', 'NMLR'],
options=['options_Emlid', 'options_Leica']):
""" create input and output files for running RTKLib post processing automatically
for all rover rinex observation files (. yyo) available in the data path directory
get doy from rover file names with name structure:
Leica Rover: '33933650.21o' [rover + doy + '0.' + yy + 'o']
Emlid Rover (pre-processed): 'NMER3650.21o' [rover + doy + '0.' + yy + 'o']
Emlid Rover (original): 'ReachM2_sladina-raw_202112041058.21O' [rover + datetime + '.' + yy + 'O']
:param dest_path: path to GNSS rinex observation and navigation data, and rtkpost configuration file (all data needs to be in one folder)
:param rover_prefix: prefix of rover rinex filename
:param yy: start year to process
:param yy: end year to process
:param doy_start: start day of year (doy) for processing files, range (0, 365)
:param doy_end: end doy for processing files, range (1, 366)
:param resolution: processing time interval (in minutes)
:param ending: suffix of solution file names (e.g. a varian of processing options: '_noglonass'
:param ti_int: processing time interval (in seconds)
:param base_prefix: prefix of base rinex filename
:param brdc_nav_prefix: prefix of broadcast navigation filename
:param precise_nav_prefix: prefix of precise orbit filename
:param rover_name: name of rover
:param options: rtkpost configuration file
"""
# Q: run rtklib for all rover files in directory
print(colored('\n\nstart processing files with RTKLIB from receiver: %s' % rover_name, 'blue'))
for file in glob.iglob(dest_path + rover_prefix + '*.*O', recursive=True):
# Q: get doy from rover filenames
rover_file = os.path.basename(file)
if rover_name == 'NMER_original':
# get date, year, modified julian date (mjd), doy, converted from datetime in Emlid original filename format (output from receiver, non-daily files)
date = dt.datetime.strptime(rover_file.split('.')[0].split('_')[2], "%Y%m%d%H%M")
year = str(date.year)[-2:]
mjd = jdcal.gcal2jd(date.year, date.month, date.day)[1]
doy = date.strftime('%j')
if rover_name == 'NMER' or rover_name == 'NMLR':
# get year, doy, date, modified julian date (mjd) directly from filename from Emlid pre-processed or Leica file name format (daily files)
year = rover_file.split('.')[1][:2]
doy = rover_file.split('.')[0][4:7]
date = gnsscal.yrdoy2date(int('20' + year), int(doy))
mjd = jdcal.gcal2jd(date.year, date.month, date.day)[1]
# Q: only process files inbetween the selected mjd range
if mjd_start <= mjd <= mjd_end:
print('\nProcessing rover file: ' + rover_file, '; year: ', year, '; doy: ', doy)
# convert doy to gpsweek and day of week (needed for precise orbit file names)
(gpsweek, dow) = gnsscal.yrdoy2gpswd(int('20' + year), int(doy))
# define input and output filenames (for some reason it's not working when input files are stored in subfolders!)
base_file = base_prefix + doy + '*.' + year + 'O'
broadcast_orbit_gps = brdc_nav_prefix + doy + '0.' + year + 'n'
broadcast_orbit_glonass = brdc_nav_prefix + doy + '0.' + year + 'g'
broadcast_orbit_galileo = brdc_nav_prefix + doy + '0.' + year + 'l'
precise_orbit = precise_nav_prefix + str(gpsweek) + str(dow) + '.EPH_M'
# create a solution directory if not existing
sol_dir = '20_solutions/' + rover_name + '/' + resolution + '/temp_' + rover_name + '/'
os.makedirs(dest_path + sol_dir, exist_ok=True)
output_file = sol_dir + '20' + year + '_' + rover_name + doy + ending + '.pos'
# Q: change directory to data directory & run RTKLib post processing command
run_rtklib_pp(dest_path, options, ti_int, output_file, rover_file, base_file,
broadcast_orbit_gps, broadcast_orbit_glonass, broadcast_orbit_galileo, precise_orbit)
print(colored('\n\nfinished processing all files with RTKLIB from receiver: %s' % rover_name, 'blue'))
def run_rtklib_pp(dest_path, options, ti_int, output_file, rover_file, base_file, brdc_orbit_gps, brdc_orbit_glonass,
brdc_orbit_galileo, precise_orbit):
""" run RTKLib post processing command (rnx2rtkp) as a subprocess (instead of manual RTKPost GUI)
example: 'rnx2rtkp -k rtkpost_options.conf -ti 900 -o 20_solutions/NMLR/15min/NMLRdoy.pos NMLR0040.17O NMLB0040.17O NMLB0040.17n NMLB0040.17g NMLB0040.17e COD17004.eph'
:param dest_path: path to GNSS rinex observation and navigation data, and rtkpost configuration file (all data needs to be in one folder)
:param options: rtkpost configuration file
:param ti_int: processing time interval (in seconds)
:param output_file: rtkpost solution file
:param rover_file: GNSS observation file (rinex) from the rover receiver
:param base_file: GNSS observation file (rinex) from the base receiver
:param brdc_orbit_gps: GNSS broadcast (predicted) orbit for GPS satellites
:param brdc_orbit_glonass: GNSS broadcast (predicted) orbit for GLONASS satellites
:param brdc_orbit_galileo: GNSS broadcast (predicted) orbit for GALILEO satellites
:param precise_orbit: GNSS precise (post processed) orbit for multi-GNSS (GPS, GLONASS, GALILEO, BEIDOU)
"""
# change directory & run RTKLIB post processing command 'rnx2rtkp'
process = subprocess.Popen('cd ' + dest_path + ' && rnx2rtkp '
'-k ' + options + '.conf '
'-ti ' + ti_int + ' '
'-o ' + output_file + ' '
+ rover_file + ' ' + base_file + ' ' + brdc_orbit_gps + ' ' + brdc_orbit_glonass + ' ' + brdc_orbit_galileo + ' ' + precise_orbit,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
# print(stdout) # print processing output
print(stderr) # print processing errors
def get_rtklib_solutions(dest_path, rover_name, resolution, ending, header_length):
""" get daily rtklib ENU solution files from solution directory and store all solutions in one dataframe and pickle
:param header_length: length of header in solution files (dependent on processing parameters)
:param dest_path: path to GNSS rinex observation and navigation data, and rtkpost configuration file
:param rover_name: name of rover
:param resolution: processing time interval (in minutes)
:param ending: suffix of solution file names (e.g. a varian of processing options: '_noglonass'
:return: df_enu (pandas dataframe containing all seasons solution data columns ['date', 'time', 'U', 'amb_state', 'nr_sat', 'std_u'])
"""
# Q: read all existing ENU solution data from .pkl if already exists, else create empty dataframe
path_to_oldpickle = dest_path + '20_solutions/' + rover_name + '_' + resolution + ending + '.pkl'
if os.path.exists(path_to_oldpickle):
print(colored('\nReading already existing ENU solutions from pickle: %s' % path_to_oldpickle, 'yellow'))
df_enu_old = pd.read_pickle(path_to_oldpickle)
else:
print(colored('\nNo existing ENU solution pickle: %s' % path_to_oldpickle, 'yellow'))
df_enu_old = pd.DataFrame()
# Q: read all newly available .ENU files in solution directory, parse date and time columns to datetimeindex and add them to the dataframe
df_enu_new = pd.DataFrame(columns=['U', 'amb_state', 'nr_sat', 'std_u', 'date', 'time'])
path = dest_path + '20_solutions/' + rover_name + '/' + resolution + '/temp_' + rover_name
print(colored('\nReading all newly available ENU solution files from receiver: %s' % rover_name, 'blue'))
for file in glob.iglob(path + '/*' + ending + '.pos', recursive=True):
print('reading ENU solution file: %s' % file)
enu = pd.read_csv(file, header=header_length, delimiter=' ', skipinitialspace=True, index_col=['date_time'],
na_values=["NaN"],
usecols=[0, 1, 4, 5, 6, 9], names=['date', 'time', 'U', 'amb_state', 'nr_sat', 'std_u'],
parse_dates=[['date', 'time']])
# add new enu data to df enu
df_enu_new = pd.concat([df_enu_new, enu], axis=0)
# move file from temp directory to solutions directory after reading
shutil.move(file, path + '/../' + os.path.basename(file))
# remove date and time columns
df_enu_new = df_enu_new.drop(columns=['date', 'time'])
# concatenate existing solutions with new solutions
df_enu_total = pd.concat([df_enu_old, df_enu_new], axis=0)
# detect all dublicates and only keep last dublicated entries
df_enu = df_enu_total[~df_enu_total.index.duplicated(keep='last')]
print(colored('\nstored all old and new ENU solution data (without dublicates) in dataframe df_enu:', 'blue'))
print(df_enu)
# store dataframe as binary pickle format
df_enu.to_pickle(dest_path + '20_solutions/' + rover_name + '_' + resolution + ending + '.pkl')
print(colored(
'\nstored all old and new ENU solution data (without dublicates) in pickle: ' + '20_solutions/' + rover_name + '_' + resolution + ending + '.pkl',
'blue'))
# delete temporary solution directory
if os.path.exists(path):
shutil.rmtree(path)
print(colored('\nAll new ENU solution files are moved to solutions dir and temp solutions directory is removed',
'blue'))
return df_enu
def filter_rtklib_solutions(dest_path, rover_name, resolution, df_enu, ambiguity=[1, 2, 5], threshold=2, window='D', ending=''):
""" filter and clean ENU solution data (outlier filtering, median filtering, adjustments for observation mast heightening)
:param dest_path: path to GNSS rinex observation and navigation data, and rtkpost configuration file
:param df_enu: pandas dataframe containing all seasons solution data columns ['date', 'time', 'U (m)', 'amb_state', 'nr_sat', 'std_u (m)']
:param rover_name: name of rover
:param resolution: processing time interval (in minutes)
:param ambiguity: ambiguity resolution state [1: fixed, 2: float, 5: standalone]
:param threshold: set threshold for outlier removing using the standard deviation (default=2 sigma)
:param window: window for median filter (default='D')
:param ending: suffix of solution file names (e.g. a varian of processing options: '_noglonass'
:return: fil_df, fil, fil_clean, m, s, jump, swe_gnss, swe_gnss_daily, std_gnss_daily
"""
print(colored('\nFiltering data', 'blue'))
# Q: select only data where ambiguities are fixed (amb_state==1) or float (amb_state==2) and sort datetime index
print('\nselect data with ambiguity solution state: %s' % ambiguity)
fil_df = pd.DataFrame(df_enu[(df_enu.amb_state == ambiguity)])
fil_df.index = pd.DatetimeIndex(fil_df.index)
fil_df = fil_df.sort_index()
u = fil_df.U * 1000 # convert up (swe) component to mm
# Q: adjust for snow mast heightening (approx. 3m elevated several times a year)
print('\ndata is corrected for snow mast heightening events (remove sudden jumps > 1m)')
jump = u[(u.diff() < -1000)]
# get value of jump difference (of values directly after - before jump)
jump_ind = jump.index.format()[0]
jump_val = u[jump_ind] - u[:jump_ind][-2]
while jump.empty is False:
print('\njump of height %s is detected! at %s' % (jump_val, jump.index.format()[0]))
adj = u[(u.index >= jump.index.format()[0])] - jump_val # correct all observations after jump [0]
u = pd.concat([u[~(u.index >= jump.index.format()[0])],
adj]) # concatenate all original obs before jump with adjusted values after jump
jump = u[(u.diff() < -1000)]
print('\nno jump detected!')
# Q: remove outliers based on x*sigma threshold
print('\nremove outliers based on %s * sigma threshold' % threshold)
upper_limit = u.rolling('3D').median() + threshold * u.rolling('3D').std()
lower_limit = u.rolling('3D').median() - threshold * u.rolling('3D').std()
u_clean = u[(u > lower_limit) & (u < upper_limit)]
# Q: filter data with a rolling median
print('\ndata is median filtered with window length: %s' % window)
swe_gnss = u_clean.rolling(window).median()
std_gnss = u_clean.rolling(window).std()
# Q: correct values to be positive values (add min value -3258.5 on '2021-12-13 20:29:42')
swe_gnss = swe_gnss - swe_gnss.min()
swe_gnss.index = swe_gnss.index + pd.Timedelta(seconds=18)
# resample data per day, calculate median and standard deviation (noise) per day to fit manual reference data
swe_gnss_daily = swe_gnss.resample('D').median()
std_gnss_daily = swe_gnss.resample('D').std()
# Q: store swe results to pickle
print(colored(
'\ndata is filtered, cleaned, and corrected and SWE results are stored to pickle and .csv: %s' % '20_solutions/SWE_results/swe_gnss_' + rover_name + '_' + resolution + ending + '.pkl',
'blue'))
os.makedirs(dest_path + '20_solutions/SWE_results/', exist_ok=True)
swe_gnss.to_pickle(
dest_path + '20_solutions/SWE_results/swe_gnss_' + rover_name + '_' + resolution + ending + '.pkl')
swe_gnss.to_csv(dest_path + '20_solutions/SWE_results/swe_gnss_' + rover_name + '_' + resolution + ending + '.csv')
return fil_df, u, u_clean, swe_gnss, std_gnss, swe_gnss_daily, std_gnss_daily
def read_swe_gnss(dest_path, swe_gnss, rover_name, resolution, ending):
# read gnss swe results from pickle
if swe_gnss is None:
print(colored(
'\nSWE results are NOT available, reading from pickle: %s' % '20_solutions/SWE_results/swe_gnss_' + rover_name + '_' + resolution + ending + '.pkl',
'orange'))
swe_gnss = pd.read_pickle(
dest_path + '20_solutions/SWE_results/swe_gnss_' + rover_name + '_' + resolution + ending + '.pkl')
return swe_gnss
""" Define reference sensors functions """
def read_manual_observations(dest_path):
""" read and interpolate manual accumulation (cm), density (kg/m^3), SWE (mm w.e.) data
:param dest_path: path to GNSS rinex observation and navigation data, and rtkpost configuration file
:return: manual2, ipol
"""
# create local directory for reference observations
loc_ref_dir = dest_path + '00_reference_data/'
os.makedirs(loc_ref_dir, exist_ok=True)
# read data
print('\nread manual observations')
manual = pd.read_csv(loc_ref_dir + 'Manual_Spuso.csv', header=1, skipinitialspace=True,
delimiter=';', index_col=0, skiprows=0, na_values=["NaN"], parse_dates=[0], dayfirst=True,
names=['Acc', 'Density', 'SWE', 'Density_aboveAnt', 'SWE_aboveAnt'])
manual2 = manual
manual2.index = manual2.index + pd.Timedelta(days=0.2)
# fix dtype of column "Acc" and convert to mm
manual2.Acc = manual2.Acc.astype('float64') * 10
# interpolate manual data
print('\n-- interpolate manual reference observations')
ipol = manual.Density_aboveAnt.resample('min').interpolate(method='linear', limit_direction='backward')
return manual2, ipol
def read_snowbuoy_observations(dest_path, url, ipol_density=None):
""" read snow buoy accumulation data from four sensors and convert to SWE & pressure, airtemp
:param ipol_density: interpolated density data from manual reference observations
:param url: webpage url where daily updated snow buoy data can be downloaded
:param dest_path: path to GNSS rinex observation and navigation data, and rtkpost configuration file
:return: buoy
"""
# create local directory for snow buoy observations
loc_buoy_dir = dest_path + '00_reference_data/Snowbuoy/'
os.makedirs(loc_buoy_dir, exist_ok=True)
# Q: download newest snow buoy data from url
# get data from url
r = requests.get(url, allow_redirects=True)
# decompress file
z = zipfile.ZipFile(io.BytesIO(r.content))
# store selected file from decompressed folder to working direcory subfolder
z.extract(z.filelist[2], path=loc_buoy_dir)
# Q: read snow buoy data
print('\nread snow buoy observations')
buoy_all = pd.read_csv(loc_buoy_dir + '2017S54_300234011695900_proc.csv', header=0,
skipinitialspace=True, delimiter=',', index_col=0, skiprows=0, na_values=["NaN"],
parse_dates=[0],
names=['lat', 'lon', 'sh1', 'sh2', 'sh3', 'sh4', 'pressure', 'airtemp', 'bodytemp',
'gpstime'])
# select only accumulation data from season 21/22 & convert to mm
buoy = buoy_all[['sh1', 'sh2', 'sh3', 'sh4']]['2021-11-26':] * 1000
# Q: adjust for snow mast heightening (approx. 1m elevated); value of jump difference (of values directly after - before jump): 2023-01-24 21:01:00 1036.0
print('\ndata is corrected for snow buoy heightening events (remove sudden jumps > 1m)')
jump_ind = '2023-01-24 21:01:00'
jump_val = 1036
print('\ncorrect jump of height %s: at %s' % (jump_val, jump_ind))
adj = buoy[['sh1', 'sh2', 'sh3', 'sh4']][
(buoy.index >= jump_ind)] + jump_val # correct all observations after jump [0]
buoy_corr = pd.concat([buoy[['sh1', 'sh2', 'sh3', 'sh4']][~(buoy.index >= jump_ind)],
adj]) # concatenate all original obs before jump with adjusted values after jump
# Q: Differences in accumulation & conversion to SWE
# calculate change in accumulation (in mm) for each buoy sensor add it as an additional column to the dataframe buoy
buoy_change = (buoy_corr[['sh1', 'sh2', 'sh3', 'sh4']] - buoy_corr[['sh1', 'sh2', 'sh3', 'sh4']].min())
buoy_change.columns = ['dsh1', 'dsh2', 'dsh3', 'dsh4']
# convert snow accumulation to SWE (with interpolated and constant density values)
print('\n-- convert buoy observations to SWE')
buoy_swe = convert_sh2swe(buoy_change, ipol_density)
buoy_swe.columns = ['dswe1', 'dswe2', 'dswe3', 'dswe4']
buoy_swe_constant = convert_sh2swe(buoy_change)
buoy_swe_constant.columns = ['dswe_const1', 'dswe_const2', 'dswe_const3', 'dswe_const4']
# append new columns to existing buoy dataframe
buoy = pd.concat([buoy_corr, buoy_change, buoy_swe, buoy_swe_constant], axis=1)
return buoy
def read_pole_observations(dest_path, ipol_density=None):
""" read Pegelfeld Spuso accumulation data from 16 poles and convert to SWE
:param ipol_density: interpolated density data from manual reference observations
:param dest_path: path to GNSS rinex observation and navigation data, and rtkpost configuration file
:return: poles
"""
# create local directory for reference observations
loc_ref_dir = dest_path + '00_reference_data/'
os.makedirs(loc_ref_dir, exist_ok=True)
# Q: read Pegelfeld Spuso pole observations
print('\nread Pegelfeld Spuso pole observations')
poles = pd.read_csv(loc_ref_dir + 'Pegelfeld_Spuso_Akkumulation.csv', header=0, delimiter=';',
index_col=0, skiprows=0, na_values=["NaN"], parse_dates=[0], dayfirst=True)
# convert to non-negative values
poles_corr = poles - poles.min().min()
# Q: convert snow accumulation to SWE (with interpolated and constant density values)
print('\n-- convert Pegelfeld Spuso pole observations to SWE')
poles_swe = convert_sh2swe(poles_corr, ipol_density)
poles_swe.columns = ['dswe'] + poles_swe.columns
poles_swe_constant = convert_sh2swe(poles_corr)
poles_swe_constant.columns = ['dswe_const'] + poles_swe_constant.columns
# append new columns to existing poles dataframe
poles = pd.concat([poles_corr, poles_swe, poles_swe_constant], axis=1)
return poles
def read_laser_observations(dest_path, laser_path, yy, ipol, laser_pickle='nm_laser'):
""" read snow accumulation observations (minute resolution) from laser distance sensor data
:param ipol: interpolated density data from manual reference observations
:param laser_pickle: read laser pickle (e.g., '00_reference_data/Laser/nm_shm.pkl') and logfiles creating/containing snow accumulation observations from laser distance sensor
:param dest_path: path to GNSS rinex observation and navigation data, and rtkpost configuration file
:return: df_shm, h, fil_h_clean, h_resampled, h_std_resampled, sh, sh_std
"""
# create local directory for laser observations
loc_laser_dir = dest_path + '00_reference_data/Laser/'
os.makedirs(loc_laser_dir, exist_ok=True)
# Q: copy laser observations (*.log/shm = *.[ls]??) from AWI server if not already existing
print(colored("\ncopy new laser files", 'blue'))
# get list of yearly directories newer than first year
for year in os.listdir(laser_path)[:-1]:
if int(year) >= int('20' + yy):
# copy missing laser observation files
for f in glob.glob(laser_path + year + '/*.[ls]??'):
file = os.path.basename(f)
# skip files of 2021 before 26th nov (no gps data before installation)
if int(file[2:8]) > 211125:
if not os.path.exists(loc_laser_dir + file):
shutil.copy2(f, loc_laser_dir)
print("file copied from %s to %s" % (f, loc_laser_dir))
else:
# print(colored("\nfile in destination already exists: %s, \ncopy aborted!!!" % dest_path, 'yellow'))
pass
else:
pass
else:
pass
print(colored("\nnew laser files copied", 'blue'))
# Q: read all existing laser observations from .pkl if already exists, else create empty dataframe
path_to_oldpickle = loc_laser_dir + laser_pickle + '.pkl'
if os.path.exists(path_to_oldpickle):
print(colored('\nReading already existing laser observations from pickle: %s' % path_to_oldpickle, 'yellow'))
laser = pd.read_pickle(path_to_oldpickle)
old_idx = laser.index[-1].date().strftime("%y%m%d")
else:
print(colored('\nNo existing laser observations pickle!', 'yellow'))
laser = pd.DataFrame()
old_idx = '211125'
# Q: read new snow accumulation files *.[log/shm] (minute resolution) from laser distance sensor data, parse date and time columns to datetimeindex and add them to the dataframe
print(colored('\nReading all new logfiles from: %s' % loc_laser_dir + 'nm*.[log/shm]', 'blue'))
for file in glob.iglob(loc_laser_dir + 'nm*.[ls]??', recursive=True):
# read accumulation files newer than last entry in laser pickle
if int(os.path.basename(file)[2:8]) > int(old_idx):
print(file)
# check if old or new type laser data format to read due to the installation of a new sensor on 22-12-2022
if int(os.path.basename(file)[2:8]) <= 221222:
# read all old-type snow accumulation.log files
# header: 'date', 'time', 'snow level (m)', 'signal(-)', 'temp (°C)', 'error (-)', 'checksum (-)'
shm = pd.read_csv(file, header=0, delimiter=r'[ >]', skipinitialspace=True, na_values=["NaN"],
names=['date', 'time', 'none', 'sh', 'signal', 'temp', 'error', 'check'],
usecols=[0, 1, 3, 5, 6],
encoding='latin1', parse_dates=[['date', 'time']], index_col=['date_time'],
engine='python', dayfirst=True)
else:
# read all new-type snow accumulation.shm files
# header: Year Month Day Hour Minute Second Command TelegramNumber SerialNumber SnowLevel SnowSignal Temperature TiltAngle Error UmbStatus Checksum DistanceRaw Unknown Checksum660
shm = pd.read_csv(file, header=0, delimiter=' |;', na_values=["NaN"],
names=['datetime', 'Command', 'TelegramNumber', 'SerialNumber', 'sh', 'signal',
'temp', 'TiltAngle', 'error', 'check'],
usecols=[0, 4, 6, 8],
encoding='latin1', parse_dates=['datetime'], index_col=0,
engine='python')
# only select error infos in 'error' (first column)
shm.error = shm.error.str.split(':', expand=True)[0]
# change outlier values ('///////') to NaN
shm.sh = pd.to_numeric(shm.sh, errors='coerce')
shm.error = pd.to_numeric(shm.error, errors='coerce')
# add loaded file to existing laser df
laser = pd.concat([laser, shm], axis=0)
else:
continue
# calculate change in accumulation (in mm) and add it as an additional column to the dataframe
laser['dsh'] = (laser['sh'] - laser['sh'][0]) * 1000
# detect all dublicates and only keep last dublicated entries
laser = laser[~laser.index.duplicated(keep='last')]
# store as .pkl
laser.to_pickle(loc_laser_dir + laser_pickle + '.pkl')
print(colored(
'\nstored all old and new laser observations (without dublicates) to pickle: %s' + loc_laser_dir + laser_pickle + '.pkl',
'blue'))
return laser
def filter_laser_observations(ipol, laser, threshold=1):
""" filter snow accumulation observations (minute resolution) from laser distance sensor data
:param threshold: threshold for removing outliers (default=1)
:param ipol: interpolated density data from manual reference observations
:param laser: laser data
:return: laser_filtered
"""
# Q: remove outliers in laser observations
print('\n-- filtering laser observations')
# 0. select only observations without errors
dsh = laser[(laser.error == 0)].dsh