MORE STATS

1f944af9 · simon-ast · 65bb5d85 · 1f944af9 · 1f944af9 · 1f944af9
Commit 1f944af9 authored 3 years ago by simon-ast
--- a/PSPops/data_handling.py
+++ b/PSPops/data_handling.py
@@ -9,21 +9,22 @@ os.environ["CDF_LIB"] = "/usr/local/cdf/lib"

 def cdf_slice(cdf_file, key: str):
    """
-    DOC
+    Simple call to specific slice of cdf data.
    
-    :param cdf_file:
-    :param key:
-    :return:
+    :param cdf_file: Name of cdf file
+    :param key: Name of desired key from cdf file
+    :return: Datta slice
    """
    return cdf_file[key][...]


 def data_generation(cdf_file) -> tp.Dict:
    """
-    DOC
+    Generate dictionary of measurement data from cdf file
    
-    :param cdf_file:
-    :return:
+    :param cdf_file: CDF file
+    :return: DICT,
+        Data dictionary
    """
    data = {
        "dqf": cdf_slice(cdf_file, key="general_flag"),
@@ -61,11 +62,14 @@ def array_reduction(data_array: np.ndarray,
 def full_reduction(data_dict: tp.Dict,
                   bad_indices: np.array) -> tp.Dict:
    """
-    DOC
-    
-    :param data_dict:
-    :param bad_indices:
-    :return:
+    Fully reduce data dictionary by all determined "bad" indices
+    
+    :param data_dict: DICT;
+        Data dictionary (for PSP data)
+    :param bad_indices: NDARRAY,
+        Array of "bad" indices
+    :return: DICT,
+        Same as input, but with reduced value arrays
    """
    for key in data_dict.keys():
        data_dict[key] = array_reduction(data_dict[key], bad_indices)

--- a/PSPops/data_quality.py
+++ b/PSPops/data_quality.py
@@ -40,9 +40,13 @@ def meas_failed(meas_array: np.ndarray) -> np.ndarray:

 def full_meas_eval(data_dict: tp.Dict):
    """
+    Assess additional failure indices by evaluating all desired
+    measurement values.
    
-    :param data_dict:
-    :return:
+    :param data_dict: DICT,
+        Dictionary of PSP measurement data
+    :return: LIST,
+        List of "bad" indices
    """
    col_ind = []
    

--- a/Statistics/__pycache__/stats.cpython-38.pyc
+++ b/Statistics/__pycache__/stats.cpython-38.pyc
--- a/Statistics/stats.py
+++ b/Statistics/stats.py
@@ -27,6 +27,10 @@ def stat_ana(value_sample):
 	median = np.median(value_sample)
 	std = np.std(value_sample)
 	
-	return mean, median, std
+	return {
+		"mean": mean,
+		"median": median,
+		"stddev": std
+	}
 	
 	
\ No newline at end of file
--- a/TESTING/testing.py
+++ b/TESTING/testing.py
 import os
 import sys
 from unipath import Path
-import scipy.stats as st
 import numpy as np
 import matplotlib.pyplot as plt
 from spacepy import pycdf
@@ -48,14 +47,6 @@ data["dTlo"] = dt.wp_to_temp(data["dwplo"])
 distance_bins = db.create_bins(0, 100, .5)
 bin_indices = db.sort_bins(distance_bins, data["r"] * 1e3 / R_sun.value)

-for key in bin_indices.keys():
-    print(key)
-    test = key.strip("()").strip().split(",")
-    print(test)
-    print("".join(test))
-
-exit()
-
 test_r = st.slice_index_list(data["r"], bin_indices["(46.5, 47.0)"]) * 1e3 / R_sun.value
 test_v = st.slice_index_list(data["vr"], bin_indices["(46.5, 47.0)"])


--- a/binned_stats.py
+++ b/binned_stats.py
+import sys
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+from astropy.constants import R_sun
+from PSPops import plot_settings as ps
+from Statistics import stats as st
+
+# DESIGNATE BINNED DATA LOCATION
+BIN_DATA_LOCATION = sys.path[0]+"/BINNED_DATA"
+
+# SANITY CHECK: Does the data directory even exist?
+if not os.path.isdir(BIN_DATA_LOCATION):
+	print(f"\n{BIN_DATA_LOCATION} IS NOT A VALID DIRECTORY!\n")
+	sys.exit(0)
+
+
+def main():
+	
+	test_r = test_v = np.array([])
+	std_r = std_v = np.array([])
+	
+	# Loop over all files in the binned data directory
+	for name in sorted(os.listdir(BIN_DATA_LOCATION)):
+		
+		# Generate correct pointer to data file
+		file = BIN_DATA_LOCATION + f"/{name}"
+		
+		# Generate numpy array from data files (multi-dim)
+		all_data = np.loadtxt(file, skiprows=1)
+		
+		# Generate sub-arrays from all data
+		r = all_data[:, 0]
+		vr = all_data[:, 1]
+		
+		stat_r = st.stat_ana(r * 1e3 / R_sun.value)
+		stat_vr = st.stat_ana(vr)
+		
+		test_r = np.append(test_r, stat_r["mean"])
+		test_v = np.append(test_v, stat_vr["mean"])
+		
+		std_r = np.append(std_r, stat_r["stddev"])
+		std_v = np.append(std_v, stat_vr["stddev"])
+		
+	plt.plot(test_r, test_v)
+	plt.fill_between(test_r, test_v + std_v, test_v - std_v,
+	                 color="grey", alpha=0.35, zorder=1)
+
+
+if __name__ == "__main__":
+	ps.rc_setup()
+	
+	main()
+	
+	plt.show()
--- a/main.py
+++ b/main.py
 import os
 import sys
-import matplotlib.pyplot as plt
 import numpy as np
 from spacepy import pycdf
 from PSPops import data_quality as dq
@@ -18,7 +17,7 @@ os.environ["CDF_LIB"] = "/usr/local/cdf/lib"
 ENCOUNTER_NUM = "encounter_5"
 DATA_LOCATION = sys.path[0]+"/DATA/"+ENCOUNTER_NUM

-# SANITY CHECK IF DATA LOCATION EXISTS
+# SANITY CHECK: Does the data directory even exist?
 if not os.path.isdir(DATA_LOCATION):
 	print(f"\n{DATA_LOCATION} IS NOT A VALID DIRECTORY!\n")
 	sys.exit(0)
@@ -53,15 +52,6 @@ def main():
 		r_tot = np.append(r_tot, data["r"])
 		vr_tot = np.append(vr_tot, data["vr"])
 	
-	"""
-	PSEUDOCODE
-	
-	Here - total arrays of distance, vr etc.
-	Create distance bins for whole distance range
-		- Sort data into sub-arrays based on distance bins
-		- Save files for each distance bin
-	This way, data can be handled by a different routine
-	"""
 	# Create distance bins and determine indices of data arrays that
 	# correspond to the respective distance bins. Some of these
 	# sub-arrays might be empty and have to be handled accordingly
@@ -71,7 +61,6 @@ def main():
 	# Create a loop for all sub-arrays, and "continue" if the index
 	# array is empty
 	for key in bin_indices:
-		
 		# Skip if the index array is empty
 		if not np.size(bin_indices[key]):
 			continue
@@ -84,6 +73,10 @@ def main():
 		binned_r = st.slice_index_list(r_tot, bin_indices[key])
 		binned_vr = st.slice_index_list(vr_tot, bin_indices[key])
 		
+		###
+		# EMPTY DIRECTORY BEFORE RUNNING
+		###
+		
 		file_name = f"BINNED_DATA/PSP-RBIN-{name_append}.dat"
 		with open(file_name, "w") as f:
 			f.write("r [km] \t vr [km/s] \n")
@@ -92,23 +85,6 @@ def main():
 				f.write(f"{binned_r[i]} \t {binned_vr[i]}\n")


-	
-	exit()
-	# Find the turn-around index for the encounter
-	ta_index = np.argmin(r_tot)
-	
-	# Create "approach" and "recession" values
-	r_app = r_tot[:ta_index]
-	v_app = vr_tot[:ta_index]
-	
-	r_rec = r_tot[ta_index + 1:]
-	v_rec = vr_tot[ta_index + 1:]
-	
-	plt.plot(r_app, v_app)
-	plt.plot(r_rec, v_rec)
-	plt.show()
-
-
 if __name__ == "__main__":
 	# Set-up plot parameters
 	ps.rc_setup()