Compare commits

..

11 Commits

6 changed files with 121 additions and 123 deletions

3
.gitignore vendored
View File

@@ -1,2 +1,5 @@
*~ *~
*.log *.log
flycheck_*.py
\#*#
test.pdf

View File

@@ -1,3 +1,5 @@
# ad-calc # ad-calc
Tools to help calculating values for Axiomatic Design analysis Tools to help calculating values for Axiomatic Design analysis
`infocalc.py` calculates information content based upon a csv file or statistical parameters and upper/lower limits

View File

@@ -9,8 +9,10 @@ import logging
import argparse import argparse
from pathlib import PurePath##https://docs.python.org/3/library/pathlib.html#module-pathlib from pathlib import PurePath##https://docs.python.org/3/library/pathlib.html#module-pathlib
import numpy as np import numpy as np
import matplotlib
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from scipy.stats import norm from scipy.stats import norm,t
import scipy.stats
import pandas as pd import pandas as pd
#Main program loop #Main program loop
@@ -18,20 +20,43 @@ print("""Axiomatic Design Information Calculator by Joseph. T. Foley<foley AT ru
From https://gitea.cs.ru.is/AxiomaticDesign/adcalc/""") From https://gitea.cs.ru.is/AxiomaticDesign/adcalc/""")
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Axiomatic Design Information Calculator.") description="Axiomatic Design Information Calculator.")
parser.add_argument('csvfile', subparsers = parser.add_subparsers(dest='mode')
subparsers.required = True
### MODE DATA
parser_data = subparsers.add_parser("DATA")
parser_data.add_argument('csvfile',
help="CSV file with data and headers") help="CSV file with data and headers")
parser.add_argument('column', parser_data.add_argument('column',
help='Which column header to take data from') help='Which column header to take data from')
parser.add_argument('minvalue', type=float, ## MODE SIM
parser_sim = subparsers.add_parser("SIM")
parser_sim.add_argument('samplesize', type=int,
help="sample size")
parser_sim.add_argument('mean', type=float,
help="mean(average) value")
parser_sim.add_argument('stddev', type=float,
help="sample standard deviation")
## General Arguments
parser.add_argument('--lowerbound', type=float,
help='Tolerance low limit') help='Tolerance low limit')
parser.add_argument('maxvalue', type=float, parser.add_argument('--upperbound', type=float,
help='Tolerance high limit') help='Tolerance high limit')
parser.add_argument('--normalizey', action="store_true", parser.add_argument('--normalizey', action="store_true",
help='Set y-axis to normalized probability density') help='Set y-axis to normalized probability density')
parser.add_argument('--log', default="INFO", parser.add_argument('--log', default="INFO",
help='Console log level: Number or DEBUG, INFO, WARNING, ERROR') help='Console log level: Number or DEBUG, INFO, WARNING, ERROR')
parser.add_argument('--graphinfo', parser.add_argument('--legend', action="store_true",
help='Put legend on the PDF graph')
parser.add_argument('--graphinfo', action="store_true",
help='Put information on the PDF graph') help='Put information on the PDF graph')
parser.add_argument('--xlabel',
help='X-axis label, if needed')
parser.add_argument('--outfile',
help="output graph to PDF file")
parser.add_argument('--fontsize', default=14, type=int,
help="Adjust font size")
args = parser.parse_args() args = parser.parse_args()
## Set up logging ## Set up logging
@@ -42,7 +67,7 @@ if not isinstance(numeric_level, int):
logger = logging.getLogger("app") logger = logging.getLogger("app")
logger.setLevel(numeric_level) logger.setLevel(numeric_level)
# log everything to file # log everything to file
logpath = os.path.splitext(args.csvfile)[0]+".log" logpath = "infocalc.log"
fh = logging.FileHandler(logpath) fh = logging.FileHandler(logpath)
fh.setLevel(logging.DEBUG) fh.setLevel(logging.DEBUG)
# log to console # log to console
@@ -57,57 +82,99 @@ fh.setFormatter(spamformatter)
logger.addHandler(ch) logger.addHandler(ch)
logger.addHandler(fh) logger.addHandler(fh)
logger.info("Creating infocalc log file %s", logpath) logger.debug("Creating infocalc log file %s", logpath)
# filename pre-processing for output # seed values for variable scoping
inpath = PurePath(args.csvfile) mean = 0
print(f"Input: {inpath}") stddev = 1
samplesize =1
# grab the data and process if args.mode == "DATA":
data = np.array(pd.read_csv(inpath)[args.column]) # filename pre-processing for output
lowerbound = args.minvalue inpath = PurePath(args.csvfile)
upperbound = args.maxvalue print(f"Input: {inpath}")
logger.debug(f"data:{data}, lower:{lowerbound}, upper:{upperbound}") # grab the data and process
data = np.array(pd.read_csv(inpath)[args.column])
mean = data.mean()
stddev = data.std(ddof=1)
# Delta Degrees of Freedom: ddof=0 for population, ddof=1 for sample std dev
samplesize = len(data)
elif args.mode == "SIM":
mean = args.mean
stddev = args.stddev
samplesize = args.samplesize
df = samplesize - 1
mean = data.mean() prob = 0
stddev = data.std(ddof=1) if args.upperbound and args.lowerbound:
# Delta Degrees of Freedom: ddof=0 for population, ddof=1 for sample std dev prob = t.cdf(df,args.upperbound, mean, stddev) - t.cdf(df,args.lowerbound, mean, stddev)
prob = norm.cdf(upperbound, mean, stddev) - norm.cdf(lowerbound, mean, stddev) elif args.upperbound:
prob = t.cdf(df,args.upperbound, mean, stddev)
elif args.lowerbound:
prob = 1 - t.cdf(df,args.lowerbound, mean, stddev)
else:
prob = 1# no bounds set!
#print("probability: %f", prob) #print("probability: %f", prob)
info = -np.emath.log2(prob) info = -np.emath.log2(prob)
#print("information content: %f bits", info) #print("information content: %f bits", info)
## set default fontsize
matplotlib.rcParams['font.size']=args.fontsize
## place text on plot: https://matplotlib.org/3.3.4/gallery/recipes/placing_text_boxes.html ## place text on plot: https://matplotlib.org/3.3.4/gallery/recipes/placing_text_boxes.html
fig, ax = plt.subplots() fig, ax = plt.subplots()
textstr = '\n'.join(( if args.graphinfo:#put info on corner of graph
r'$n=%d$' % (len(data)), textstr = '\n'.join((
r'$\mu=%.2f$' % (mean, ), r'$n=%d$' % (samplesize),
r'$\sigma=%.2f$' % (stddev, ), r'$\mu=%.2f$' % (mean, ),
r'$P=%.2f$' % (prob, ), r'$\sigma=%.2f$' % (stddev, ),
r'$I=%.2f$ bits' % (info, ))) r'$P=%.2f$' % (prob, ),
# these are matplotlib.patch.Patch properties r'$I=%.2f$ bits' % (info, )))
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) # these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords # place a text box in upper left in axes coords
ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=14, ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=args.fontsize,
verticalalignment='top', bbox=props) verticalalignment='top', bbox=props)
xgraphlimits = {"min": mean-3*stddev, "max": mean+3*stddev}
if args.lowerbound and xgraphlimits["min"] > args.lowerbound:
xgraphlimits["min"] = args.lowerbound
if args.upperbound and xgraphlimits["max"] < args.upperbound:
xgraphlimits["max"] = args.upperbound
x = np.linspace(mean-3*stddev, mean+3*stddev, 500) x = np.linspace(xgraphlimits["min"], xgraphlimits["max"], 500)
y = norm.pdf(x, loc=mean, scale=stddev) y = norm.pdf(x, loc=mean, scale=stddev)
if args.normalizey: if args.normalizey:
y = y * stddev#rescale back to unity area y = y * stddev#rescale back to unity area
plt.axvline(x=mean, color="green", linestyle="dashed", label="mean") plt.axvline(x=mean, color="green", linestyle="dashed", label="mean")
plt.axvline(lowerbound, color="red") if args.lowerbound:
plt.axvline(upperbound, color="red") plt.axvline(args.lowerbound, color="red")
if args.upperbound:
plt.axvline(args.upperbound, color="red")
plt.plot(x, y, 'b-', label='Normal distribution') plt.plot(x, y, 'b-', label='Normal distribution')
#yt = scipy.stats.t.pdf(x, len(data)-1, mean, stddev) #yt = scipy.stats.t.pdf(x, len(data)-1, mean, stddev)
#plt.plot(x, yt, 'g-', label='T Distribution') #plt.plot(x, yt, 'g-', label='T Distribution')
coloredregion = (x >= lowerbound) & ( x <= upperbound ) #select x values
plt.fill_between(x, 0, y, where=coloredregion, color="grey", alpha=0.5, label="Design range")
plt.xlabel('X')
plt.ylabel('Probability density')
plt.legend()
plt.grid(True)
# Filter for which region to fill
coloredregion = x#default fill all
if args.lowerbound and args.upperbound:
coloredregion = (x >= args.lowerbound) & ( x <= args.upperbound )
elif args.upperbound:
coloredregion = x <= args.upperbound
elif args.lowerbound:
coloredregion = x >= args.lowerbound
plt.fill_between(x, 0, y, where=coloredregion, color="grey", alpha=0.5, label="Design range",)
if args.xlabel:
plt.xlabel(args.xlabel)
plt.ylabel('Probability density')
if args.legend:
plt.legend()
#plt.grid(True)
top = plt.ylim()[1] top = plt.ylim()[1]
plt.show()
if args.outfile:
logger.info(f"Graph output to {args.outfile}")
plt.savefig(args.outfile,bbox_inches='tight')
else:
plt.show()

View File

@@ -1,57 +0,0 @@
#!/usr/bin/env python
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
## Data goes here for now --foley
data = np.array([1, 1.1, 0.9, 1, 1, 0.9, 0.9])
lowerbound = 0.9
upperbound = 1.0
mean = data.mean()
stddev = data.std(ddof=1)
# Delta Degrees of Freedom: ddof=0 for population, ddof=1 for sample std dev
prob = norm.cdf(upperbound, mean, stddev) - norm.cdf(lowerbound, mean, stddev)
#print("probability: %f", prob)
info = -np.emath.log2(prob)
#print("information content: %f bits", info)
## place text on plot: https://matplotlib.org/3.3.4/gallery/recipes/placing_text_boxes.html
fig, ax = plt.subplots()
textstr = '\n'.join((
r'$n=%d$' % (len(data)),
r'$\mu=%.2f$' % (mean, ),
r'$\sigma=%.2f$' % (stddev, ),
r'$P=%.2f$' % (prob, ),
r'$I=%.2f$ bits' % (info, )))
# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
# place a text box in upper left in axes coords
ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=14,
verticalalignment='top', bbox=props)
x = np.linspace(mean-3*stddev, mean+3*stddev, 500)
y = norm.pdf(x, loc=mean, scale=stddev) * stddev#rescale back to unity area
plt.axvline(x=mean, color="green", linestyle="dashed", label="mean")
plt.axvline(lowerbound, color="red")
plt.axvline(upperbound, color="red")
plt.plot(x, y, 'b-', label='Normal distribution')
#yt = scipy.stats.t.pdf(x, len(data)-1, mean, stddev)
#plt.plot(x, yt, 'g-', label='T Distribution')
coloredregion = (x >= lowerbound) & ( x <= upperbound ) #select x values
plt.fill_between(x, 0, y, where=coloredregion, color="grey", alpha=0.5, label="Design range")
plt.xlabel('X')
plt.ylabel('Probability density')
plt.legend()
plt.grid(True)
top = plt.ylim()[1]
plt.show()
# annotate values on X after drawing the graphs

View File

@@ -1,23 +0,0 @@
#!/usr/bin/env python
import pandas as pd
df = pd.DataFrame(
{
"Name": [
"Braund, Mr. Owen Harris",
"Allen, Mr. William Hentry",
"Bonnell, Miss. Elizabeth",
],
"Age": [22, 35, 58],
"Sex": ["male", "male", "female"],
}
)
print(df.describe())
titanic = pd.read_csv("titanic.csv")
print(titanic.head(8))
titanic.to_excel("titanic.xlsx", sheet_name="passengers", index=False)
titanic_xltest = pd.read_excel("titanic.xlsx", sheet_name="passengers")
print("INFO")
print(titanic.info())

6
tests.sh Executable file
View File

@@ -0,0 +1,6 @@
#!/bin/bash
# Get infocalc.py from https://gitea.cs.ru.is/AxiomaticDesign/adcalc
echo "Loading data from file"
./infocalc.py --lowerbound 0.9 --upperbound 1.1 --graphinfo DATA testdata.csv data1
echo "Creating simulated curve from parameters"
./infocalc.py --lowerbound 0.9 --upperbound 1.1 SIM 8 1.0 0.5